we (web engine): Experimental web browser project to understand the limits of Claude
at texture-validation 491 lines 17 kB view raw
1//! Script element loading and execution. 2//! 3//! Walks the DOM for `<script>` elements, fetches external scripts, and 4//! executes them in a shared JS VM with DOM access. 5 6use crate::loader::{Resource, ResourceLoader}; 7use we_dom::{Document, NodeId}; 8use we_js::compiler; 9use we_js::parser::Parser; 10use we_js::vm::Vm; 11use we_url::Url; 12 13/// Information about a `<script>` element extracted from the DOM. 14struct ScriptInfo { 15 /// Inline script text (from child text nodes). 16 text: Option<String>, 17 /// `src` attribute value (external script URL). 18 src: Option<String>, 19 /// Whether the `defer` attribute is present. 20 defer: bool, 21 /// Whether the `async` attribute is present. 22 async_attr: bool, 23 /// The `type` attribute value, if any. 24 type_attr: Option<String>, 25} 26 27/// Returns true if a script's `type` attribute indicates it should execute. 28/// 29/// Per spec, scripts execute if there is no type attribute or if the type 30/// is a JavaScript MIME type. Unknown types are not executed. 31fn should_execute(type_attr: &Option<String>) -> bool { 32 match type_attr { 33 None => true, 34 Some(t) => { 35 let t = t.trim().to_ascii_lowercase(); 36 t.is_empty() 37 || t == "text/javascript" 38 || t == "application/javascript" 39 || t == "application/x-javascript" 40 || t == "text/ecmascript" 41 || t == "application/ecmascript" 42 } 43 } 44} 45 46/// Collect text content from child text nodes of an element. 47fn collect_text_content(doc: &Document, node: NodeId) -> String { 48 let mut text = String::new(); 49 for child in doc.children(node) { 50 if let Some(data) = doc.text_content(child) { 51 text.push_str(data); 52 } 53 } 54 text 55} 56 57/// Walk the DOM tree in document order, collecting `<script>` elements. 58fn collect_script_nodes(doc: &Document, node: NodeId, result: &mut Vec<NodeId>) { 59 if doc.tag_name(node) == Some("script") { 60 result.push(node); 61 } 62 for child in doc.children(node) { 63 collect_script_nodes(doc, child, result); 64 } 65} 66 67/// Extract script info from a `<script>` element node. 68fn extract_script_info(doc: &Document, node: NodeId) -> ScriptInfo { 69 let src = doc.get_attribute(node, "src").map(|s| s.to_string()); 70 let defer = doc.get_attribute(node, "defer").is_some(); 71 let async_attr = doc.get_attribute(node, "async").is_some(); 72 let type_attr = doc.get_attribute(node, "type").map(|s| s.to_string()); 73 74 let text = if src.is_none() { 75 let content = collect_text_content(doc, node); 76 if content.is_empty() { 77 None 78 } else { 79 Some(content) 80 } 81 } else { 82 None 83 }; 84 85 ScriptInfo { 86 text, 87 src, 88 defer, 89 async_attr, 90 type_attr, 91 } 92} 93 94/// Fetch the text of an external script. 95fn fetch_script_text(loader: &mut ResourceLoader, src: &str, base_url: &Url) -> Option<String> { 96 match loader.fetch_url(src, Some(base_url)) { 97 Ok(Resource::Script { text, .. }) => Some(text), 98 Ok(Resource::Other { data, .. }) => { 99 // Try decoding as UTF-8 (servers may not set correct MIME type). 100 String::from_utf8(data).ok() 101 } 102 Ok(_) => { 103 eprintln!("[script] unexpected resource type for {src}"); 104 None 105 } 106 Err(e) => { 107 eprintln!("[script] failed to fetch {src}: {e}"); 108 None 109 } 110 } 111} 112 113/// Compile and execute a script in the VM. Errors are logged, not propagated. 114fn execute_script(vm: &mut Vm, source: &str, label: &str) { 115 let ast = match Parser::parse(source) { 116 Ok(ast) => ast, 117 Err(e) => { 118 eprintln!("[script] parse error in {label}: {e}"); 119 return; 120 } 121 }; 122 123 let func = match compiler::compile(&ast) { 124 Ok(f) => f, 125 Err(e) => { 126 eprintln!("[script] compile error in {label}: {e}"); 127 return; 128 } 129 }; 130 131 if let Err(e) = vm.execute(&func) { 132 eprintln!("[script] runtime error in {label}: {e}"); 133 } 134} 135 136/// Execute all `<script>` elements on the page. 137/// 138/// Takes ownership of the DOM `Document`, creates a JS VM with DOM access, 139/// executes scripts in document order (respecting `defer` and `async` 140/// attributes), then returns the (possibly modified) document. 141/// 142/// Script execution order: 143/// 1. Synchronous scripts (no `defer`/`async`): execute in document order, 144/// blocking further script processing. 145/// 2. Deferred scripts (`defer`): execute after all synchronous scripts, 146/// in document order. 147/// 3. Async scripts (`async`): execute as soon as fetched; for synchronous 148/// fetching this is equivalent to document order. 149/// 150/// Scripts with an unrecognized `type` attribute are skipped. 151/// Errors in one script do not prevent other scripts from running. 152pub fn execute_page_scripts( 153 doc: Document, 154 loader: &mut ResourceLoader, 155 base_url: &Url, 156) -> Document { 157 // Find all <script> elements in document order. 158 let mut script_nodes = Vec::new(); 159 let root = doc.root(); 160 collect_script_nodes(&doc, root, &mut script_nodes); 161 162 // Extract script info and classify. 163 let scripts: Vec<(NodeId, ScriptInfo)> = script_nodes 164 .into_iter() 165 .map(|node| (node, extract_script_info(&doc, node))) 166 .filter(|(_, info)| should_execute(&info.type_attr)) 167 .collect(); 168 169 if scripts.is_empty() { 170 return doc; 171 } 172 173 // Create VM with DOM access. 174 let mut vm = Vm::new(); 175 vm.attach_document(doc); 176 177 // Separate into immediate (sync + async) and deferred scripts. 178 let mut deferred_sources: Vec<(String, String)> = Vec::new(); 179 180 for (_node, info) in &scripts { 181 // Resolve the script source text. 182 let (source, label) = if let Some(ref src) = info.src { 183 // External script: fetch it. 184 match fetch_script_text(loader, src, base_url) { 185 Some(text) => (text, src.clone()), 186 None => continue, 187 } 188 } else if let Some(ref text) = info.text { 189 (text.clone(), "<inline>".to_string()) 190 } else { 191 continue; 192 }; 193 194 if info.defer && !info.async_attr && info.src.is_some() { 195 // Deferred external scripts: queue for later execution. 196 // Per spec, defer only applies to external scripts and is 197 // ignored when async is also present. 198 deferred_sources.push((source, label)); 199 } else { 200 // Synchronous and async scripts: execute immediately. 201 // Async scripts have no ordering guarantee, but since we 202 // fetch synchronously they execute in document order. 203 execute_script(&mut vm, &source, &label); 204 } 205 } 206 207 // Execute deferred scripts in document order. 208 for (source, label) in &deferred_sources { 209 execute_script(&mut vm, source, label); 210 } 211 212 // Pump the event loop to handle any pending microtasks/timers. 213 let _ = vm.pump_event_loop(); 214 215 // Take the document back from the VM. 216 vm.detach_document().unwrap_or_default() 217} 218 219#[cfg(test)] 220mod tests { 221 use super::*; 222 use we_html::parse_html; 223 224 /// Helper: parse HTML, execute scripts, return the document. 225 fn run_scripts(html: &str) -> Document { 226 let doc = parse_html(html); 227 let mut loader = ResourceLoader::new(); 228 let base_url = Url::parse("about:blank").unwrap(); 229 execute_page_scripts(doc, &mut loader, &base_url) 230 } 231 232 #[test] 233 fn test_no_scripts() { 234 let doc = run_scripts("<html><body><p>Hello</p></body></html>"); 235 // Should return the document unchanged. 236 assert!(doc.tag_name(doc.root()).is_none()); // root is Document node 237 } 238 239 #[test] 240 fn test_inline_script_executes() { 241 // Script that modifies a DOM element's text content. 242 let html = r#"<html><body> 243 <div id="target">before</div> 244 <script> 245 var el = document.getElementById("target"); 246 el.textContent = "after"; 247 </script> 248 </body></html>"#; 249 let doc = run_scripts(html); 250 251 // Find the div and check its text content was modified. 252 let mut found = false; 253 fn find_div(doc: &Document, node: NodeId, found: &mut bool) { 254 if doc.tag_name(node) == Some("div") { 255 if doc.get_attribute(node, "id") == Some("target") { 256 // Check child text node. 257 for child in doc.children(node) { 258 if let Some(text) = doc.text_content(child) { 259 if text == "after" { 260 *found = true; 261 } 262 } 263 } 264 } 265 } 266 for child in doc.children(node) { 267 find_div(doc, child, found); 268 } 269 } 270 find_div(&doc, doc.root(), &mut found); 271 assert!(found, "script should have modified div text to 'after'"); 272 } 273 274 #[test] 275 fn test_multiple_scripts_share_scope() { 276 let html = r#"<html><body> 277 <script>var x = 42;</script> 278 <script>var y = x + 1;</script> 279 <div id="result">placeholder</div> 280 <script> 281 document.getElementById("result").textContent = String(y); 282 </script> 283 </body></html>"#; 284 let doc = run_scripts(html); 285 286 fn find_result(doc: &Document, node: NodeId) -> Option<String> { 287 if doc.tag_name(node) == Some("div") { 288 if doc.get_attribute(node, "id") == Some("result") { 289 for child in doc.children(node) { 290 if let Some(text) = doc.text_content(child) { 291 return Some(text.to_string()); 292 } 293 } 294 } 295 } 296 for child in doc.children(node) { 297 if let Some(result) = find_result(doc, child) { 298 return Some(result); 299 } 300 } 301 None 302 } 303 304 let result = find_result(&doc, doc.root()); 305 assert_eq!(result.as_deref(), Some("43")); 306 } 307 308 #[test] 309 fn test_unknown_type_not_executed() { 310 let html = r#"<html><body> 311 <div id="target">original</div> 312 <script type="text/template"> 313 document.getElementById("target").textContent = "changed"; 314 </script> 315 </body></html>"#; 316 let doc = run_scripts(html); 317 318 fn find_target(doc: &Document, node: NodeId) -> Option<String> { 319 if doc.tag_name(node) == Some("div") { 320 if doc.get_attribute(node, "id") == Some("target") { 321 for child in doc.children(node) { 322 if let Some(text) = doc.text_content(child) { 323 return Some(text.to_string()); 324 } 325 } 326 } 327 } 328 for child in doc.children(node) { 329 if let Some(result) = find_target(doc, child) { 330 return Some(result); 331 } 332 } 333 None 334 } 335 336 let result = find_target(&doc, doc.root()); 337 assert_eq!(result.as_deref(), Some("original")); 338 } 339 340 #[test] 341 fn test_type_text_javascript_executes() { 342 let html = r#"<html><body> 343 <div id="target">before</div> 344 <script type="text/javascript"> 345 document.getElementById("target").textContent = "after"; 346 </script> 347 </body></html>"#; 348 let doc = run_scripts(html); 349 350 fn find_target(doc: &Document, node: NodeId) -> Option<String> { 351 if doc.tag_name(node) == Some("div") { 352 if doc.get_attribute(node, "id") == Some("target") { 353 for child in doc.children(node) { 354 if let Some(text) = doc.text_content(child) { 355 return Some(text.to_string()); 356 } 357 } 358 } 359 } 360 for child in doc.children(node) { 361 if let Some(result) = find_target(doc, child) { 362 return Some(result); 363 } 364 } 365 None 366 } 367 368 let result = find_target(&doc, doc.root()); 369 assert_eq!(result.as_deref(), Some("after")); 370 } 371 372 #[test] 373 fn test_script_error_does_not_crash() { 374 // A script with a runtime error should not prevent subsequent scripts. 375 let html = r#"<html><body> 376 <div id="target">before</div> 377 <script> 378 undefinedFunction(); 379 </script> 380 <script> 381 document.getElementById("target").textContent = "after"; 382 </script> 383 </body></html>"#; 384 let doc = run_scripts(html); 385 386 fn find_target(doc: &Document, node: NodeId) -> Option<String> { 387 if doc.tag_name(node) == Some("div") { 388 if doc.get_attribute(node, "id") == Some("target") { 389 for child in doc.children(node) { 390 if let Some(text) = doc.text_content(child) { 391 return Some(text.to_string()); 392 } 393 } 394 } 395 } 396 for child in doc.children(node) { 397 if let Some(result) = find_target(doc, child) { 398 return Some(result); 399 } 400 } 401 None 402 } 403 404 let result = find_target(&doc, doc.root()); 405 assert_eq!(result.as_deref(), Some("after")); 406 } 407 408 #[test] 409 fn test_empty_script_no_crash() { 410 let doc = run_scripts("<html><body><script></script></body></html>"); 411 assert!(!doc.is_empty()); 412 } 413 414 #[test] 415 fn test_defer_scripts_run_after_sync() { 416 // In our parse-first model, both sync and defer run after parsing. 417 // defer scripts with src are queued; defer inline scripts are treated 418 // as sync. Without external script loading in tests, we verify that 419 // defer inline scripts still execute (treated as sync per spec — 420 // defer only applies to external scripts). 421 let html = r#"<html><body> 422 <div id="result">0</div> 423 <script defer> 424 var counter = 1; 425 </script> 426 <script> 427 document.getElementById("result").textContent = String(counter); 428 </script> 429 </body></html>"#; 430 let doc = run_scripts(html); 431 432 fn find_result(doc: &Document, node: NodeId) -> Option<String> { 433 if doc.tag_name(node) == Some("div") { 434 if doc.get_attribute(node, "id") == Some("result") { 435 for child in doc.children(node) { 436 if let Some(text) = doc.text_content(child) { 437 return Some(text.to_string()); 438 } 439 } 440 } 441 } 442 for child in doc.children(node) { 443 if let Some(result) = find_result(doc, child) { 444 return Some(result); 445 } 446 } 447 None 448 } 449 450 let result = find_result(&doc, doc.root()); 451 // defer on inline scripts is ignored per spec, so counter=1 runs first 452 assert_eq!(result.as_deref(), Some("1")); 453 } 454 455 #[test] 456 fn test_script_create_element() { 457 let html = r#"<html><body> 458 <div id="container"></div> 459 <script> 460 var div = document.createElement("p"); 461 div.textContent = "dynamic"; 462 document.getElementById("container").appendChild(div); 463 </script> 464 </body></html>"#; 465 let doc = run_scripts(html); 466 467 // Check that a <p> was added inside #container. 468 fn find_dynamic(doc: &Document, node: NodeId) -> bool { 469 if doc.tag_name(node) == Some("div") { 470 if doc.get_attribute(node, "id") == Some("container") { 471 for child in doc.children(node) { 472 if doc.tag_name(child) == Some("p") { 473 return true; 474 } 475 } 476 } 477 } 478 for child in doc.children(node) { 479 if find_dynamic(doc, child) { 480 return true; 481 } 482 } 483 false 484 } 485 486 assert!( 487 find_dynamic(&doc, doc.root()), 488 "script should have appended a <p> to #container" 489 ); 490 } 491}