(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (* Web Worker entry point for background HTML validation. This runs in a separate thread and communicates via postMessage. It only does string-based validation since workers can't access the DOM. *) [@@@warning "-33"] (* Suppress unused open - we only need Jv from Brr *) open Brr let console_log msg = ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string msg |]) let console_error msg = ignore (Jv.call (Jv.get Jv.global "console") "error" [| Jv.of_string msg |]) let ensure_doctype html = let lower = String.lowercase_ascii html in if String.length lower >= 9 && String.sub lower 0 9 = "" ^ html (* Debug: dump tree structure to see what parser built *) let dump_tree_structure html = let doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string html) in let root = Html5rw.root doc in let buf = Buffer.create 1024 in let rec dump indent node = let prefix = String.make (indent * 2) ' ' in let name = node.Html5rw.Dom.name in if name = "#text" then begin let text = String.trim node.Html5rw.Dom.data in if String.length text > 0 then Buffer.add_string buf (Printf.sprintf "%s#text: \"%s\"\n" prefix (if String.length text > 30 then String.sub text 0 30 ^ "..." else text)) end else if name = "#comment" then () else begin Buffer.add_string buf (Printf.sprintf "%s<%s>\n" prefix name); if indent < 5 then (* only show first 5 levels *) List.iter (dump (indent + 1)) node.Html5rw.Dom.children end in dump 0 root; Buffer.contents buf let handle_message msg_data = console_log "[html5rw worker] Message received"; let response = Jv.obj [||] in try let id = Jv.get msg_data "id" |> Jv.to_int in let raw_html = Jv.get msg_data "html" |> Jv.to_string in let html = ensure_doctype raw_html in console_log (Printf.sprintf "[html5rw worker] Validating %d bytes (id=%d)" (String.length html) id); (* Log first 500 chars of HTML for debugging *) let preview = if String.length html > 500 then String.sub html 0 500 ^ "..." else html in console_log (Printf.sprintf "[html5rw worker] HTML preview:\n%s" preview); Jv.set response "id" (Jv.of_int id); (try (* Run validation *) let core_result = Htmlrw_check.check_string html in let messages = Htmlrw_check.messages core_result in (* Convert messages to JS-friendly format *) let warnings = Jv.of_list (fun msg -> let obj = Jv.obj [||] in Jv.set obj "severity" (Jv.of_string (Htmlrw_check.severity_to_string msg.Htmlrw_check.severity)); Jv.set obj "message" (Jv.of_string msg.Htmlrw_check.text); Jv.set obj "errorCode" (Jv.of_string (Htmlrw_check.error_code_to_string msg.Htmlrw_check.error_code)); (match msg.Htmlrw_check.element with | Some el -> Jv.set obj "elementName" (Jv.of_string el) | None -> ()); (match msg.Htmlrw_check.attribute with | Some attr -> Jv.set obj "attribute" (Jv.of_string attr) | None -> ()); (match msg.Htmlrw_check.location with | Some loc -> Jv.set obj "line" (Jv.of_int loc.line); Jv.set obj "column" (Jv.of_int loc.column) | None -> ()); obj ) messages in let error_count = List.length (List.filter (fun m -> m.Htmlrw_check.severity = Htmlrw_check.Error) messages) in let warning_count = List.length (List.filter (fun m -> m.Htmlrw_check.severity = Htmlrw_check.Warning) messages) in let info_count = List.length (List.filter (fun m -> m.Htmlrw_check.severity = Htmlrw_check.Info) messages) in Jv.set response "warnings" warnings; Jv.set response "errorCount" (Jv.of_int error_count); Jv.set response "warningCount" (Jv.of_int warning_count); Jv.set response "infoCount" (Jv.of_int info_count); Jv.set response "hasErrors" (Jv.of_bool (error_count > 0)); (* Add tree structure for debugging *) let tree_dump = dump_tree_structure html in Jv.set response "treeStructure" (Jv.of_string tree_dump); Jv.set response "htmlPreview" (Jv.of_string preview); console_log (Printf.sprintf "[html5rw worker] Tree structure:\n%s" tree_dump) with exn -> (* Return error on parse failure *) let error_obj = Jv.obj [||] in Jv.set error_obj "severity" (Jv.of_string "error"); Jv.set error_obj "message" (Jv.of_string (Printf.sprintf "Parse error: %s" (Printexc.to_string exn))); Jv.set error_obj "errorCode" (Jv.of_string "parse-error"); Jv.set response "warnings" (Jv.of_list Fun.id [error_obj]); Jv.set response "errorCount" (Jv.of_int 1); Jv.set response "warningCount" (Jv.of_int 0); Jv.set response "infoCount" (Jv.of_int 0); Jv.set response "hasErrors" (Jv.of_bool true); Jv.set response "parseError" (Jv.of_string (Printexc.to_string exn))); console_log "[html5rw worker] Validation complete, posting response"; (* Post result back to main thread *) let self = Jv.get Jv.global "self" in ignore (Jv.call self "postMessage" [| response |]) with exn -> (* Outer error handler - catches message parsing errors *) console_error (Printf.sprintf "[html5rw worker] Fatal error: %s" (Printexc.to_string exn)); let error_obj = Jv.obj [||] in Jv.set error_obj "severity" (Jv.of_string "error"); Jv.set error_obj "message" (Jv.of_string (Printf.sprintf "Worker error: %s" (Printexc.to_string exn))); Jv.set error_obj "errorCode" (Jv.of_string "worker-error"); Jv.set response "id" (Jv.of_int (-1)); Jv.set response "warnings" (Jv.of_list Fun.id [error_obj]); Jv.set response "errorCount" (Jv.of_int 1); Jv.set response "warningCount" (Jv.of_int 0); Jv.set response "infoCount" (Jv.of_int 0); Jv.set response "hasErrors" (Jv.of_bool true); Jv.set response "fatalError" (Jv.of_string (Printexc.to_string exn)); let self = Jv.get Jv.global "self" in ignore (Jv.call self "postMessage" [| response |]) let () = console_log "[html5rw worker] Worker script starting..."; (* Set up message handler *) let self = Jv.get Jv.global "self" in let handler = Jv.callback ~arity:1 (fun ev -> let data = Jv.get ev "data" in handle_message data ) in ignore (Jv.call self "addEventListener" [| Jv.of_string "message"; handler |]); console_log "[html5rw worker] Message handler registered, ready for messages"