(** Structured expected messages from Nu validator. *) type t = { message: string; error_code: Htmlrw_check.Error_code.t option; line: int option; column: int option; element: string option; attribute: string option; severity: [`Error | `Warning | `Info] option; } type match_quality = | Exact_match | Code_match | Message_match | Substring_match | Severity_mismatch | No_match type strictness = { require_exact_message: bool; require_error_code: bool; require_location: bool; require_severity: bool; } let lenient = { require_exact_message = false; require_error_code = false; require_location = false; require_severity = false; } (** Practical strict mode: requires exact message text but not typed error codes *) let exact_message = { require_exact_message = true; require_error_code = false; require_location = false; require_severity = false; } (** Full strict mode: all checks enabled (requires typed error code migration) *) let strict = { require_exact_message = true; require_error_code = true; require_location = true; require_severity = true; } (** Unicode ellipsis character *) let ellipsis = "\xe2\x80\xa6" (** Normalize Unicode curly quotes to ASCII for comparison *) let normalize_quotes s = let buf = Buffer.create (String.length s) in let i = ref 0 in while !i < String.length s do let c = s.[!i] in if !i + 2 < String.length s && c = '\xe2' then begin let c1 = s.[!i + 1] in let c2 = s.[!i + 2] in if c1 = '\x80' && (c2 = '\x9c' || c2 = '\x9d') then begin Buffer.add_char buf '"'; i := !i + 3 end else begin Buffer.add_char buf c; incr i end end else begin Buffer.add_char buf c; incr i end done; Buffer.contents buf (** Unicode curly quotes *) let left_curly_quote = "\xe2\x80\x9c" let right_curly_quote = "\xe2\x80\x9d" (** Check if expected message (with potential ellipsis truncation) matches actual. When expected has ellipsis followed by text in curly quotes, we check if actual has a value that ends with that text. This handles Nu validator's message truncation for long attribute values. *) let truncation_aware_match expected actual = (* Look for pattern: left_curly_quote + ellipsis in expected *) let quote_ellipsis = left_curly_quote ^ ellipsis in try let pos = Str.search_forward (Str.regexp_string quote_ellipsis) expected 0 in (* Found quote+ellipsis pattern - extract what comes after ellipsis until closing curly quote *) let start_after_ellipsis = pos + String.length quote_ellipsis in let end_quote_pos = try Str.search_forward (Str.regexp_string right_curly_quote) expected start_after_ellipsis with Not_found -> String.length expected in let truncated_suffix = String.sub expected start_after_ellipsis (end_quote_pos - start_after_ellipsis) in (* Build expected prefix (everything before the truncated quote) and suffix (everything after) *) let prefix = String.sub expected 0 pos in let suffix_start = end_quote_pos + String.length right_curly_quote in let suffix = if suffix_start < String.length expected then String.sub expected suffix_start (String.length expected - suffix_start) else "" in (* Check if actual starts with prefix and ends with suffix *) let actual_starts_with_prefix = String.length actual >= String.length prefix && String.sub actual 0 (String.length prefix) = prefix in let actual_ends_with_suffix = String.length actual >= String.length suffix && String.sub actual (String.length actual - String.length suffix) (String.length suffix) = suffix in (* If prefix and suffix match, extract the middle (the quoted value in actual) *) if actual_starts_with_prefix && actual_ends_with_suffix then begin (* Find the quoted value in actual at the same position *) let actual_quote_start = String.length prefix in try (* Check actual has left curly quote at expected position *) if String.sub actual actual_quote_start (String.length left_curly_quote) = left_curly_quote then begin let actual_value_start = actual_quote_start + String.length left_curly_quote in let actual_value_end = Str.search_forward (Str.regexp_string right_curly_quote) actual actual_value_start in let actual_value = String.sub actual actual_value_start (actual_value_end - actual_value_start) in (* Check if actual value ends with the truncated suffix from expected *) String.length actual_value >= String.length truncated_suffix && String.sub actual_value (String.length actual_value - String.length truncated_suffix) (String.length truncated_suffix) = truncated_suffix end else false with _ -> false end else false with Not_found -> (* No ellipsis truncation pattern found *) false (** Pattern matchers for Nu validator messages. Each returns (error_code option, element option, attribute option) *) let pattern_element_not_allowed msg = (* "Element "X" not allowed as child of element "Y"..." *) let re = Str.regexp {|Element "\([^"]+\)" not allowed as child of element "\([^"]+\)"|} in if Str.string_match re msg 0 then let child = Str.matched_group 1 msg in let parent = Str.matched_group 2 msg in Some ((`Element (`Not_allowed_as_child (`Child child, `Parent parent)) : Htmlrw_check.Error_code.t), Some child, None) else None let pattern_attr_not_allowed_on_element msg = (* "Attribute "X" not allowed on element "Y"..." *) let re = Str.regexp {|Attribute "\([^"]+\)" not allowed on element "\([^"]+\)"|} in if Str.string_match re msg 0 then let attr = Str.matched_group 1 msg in let element = Str.matched_group 2 msg in Some ((`Attr (`Not_allowed (`Attr attr, `Elem element)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_attr_not_allowed_here msg = (* "Attribute "X" not allowed here." *) let re = Str.regexp {|Attribute "\([^"]+\)" not allowed here|} in if Str.string_match re msg 0 then let attr = Str.matched_group 1 msg in Some ((`Attr (`Not_allowed_here (`Attr attr)) : Htmlrw_check.Error_code.t), None, Some attr) else None let pattern_missing_required_attr msg = (* "Element "X" is missing required attribute "Y"." *) let re = Str.regexp {|Element "\([^"]+\)" is missing required attribute "\([^"]+\)"|} in if Str.string_match re msg 0 then let element = Str.matched_group 1 msg in let attr = Str.matched_group 2 msg in Some ((`Attr (`Missing (`Elem element, `Attr attr)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_missing_required_child msg = (* "Element "X" is missing required child element "Y"." *) let re = Str.regexp {|Element "\([^"]+\)" is missing required child element "\([^"]+\)"|} in if Str.string_match re msg 0 then let parent = Str.matched_group 1 msg in let child = Str.matched_group 2 msg in Some ((`Element (`Missing_child (`Parent parent, `Child child)) : Htmlrw_check.Error_code.t), Some parent, None) else None let pattern_duplicate_id msg = (* "Duplicate ID "X"." *) let re = Str.regexp {|Duplicate ID "\([^"]+\)"|} in if Str.string_match re msg 0 then let id = Str.matched_group 1 msg in Some ((`Attr (`Duplicate_id (`Id id)) : Htmlrw_check.Error_code.t), None, None) else None let pattern_obsolete_element msg = (* "The "X" element is obsolete." *) let re = Str.regexp {|The "\([^"]+\)" element is obsolete|} in if Str.string_match re msg 0 then let element = Str.matched_group 1 msg in Some ((`Element (`Obsolete (`Elem element, `Suggestion "")) : Htmlrw_check.Error_code.t), Some element, None) else None let pattern_obsolete_attr msg = (* "The "X" attribute on the "Y" element is obsolete." *) let re = Str.regexp {|The "\([^"]+\)" attribute on the "\([^"]+\)" element is obsolete|} in if Str.string_match re msg 0 then let attr = Str.matched_group 1 msg in let element = Str.matched_group 2 msg in Some ((`Element (`Obsolete_attr (`Elem element, `Attr attr, `Suggestion None)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_stray_end_tag msg = (* "Stray end tag "X"." *) let re = Str.regexp {|Stray end tag "\([^"]+\)"|} in if Str.string_match re msg 0 then let tag = Str.matched_group 1 msg in Some ((`Tag (`Stray_end (`Tag tag)) : Htmlrw_check.Error_code.t), Some tag, None) else None let pattern_stray_start_tag msg = (* "Stray start tag "X"." *) let re = Str.regexp {|Stray start tag "\([^"]+\)"|} in if Str.string_match re msg 0 then let tag = Str.matched_group 1 msg in Some ((`Tag (`Stray_start (`Tag tag)) : Htmlrw_check.Error_code.t), Some tag, None) else None let pattern_unnecessary_role msg = (* "The "X" role is unnecessary for..." *) let re = Str.regexp {|The "\([^"]+\)" role is unnecessary for \(.*\)|} in if Str.string_match re msg 0 then let role = Str.matched_group 1 msg in let reason = Str.matched_group 2 msg in Some ((`Aria (`Unnecessary_role (`Role role, `Elem "", `Reason reason)) : Htmlrw_check.Error_code.t), None, None) else None let pattern_bad_role msg = (* "Bad value "X" for attribute "role" on element "Y"." *) let re = Str.regexp {|Bad value "\([^"]+\)" for attribute "role" on element "\([^"]+\)"|} in if Str.string_match re msg 0 then let role = Str.matched_group 1 msg in let element = Str.matched_group 2 msg in Some ((`Aria (`Bad_role (`Elem element, `Role role)) : Htmlrw_check.Error_code.t), Some element, Some "role") else None let pattern_aria_must_not_be_specified msg = (* "The "X" attribute must not be specified on any "Y" element unless..." *) let re = Str.regexp {|The "\([^"]+\)" attribute must not be specified on any "\([^"]+\)" element unless \(.*\)|} in if Str.string_match re msg 0 then let attr = Str.matched_group 1 msg in let element = Str.matched_group 2 msg in let condition = Str.matched_group 3 msg in Some ((`Aria (`Must_not_specify (`Attr attr, `Elem element, `Condition condition)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_aria_must_not_be_used msg = (* "The "X" attribute must not be used on an "Y" element which has..." *) let re = Str.regexp {|The "\([^"]+\)" attribute must not be used on an "\([^"]+\)" element which has \(.*\)|} in if Str.string_match re msg 0 then let attr = Str.matched_group 1 msg in let element = Str.matched_group 2 msg in let condition = Str.matched_group 3 msg in Some ((`Aria (`Must_not_use (`Attr attr, `Elem element, `Condition condition)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_bad_attr_value msg = (* "Bad value "X" for attribute "Y" on element "Z": ..." *) let re = Str.regexp {|Bad value "\([^"]*\)" for attribute "\([^"]+\)" on element "\([^"]+\)"|} in if Str.string_match re msg 0 then let value = Str.matched_group 1 msg in let attr = Str.matched_group 2 msg in let element = Str.matched_group 3 msg in (* Extract reason after the colon if present *) let reason = try let colon_pos = String.index_from msg (Str.match_end ()) ':' in String.trim (String.sub msg (colon_pos + 1) (String.length msg - colon_pos - 1)) with Not_found -> "" in Some ((`Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason)) : Htmlrw_check.Error_code.t), Some element, Some attr) else None let pattern_end_tag_implied msg = (* "End tag "X" implied, but there were open elements." *) let re = Str.regexp {|End tag "\([^"]+\)" implied, but there were open elements|} in if Str.string_match re msg 0 then let tag = Str.matched_group 1 msg in Some ((`Tag (`End_implied_open (`Tag tag)) : Htmlrw_check.Error_code.t), Some tag, None) else None let pattern_no_element_in_scope msg = (* "No "X" element in scope but a "X" end tag seen." *) let re = Str.regexp {|No "\([^"]+\)" element in scope but a "\([^"]+\)" end tag seen|} in if Str.string_match re msg 0 then let tag = Str.matched_group 1 msg in Some ((`Tag (`Not_in_scope (`Tag tag)) : Htmlrw_check.Error_code.t), Some tag, None) else None let pattern_start_tag_in_table msg = (* "Start tag "X" seen in "table"." *) let re = Str.regexp {|Start tag "\([^"]+\)" seen in "table"|} in if Str.string_match re msg 0 then let tag = Str.matched_group 1 msg in Some ((`Tag (`Start_in_table (`Tag tag)) : Htmlrw_check.Error_code.t), Some tag, None) else None (** All pattern matchers in priority order *) let patterns = [ pattern_element_not_allowed; pattern_attr_not_allowed_on_element; pattern_attr_not_allowed_here; pattern_missing_required_attr; pattern_missing_required_child; pattern_duplicate_id; pattern_obsolete_element; pattern_obsolete_attr; pattern_stray_end_tag; pattern_stray_start_tag; pattern_unnecessary_role; pattern_bad_role; pattern_aria_must_not_be_specified; pattern_aria_must_not_be_used; pattern_bad_attr_value; pattern_end_tag_implied; pattern_no_element_in_scope; pattern_start_tag_in_table; ] (** Try to recognize the error code from a message *) let recognize_error_code msg = let normalized = normalize_quotes msg in let rec try_patterns = function | [] -> (None, None, None) | p :: rest -> match p normalized with | Some (code, elem, attr) -> (Some code, elem, attr) | None -> try_patterns rest in try_patterns patterns (** Infer severity from message patterns *) let infer_severity msg = let normalized = String.lowercase_ascii msg in if String.sub normalized 0 (min 8 (String.length normalized)) = "consider" then Some `Info else if String.sub normalized 0 (min 3 (String.length normalized)) = "the" && (try let _ = Str.search_forward (Str.regexp_string "is unnecessary") normalized 0 in true with Not_found -> false) then Some `Warning else Some `Error let parse message = let (error_code, element, attribute) = recognize_error_code message in let severity = infer_severity message in { message; error_code; line = None; column = None; element; attribute; severity; } let parse_json_value ~get_string ~get_int ~message_field = let message = match message_field with | Some m -> m | None -> match get_string "message" with Some m -> m | None -> "" in let base = parse message in { base with line = (match get_int "line" with Some l -> Some l | None -> base.line); column = (match get_int "column" with Some c -> Some c | None -> base.column); element = (match get_string "element" with Some e -> Some e | None -> base.element); attribute = (match get_string "attribute" with Some a -> Some a | None -> base.attribute); } (** Compare error codes for semantic equality *) let error_codes_match code1 code2 = (* Use structural equality for all polymorphic variant error codes *) code1 = code2 let matches ~strictness ~expected ~actual = let expected_norm = normalize_quotes expected.message in let actual_norm = normalize_quotes actual.Htmlrw_check.text in (* Check severity match *) let severity_matches = match (expected.severity, actual.Htmlrw_check.severity) with | (None, _) -> true | (Some `Error, Htmlrw_check.Error) -> true | (Some `Warning, Htmlrw_check.Warning) -> true | (Some `Info, Htmlrw_check.Info) -> true | _ -> false in (* Check location match *) let location_matches = match (expected.line, expected.column, actual.Htmlrw_check.location) with | (None, None, _) -> true | (Some el, Some ec, Some loc) -> loc.line = el && loc.column = ec | (Some el, None, Some loc) -> loc.line = el | _ -> false in (* Check error code match *) let code_matches = match (expected.error_code, actual.Htmlrw_check.error_code) with | (None, _) -> true (* No expected code to match *) | (Some ec, Htmlrw_check.Conformance ac) -> error_codes_match ec ac | (Some _, Htmlrw_check.Parse _) -> false (* Expected conformance but got parse error *) in (* Check message text *) let exact_text_match = actual_norm = expected_norm in (* Truncation-aware match: expected may have ellipsis where actual has full value *) let truncation_match = truncation_aware_match expected.message actual.Htmlrw_check.text in let substring_match = try let _ = Str.search_forward (Str.regexp_string expected_norm) actual_norm 0 in true with Not_found -> false in (* Determine match quality *) if not severity_matches && strictness.require_severity then Severity_mismatch else if exact_text_match && code_matches && (location_matches || not strictness.require_location) then Exact_match else if code_matches && expected.error_code <> None then Code_match else if exact_text_match then Message_match else if truncation_match then Message_match (* Treat truncation match same as message match *) else if substring_match && not strictness.require_exact_message then Substring_match else No_match let is_acceptable ~strictness quality = match quality with | Exact_match -> true | Code_match -> not strictness.require_exact_message | Message_match -> not strictness.require_error_code | Substring_match -> not strictness.require_exact_message | Severity_mismatch -> not strictness.require_severity | No_match -> false let match_quality_to_string = function | Exact_match -> "exact" | Code_match -> "code" | Message_match -> "message" | Substring_match -> "substring" | Severity_mismatch -> "severity-mismatch" | No_match -> "no-match"