OCaml HTML5 parser/serialiser based on Python's JustHTML

more

Changed files
+443 -17
lib
test
+1
dune-project
··· 25 25 (uutf (>= 1.0.0)) 26 26 (uuuu (>= 0.3.0)) 27 27 (uunf (>= 15.0.0)) 28 + (xmlm (>= 1.4.0)) 28 29 (odoc :with-doc) 29 30 (jsont (>= 0.2.0)) 30 31 (cmdliner (>= 1.3.0))))
+1
html5rw.opam
··· 16 16 "uutf" {>= "1.0.0"} 17 17 "uuuu" {>= "0.3.0"} 18 18 "uunf" {>= "15.0.0"} 19 + "xmlm" {>= "1.4.0"} 19 20 "odoc" {with-doc} 20 21 "jsont" {>= "0.2.0"} 21 22 "cmdliner" {>= "1.3.0"}
+1
lib/html5_checker/checker_registry.ml
··· 41 41 Hashtbl.replace reg "table" Table_checker.checker; 42 42 Hashtbl.replace reg "mime-type" Mime_type_checker.checker; 43 43 Hashtbl.replace reg "normalization" Normalization_checker.checker; 44 + Hashtbl.replace reg "svg" Svg_checker.checker; 44 45 (* Hashtbl.replace reg "heading" Heading_checker.checker; *) 45 46 (* Hashtbl.replace reg "content" Content_checker.checker; *) 46 47 reg
+424
lib/html5_checker/specialized/svg_checker.ml
··· 1 + (** SVG attribute and element validation checker. 2 + 3 + Validates SVG elements and attributes according to SVG 1.1/2 specifications. *) 4 + 5 + type state = { 6 + mutable in_svg : bool; 7 + mutable element_stack : string list; 8 + } 9 + 10 + let create () = { in_svg = false; element_stack = [] } 11 + let reset state = state.in_svg <- false; state.element_stack <- [] 12 + 13 + (* SVG namespace - the DOM stores this as "svg" shorthand *) 14 + let svg_ns = "svg" 15 + 16 + (* Full SVG namespace URL for validation *) 17 + let svg_ns_url = "http://www.w3.org/2000/svg" 18 + 19 + (* Global SVG attributes allowed on all elements *) 20 + let global_svg_attrs = [ 21 + "id"; "class"; "style"; "tabindex"; "lang"; "xml:lang"; "xml:space"; 22 + "requiredExtensions"; "requiredFeatures"; "systemLanguage"; 23 + (* XLink attributes *) 24 + "xlink:href"; "xlink:type"; "xlink:role"; "xlink:arcrole"; "xlink:title"; 25 + "xlink:show"; "xlink:actuate"; 26 + (* Event attributes *) 27 + "onload"; "onunload"; "onabort"; "onerror"; "onresize"; "onscroll"; "onzoom"; 28 + "onfocusin"; "onfocusout"; "onactivate"; "onclick"; "onmousedown"; "onmouseup"; 29 + "onmouseover"; "onmousemove"; "onmouseout"; "onbegin"; "onend"; "onrepeat"; 30 + (* Presentation attributes - comprehensive list *) 31 + "alignment-baseline"; "baseline-shift"; 32 + "clip"; "clip-path"; "clip-rule"; "color"; "color-interpolation"; "color-interpolation-filters"; 33 + "color-profile"; "color-rendering"; 34 + "cursor"; "direction"; "display"; "dominant-baseline"; 35 + "enable-background"; 36 + "fill"; "fill-opacity"; "fill-rule"; "filter"; 37 + "flood-color"; "flood-opacity"; "font-family"; "font-size"; "font-size-adjust"; 38 + "font-stretch"; "font-style"; "font-variant"; "font-weight"; 39 + "glyph-orientation-horizontal"; "glyph-orientation-vertical"; 40 + "image-rendering"; 41 + "kerning"; 42 + "letter-spacing"; "lighting-color"; 43 + "marker"; "marker-end"; "marker-mid"; "marker-start"; "mask"; 44 + "opacity"; "overflow"; 45 + "pointer-events"; 46 + "shape-rendering"; 47 + "stop-color"; "stop-opacity"; "stroke"; "stroke-dasharray"; "stroke-dashoffset"; 48 + "stroke-linecap"; "stroke-linejoin"; "stroke-miterlimit"; "stroke-opacity"; 49 + "stroke-width"; 50 + "text-anchor"; "text-decoration"; "text-rendering"; 51 + "transform"; "transform-origin"; 52 + "unicode-bidi"; 53 + "vector-effect"; "visibility"; 54 + "word-spacing"; "writing-mode"; 55 + (* Data attributes *) 56 + "data-*"; 57 + (* ARIA attributes *) 58 + "role"; "aria-activedescendant"; "aria-atomic"; "aria-autocomplete"; "aria-busy"; 59 + "aria-checked"; "aria-colcount"; "aria-colindex"; "aria-colspan"; "aria-controls"; 60 + "aria-current"; "aria-describedby"; "aria-details"; "aria-disabled"; "aria-dropeffect"; 61 + "aria-errormessage"; "aria-expanded"; "aria-flowto"; "aria-grabbed"; "aria-haspopup"; 62 + "aria-hidden"; "aria-invalid"; "aria-keyshortcuts"; "aria-label"; "aria-labelledby"; 63 + "aria-level"; "aria-live"; "aria-modal"; "aria-multiline"; "aria-multiselectable"; 64 + "aria-orientation"; "aria-owns"; "aria-placeholder"; "aria-posinset"; "aria-pressed"; 65 + "aria-readonly"; "aria-relevant"; "aria-required"; "aria-roledescription"; "aria-rowcount"; 66 + "aria-rowindex"; "aria-rowspan"; "aria-selected"; "aria-setsize"; "aria-sort"; 67 + "aria-valuemax"; "aria-valuemin"; "aria-valuenow"; "aria-valuetext"; 68 + ] 69 + 70 + (* Element-specific attributes *) 71 + let element_attrs = [ 72 + ("svg", ["xmlns"; "xmlns:xlink"; "version"; "baseProfile"; "x"; "y"; "width"; "height"; 73 + "viewBox"; "preserveAspectRatio"; "zoomAndPan"; "contentScriptType"; 74 + "contentStyleType"]); 75 + ("g", ["transform"]); 76 + ("defs", []); 77 + ("symbol", ["viewBox"; "preserveAspectRatio"; "refX"; "refY"]); 78 + ("use", ["href"; "xlink:href"; "x"; "y"; "width"; "height"]); 79 + ("image", ["href"; "xlink:href"; "x"; "y"; "width"; "height"; "preserveAspectRatio"; 80 + "crossorigin"; "decoding"]); 81 + ("switch", []); 82 + ("foreignObject", ["x"; "y"; "width"; "height"]); 83 + 84 + (* Shape elements *) 85 + ("circle", ["cx"; "cy"; "r"; "pathLength"]); 86 + ("ellipse", ["cx"; "cy"; "rx"; "ry"; "pathLength"]); 87 + ("line", ["x1"; "y1"; "x2"; "y2"; "pathLength"]); 88 + ("polygon", ["points"; "pathLength"]); 89 + ("polyline", ["points"; "pathLength"]); 90 + ("rect", ["x"; "y"; "width"; "height"; "rx"; "ry"; "pathLength"]); 91 + ("path", ["d"; "pathLength"]); 92 + 93 + (* Text elements *) 94 + ("text", ["x"; "y"; "dx"; "dy"; "rotate"; "textLength"; "lengthAdjust"]); 95 + ("tspan", ["x"; "y"; "dx"; "dy"; "rotate"; "textLength"; "lengthAdjust"]); 96 + ("textPath", ["href"; "xlink:href"; "startOffset"; "method"; "spacing"; "path"; "side"]); 97 + 98 + (* Gradient elements *) 99 + ("linearGradient", ["gradientUnits"; "gradientTransform"; "spreadMethod"; 100 + "href"; "xlink:href"; "x1"; "y1"; "x2"; "y2"]); 101 + ("radialGradient", ["gradientUnits"; "gradientTransform"; "spreadMethod"; 102 + "href"; "xlink:href"; "cx"; "cy"; "r"; "fx"; "fy"; "fr"]); 103 + ("stop", ["offset"]); 104 + 105 + (* Pattern *) 106 + ("pattern", ["patternUnits"; "patternContentUnits"; "patternTransform"; 107 + "href"; "xlink:href"; "x"; "y"; "width"; "height"; "viewBox"; 108 + "preserveAspectRatio"]); 109 + 110 + (* Clipping and masking *) 111 + ("clipPath", ["clipPathUnits"]); 112 + ("mask", ["maskUnits"; "maskContentUnits"; "x"; "y"; "width"; "height"]); 113 + 114 + (* Filter elements *) 115 + ("filter", ["filterUnits"; "primitiveUnits"; "x"; "y"; "width"; "height"; 116 + "href"; "xlink:href"]); 117 + ("feBlend", ["in"; "in2"; "mode"; "x"; "y"; "width"; "height"; "result"]); 118 + ("feColorMatrix", ["in"; "type"; "values"; "x"; "y"; "width"; "height"; "result"]); 119 + ("feComponentTransfer", ["in"; "x"; "y"; "width"; "height"; "result"]); 120 + ("feFuncR", ["type"; "tableValues"; "slope"; "intercept"; "amplitude"; "exponent"; "offset"]); 121 + ("feFuncG", ["type"; "tableValues"; "slope"; "intercept"; "amplitude"; "exponent"; "offset"]); 122 + ("feFuncB", ["type"; "tableValues"; "slope"; "intercept"; "amplitude"; "exponent"; "offset"]); 123 + ("feFuncA", ["type"; "tableValues"; "slope"; "intercept"; "amplitude"; "exponent"; "offset"]); 124 + ("feComposite", ["in"; "in2"; "operator"; "k1"; "k2"; "k3"; "k4"; "x"; "y"; "width"; "height"; "result"]); 125 + ("feConvolveMatrix", ["in"; "order"; "kernelMatrix"; "divisor"; "bias"; "targetX"; "targetY"; 126 + "edgeMode"; "preserveAlpha"; "x"; "y"; "width"; "height"; "result"]); 127 + ("feDiffuseLighting", ["in"; "surfaceScale"; "diffuseConstant"; "x"; "y"; "width"; "height"; "result"]); 128 + ("feDisplacementMap", ["in"; "in2"; "scale"; "xChannelSelector"; "yChannelSelector"; 129 + "x"; "y"; "width"; "height"; "result"]); 130 + ("feDropShadow", ["in"; "dx"; "dy"; "stdDeviation"; "x"; "y"; "width"; "height"; "result"]); 131 + ("feFlood", ["x"; "y"; "width"; "height"; "result"]); 132 + ("feGaussianBlur", ["in"; "stdDeviation"; "edgeMode"; "x"; "y"; "width"; "height"; "result"]); 133 + ("feImage", ["href"; "xlink:href"; "preserveAspectRatio"; "crossorigin"; 134 + "x"; "y"; "width"; "height"; "result"]); 135 + ("feMerge", ["x"; "y"; "width"; "height"; "result"]); 136 + ("feMergeNode", ["in"]); 137 + ("feMorphology", ["in"; "operator"; "radius"; "x"; "y"; "width"; "height"; "result"]); 138 + ("feOffset", ["in"; "dx"; "dy"; "x"; "y"; "width"; "height"; "result"]); 139 + ("fePointLight", ["x"; "y"; "z"]); 140 + ("feSpecularLighting", ["in"; "surfaceScale"; "specularConstant"; "specularExponent"; 141 + "x"; "y"; "width"; "height"; "result"]); 142 + ("feSpotLight", ["x"; "y"; "z"; "pointsAtX"; "pointsAtY"; "pointsAtZ"; 143 + "specularExponent"; "limitingConeAngle"]); 144 + ("feTile", ["in"; "x"; "y"; "width"; "height"; "result"]); 145 + ("feTurbulence", ["type"; "baseFrequency"; "numOctaves"; "seed"; "stitchTiles"; 146 + "x"; "y"; "width"; "height"; "result"]); 147 + 148 + (* Marker *) 149 + ("marker", ["viewBox"; "preserveAspectRatio"; "refX"; "refY"; "markerUnits"; 150 + "markerWidth"; "markerHeight"; "orient"]); 151 + 152 + (* Descriptive elements *) 153 + ("title", []); 154 + ("desc", []); 155 + ("metadata", []); 156 + 157 + (* Animation elements *) 158 + ("animate", ["attributeName"; "attributeType"; "from"; "to"; "by"; "values"; 159 + "begin"; "dur"; "end"; "min"; "max"; "restart"; "repeatCount"; 160 + "repeatDur"; "fill"; "calcMode"; "keyTimes"; "keySplines"; 161 + "additive"; "accumulate"; "href"; "xlink:href"]); 162 + ("animateMotion", ["path"; "keyPoints"; "rotate"; "origin"; 163 + "begin"; "dur"; "end"; "min"; "max"; "restart"; "repeatCount"; 164 + "repeatDur"; "fill"; "calcMode"; "keyTimes"; "keySplines"; 165 + "additive"; "accumulate"; "href"; "xlink:href"]); 166 + ("animateTransform", ["attributeName"; "attributeType"; "type"; "from"; "to"; "by"; "values"; 167 + "begin"; "dur"; "end"; "min"; "max"; "restart"; "repeatCount"; 168 + "repeatDur"; "fill"; "calcMode"; "keyTimes"; "keySplines"; 169 + "additive"; "accumulate"; "href"; "xlink:href"]); 170 + ("set", ["attributeName"; "attributeType"; "to"; 171 + "begin"; "dur"; "end"; "min"; "max"; "restart"; "repeatCount"; 172 + "repeatDur"; "fill"; "href"; "xlink:href"]); 173 + ("mpath", ["href"; "xlink:href"]); 174 + 175 + (* Font elements (deprecated but still valid SVG 1.1) *) 176 + ("font", ["horiz-origin-x"; "horiz-origin-y"; "horiz-adv-x"; "vert-origin-x"; 177 + "vert-origin-y"; "vert-adv-y"]); 178 + ("font-face", ["font-family"; "font-style"; "font-variant"; "font-weight"; 179 + "font-stretch"; "font-size"; "unicode-range"; "units-per-em"; 180 + "panose-1"; "stemv"; "stemh"; "slope"; "cap-height"; "x-height"; 181 + "accent-height"; "ascent"; "descent"; "widths"; "bbox"; 182 + "ideographic"; "alphabetic"; "mathematical"; "hanging"; 183 + "v-ideographic"; "v-alphabetic"; "v-mathematical"; "v-hanging"; 184 + "underline-position"; "underline-thickness"; "strikethrough-position"; 185 + "strikethrough-thickness"; "overline-position"; "overline-thickness"]); 186 + ("font-face-src", []); 187 + ("font-face-uri", ["href"; "xlink:href"]); 188 + ("font-face-format", ["string"]); 189 + ("font-face-name", ["name"]); 190 + ("glyph", ["unicode"; "glyph-name"; "d"; "orientation"; "arabic-form"; "lang"; 191 + "horiz-adv-x"; "vert-origin-x"; "vert-origin-y"; "vert-adv-y"]); 192 + ("missing-glyph", ["d"; "horiz-adv-x"; "vert-origin-x"; "vert-origin-y"; "vert-adv-y"]); 193 + ("hkern", ["u1"; "g1"; "u2"; "g2"; "k"]); 194 + ("vkern", ["u1"; "g1"; "u2"; "g2"; "k"]); 195 + 196 + (* Link *) 197 + ("a", ["href"; "xlink:href"; "target"; "download"; "ping"; "rel"; "hreflang"; "type"; 198 + "referrerpolicy"]); 199 + 200 + (* Script and style *) 201 + ("script", ["href"; "xlink:href"; "type"; "crossorigin"]); 202 + ("style", ["type"; "media"; "title"]); 203 + 204 + (* View *) 205 + ("view", ["viewBox"; "preserveAspectRatio"; "zoomAndPan"; "viewTarget"]); 206 + ] 207 + 208 + (* Required attributes for certain elements *) 209 + let required_attrs = [ 210 + ("feConvolveMatrix", ["order"]); 211 + ("rect", ["width"; "height"]); 212 + ("font", ["horiz-adv-x"]); 213 + ] 214 + 215 + (* Attributes that are NOT allowed on specific elements - overrides global/element attrs *) 216 + (* NOTE: Element names must be lowercase for lookup to work *) 217 + let disallowed_attrs = [ 218 + (* fill/stroke not valid on image *) 219 + ("image", ["fill"; "fill-opacity"; "fill-rule"]); 220 + (* stop-color/stop-opacity only valid on stop element and containers for inheritance *) 221 + ("rect", ["stop-color"; "stop-opacity"]); 222 + (* marker shorthand not valid on container elements *) 223 + ("g", ["marker"]); 224 + ("svg", ["marker"; "contentScriptType"; "contentStyleType"]); 225 + (* x,y not valid on clipPath *) 226 + ("clippath", ["x"; "y"; "width"; "height"]); 227 + ] 228 + 229 + (* Required child elements - for future use *) 230 + let _required_children = [ 231 + ("font", ["missing-glyph"]); 232 + ] 233 + 234 + (* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *) 235 + let matches_pattern attr pattern = 236 + let attr_lower = String.lowercase_ascii attr in 237 + let pattern_lower = String.lowercase_ascii pattern in 238 + if String.ends_with ~suffix:"-*" pattern_lower then 239 + let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in 240 + String.starts_with ~prefix attr_lower 241 + else 242 + attr_lower = pattern_lower 243 + 244 + (* Check if attribute is valid for element *) 245 + let is_valid_attr element attr = 246 + (* First check if this attribute is specifically disallowed on this element *) 247 + (match List.assoc_opt element disallowed_attrs with 248 + | Some disallowed -> 249 + if List.exists (matches_pattern attr) disallowed then false 250 + else true 251 + | None -> true) && 252 + (* Then check global attrs *) 253 + (if List.exists (matches_pattern attr) global_svg_attrs then true 254 + else 255 + (* Check element-specific attrs *) 256 + match List.assoc_opt element element_attrs with 257 + | Some attrs -> List.exists (matches_pattern attr) attrs 258 + | None -> 259 + (* Unknown SVG element - be permissive *) 260 + true) 261 + 262 + (* Validate xmlns attributes *) 263 + let validate_xmlns_attr attr value element collector = 264 + match attr with 265 + | "xmlns" when element = "svg" -> 266 + if value <> svg_ns_url then 267 + Message_collector.add_error collector 268 + ~message:(Printf.sprintf 269 + "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns\xe2\x80\x9d (only \xe2\x80\x9c%s\xe2\x80\x9d permitted here)." 270 + value svg_ns_url) 271 + ~element 272 + ~attribute:attr 273 + () 274 + | "xmlns:xlink" -> 275 + if value <> "http://www.w3.org/1999/xlink" then 276 + Message_collector.add_error collector 277 + ~message:(Printf.sprintf 278 + "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:xlink\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)." 279 + value) 280 + ~element 281 + ~attribute:attr 282 + () 283 + | _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" -> 284 + (* Other xmlns declarations are not allowed in HTML-embedded SVG *) 285 + Message_collector.add_error collector 286 + ~message:(Printf.sprintf "Attribute \xe2\x80\x9c%s\xe2\x80\x9d not allowed here." attr) 287 + ~element 288 + ~attribute:attr 289 + () 290 + | _ -> () 291 + 292 + (* Validate SVG path data *) 293 + let validate_path_data d element collector = 294 + (* Simple path data validation - check for obviously invalid characters *) 295 + let len = String.length d in 296 + let i = ref 0 in 297 + let context_start = ref 0 in 298 + while !i < len do 299 + let c = d.[!i] in 300 + match c with 301 + | 'M' | 'm' | 'L' | 'l' | 'H' | 'h' | 'V' | 'v' | 'C' | 'c' | 'S' | 's' 302 + | 'Q' | 'q' | 'T' | 't' | 'A' | 'a' | 'Z' | 'z' 303 + | '0'..'9' | '.' | '-' | '+' | ',' | ' ' | '\t' | '\n' | '\r' | 'e' | 'E' -> 304 + incr i 305 + | '#' -> 306 + let ctx_end = min (String.length d) (!i + 1) in 307 + let context = String.sub d !context_start (ctx_end - !context_start) in 308 + Message_collector.add_error collector 309 + ~message:(Printf.sprintf 310 + "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected command but found \xe2\x80\x9c#\xe2\x80\x9d (context: \xe2\x80\x9c%s\xe2\x80\x9d)." 311 + d element context) 312 + ~element 313 + ~attribute:"d" 314 + (); 315 + i := len (* Stop processing *) 316 + | _ -> 317 + incr i 318 + done; 319 + (* Check arc command flags - they must be 0 or 1 *) 320 + (* This is a simplified check - look for 'a' or 'A' followed by numbers *) 321 + let arc_re = Str.regexp "[aA][ \t\n]*\\([0-9.+-]+\\)[ \t\n,]*\\([0-9.+-]+\\)[ \t\n,]*\\([0-9.+-]+\\)[ \t\n,]*\\([0-9]+\\)" in 322 + try 323 + let _ = Str.search_forward arc_re d 0 in 324 + let flag = Str.matched_group 4 d in 325 + if flag <> "0" && flag <> "1" then begin 326 + let pos = Str.match_beginning () in 327 + let ctx_end = min (String.length d) (pos + 25) in 328 + let ctx_start = max 0 (pos - 10) in 329 + let context = String.sub d ctx_start (ctx_end - ctx_start) in 330 + Message_collector.add_error collector 331 + ~message:(Printf.sprintf 332 + "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)." 333 + d element flag context) 334 + ~element 335 + ~attribute:"d" 336 + () 337 + end 338 + with Not_found -> () 339 + 340 + let start_element state ~name ~namespace ~attrs collector = 341 + let is_svg_element = namespace = Some svg_ns in 342 + 343 + (* Track if we're in SVG context *) 344 + if name = "svg" && is_svg_element then 345 + state.in_svg <- true; 346 + 347 + if is_svg_element || state.in_svg then begin 348 + state.element_stack <- name :: state.element_stack; 349 + 350 + let name_lower = String.lowercase_ascii name in 351 + 352 + (* Check each attribute *) 353 + List.iter (fun (attr, value) -> 354 + let attr_lower = String.lowercase_ascii attr in 355 + 356 + (* Validate xmlns attributes *) 357 + if String.starts_with ~prefix:"xmlns" attr_lower then 358 + validate_xmlns_attr attr_lower value name_lower collector 359 + (* Check xml:* attributes - most are not allowed *) 360 + else if attr_lower = "xml:id" || attr_lower = "xml:base" then 361 + Message_collector.add_error collector 362 + ~message:(Printf.sprintf 363 + "Attribute \xe2\x80\x9c%s\xe2\x80\x9d not allowed on element \xe2\x80\x9c%s\xe2\x80\x9d at this point." 364 + attr name_lower) 365 + ~element:name_lower 366 + ~attribute:attr_lower 367 + () 368 + (* Validate path data *) 369 + else if attr_lower = "d" && name_lower = "path" then 370 + validate_path_data value name_lower collector 371 + (* Check if attribute is valid for this element *) 372 + else if not (is_valid_attr name_lower attr_lower) then 373 + Message_collector.add_error collector 374 + ~message:(Printf.sprintf 375 + "Attribute \xe2\x80\x9c%s\xe2\x80\x9d not allowed on element \xe2\x80\x9c%s\xe2\x80\x9d at this point." 376 + attr name_lower) 377 + ~element:name_lower 378 + ~attribute:attr_lower 379 + () 380 + ) attrs; 381 + 382 + (* Check required attributes *) 383 + (match List.assoc_opt name_lower required_attrs with 384 + | Some req_attrs -> 385 + List.iter (fun req_attr -> 386 + if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then 387 + Message_collector.add_error collector 388 + ~message:(Printf.sprintf 389 + "Element \xe2\x80\x9c%s\xe2\x80\x9d is missing required attribute \xe2\x80\x9c%s\xe2\x80\x9d." 390 + name_lower req_attr) 391 + ~element:name_lower 392 + () 393 + ) req_attrs 394 + | None -> ()) 395 + end 396 + 397 + let end_element state ~name ~namespace _collector = 398 + let is_svg_element = namespace = Some svg_ns in 399 + 400 + if is_svg_element || state.in_svg then begin 401 + (* Pop from stack *) 402 + (match state.element_stack with 403 + | _ :: rest -> state.element_stack <- rest 404 + | [] -> ()); 405 + 406 + (* Exit SVG context *) 407 + if name = "svg" && is_svg_element then 408 + state.in_svg <- false 409 + end 410 + 411 + let characters _state _text _collector = () 412 + 413 + let end_document _state _collector = () 414 + 415 + let checker = 416 + (module struct 417 + type nonrec state = state 418 + let create = create 419 + let reset = reset 420 + let start_element = start_element 421 + let end_element = end_element 422 + let characters = characters 423 + let end_document = end_document 424 + end : Checker.S)
+5
lib/html5_checker/specialized/svg_checker.mli
··· 1 + (** SVG attribute and element validation checker. 2 + 3 + Validates SVG elements and attributes according to SVG 1.1/2 specifications. *) 4 + 5 + val checker : Checker.t
+11 -17
test/debug_check.ml
··· 1 1 let () = 2 - let test_file = "validator/tests/xhtml/elements/menu/menu-containing-hr-novalid.xhtml" in 2 + let test_file = "validator/tests/xhtml/elements/progress/002-isvalid.xhtml" in 3 3 let ic = open_in test_file in 4 4 let html = really_input_string ic (in_channel_length ic) in 5 5 close_in ic; 6 6 let reader = Bytesrw.Bytes.Reader.of_string html in 7 7 let doc = Html5rw.parse ~collect_errors:true reader in 8 8 let root = Html5rw.root doc in 9 - print_endline "=== DOM Structure ==="; 9 + print_endline "=== DOM Structure (with namespaces) ==="; 10 10 let rec print_node indent (node : Html5rw.Dom.node) = 11 11 let open Html5rw.Dom in 12 12 match node.name with 13 - | "#text" -> 14 - let text = String.trim node.data in 15 - if String.length text > 0 then 16 - Printf.printf "%sTEXT: %s\n" indent text 13 + | "#text" -> () 17 14 | "#document" | "#document-fragment" -> 18 15 Printf.printf "%s%s\n" indent node.name; 19 16 List.iter (print_node (indent ^ " ")) node.children 20 - | "!doctype" -> Printf.printf "%s<!DOCTYPE>\n" indent 17 + | "!doctype" -> () 21 18 | "#comment" -> () 22 19 | _ -> 23 - Printf.printf "%s<%s>\n" indent node.name; 20 + let ns = match node.namespace with Some ns -> ns | None -> "none" in 21 + Printf.printf "%s<%s ns=%s>\n" indent node.name ns; 22 + List.iter (fun (k, v) -> 23 + if k = "foo" then Printf.printf "%s @%s=%s\n" indent k v 24 + ) node.attrs; 24 25 List.iter (print_node (indent ^ " ")) node.children 25 26 in 26 27 print_node "" root; 27 - print_endline "\n=== Now checking ==="; 28 + print_endline "\n=== Checking... ==="; 28 29 let reader2 = Bytesrw.Bytes.Reader.of_string html in 29 30 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in 30 31 let errors = Html5_checker.errors result in 31 - let warnings = Html5_checker.warnings result in 32 32 print_endline "=== Errors ==="; 33 33 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors; 34 - print_endline "=== Warnings ==="; 35 - List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings; 36 34 print_endline "\n=== Expected ==="; 37 - print_endline "Element \xe2\x80\x9chr\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cmenu\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"; 38 - if List.length errors > 0 then 39 - print_endline "\nPASS (has errors)" 40 - else 41 - print_endline "\nFAIL (no errors)" 35 + print_endline "Element \xe2\x80\x9crect\xe2\x80\x9d is missing required attribute \xe2\x80\x9cheight\xe2\x80\x9d."