OCaml HTML5 parser/serialiser based on Python's JustHTML
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at aceb6815d59edeb1b5d999072b91ec93d1948c44 121 lines 3.7 kB view raw
1(** Ruby element content model validation checker. 2 3 Validates that: 4 - Ruby contains at least one rt element 5 - Ruby contains phrasing content before rt elements *) 6 7type ruby_info = { 8 mutable has_rt : bool; 9 mutable has_content_before_rt : bool; 10 mutable saw_rt : bool; (* Whether we've seen rt yet *) 11 mutable depth : int; (* Track nesting level *) 12} 13 14type state = { 15 mutable ruby_stack : ruby_info list; (* Stack for nested ruby elements *) 16 mutable in_template : int; 17} 18 19let create () = { 20 ruby_stack = []; 21 in_template = 0; 22} 23 24let reset state = 25 state.ruby_stack <- []; 26 state.in_template <- 0 27 28(** Check if element is phrasing content that can appear before rt *) 29let is_phrasing_content tag = 30 match tag with 31 | Tag.Html `Rt | Tag.Html `Rp -> false 32 | _ -> true 33 34let start_element state ~element _collector = 35 match element.Element.tag with 36 | Tag.Html `Template -> 37 state.in_template <- state.in_template + 1 38 39 | Tag.Html `Ruby when state.in_template = 0 -> 40 (* Push new ruby context *) 41 let info = { 42 has_rt = false; 43 has_content_before_rt = false; 44 saw_rt = false; 45 depth = 1; (* Set depth to 1 for the ruby element itself *) 46 } in 47 state.ruby_stack <- info :: state.ruby_stack 48 49 | tag when state.in_template = 0 -> 50 (match state.ruby_stack with 51 | info :: _ -> 52 (* Inside a ruby element *) 53 if info.depth = 1 then begin 54 (* Direct children of ruby *) 55 match tag with 56 | Tag.Html `Rt -> 57 info.has_rt <- true; 58 info.saw_rt <- true 59 | _ when is_phrasing_content tag -> 60 if not info.saw_rt then 61 info.has_content_before_rt <- true 62 | _ -> () 63 end; 64 info.depth <- info.depth + 1 65 | [] -> ()) 66 67 | _ -> () (* In template or non-HTML element *) 68 69let end_element state ~tag collector = 70 match tag with 71 | Tag.Html `Template when state.in_template > 0 -> 72 state.in_template <- state.in_template - 1 73 74 | Tag.Html `Ruby when state.in_template = 0 -> 75 (match state.ruby_stack with 76 | info :: rest -> 77 info.depth <- info.depth - 1; 78 (* Check if this is the closing ruby tag (depth becomes 0 when ruby closes) *) 79 if info.depth <= 0 then begin 80 (* Closing ruby element - validate *) 81 if not info.has_rt then 82 (* Empty ruby or ruby without any rt - needs rp or rt *) 83 Message_collector.add_typed collector 84 (`Element (`Missing_child_one_of (`Parent "ruby", `Children ["rp"; "rt"]))) 85 else if not info.has_content_before_rt then 86 (* Has rt but missing content before it - needs content *) 87 Message_collector.add_typed collector 88 (`Element (`Missing_child (`Parent "ruby", `Child "rt"))); 89 state.ruby_stack <- rest 90 end 91 | [] -> ()) 92 93 | _ when state.in_template = 0 -> 94 (match state.ruby_stack with 95 | info :: _ -> 96 info.depth <- info.depth - 1 97 | [] -> ()) 98 99 | _ -> () (* In template or non-HTML element *) 100 101let characters state text _collector = 102 (* Text content counts as phrasing content before rt *) 103 if state.in_template > 0 then () 104 else begin 105 match state.ruby_stack with 106 | info :: _ -> 107 if info.depth = 1 then begin 108 (* Direct text child of ruby *) 109 let has_non_whitespace = 110 String.exists (fun c -> 111 c <> ' ' && c <> '\t' && c <> '\n' && c <> '\r' 112 ) text 113 in 114 if has_non_whitespace && not info.saw_rt then 115 info.has_content_before_rt <- true 116 end 117 | [] -> () 118 end 119 120let checker = Checker.make ~create ~reset ~start_element ~end_element 121 ~characters ()