OCaml HTML5 parser/serialiser based on Python's JustHTML

pp

+10
lib/html5rw/dom/dom.mli
··· 134 134 system_id : string option; (** System identifier (legacy, rarely used) *) 135 135 } 136 136 137 + val pp_doctype_data : Format.formatter -> doctype_data -> unit 138 + (** Pretty-print DOCTYPE data. *) 139 + 137 140 (** Quirks mode setting for the document. 138 141 139 142 {i Quirks mode} is a browser rendering mode that emulates bugs and ··· 173 176 WHATWG: How the parser determines quirks mode 174 177 *) 175 178 type quirks_mode = Dom_node.quirks_mode = No_quirks | Quirks | Limited_quirks 179 + 180 + val pp_quirks_mode : Format.formatter -> quirks_mode -> unit 181 + (** Pretty-print quirks mode. *) 176 182 177 183 (** A DOM node in the parsed document tree. 178 184 ··· 322 328 323 329 Only doctype nodes use this field; for all other nodes it is [None]. *) 324 330 } 331 + 332 + val pp : Format.formatter -> node -> unit 333 + (** Pretty-print a DOM node. Prints a summary representation showing the 334 + node type and key attributes. Does not recursively print children. *) 325 335 326 336 (** {1 Node Name Constants} 327 337
+31
lib/html5rw/dom/dom_node.ml
··· 162 162 ) node.template_content 163 163 end; 164 164 new_node 165 + 166 + (* Pretty printers *) 167 + let pp_doctype_data fmt (d : doctype_data) = 168 + Format.fprintf fmt "<!DOCTYPE %s%s%s>" 169 + (Option.value ~default:"" d.name) 170 + (match d.public_id with Some p -> " PUBLIC \"" ^ p ^ "\"" | None -> "") 171 + (match d.system_id with Some s -> " \"" ^ s ^ "\"" | None -> "") 172 + 173 + let pp_quirks_mode fmt = function 174 + | No_quirks -> Format.pp_print_string fmt "no-quirks" 175 + | Quirks -> Format.pp_print_string fmt "quirks" 176 + | Limited_quirks -> Format.pp_print_string fmt "limited-quirks" 177 + 178 + let pp fmt node = 179 + if is_text node then 180 + Format.fprintf fmt "#text %S" node.data 181 + else if is_comment node then 182 + Format.fprintf fmt "<!-- %s -->" node.data 183 + else if is_document node then 184 + Format.pp_print_string fmt "#document" 185 + else if is_document_fragment node then 186 + Format.pp_print_string fmt "#document-fragment" 187 + else if is_doctype node then 188 + (match node.doctype with 189 + | Some d -> pp_doctype_data fmt d 190 + | None -> Format.pp_print_string fmt "<!DOCTYPE>") 191 + else begin 192 + Format.fprintf fmt "<%s" node.name; 193 + List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) node.attrs; 194 + Format.pp_print_char fmt '>' 195 + end
+10
lib/html5rw/dom/dom_node.mli
··· 134 134 system_id : string option; (** System identifier (legacy, rarely used) *) 135 135 } 136 136 137 + val pp_doctype_data : Format.formatter -> doctype_data -> unit 138 + (** Pretty-print DOCTYPE data. *) 139 + 137 140 (** Quirks mode setting for the document. 138 141 139 142 {i Quirks mode} is a browser rendering mode that emulates bugs and ··· 173 176 WHATWG: How the parser determines quirks mode 174 177 *) 175 178 type quirks_mode = No_quirks | Quirks | Limited_quirks 179 + 180 + val pp_quirks_mode : Format.formatter -> quirks_mode -> unit 181 + (** Pretty-print quirks mode. *) 176 182 177 183 (** A DOM node in the parsed document tree. 178 184 ··· 322 328 323 329 Only doctype nodes use this field; for all other nodes it is [None]. *) 324 330 } 331 + 332 + val pp : Format.formatter -> node -> unit 333 + (** Pretty-print a DOM node. Prints a summary representation showing the 334 + node type and key attributes. Does not recursively print children. *) 325 335 326 336 (** {1 Node Name Constants} 327 337
+2
lib/html5rw/encoding/encoding.ml
··· 99 99 ]} 100 100 *) 101 101 let decode = Encoding_decode.decode 102 + 103 + let pp fmt enc = Format.pp_print_string fmt (encoding_to_string enc)
+3
lib/html5rw/encoding/encoding.mli
··· 38 38 | Iso_8859_2 (** ISO-8859-2 (Central European) *) 39 39 | Euc_jp (** EUC-JP (Japanese) *) 40 40 41 + val pp : Format.formatter -> encoding -> unit 42 + (** Pretty-print an encoding using its canonical label. *) 43 + 41 44 (** {1 Encoding Utilities} *) 42 45 43 46 val encoding_to_string : encoding -> string
+18
lib/html5rw/html5rw.ml
··· 107 107 (** DOM node type. See {!Dom} for manipulation functions. *) 108 108 type node = Dom.node 109 109 110 + let pp_node = Dom.pp 111 + 110 112 (** Doctype information *) 111 113 type doctype_data = Dom.doctype_data = { 112 114 name : string option; ··· 114 116 system_id : string option; 115 117 } 116 118 119 + let pp_doctype_data = Dom.pp_doctype_data 120 + 117 121 (** Quirks mode as determined during parsing *) 118 122 type quirks_mode = Dom.quirks_mode = No_quirks | Quirks | Limited_quirks 119 123 124 + let pp_quirks_mode = Dom.pp_quirks_mode 125 + 120 126 (** Character encoding detected or specified *) 121 127 type encoding = Encoding.encoding = 122 128 | Utf8 ··· 125 131 | Windows_1252 126 132 | Iso_8859_2 127 133 | Euc_jp 134 + 135 + let pp_encoding = Encoding.pp 128 136 129 137 (** Parse error record *) 130 138 type parse_error = Parser.parse_error ··· 144 152 (** Get the namespace from a fragment context *) 145 153 let fragment_context_namespace = Parser.fragment_context_namespace 146 154 155 + let pp_fragment_context = Parser.pp_fragment_context 156 + 147 157 (** Get the error code *) 148 158 let error_code = Parser.error_code 149 159 ··· 152 162 153 163 (** Get the column number of an error (1-indexed) *) 154 164 let error_column = Parser.error_column 165 + 166 + let pp_parse_error = Parser.pp_parse_error 155 167 156 168 (** Result of parsing an HTML document *) 157 169 type t = { ··· 159 171 errors : parse_error list; 160 172 encoding : encoding option; 161 173 } 174 + 175 + let pp fmt t = 176 + Format.fprintf fmt "{root=%a; errors=%d; encoding=%a}" 177 + pp_node t.root 178 + (List.length t.errors) 179 + (Format.pp_print_option pp_encoding) t.encoding 162 180 163 181 (* Internal: convert Parser.t to our t *) 164 182 let of_parser_result (p : Parser.t) : t =
+22
lib/html5rw/html5rw.mli
··· 241 241 WHATWG: The DOM *) 242 242 type node = Dom.node 243 243 244 + val pp_node : Format.formatter -> node -> unit 245 + (** Pretty-print a DOM node. Prints a summary representation showing the 246 + node type and key attributes. Does not recursively print children. *) 247 + 244 248 (** DOCTYPE information. 245 249 246 250 The DOCTYPE declaration ([<!DOCTYPE html>]) appears at the start of HTML ··· 262 266 (** System identifier (URL) for legacy DOCTYPEs *) 263 267 } 264 268 269 + val pp_doctype_data : Format.formatter -> doctype_data -> unit 270 + (** Pretty-print DOCTYPE data. *) 271 + 265 272 (** Quirks mode as determined during parsing. 266 273 267 274 {i Quirks mode} controls how browsers render CSS and compute layouts. ··· 285 292 @see <https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode> 286 293 WHATWG: How quirks mode is determined *) 287 294 type quirks_mode = Dom.quirks_mode = No_quirks | Quirks | Limited_quirks 295 + 296 + val pp_quirks_mode : Format.formatter -> quirks_mode -> unit 297 + (** Pretty-print quirks mode. *) 288 298 289 299 (** Character encoding detected or specified. 290 300 ··· 322 332 | Euc_jp 323 333 (** EUC-JP: Extended Unix Code for Japanese *) 324 334 335 + val pp_encoding : Format.formatter -> encoding -> unit 336 + (** Pretty-print an encoding using its canonical label. *) 337 + 325 338 (** A parse error encountered during HTML5 parsing. 326 339 327 340 HTML5 parsing {b never fails} - the specification defines error recovery ··· 371 384 372 385 Column numbers count from 1 and reset at each newline. *) 373 386 val error_column : parse_error -> int 387 + 388 + val pp_parse_error : Format.formatter -> parse_error -> unit 389 + (** Pretty-print a parse error with location information. *) 374 390 375 391 (** {1 Error Handling} *) 376 392 ··· 524 540 (** Get the namespace of a fragment context. *) 525 541 val fragment_context_namespace : fragment_context -> string option 526 542 543 + val pp_fragment_context : Format.formatter -> fragment_context -> unit 544 + (** Pretty-print a fragment context. *) 545 + 527 546 (** Result of parsing an HTML document. 528 547 529 548 This record contains everything produced by parsing: ··· 557 576 encoding detection, and [None] when using {!parse} (which expects 558 577 pre-decoded UTF-8 input). *) 559 578 } 579 + 580 + val pp : Format.formatter -> t -> unit 581 + (** Pretty-print a parse result summary. *) 560 582 561 583 (** {1 Parsing Functions} *) 562 584
+2
lib/html5rw/parse_error_code.ml
··· 207 207 let is_whatwg_standard = function 208 208 | Tree_construction_error _ -> false 209 209 | _ -> true 210 + 211 + let pp fmt t = Format.pp_print_string fmt (to_string t)
+3
lib/html5rw/parse_error_code.mli
··· 328 328 (** Check if an error code is defined in the WHATWG specification. 329 329 330 330 Returns [false] for [Tree_construction_error _], [true] for all others. *) 331 + 332 + val pp : Format.formatter -> t -> unit 333 + (** Pretty-print an error code using the WHATWG specification string format. *)
+15
lib/html5rw/parser/parser.ml
··· 40 40 let root t = t.Parser_impl.root 41 41 let errors t = t.Parser_impl.errors 42 42 let encoding t = t.Parser_impl.encoding 43 + 44 + (* Pretty printers *) 45 + let pp_parse_error fmt (e : parse_error) = 46 + Format.fprintf fmt "(%d,%d): %a" e.line e.column Parse_error_code.pp e.code 47 + 48 + let pp_fragment_context fmt (ctx : fragment_context) = 49 + Format.fprintf fmt "<%s%s>" 50 + ctx.tag_name 51 + (match ctx.namespace with Some ns -> " xmlns=" ^ ns | None -> "") 52 + 53 + let pp fmt t = 54 + Format.fprintf fmt "{root=%a; errors=%d; encoding=%a}" 55 + Dom.pp t.Parser_impl.root 56 + (List.length t.Parser_impl.errors) 57 + (Format.pp_print_option Encoding.pp) t.Parser_impl.encoding
+9
lib/html5rw/parser/parser.mli
··· 283 283 not bytes or grapheme clusters. *) 284 284 val error_column : parse_error -> int 285 285 286 + val pp_parse_error : Format.formatter -> parse_error -> unit 287 + (** Pretty-print a parse error with location information. *) 288 + 286 289 (** Context element for HTML fragment parsing. 287 290 288 291 When parsing HTML fragments (the content that would be assigned to ··· 349 352 350 353 (** Get the namespace of a fragment context ([None] for HTML). *) 351 354 val fragment_context_namespace : fragment_context -> string option 355 + 356 + val pp_fragment_context : Format.formatter -> fragment_context -> unit 357 + (** Pretty-print a fragment context. *) 352 358 353 359 (** Result of parsing an HTML document or fragment. 354 360 ··· 485 491 @see <https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding> 486 492 WHATWG: Determining the character encoding *) 487 493 val encoding : t -> Encoding.encoding option 494 + 495 + val pp : Format.formatter -> t -> unit 496 + (** Pretty-print a parse result summary. *) 488 497 489 498 (** {1 Querying} *) 490 499
+21
lib/html5rw/selector/selector.mli
··· 107 107 108 108 val to_human_string : t -> string 109 109 (** Convert to a human-readable error message. *) 110 + 111 + val pp : Format.formatter -> t -> unit 112 + (** Pretty-print a selector error code. *) 110 113 end 111 114 112 115 (** {1 Exceptions} *) ··· 169 172 val make_compound : simple_selector list -> compound_selector 170 173 val make_complex : (string option * compound_selector) list -> complex_selector 171 174 val make_list : complex_selector list -> selector_list 175 + 176 + val pp_simple_selector_type : Format.formatter -> simple_selector_type -> unit 177 + (** Pretty-print a simple selector type. *) 178 + 179 + val pp_simple_selector : Format.formatter -> simple_selector -> unit 180 + (** Pretty-print a simple selector. *) 181 + 182 + val pp_compound_selector : Format.formatter -> compound_selector -> unit 183 + (** Pretty-print a compound selector. *) 184 + 185 + val pp_complex_selector : Format.formatter -> complex_selector -> unit 186 + (** Pretty-print a complex selector. *) 187 + 188 + val pp_selector_list : Format.formatter -> selector_list -> unit 189 + (** Pretty-print a selector list. *) 190 + 191 + val pp : Format.formatter -> selector -> unit 192 + (** Pretty-print a selector. *) 172 193 end 173 194 174 195 (** Token types for the selector lexer. *)
+37
lib/html5rw/selector/selector_ast.ml
··· 45 45 let make_complex parts : complex_selector = { parts } 46 46 47 47 let make_list (selectors : complex_selector list) : selector_list = { selectors } 48 + 49 + (* Pretty printers *) 50 + let pp_simple_selector_type fmt = function 51 + | Type_tag -> Format.pp_print_string fmt "tag" 52 + | Type_id -> Format.pp_print_string fmt "id" 53 + | Type_class -> Format.pp_print_string fmt "class" 54 + | Type_universal -> Format.pp_print_string fmt "universal" 55 + | Type_attr -> Format.pp_print_string fmt "attr" 56 + | Type_pseudo -> Format.pp_print_string fmt "pseudo" 57 + 58 + let pp_simple_selector fmt s = 59 + Format.fprintf fmt "%a" pp_simple_selector_type s.selector_type; 60 + Option.iter (Format.fprintf fmt "(%s)") s.name; 61 + Option.iter (Format.fprintf fmt "[%s]") s.operator; 62 + Option.iter (Format.fprintf fmt "=%S") s.value; 63 + Option.iter (Format.fprintf fmt "(%s)") s.arg 64 + 65 + let pp_compound_selector fmt (c : compound_selector) = 66 + Format.pp_print_list ~pp_sep:(fun fmt () -> Format.pp_print_char fmt ' ') 67 + pp_simple_selector fmt c.selectors 68 + 69 + let pp_complex_selector fmt c = 70 + let pp_part fmt (comb, compound) = 71 + Option.iter (Format.fprintf fmt " %s ") comb; 72 + pp_compound_selector fmt compound 73 + in 74 + List.iter (pp_part fmt) c.parts 75 + 76 + let pp_selector_list fmt l = 77 + Format.pp_print_list ~pp_sep:(fun fmt () -> Format.pp_print_string fmt ", ") 78 + pp_complex_selector fmt l.selectors 79 + 80 + let pp fmt = function 81 + | Simple s -> pp_simple_selector fmt s 82 + | Compound c -> pp_compound_selector fmt c 83 + | Complex c -> pp_complex_selector fmt c 84 + | List l -> pp_selector_list fmt l
+2
lib/html5rw/selector/selector_error_code.ml
··· 70 70 | Expected_selector_after_combinator -> "Expected selector after combinator" 71 71 | Unexpected_token -> "Unexpected token" 72 72 | Expected_end_of_selector -> "Expected end of selector" 73 + 74 + let pp fmt t = Format.pp_print_string fmt (to_string t)
+3
lib/html5rw/selector/selector_error_code.mli
··· 60 60 - [to_human_string Empty_selector] returns ["Empty selector"] 61 61 - [to_human_string Expected_closing_bracket] returns ["Expected \]"] 62 62 *) 63 + 64 + val pp : Format.formatter -> t -> unit 65 + (** Pretty-print a selector error code using its kebab-case string form. *)
+18
lib/html5rw/tokenizer/tokenizer.mli
··· 50 50 val make_comment : string -> t 51 51 val make_character : string -> t 52 52 val eof : t 53 + 54 + val pp_tag_kind : Format.formatter -> tag_kind -> unit 55 + (** Pretty-print a tag kind (Start or End). *) 56 + 57 + val pp_doctype : Format.formatter -> doctype -> unit 58 + (** Pretty-print a DOCTYPE token. *) 59 + 60 + val pp_tag : Format.formatter -> tag -> unit 61 + (** Pretty-print a tag token. *) 62 + 63 + val pp : Format.formatter -> t -> unit 64 + (** Pretty-print a token. *) 53 65 end 54 66 55 67 (** Tokenizer states. *) ··· 135 147 | Hexadecimal_character_reference 136 148 | Decimal_character_reference 137 149 | Numeric_character_reference_end 150 + 151 + val pp : Format.formatter -> t -> unit 152 + (** Pretty-print a tokenizer state. *) 138 153 end 139 154 140 155 (** Parse error types. *) ··· 153 168 (** Create an error with a typed error code. *) 154 169 155 170 val to_string : t -> string 171 + 172 + val pp : Format.formatter -> t -> unit 173 + (** Pretty-print a tokenizer error. *) 156 174 end 157 175 158 176 (** Input stream with position tracking. *)
+4
lib/html5rw/tokenizer/tokenizer_errors.ml
··· 14 14 let to_string err = 15 15 Printf.sprintf "(%d,%d): %s" err.line err.column 16 16 (Parse_error_code.to_string err.code) 17 + 18 + let pp fmt err = 19 + Format.fprintf fmt "(%d,%d): %a" err.line err.column 20 + Parse_error_code.pp err.code
+85
lib/html5rw/tokenizer/tokenizer_state.ml
··· 81 81 | Hexadecimal_character_reference 82 82 | Decimal_character_reference 83 83 | Numeric_character_reference_end 84 + 85 + let pp fmt t = 86 + let s = match t with 87 + | Data -> "Data" 88 + | Rcdata -> "Rcdata" 89 + | Rawtext -> "Rawtext" 90 + | Script_data -> "Script_data" 91 + | Plaintext -> "Plaintext" 92 + | Tag_open -> "Tag_open" 93 + | End_tag_open -> "End_tag_open" 94 + | Tag_name -> "Tag_name" 95 + | Rcdata_less_than_sign -> "Rcdata_less_than_sign" 96 + | Rcdata_end_tag_open -> "Rcdata_end_tag_open" 97 + | Rcdata_end_tag_name -> "Rcdata_end_tag_name" 98 + | Rawtext_less_than_sign -> "Rawtext_less_than_sign" 99 + | Rawtext_end_tag_open -> "Rawtext_end_tag_open" 100 + | Rawtext_end_tag_name -> "Rawtext_end_tag_name" 101 + | Script_data_less_than_sign -> "Script_data_less_than_sign" 102 + | Script_data_end_tag_open -> "Script_data_end_tag_open" 103 + | Script_data_end_tag_name -> "Script_data_end_tag_name" 104 + | Script_data_escape_start -> "Script_data_escape_start" 105 + | Script_data_escape_start_dash -> "Script_data_escape_start_dash" 106 + | Script_data_escaped -> "Script_data_escaped" 107 + | Script_data_escaped_dash -> "Script_data_escaped_dash" 108 + | Script_data_escaped_dash_dash -> "Script_data_escaped_dash_dash" 109 + | Script_data_escaped_less_than_sign -> "Script_data_escaped_less_than_sign" 110 + | Script_data_escaped_end_tag_open -> "Script_data_escaped_end_tag_open" 111 + | Script_data_escaped_end_tag_name -> "Script_data_escaped_end_tag_name" 112 + | Script_data_double_escape_start -> "Script_data_double_escape_start" 113 + | Script_data_double_escaped -> "Script_data_double_escaped" 114 + | Script_data_double_escaped_dash -> "Script_data_double_escaped_dash" 115 + | Script_data_double_escaped_dash_dash -> "Script_data_double_escaped_dash_dash" 116 + | Script_data_double_escaped_less_than_sign -> "Script_data_double_escaped_less_than_sign" 117 + | Script_data_double_escape_end -> "Script_data_double_escape_end" 118 + | Before_attribute_name -> "Before_attribute_name" 119 + | Attribute_name -> "Attribute_name" 120 + | After_attribute_name -> "After_attribute_name" 121 + | Before_attribute_value -> "Before_attribute_value" 122 + | Attribute_value_double_quoted -> "Attribute_value_double_quoted" 123 + | Attribute_value_single_quoted -> "Attribute_value_single_quoted" 124 + | Attribute_value_unquoted -> "Attribute_value_unquoted" 125 + | After_attribute_value_quoted -> "After_attribute_value_quoted" 126 + | Self_closing_start_tag -> "Self_closing_start_tag" 127 + | Bogus_comment -> "Bogus_comment" 128 + | Markup_declaration_open -> "Markup_declaration_open" 129 + | Comment_start -> "Comment_start" 130 + | Comment_start_dash -> "Comment_start_dash" 131 + | Comment -> "Comment" 132 + | Comment_less_than_sign -> "Comment_less_than_sign" 133 + | Comment_less_than_sign_bang -> "Comment_less_than_sign_bang" 134 + | Comment_less_than_sign_bang_dash -> "Comment_less_than_sign_bang_dash" 135 + | Comment_less_than_sign_bang_dash_dash -> "Comment_less_than_sign_bang_dash_dash" 136 + | Comment_end_dash -> "Comment_end_dash" 137 + | Comment_end -> "Comment_end" 138 + | Comment_end_bang -> "Comment_end_bang" 139 + | Doctype -> "Doctype" 140 + | Before_doctype_name -> "Before_doctype_name" 141 + | Doctype_name -> "Doctype_name" 142 + | After_doctype_name -> "After_doctype_name" 143 + | After_doctype_public_keyword -> "After_doctype_public_keyword" 144 + | Before_doctype_public_identifier -> "Before_doctype_public_identifier" 145 + | Doctype_public_identifier_double_quoted -> "Doctype_public_identifier_double_quoted" 146 + | Doctype_public_identifier_single_quoted -> "Doctype_public_identifier_single_quoted" 147 + | After_doctype_public_identifier -> "After_doctype_public_identifier" 148 + | Between_doctype_public_and_system_identifiers -> "Between_doctype_public_and_system_identifiers" 149 + | After_doctype_system_keyword -> "After_doctype_system_keyword" 150 + | Before_doctype_system_identifier -> "Before_doctype_system_identifier" 151 + | Doctype_system_identifier_double_quoted -> "Doctype_system_identifier_double_quoted" 152 + | Doctype_system_identifier_single_quoted -> "Doctype_system_identifier_single_quoted" 153 + | After_doctype_system_identifier -> "After_doctype_system_identifier" 154 + | Bogus_doctype -> "Bogus_doctype" 155 + | Cdata_section -> "Cdata_section" 156 + | Cdata_section_bracket -> "Cdata_section_bracket" 157 + | Cdata_section_end -> "Cdata_section_end" 158 + | Character_reference -> "Character_reference" 159 + | Named_character_reference -> "Named_character_reference" 160 + | Ambiguous_ampersand -> "Ambiguous_ampersand" 161 + | Numeric_character_reference -> "Numeric_character_reference" 162 + | Hexadecimal_character_reference_start -> "Hexadecimal_character_reference_start" 163 + | Decimal_character_reference_start -> "Decimal_character_reference_start" 164 + | Hexadecimal_character_reference -> "Hexadecimal_character_reference" 165 + | Decimal_character_reference -> "Decimal_character_reference" 166 + | Numeric_character_reference_end -> "Numeric_character_reference_end" 167 + in 168 + Format.pp_print_string fmt s
+27
lib/html5rw/tokenizer/tokenizer_token.ml
··· 37 37 let make_character data = Character data 38 38 39 39 let eof = EOF 40 + 41 + (* Pretty printers *) 42 + let pp_tag_kind fmt = function 43 + | Start -> Format.pp_print_string fmt "Start" 44 + | End -> Format.pp_print_string fmt "End" 45 + 46 + let pp_doctype fmt (d : doctype) = 47 + Format.fprintf fmt "DOCTYPE{name=%a; public_id=%a; system_id=%a; force_quirks=%b}" 48 + (Format.pp_print_option Format.pp_print_string) d.name 49 + (Format.pp_print_option Format.pp_print_string) d.public_id 50 + (Format.pp_print_option Format.pp_print_string) d.system_id 51 + d.force_quirks 52 + 53 + let pp_tag fmt (t : tag) = 54 + Format.fprintf fmt "<%s%s" 55 + (match t.kind with Start -> "" | End -> "/") 56 + t.name; 57 + List.iter (fun (k, v) -> Format.fprintf fmt " %s=%S" k v) t.attrs; 58 + if t.self_closing then Format.pp_print_string fmt " /"; 59 + Format.pp_print_char fmt '>' 60 + 61 + let pp fmt = function 62 + | Tag t -> pp_tag fmt t 63 + | Character s -> Format.fprintf fmt "Character %S" s 64 + | Comment s -> Format.fprintf fmt "Comment %S" s 65 + | Doctype d -> pp_doctype fmt d 66 + | EOF -> Format.pp_print_string fmt "EOF"