OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(* html5rw.parser - HTML5 parser with bytesrw-only API *)
7
8module Dom = Dom
9module Tokenizer = Tokenizer
10module Encoding = Encoding
11module Parse_error_code = Parse_error_code
12module Constants = Parser_constants
13module Insertion_mode = Parser_insertion_mode
14module Tree_builder = Parser_tree_builder
15
16type parse_error = Parser_impl.parse_error
17type fragment_context = Parser_impl.fragment_context
18type t = Parser_impl.t
19
20(* parse_error accessors *)
21let error_code (e : parse_error) = e.Parser_tree_builder.code
22let error_line (e : parse_error) = e.Parser_tree_builder.line
23let error_column (e : parse_error) = e.Parser_tree_builder.column
24
25(* fragment_context constructor and accessors *)
26let make_fragment_context ~tag_name ?(namespace=None) () : fragment_context =
27 { Parser_tree_builder.tag_name; namespace }
28
29let fragment_context_tag (ctx : fragment_context) = ctx.Parser_tree_builder.tag_name
30let fragment_context_namespace (ctx : fragment_context) = ctx.Parser_tree_builder.namespace
31
32let parse = Parser_impl.parse
33let parse_bytes = Parser_impl.parse_bytes
34let query = Parser_impl.query
35let to_writer = Parser_impl.to_writer
36let to_string = Parser_impl.to_string
37let to_text = Parser_impl.to_text
38let to_test_format = Parser_impl.to_test_format
39
40let root t = t.Parser_impl.root
41let errors t = t.Parser_impl.errors
42let encoding t = t.Parser_impl.encoding
43
44(* Pretty printers *)
45let pp_parse_error fmt (e : parse_error) =
46 Format.fprintf fmt "(%d,%d): %a" e.line e.column Parse_error_code.pp e.code
47
48let pp_fragment_context fmt (ctx : fragment_context) =
49 Format.fprintf fmt "<%s%s>"
50 ctx.tag_name
51 (match ctx.namespace with Some ns -> " xmlns=" ^ ns | None -> "")
52
53let pp fmt t =
54 Format.fprintf fmt "{root=%a; errors=%d; encoding=%a}"
55 Dom.pp t.Parser_impl.root
56 (List.length t.Parser_impl.errors)
57 (Format.pp_print_option Encoding.pp) t.Parser_impl.encoding