OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: MIT 4 ---------------------------------------------------------------------------*) 5 6(* html5rw.parser - HTML5 parser with bytesrw-only API *) 7 8module Dom = Dom 9module Tokenizer = Tokenizer 10module Encoding = Encoding 11module Parse_error_code = Parse_error_code 12module Constants = Parser_constants 13module Insertion_mode = Parser_insertion_mode 14module Tree_builder = Parser_tree_builder 15 16type parse_error = Parser_impl.parse_error 17type fragment_context = Parser_impl.fragment_context 18type t = Parser_impl.t 19 20(* parse_error accessors *) 21let error_code (e : parse_error) = e.Parser_tree_builder.code 22let error_line (e : parse_error) = e.Parser_tree_builder.line 23let error_column (e : parse_error) = e.Parser_tree_builder.column 24 25(* fragment_context constructor and accessors *) 26let make_fragment_context ~tag_name ?(namespace=None) () : fragment_context = 27 { Parser_tree_builder.tag_name; namespace } 28 29let fragment_context_tag (ctx : fragment_context) = ctx.Parser_tree_builder.tag_name 30let fragment_context_namespace (ctx : fragment_context) = ctx.Parser_tree_builder.namespace 31 32let parse = Parser_impl.parse 33let parse_bytes = Parser_impl.parse_bytes 34let query = Parser_impl.query 35let to_writer = Parser_impl.to_writer 36let to_string = Parser_impl.to_string 37let to_text = Parser_impl.to_text 38let to_test_format = Parser_impl.to_test_format 39 40let root t = t.Parser_impl.root 41let errors t = t.Parser_impl.errors 42let encoding t = t.Parser_impl.encoding 43 44(* Pretty printers *) 45let pp_parse_error fmt (e : parse_error) = 46 Format.fprintf fmt "(%d,%d): %a" e.line e.column Parse_error_code.pp e.code 47 48let pp_fragment_context fmt (ctx : fragment_context) = 49 Format.fprintf fmt "<%s%s>" 50 ctx.tag_name 51 (match ctx.namespace with Some ns -> " xmlns=" ^ ns | None -> "") 52 53let pp fmt t = 54 Format.fprintf fmt "{root=%a; errors=%d; encoding=%a}" 55 Dom.pp t.Parser_impl.root 56 (List.length t.Parser_impl.errors) 57 (Format.pp_print_option Encoding.pp) t.Parser_impl.encoding