+45
lib/html5rw/html5rw.ml
+45
lib/html5rw/html5rw.ml
···
34
34
]}
35
35
*)
36
36
37
+
(** {1 Error Handling} *)
38
+
39
+
(** Global error type that wraps all errors raised by the Html5rw library.
40
+
41
+
This provides a unified error type for all parsing and selector errors,
42
+
along with printers for display and debugging.
43
+
*)
44
+
module Error = struct
45
+
(** The unified error type for the Html5rw library. *)
46
+
type t =
47
+
| Parse_error of {
48
+
code : Parse_error_code.t;
49
+
line : int;
50
+
column : int;
51
+
}
52
+
(** An HTML parse error, including location information. *)
53
+
| Selector_error of Selector.Error_code.t
54
+
(** A CSS selector parse error. *)
55
+
56
+
let of_parse_error (err : Parser.parse_error) : t =
57
+
Parse_error {
58
+
code = Parser.error_code err;
59
+
line = Parser.error_line err;
60
+
column = Parser.error_column err;
61
+
}
62
+
63
+
let of_selector_error (code : Selector.Error_code.t) : t =
64
+
Selector_error code
65
+
66
+
let to_string = function
67
+
| Parse_error { code; line; column } ->
68
+
Printf.sprintf "Parse error at %d:%d: %s" line column
69
+
(Parse_error_code.to_string code)
70
+
| Selector_error code ->
71
+
Printf.sprintf "Selector error: %s"
72
+
(Selector.Error_code.to_human_string code)
73
+
74
+
let pp fmt err = Format.pp_print_string fmt (to_string err)
75
+
76
+
(** Get the error code as a kebab-case string. *)
77
+
let code_string = function
78
+
| Parse_error { code; _ } -> Parse_error_code.to_string code
79
+
| Selector_error code -> Selector.Error_code.to_string code
80
+
end
81
+
37
82
(** {1 Sub-modules} *)
38
83
39
84
(** Parse error code types *)
+89
lib/html5rw/html5rw.mli
+89
lib/html5rw/html5rw.mli
···
372
372
Column numbers count from 1 and reset at each newline. *)
373
373
val error_column : parse_error -> int
374
374
375
+
(** {1 Error Handling} *)
376
+
377
+
(** Global error type that wraps all errors raised by the Html5rw library.
378
+
379
+
This module provides a unified error type for all parsing and selector
380
+
errors, along with printers and conversion functions. Use this when you
381
+
want to handle all possible errors from the library in a uniform way.
382
+
383
+
{2 Usage}
384
+
385
+
{[
386
+
(* Converting parse errors *)
387
+
let errors = Html5rw.errors result in
388
+
List.iter (fun err ->
389
+
let unified = Html5rw.Error.of_parse_error err in
390
+
Printf.eprintf "%s\n" (Html5rw.Error.to_string unified)
391
+
) errors
392
+
393
+
(* Catching selector errors *)
394
+
match Html5rw.query result selector with
395
+
| nodes -> (* success *)
396
+
| exception Html5rw.Selector.Selector_error code ->
397
+
let unified = Html5rw.Error.of_selector_error code in
398
+
Printf.eprintf "%s\n" (Html5rw.Error.to_string unified)
399
+
]}
400
+
*)
401
+
module Error : sig
402
+
(** The unified error type for the Html5rw library. *)
403
+
type t =
404
+
| Parse_error of {
405
+
code : Parse_error_code.t;
406
+
line : int;
407
+
column : int;
408
+
}
409
+
(** An HTML parse error, including location information.
410
+
411
+
Parse errors occur during HTML tokenization and tree construction.
412
+
The location indicates where in the input the error was detected.
413
+
414
+
@see <https://html.spec.whatwg.org/multipage/parsing.html#parse-errors>
415
+
WHATWG: Parse errors *)
416
+
417
+
| Selector_error of Selector.Error_code.t
418
+
(** A CSS selector parse error.
419
+
420
+
Selector errors occur when parsing malformed CSS selectors passed
421
+
to {!query} or {!matches}. *)
422
+
423
+
val of_parse_error : parse_error -> t
424
+
(** Convert a parse error to the unified error type.
425
+
426
+
{[
427
+
let errors = Html5rw.errors result in
428
+
let unified_errors = List.map Html5rw.Error.of_parse_error errors
429
+
]} *)
430
+
431
+
val of_selector_error : Selector.Error_code.t -> t
432
+
(** Convert a selector error code to the unified error type.
433
+
434
+
{[
435
+
match Html5rw.query result "invalid[" with
436
+
| _ -> ()
437
+
| exception Html5rw.Selector.Selector_error code ->
438
+
let err = Html5rw.Error.of_selector_error code in
439
+
Printf.eprintf "%s\n" (Html5rw.Error.to_string err)
440
+
]} *)
441
+
442
+
val to_string : t -> string
443
+
(** Convert to a human-readable error message with location information.
444
+
445
+
Examples:
446
+
- ["Parse error at 5:12: unexpected-null-character"]
447
+
- ["Selector error: Expected \]"] *)
448
+
449
+
val pp : Format.formatter -> t -> unit
450
+
(** Pretty-printer for use with [Format] functions. *)
451
+
452
+
val code_string : t -> string
453
+
(** Get just the error code as a kebab-case string (without location).
454
+
455
+
This is useful for programmatic error handling or logging.
456
+
457
+
Examples:
458
+
- ["unexpected-null-character"]
459
+
- ["expected-closing-bracket"] *)
460
+
end
461
+
462
+
(** {1 Fragment Parsing} *)
463
+
375
464
(** Context element for HTML fragment parsing (innerHTML).
376
465
377
466
When parsing HTML fragments (like the [innerHTML] of an element), you
+6
-1
lib/html5rw/selector/selector.ml
+6
-1
lib/html5rw/selector/selector.ml
···
58
58
]}
59
59
*)
60
60
61
+
(** {1 Error Types} *)
62
+
63
+
(** CSS selector error codes. *)
64
+
module Error_code = Selector_error_code
65
+
61
66
(** {1 Exceptions} *)
62
67
63
68
(** Raised when a selector string is malformed.
64
69
65
-
The exception contains an error message describing the parse error.
70
+
The exception contains a typed error code describing the parse error.
66
71
*)
67
72
exception Selector_error = Selector_lexer.Selector_error
68
73
+55
-2
lib/html5rw/selector/selector.mli
+55
-2
lib/html5rw/selector/selector.mli
···
58
58
]}
59
59
*)
60
60
61
+
(** {1 Error Types} *)
62
+
63
+
(** CSS selector error codes.
64
+
65
+
This module provides the {!Error_code.t} variant type that represents
66
+
all possible errors when parsing CSS selectors.
67
+
*)
68
+
module Error_code : sig
69
+
type t =
70
+
| Empty_selector
71
+
(** The selector string was empty or contained only whitespace. *)
72
+
| Unterminated_string
73
+
(** A quoted string was not closed before end of input. *)
74
+
| Unterminated_escape
75
+
(** An escape sequence was not completed before end of input. *)
76
+
| Expected_identifier_after_hash
77
+
(** Expected an identifier after [#] for ID selector. *)
78
+
| Expected_identifier_after_dot
79
+
(** Expected an identifier after [.] for class selector. *)
80
+
| Expected_attribute_name
81
+
(** Expected an attribute name inside an attribute selector. *)
82
+
| Expected_closing_bracket
83
+
(** Expected [\]] to close an attribute selector. *)
84
+
| Expected_equals_after_operator of char
85
+
(** Expected [=] after an attribute operator like [~], [|], [^], [$], or [*]. *)
86
+
| Unexpected_character_in_attribute_selector
87
+
(** Found an unexpected character inside an attribute selector. *)
88
+
| Expected_pseudo_class_name
89
+
(** Expected a pseudo-class name after [:]. *)
90
+
| Expected_closing_paren
91
+
(** Expected [)] to close a pseudo-class argument. *)
92
+
| Unexpected_character of char
93
+
(** Found an unexpected character in the selector. *)
94
+
| Expected_attribute_value
95
+
(** Expected a value after the attribute operator. *)
96
+
| Expected_closing_bracket_or_operator
97
+
(** Expected [\]] or an attribute operator like [=]. *)
98
+
| Expected_selector_after_combinator
99
+
(** Expected a selector after a combinator ([>], [+], [~], or space). *)
100
+
| Unexpected_token
101
+
(** Found an unexpected token in the selector. *)
102
+
| Expected_end_of_selector
103
+
(** Expected end of selector but found more tokens. *)
104
+
105
+
val to_string : t -> string
106
+
(** Convert to a kebab-case string identifier suitable for programmatic use. *)
107
+
108
+
val to_human_string : t -> string
109
+
(** Convert to a human-readable error message. *)
110
+
end
111
+
61
112
(** {1 Exceptions} *)
62
113
63
-
exception Selector_error of string
114
+
exception Selector_error of Error_code.t
64
115
(** Raised when a selector string is malformed.
65
116
66
-
The exception contains an error message describing the parse error.
117
+
The exception contains a typed error code describing the parse error.
118
+
Use {!Error_code.to_string} or {!Error_code.to_human_string} to get
119
+
a string representation.
67
120
*)
68
121
69
122
(** {1 Sub-modules} *)
+13
-13
lib/html5rw/selector/selector_lexer.ml
+13
-13
lib/html5rw/selector/selector_lexer.ml
···
1
1
(* CSS selector lexer *)
2
2
3
-
exception Selector_error of string
3
+
exception Selector_error of Selector_error_code.t
4
4
5
5
type t = {
6
6
input : string;
···
47
47
let buf = Buffer.create 32 in
48
48
let rec loop () =
49
49
match peek t with
50
-
| None -> raise (Selector_error "Unterminated string")
50
+
| None -> raise (Selector_error Selector_error_code.Unterminated_string)
51
51
| Some c when c = quote -> advance t
52
52
| Some '\\' ->
53
53
advance t;
54
54
(match peek t with
55
55
| Some c -> Buffer.add_char buf c; advance t; loop ()
56
-
| None -> raise (Selector_error "Unterminated escape"))
56
+
| None -> raise (Selector_error Selector_error_code.Unterminated_escape))
57
57
| Some c ->
58
58
Buffer.add_char buf c;
59
59
advance t;
···
99
99
| '#' ->
100
100
advance t;
101
101
let name = read_name t in
102
-
if name = "" then raise (Selector_error "Expected identifier after #");
102
+
if name = "" then raise (Selector_error Selector_error_code.Expected_identifier_after_hash);
103
103
tokens := Selector_token.Id name :: !tokens
104
104
| '.' ->
105
105
advance t;
106
106
let name = read_name t in
107
-
if name = "" then raise (Selector_error "Expected identifier after .");
107
+
if name = "" then raise (Selector_error Selector_error_code.Expected_identifier_after_dot);
108
108
tokens := Selector_token.Class name :: !tokens
109
109
| '[' ->
110
110
advance t;
111
111
tokens := Selector_token.Attr_start :: !tokens;
112
112
skip_whitespace t;
113
113
let attr_name = read_name t in
114
-
if attr_name = "" then raise (Selector_error "Expected attribute name");
114
+
if attr_name = "" then raise (Selector_error Selector_error_code.Expected_attribute_name);
115
115
tokens := Selector_token.Tag attr_name :: !tokens;
116
116
skip_whitespace t;
117
117
···
130
130
in
131
131
tokens := Selector_token.String value :: !tokens;
132
132
skip_whitespace t;
133
-
if peek t <> Some ']' then raise (Selector_error "Expected ]");
133
+
if peek t <> Some ']' then raise (Selector_error Selector_error_code.Expected_closing_bracket);
134
134
advance t;
135
135
tokens := Selector_token.Attr_end :: !tokens
136
136
| Some ('~' | '|' | '^' | '$' | '*') as op_char ->
137
137
let op_c = Option.get op_char in
138
138
advance t;
139
139
if peek t <> Some '=' then
140
-
raise (Selector_error ("Expected = after " ^ String.make 1 op_c));
140
+
raise (Selector_error (Selector_error_code.Expected_equals_after_operator op_c));
141
141
advance t;
142
142
tokens := Selector_token.Attr_op (String.make 1 op_c ^ "=") :: !tokens;
143
143
skip_whitespace t;
···
148
148
in
149
149
tokens := Selector_token.String value :: !tokens;
150
150
skip_whitespace t;
151
-
if peek t <> Some ']' then raise (Selector_error "Expected ]");
151
+
if peek t <> Some ']' then raise (Selector_error Selector_error_code.Expected_closing_bracket);
152
152
advance t;
153
153
tokens := Selector_token.Attr_end :: !tokens
154
-
| _ -> raise (Selector_error "Unexpected character in attribute selector"))
154
+
| _ -> raise (Selector_error Selector_error_code.Unexpected_character_in_attribute_selector))
155
155
156
156
| ',' ->
157
157
advance t;
···
161
161
advance t;
162
162
tokens := Selector_token.Colon :: !tokens;
163
163
let name = read_name t in
164
-
if name = "" then raise (Selector_error "Expected pseudo-class name");
164
+
if name = "" then raise (Selector_error Selector_error_code.Expected_pseudo_class_name);
165
165
tokens := Selector_token.Tag name :: !tokens;
166
166
167
167
if peek t = Some '(' then begin
···
179
179
done;
180
180
let arg = String.trim (String.sub t.input start (t.pos - start)) in
181
181
if arg <> "" then tokens := Selector_token.String arg :: !tokens;
182
-
if peek t <> Some ')' then raise (Selector_error "Expected )");
182
+
if peek t <> Some ')' then raise (Selector_error Selector_error_code.Expected_closing_paren);
183
183
advance t;
184
184
tokens := Selector_token.Paren_close :: !tokens
185
185
end
···
187
187
let name = read_name t in
188
188
tokens := Selector_token.Tag (String.lowercase_ascii name) :: !tokens
189
189
| _ ->
190
-
raise (Selector_error ("Unexpected character: " ^ String.make 1 c))
190
+
raise (Selector_error (Selector_error_code.Unexpected_character c))
191
191
end
192
192
done;
193
193
+14
-13
lib/html5rw/selector/selector_parser.ml
+14
-13
lib/html5rw/selector/selector_parser.ml
···
3
3
open Selector_ast
4
4
open Selector_token
5
5
6
-
exception Parse_error of string
6
+
(* Re-use the Selector_error exception from the lexer for consistency *)
7
+
let raise_error code = raise (Selector_lexer.Selector_error code)
7
8
8
9
type t = {
9
10
tokens : Selector_token.t list;
···
29
30
let expect t expected =
30
31
let tok = peek t in
31
32
if tok <> expected then
32
-
raise (Parse_error ("Expected " ^ (match expected with EOF -> "EOF" | _ -> "token")))
33
+
raise_error (match expected with EOF -> Selector_error_code.Expected_end_of_selector | _ -> Selector_error_code.Unexpected_token)
33
34
else
34
35
advance t
35
36
···
51
52
advance t;
52
53
let attr_name = match peek t with
53
54
| Tag name -> advance t; name
54
-
| _ -> raise (Parse_error "Expected attribute name")
55
+
| _ -> raise_error Selector_error_code.Expected_attribute_name
55
56
in
56
57
(match peek t with
57
58
| Attr_end ->
···
61
62
advance t;
62
63
let value = match peek t with
63
64
| String v -> advance t; v
64
-
| _ -> raise (Parse_error "Expected attribute value")
65
+
| _ -> raise_error Selector_error_code.Expected_attribute_value
65
66
in
66
67
(match peek t with
67
68
| Attr_end -> advance t
68
-
| _ -> raise (Parse_error "Expected ]"));
69
+
| _ -> raise_error Selector_error_code.Expected_closing_bracket);
69
70
Some (make_simple Type_attr ~name:attr_name ~operator:op ~value ())
70
-
| _ -> raise (Parse_error "Expected ] or attribute operator"))
71
+
| _ -> raise_error Selector_error_code.Expected_closing_bracket_or_operator)
71
72
| Colon ->
72
73
advance t;
73
74
let name = match peek t with
74
75
| Tag n -> advance t; n
75
-
| _ -> raise (Parse_error "Expected pseudo-class name")
76
+
| _ -> raise_error Selector_error_code.Expected_pseudo_class_name
76
77
in
77
78
let arg = match peek t with
78
79
| Paren_open ->
···
84
85
in
85
86
(match peek t with
86
87
| Paren_close -> advance t
87
-
| _ -> raise (Parse_error "Expected )"));
88
+
| _ -> raise_error Selector_error_code.Expected_closing_paren);
88
89
a
89
90
| _ -> None
90
91
in
···
111
112
| Combinator comb ->
112
113
advance t;
113
114
(match parse_compound_selector t with
114
-
| None -> raise (Parse_error "Expected selector after combinator")
115
+
| None -> raise_error Selector_error_code.Expected_selector_after_combinator
115
116
| Some compound ->
116
117
parts := (Some comb, compound) :: !parts;
117
118
loop ())
···
131
132
advance t;
132
133
loop (sel :: acc)
133
134
| EOF -> sel :: acc
134
-
| _ -> raise (Parse_error "Unexpected token"))
135
+
| _ -> raise_error Selector_error_code.Unexpected_token)
135
136
in
136
137
let selectors = List.rev (loop []) in
137
138
(match peek t with
138
139
| EOF -> ()
139
-
| _ -> raise (Parse_error "Expected end of selector"));
140
+
| _ -> raise_error Selector_error_code.Expected_end_of_selector);
140
141
match selectors with
141
-
| [] -> raise (Parse_error "Empty selector")
142
+
| [] -> raise_error Selector_error_code.Empty_selector
142
143
| [sel] -> Complex sel
143
144
| sels -> List (make_list sels)
144
145
145
146
let parse_selector input =
146
147
if String.trim input = "" then
147
-
raise (Selector_lexer.Selector_error "Empty selector");
148
+
raise_error Selector_error_code.Empty_selector;
148
149
let tokens = Selector_lexer.tokenize input in
149
150
parse tokens