+34
-11
lib/htmlrw_check/htmlrw_check.ml
+34
-11
lib/htmlrw_check/htmlrw_check.ml
···
5
6
module Error_code = Error_code
7
8
-
(* Public types - defined here to avoid re-exporting internal modules *)
9
10
type severity = Error | Warning | Info
11
···
17
system_id : string option;
18
}
19
20
type message = {
21
severity : severity;
22
text : string;
23
-
code : string;
24
-
error_code : Error_code.t option;
25
location : location option;
26
element : string option;
27
attribute : string option;
···
49
system_id = loc.system_id;
50
}
51
52
let convert_message (m : Message.t) : message = {
53
severity = convert_severity m.severity;
54
text = m.message;
55
-
code = m.code;
56
-
error_code = m.error_code;
57
location = Option.map convert_location m.location;
58
element = m.element;
59
attribute = m.attribute;
···
77
78
match Xhtml_parser.parse_xhtml content with
79
| Ok root ->
80
-
(* Run all registered checkers via DOM traversal *)
81
let registry = Checker_registry.default () in
82
Dom_walker.walk_registry registry collector root;
83
let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
···
111
{ doc; msgs; sys_id = system_id }
112
end
113
114
let check_parsed ?(collect_parse_errors = true) ?system_id doc =
115
let collector = Message_collector.create () in
116
···
138
let infos t =
139
List.filter (fun msg -> msg.severity = Info) t.msgs
140
141
let has_errors t =
142
List.exists (fun msg -> msg.severity = Error) t.msgs
143
···
163
system_id = loc.system_id;
164
}
165
166
let unconvert_message (m : message) : Message.t = {
167
severity = unconvert_severity m.severity;
168
message = m.text;
169
-
code = m.code;
170
-
error_code = m.error_code;
171
location = Option.map unconvert_location m.location;
172
element = m.element;
173
attribute = m.attribute;
···
193
| Warning -> "warning"
194
| Info -> "info"
195
196
let pp_severity fmt sev =
197
Format.pp_print_string fmt (severity_to_string sev)
198
···
207
match msg.location with
208
| Some loc -> Format.fprintf fmt " (at %a)" pp_location loc
209
| None -> ()
210
-
211
-
let message_to_string msg =
212
-
Format.asprintf "%a" pp_message msg
···
5
6
module Error_code = Error_code
7
8
+
(* Public types *)
9
10
type severity = Error | Warning | Info
11
···
17
system_id : string option;
18
}
19
20
+
type error_code =
21
+
| Parse of Html5rw.Parse_error_code.t
22
+
| Conformance of Error_code.t
23
+
24
type message = {
25
severity : severity;
26
text : string;
27
+
error_code : error_code;
28
location : location option;
29
element : string option;
30
attribute : string option;
···
52
system_id = loc.system_id;
53
}
54
55
+
let convert_error_code = function
56
+
| Message.Parse_error code -> Parse code
57
+
| Message.Conformance_error code -> Conformance code
58
+
59
let convert_message (m : Message.t) : message = {
60
severity = convert_severity m.severity;
61
text = m.message;
62
+
error_code = convert_error_code m.error_code;
63
location = Option.map convert_location m.location;
64
element = m.element;
65
attribute = m.attribute;
···
83
84
match Xhtml_parser.parse_xhtml content with
85
| Ok root ->
86
let registry = Checker_registry.default () in
87
Dom_walker.walk_registry registry collector root;
88
let dummy_doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string "") in
···
116
{ doc; msgs; sys_id = system_id }
117
end
118
119
+
let check_string ?system_id html =
120
+
let reader = Bytesrw.Bytes.Reader.of_string html in
121
+
check ?system_id reader
122
+
123
let check_parsed ?(collect_parse_errors = true) ?system_id doc =
124
let collector = Message_collector.create () in
125
···
147
let infos t =
148
List.filter (fun msg -> msg.severity = Info) t.msgs
149
150
+
let parse_errors t =
151
+
List.filter (fun msg ->
152
+
match msg.error_code with Parse _ -> true | Conformance _ -> false
153
+
) t.msgs
154
+
155
+
let conformance_errors t =
156
+
List.filter (fun msg ->
157
+
match msg.error_code with Parse _ -> false | Conformance _ -> true
158
+
) t.msgs
159
+
160
let has_errors t =
161
List.exists (fun msg -> msg.severity = Error) t.msgs
162
···
182
system_id = loc.system_id;
183
}
184
185
+
let unconvert_error_code = function
186
+
| Parse code -> Message.Parse_error code
187
+
| Conformance code -> Message.Conformance_error code
188
+
189
let unconvert_message (m : message) : Message.t = {
190
severity = unconvert_severity m.severity;
191
message = m.text;
192
+
error_code = unconvert_error_code m.error_code;
193
location = Option.map unconvert_location m.location;
194
element = m.element;
195
attribute = m.attribute;
···
215
| Warning -> "warning"
216
| Info -> "info"
217
218
+
let error_code_to_string = function
219
+
| Parse code -> Html5rw.Parse_error_code.to_string code
220
+
| Conformance code -> Error_code.code_string code
221
+
222
let pp_severity fmt sev =
223
Format.pp_print_string fmt (severity_to_string sev)
224
···
233
match msg.location with
234
| Some loc -> Format.fprintf fmt " (at %a)" pp_location loc
235
| None -> ()
+135
-247
lib/htmlrw_check/htmlrw_check.mli
+135
-247
lib/htmlrw_check/htmlrw_check.mli
···
5
6
(** HTML5 Conformance Checker
7
8
-
This module validates HTML5 documents against the
9
-
{{:https://html.spec.whatwg.org/} WHATWG HTML Living Standard},
10
-
reporting conformance errors, warnings, and suggestions.
11
12
{2 Quick Start}
13
14
{[
15
-
(* Validate HTML from a string *)
16
-
let html = "<html><body><img></body></html>" in
17
-
let reader = Bytesrw.Bytes.Reader.of_string html in
18
-
let result = Htmlrw_check.check reader in
19
20
-
if Htmlrw_check.has_errors result then begin
21
-
List.iter (fun msg ->
22
-
Printf.printf "%s: %s\n"
23
-
(Htmlrw_check.severity_to_string msg.Htmlrw_check.severity)
24
-
msg.Htmlrw_check.text
25
-
) (Htmlrw_check.errors result)
26
-
end
27
]}
28
29
-
{2 What Gets Checked}
30
31
-
The checker validates:
32
33
-
- {b Parse errors}: Malformed HTML syntax (missing end tags, invalid
34
-
nesting, etc.) per the WHATWG parsing specification
35
-
- {b Content model}: Elements appearing in contexts where they're not
36
-
allowed (e.g., [<div>] inside [<p>])
37
-
- {b Attributes}: Missing required attributes, disallowed attributes,
38
-
and invalid attribute values
39
-
- {b Accessibility}: ARIA role/attribute misuse, missing alt text on
40
-
images, form labeling issues
41
-
- {b Document structure}: Missing DOCTYPE, duplicate IDs, heading
42
-
hierarchy issues
43
-
- {b Internationalization}: Missing or mismatched lang attributes
44
45
-
{2 Output Formats}
46
47
-
Results can be formatted as:
48
-
- {b Text}: Human-readable messages for terminal output
49
-
- {b JSON}: Machine-readable format compatible with Nu HTML Validator
50
-
- {b GNU}: Error format for IDE integration
51
52
-
@see <https://html.spec.whatwg.org/>
53
-
WHATWG HTML Living Standard
54
-
@see <https://validator.w3.org/nu/>
55
-
Nu HTML Checker (reference validator) *)
56
57
-
(** {1:types Types} *)
58
59
-
(** Message severity level.
60
61
-
- [Error]: Conformance error - the document violates the HTML5 spec
62
-
- [Warning]: Likely problem - should be reviewed but may be intentional
63
-
- [Info]: Suggestion - best practice recommendation *)
64
-
type severity = Error | Warning | Info
65
66
-
(** Source location of a validation issue.
67
68
-
Locations use 1-based line and column numbers matching typical editor
69
-
conventions. The [system_id] field contains the file path or URL if one
70
-
was provided to the checker. *)
71
type location = {
72
line : int;
73
-
(** Line number (1-indexed) where the issue was found. *)
74
-
75
column : int;
76
-
(** Column number (1-indexed) within the line. *)
77
-
78
end_line : int option;
79
-
(** End line for issues spanning multiple lines. *)
80
-
81
end_column : int option;
82
-
(** End column for range-based issues. *)
83
-
84
-
system_id : string option;
85
-
(** File path or URL, if provided to the checker. *)
86
}
87
88
-
(** A validation message describing a conformance issue.
89
90
-
Each message contains:
91
-
- The {!field-severity} indicating how serious the issue is
92
-
- Human-readable {!field-text} explaining the problem
93
-
- Machine-readable {!field-code} for programmatic handling
94
-
- Optional {!field-error_code} for fine-grained pattern matching
95
-
- Source {!field-location} when available
96
-
- Context ({!field-element}, {!field-attribute}) when relevant *)
97
type message = {
98
severity : severity;
99
-
(** Severity level of this message. *)
100
-
101
-
text : string;
102
-
(** Human-readable description of the issue.
103
-
104
-
The text follows Nu HTML Validator message conventions, using
105
-
Unicode quotes around element/attribute names:
106
-
["Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child..."] *)
107
-
108
-
code : string;
109
-
(** Machine-readable error code in kebab-case.
110
-
111
-
Examples: ["missing-alt"], ["duplicate-id"], ["unexpected-end-tag"].
112
-
Useful for filtering or categorizing errors programmatically. *)
113
-
114
-
error_code : Error_code.t option;
115
-
(** Typed error code for pattern matching.
116
-
117
-
When present, allows fine-grained handling of specific errors:
118
-
{[
119
-
match msg.error_code with
120
-
| Some (`Img `Missing_alt) -> suggest_alt_text ()
121
-
| Some (`Attr (`Duplicate_id (`Id id))) -> highlight_duplicate id
122
-
| _ -> show_generic_error msg
123
-
]} *)
124
-
125
-
location : location option;
126
-
(** Source location where the issue was detected.
127
-
128
-
[None] for document-level issues or when location tracking is
129
-
unavailable (e.g., for some content model errors). *)
130
-
131
-
element : string option;
132
-
(** Element name relevant to this message (e.g., ["img"], ["div"]).
133
-
134
-
Lowercase, without angle brackets. *)
135
-
136
-
attribute : string option;
137
-
(** Attribute name relevant to this message (e.g., ["alt"], ["href"]).
138
-
139
-
Lowercase. Only present for attribute-related errors. *)
140
-
141
-
extract : string option;
142
-
(** Source excerpt showing context around the error.
143
-
144
-
Typically a few characters before and after the problematic location.
145
-
Useful for displaying the error in context. *)
146
}
147
148
-
(** Validation result containing all messages and the parsed document.
149
-
150
-
Use {!messages}, {!errors}, {!warnings}, and {!infos} to access
151
-
the validation messages. Use {!document} to access the parsed DOM. *)
152
type t
153
154
-
(** {1:validation Validation Functions} *)
155
156
-
(** Validate HTML from a reader.
157
158
-
Parses the HTML input and runs all conformance checks, returning
159
-
a result containing any validation messages.
160
161
-
{b Example:}
162
{[
163
-
let ic = open_in "page.html" in
164
-
let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
165
-
let result = Htmlrw_check.check ~system_id:"page.html" reader in
166
-
close_in ic;
167
-
168
if Htmlrw_check.has_errors result then
169
-
print_endline (Htmlrw_check.to_text result)
170
]}
171
172
-
@param collect_parse_errors If [true] (default), include HTML parse
173
-
errors in the results. Set to [false] to only get conformance
174
-
checker errors (content model, attributes, etc.).
175
-
@param system_id File path or URL for the document. Used in error
176
-
messages and the {!location} field. Does not affect validation. *)
177
val check :
178
?collect_parse_errors:bool ->
179
?system_id:string ->
180
Bytesrw.Bytes.Reader.t ->
181
t
182
183
-
(** Validate an already-parsed HTML document.
184
-
185
-
Runs conformance checks on an existing {!Html5rw.t} parse result.
186
-
Useful when you've already parsed the document and want to validate
187
-
it without re-parsing.
188
-
189
-
{b Example:}
190
-
{[
191
-
let doc = Html5rw.parse reader in
192
-
(* ... manipulate the DOM ... *)
193
-
let result = Htmlrw_check.check_parsed doc in
194
-
]}
195
196
-
@param collect_parse_errors If [true] (default), include any parse
197
-
errors that were collected during the original parse.
198
-
@param system_id File path or URL for error reporting. *)
199
val check_parsed :
200
?collect_parse_errors:bool ->
201
?system_id:string ->
202
Html5rw.t ->
203
t
204
205
-
(** {1:results Result Accessors} *)
206
207
-
(** Get all validation messages.
208
209
-
Returns messages in the order they were generated, which roughly
210
-
corresponds to document order for element-related errors. *)
211
val messages : t -> message list
212
213
-
(** Get only error messages.
214
-
215
-
Errors indicate conformance violations - the document does not
216
-
comply with the HTML5 specification. *)
217
val errors : t -> message list
218
219
-
(** Get only warning messages.
220
-
221
-
Warnings indicate likely problems that may be intentional in
222
-
some cases (e.g., deprecated features still in use). *)
223
val warnings : t -> message list
224
225
-
(** Get only informational messages.
226
-
227
-
Info messages are suggestions for best practices that don't
228
-
affect conformance. *)
229
val infos : t -> message list
230
231
-
(** Test if any errors were found.
232
233
-
Equivalent to [errors result <> []] but more efficient. *)
234
-
val has_errors : t -> bool
235
236
-
(** Test if any warnings were found.
237
238
-
Equivalent to [warnings result <> []] but more efficient. *)
239
val has_warnings : t -> bool
240
241
-
(** Get the parsed document.
242
-
243
-
Returns the DOM tree that was validated. For {!check}, this is the
244
-
newly parsed document. For {!check_parsed}, this is the document
245
-
that was passed in. *)
246
val document : t -> Html5rw.t
247
248
-
(** Get the system identifier.
249
-
250
-
Returns the file path or URL that was passed to {!check} or
251
-
{!check_parsed}, or [None] if not provided. *)
252
val system_id : t -> string option
253
254
(** {1:formatting Output Formatting} *)
255
256
-
(** Format messages as human-readable text.
257
258
-
Produces multi-line output suitable for terminal display:
259
{v
260
-
Error: Element "img" is missing required attribute "alt".
261
-
At line 5, column 3
262
-
<img src="photo.jpg">
263
-
v}
264
-
265
-
Messages are formatted with severity, description, location,
266
-
and source excerpt when available. *)
267
val to_text : t -> string
268
269
-
(** Format messages as JSON.
270
271
-
Produces JSON output compatible with the Nu HTML Validator format:
272
{v
273
-
{
274
-
"messages": [
275
-
{
276
-
"type": "error",
277
-
"message": "Element \"img\" is missing required attribute \"alt\".",
278
-
"lastLine": 5,
279
-
"lastColumn": 3
280
-
}
281
-
]
282
-
}
283
-
v}
284
-
285
-
Useful for machine processing and integration with other tools. *)
286
val to_json : t -> string
287
288
-
(** Format messages in GNU error format.
289
290
-
Produces one-line-per-error output for IDE integration:
291
{v
292
-
page.html:5:3: error: Element "img" is missing required attribute "alt".
293
-
v}
294
-
295
-
This format is recognized by many editors and build tools. *)
296
val to_gnu : t -> string
297
298
-
(** {1:utilities Utility Functions} *)
299
300
-
(** Convert severity to lowercase string.
301
302
-
Returns ["error"], ["warning"], or ["info"]. *)
303
val severity_to_string : severity -> string
304
305
-
(** Pretty-print a severity value. *)
306
val pp_severity : Format.formatter -> severity -> unit
307
308
-
(** Pretty-print a location. *)
309
val pp_location : Format.formatter -> location -> unit
310
311
-
(** Pretty-print a message.
312
-
313
-
Includes severity, text, and location if available. *)
314
val pp_message : Format.formatter -> message -> unit
315
316
-
(** Convert a message to a single-line string.
317
318
-
Includes severity and message text. *)
319
-
val message_to_string : message -> string
320
-
321
-
(** {1:error_codes Error Codes}
322
323
-
The {!Error_code} module provides typed error codes for programmatic
324
-
handling of validation issues. Use pattern matching to handle specific
325
-
errors:
326
327
{[
328
-
let handle_message msg =
329
-
match msg.Htmlrw_check.error_code with
330
-
| Some (`Img `Missing_alt) ->
331
-
(* Image accessibility issue *)
332
-
suggest_alt_text msg
333
-
| Some (`Attr (`Duplicate_id (`Id id))) ->
334
-
(* Duplicate ID found *)
335
-
highlight_all_with_id id
336
-
| Some (`Aria _) ->
337
-
(* Any ARIA-related error *)
338
-
show_aria_help ()
339
-
| _ ->
340
-
(* Generic handling *)
341
-
display_error msg
342
-
]}
343
-
344
-
The error codes are organized into categories:
345
-
- [`Attr _]: Attribute errors (missing, invalid, duplicate)
346
-
- [`Element _]: Element/content model errors
347
-
- [`Aria _]: ARIA accessibility errors
348
-
- [`Img _]: Image-related errors
349
-
- [`Table _]: Table structure errors
350
-
- And more...
351
-
352
-
See {!Error_code} for the complete type definition. *)
353
module Error_code = Error_code
···
5
6
(** HTML5 Conformance Checker
7
8
+
Validates HTML5 documents against the
9
+
{{:https://html.spec.whatwg.org/} WHATWG HTML Living Standard}.
10
11
{2 Quick Start}
12
13
{[
14
+
let result = Htmlrw_check.check_string "<html><body><img></body></html>" in
15
16
+
if Htmlrw_check.has_errors result then
17
+
print_endline (Htmlrw_check.to_text result)
18
+
else
19
+
print_endline "Valid HTML5!"
20
]}
21
22
+
{2 Handling Specific Errors}
23
24
+
Use pattern matching on {!field-message.error_code} for fine-grained control:
25
26
+
{[
27
+
List.iter (fun msg ->
28
+
match msg.Htmlrw_check.error_code with
29
+
| Parse code ->
30
+
Printf.printf "Syntax error: %s\n"
31
+
(Html5rw.Parse_error_code.to_string code)
32
+
| Conformance code ->
33
+
match code with
34
+
| `Img `Missing_alt ->
35
+
Printf.printf "Accessibility: %s needs alt text\n"
36
+
(Option.value ~default:"image" msg.element)
37
+
| `Attr (`Duplicate_id _) ->
38
+
Printf.printf "Duplicate ID found\n"
39
+
| _ ->
40
+
Printf.printf "Error: %s\n" msg.text
41
+
) (Htmlrw_check.errors result)
42
+
]}
43
44
+
{2 CI Integration}
45
46
+
{[
47
+
let validate_file path =
48
+
let ic = open_in path in
49
+
let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
50
+
let result = Htmlrw_check.check ~system_id:path reader in
51
+
close_in ic;
52
+
if Htmlrw_check.has_errors result then begin
53
+
print_string (Htmlrw_check.to_gnu result);
54
+
exit 1
55
+
end
56
+
]}
57
58
+
{2 What Gets Checked}
59
60
+
- {b Parse errors}: Malformed syntax per WHATWG parsing specification
61
+
- {b Content model}: Invalid element nesting (e.g., [<div>] inside [<p>])
62
+
- {b Attributes}: Missing required, disallowed, or invalid attributes
63
+
- {b Accessibility}: ARIA misuse, missing alt text, form labeling
64
+
- {b Structure}: Missing DOCTYPE, duplicate IDs, heading hierarchy
65
+
- {b Internationalization}: Missing or mismatched lang attributes
66
67
+
@see <https://html.spec.whatwg.org/> WHATWG HTML Living Standard
68
+
@see <https://validator.w3.org/nu/> Nu HTML Checker *)
69
70
71
+
(** {1:types Types} *)
72
+
73
+
(** Message severity level. *)
74
+
type severity =
75
+
| Error (** Conformance violation - document is invalid *)
76
+
| Warning (** Likely problem - may be intentional *)
77
+
| Info (** Suggestion for improvement *)
78
79
+
(** Source location in the document. Line and column are 1-indexed. *)
80
type location = {
81
line : int;
82
column : int;
83
end_line : int option;
84
end_column : int option;
85
+
system_id : string option; (** File path or URL if provided *)
86
}
87
88
+
(** Typed error code. Pattern match to handle specific errors.
89
90
+
{[
91
+
match msg.error_code with
92
+
| Parse Html5rw.Parse_error_code.Eof_in_tag ->
93
+
(* Unclosed tag at end of file *)
94
+
| Conformance (`Img `Missing_alt) ->
95
+
(* Image without alt attribute *)
96
+
| _ -> ()
97
+
]} *)
98
+
type error_code =
99
+
| Parse of Html5rw.Parse_error_code.t
100
+
(** Syntax error from the HTML5 parser.
101
+
@see <https://html.spec.whatwg.org/multipage/parsing.html#parse-errors> *)
102
+
| Conformance of Error_code.t
103
+
(** Semantic error from conformance checking. *)
104
+
105
+
(** A validation message. *)
106
type message = {
107
severity : severity;
108
+
text : string; (** Human-readable description *)
109
+
error_code : error_code; (** Typed code for pattern matching *)
110
+
location : location option; (** Source location if available *)
111
+
element : string option; (** Relevant element (lowercase) *)
112
+
attribute : string option; (** Relevant attribute (lowercase) *)
113
+
extract : string option; (** Source excerpt for context *)
114
}
115
116
+
(** Validation result. Use accessors below to inspect. *)
117
type t
118
119
120
+
(** {1:validation Validation} *)
121
122
+
(** Validate HTML from a string.
123
124
{[
125
+
let result = Htmlrw_check.check_string html in
126
if Htmlrw_check.has_errors result then
127
+
prerr_endline (Htmlrw_check.to_text result)
128
]}
129
130
+
@param system_id File path or URL for error messages. *)
131
+
val check_string : ?system_id:string -> string -> t
132
+
133
+
(** Validate HTML from a reader.
134
+
135
+
@param collect_parse_errors Include syntax errors (default: [true]).
136
+
@param system_id File path or URL for error messages. *)
137
val check :
138
?collect_parse_errors:bool ->
139
?system_id:string ->
140
Bytesrw.Bytes.Reader.t ->
141
t
142
143
+
(** Validate an already-parsed document.
144
145
+
Useful when you've parsed the HTML separately and want to run
146
+
conformance checks without re-parsing. *)
147
val check_parsed :
148
?collect_parse_errors:bool ->
149
?system_id:string ->
150
Html5rw.t ->
151
t
152
153
154
+
(** {1:results Results} *)
155
156
+
(** All messages in document order. *)
157
val messages : t -> message list
158
159
+
(** Only error-severity messages. *)
160
val errors : t -> message list
161
162
+
(** Only warning-severity messages. *)
163
val warnings : t -> message list
164
165
+
(** Only info-severity messages. *)
166
val infos : t -> message list
167
168
+
(** Only syntax errors from the parser. *)
169
+
val parse_errors : t -> message list
170
171
+
(** Only semantic errors from conformance checking. *)
172
+
val conformance_errors : t -> message list
173
174
+
(** [true] if any errors were found. *)
175
+
val has_errors : t -> bool
176
177
+
(** [true] if any warnings were found. *)
178
val has_warnings : t -> bool
179
180
+
(** The parsed document. *)
181
val document : t -> Html5rw.t
182
183
+
(** The system identifier (file path or URL) if provided. *)
184
val system_id : t -> string option
185
186
+
187
(** {1:formatting Output Formatting} *)
188
189
+
(** Human-readable text format.
190
191
{v
192
+
file.html:5.3: error [missing-alt]: Element "img" is missing required attribute "alt".
193
+
v} *)
194
val to_text : t -> string
195
196
+
(** JSON format compatible with Nu HTML Validator.
197
198
{v
199
+
{"messages":[{"type":"error","message":"...","firstLine":5,"firstColumn":3}]}
200
+
v} *)
201
val to_json : t -> string
202
203
+
(** GNU error format for IDE integration.
204
205
{v
206
+
file.html:5:3: error: Element "img" is missing required attribute "alt".
207
+
v} *)
208
val to_gnu : t -> string
209
210
211
+
(** {1:utilities Utilities} *)
212
213
+
(** ["error"], ["warning"], or ["info"]. *)
214
val severity_to_string : severity -> string
215
216
+
(** String representation of an error code. *)
217
+
val error_code_to_string : error_code -> string
218
+
219
+
(** Pretty-printer for severity. *)
220
val pp_severity : Format.formatter -> severity -> unit
221
222
+
(** Pretty-printer for location. *)
223
val pp_location : Format.formatter -> location -> unit
224
225
+
(** Pretty-printer for message. *)
226
val pp_message : Format.formatter -> message -> unit
227
228
229
+
(** {1:error_codes Error Code Types}
230
231
+
For pattern matching on conformance errors. Parse errors use
232
+
{!Html5rw.Parse_error_code}.
233
234
{[
235
+
match code with
236
+
| `Attr (`Missing_required_attr _) -> ...
237
+
| `Img `Missing_alt -> ...
238
+
| `Aria _ -> ... (* Any ARIA error *)
239
+
| _ -> ...
240
+
]} *)
241
module Error_code = Error_code
+23
-18
lib/htmlrw_check/message.ml
+23
-18
lib/htmlrw_check/message.ml
···
10
system_id : string option;
11
}
12
13
type t = {
14
severity : severity;
15
message : string;
16
-
code : string;
17
-
error_code : Error_code.t option;
18
location : location option;
19
element : string option;
20
attribute : string option;
···
24
let make_location ~line ~column ?end_line ?end_column ?system_id () =
25
{ line; column; end_line; end_column; system_id }
26
27
-
(** Create a message from a typed error code *)
28
-
let of_error_code ?location ?element ?attribute ?extract error_code =
29
let severity = match Error_code.severity error_code with
30
| Error_code.Error -> Error
31
| Error_code.Warning -> Warning
···
34
{
35
severity;
36
message = Error_code.to_message error_code;
37
-
code = Error_code.code_string error_code;
38
-
error_code = Some error_code;
39
location;
40
element;
41
attribute;
42
extract;
43
}
44
45
-
(** Create a message with manual message text (for backwards compatibility during migration) *)
46
-
let make ~severity ~message ?(code="generic") ?location ?element ?attribute ?extract () =
47
-
{ severity; message; code; error_code = None; location; element; attribute; extract }
48
-
49
-
let error ~message ?(code="generic") ?location ?element ?attribute ?extract () =
50
-
make ~severity:Error ~message ~code ?location ?element ?attribute ?extract ()
51
-
52
-
let warning ~message ?(code="generic") ?location ?element ?attribute ?extract () =
53
-
make ~severity:Warning ~message ~code ?location ?element ?attribute ?extract ()
54
55
-
let info ~message ?(code="generic") ?location ?element ?attribute ?extract () =
56
-
make ~severity:Info ~message ~code ?location ?element ?attribute ?extract ()
57
58
let severity_to_string = function
59
| Error -> "error"
···
82
Format.fprintf fmt ": "
83
| None -> ());
84
pp_severity fmt msg.severity;
85
-
Format.fprintf fmt " [%s]" msg.code;
86
Format.fprintf fmt ": %s" msg.message;
87
(match msg.element with
88
| Some elem -> Format.fprintf fmt " (element: %s)" elem
···
10
system_id : string option;
11
}
12
13
+
type error_code =
14
+
| Parse_error of Html5rw.Parse_error_code.t
15
+
| Conformance_error of Error_code.t
16
+
17
type t = {
18
severity : severity;
19
message : string;
20
+
error_code : error_code;
21
location : location option;
22
element : string option;
23
attribute : string option;
···
27
let make_location ~line ~column ?end_line ?end_column ?system_id () =
28
{ line; column; end_line; end_column; system_id }
29
30
+
(** Create a message from a conformance error code *)
31
+
let of_conformance_error ?location ?element ?attribute ?extract error_code =
32
let severity = match Error_code.severity error_code with
33
| Error_code.Error -> Error
34
| Error_code.Warning -> Warning
···
37
{
38
severity;
39
message = Error_code.to_message error_code;
40
+
error_code = Conformance_error error_code;
41
location;
42
element;
43
attribute;
44
extract;
45
}
46
47
+
(** Create a message from a parse error code *)
48
+
let of_parse_error ?location ?element ?attribute ?extract ~message code =
49
+
{
50
+
severity = Error; (* Parse errors are always errors *)
51
+
message;
52
+
error_code = Parse_error code;
53
+
location;
54
+
element;
55
+
attribute;
56
+
extract;
57
+
}
58
59
+
let error_code_to_string = function
60
+
| Parse_error code -> Html5rw.Parse_error_code.to_string code
61
+
| Conformance_error code -> Error_code.code_string code
62
63
let severity_to_string = function
64
| Error -> "error"
···
87
Format.fprintf fmt ": "
88
| None -> ());
89
pp_severity fmt msg.severity;
90
+
Format.fprintf fmt " [%s]" (error_code_to_string msg.error_code);
91
Format.fprintf fmt ": %s" msg.message;
92
(match msg.element with
93
| Some elem -> Format.fprintf fmt " (element: %s)" elem
+18
-42
lib/htmlrw_check/message.mli
+18
-42
lib/htmlrw_check/message.mli
···
18
system_id : string option; (** File path or URL *)
19
}
20
21
(** A validation message. *)
22
type t = {
23
severity : severity;
24
message : string; (** Human-readable description *)
25
-
code : string; (** Machine-readable error code *)
26
-
error_code : Error_code.t option; (** Typed error code if available *)
27
location : location option;
28
element : string option; (** Element name if relevant *)
29
attribute : string option; (** Attribute name if relevant *)
···
32
33
(** {1 Constructors} *)
34
35
-
(** Create a message from a typed error code (preferred method). *)
36
-
val of_error_code :
37
?location:location ->
38
?element:string ->
39
?attribute:string ->
···
41
Error_code.t ->
42
t
43
44
-
(** Create a validation message with specified severity (legacy). *)
45
-
val make :
46
-
severity:severity ->
47
-
message:string ->
48
-
?code:string ->
49
-
?location:location ->
50
-
?element:string ->
51
-
?attribute:string ->
52
-
?extract:string ->
53
-
unit ->
54
-
t
55
-
56
-
(** Create an error message (legacy). *)
57
-
val error :
58
-
message:string ->
59
-
?code:string ->
60
?location:location ->
61
?element:string ->
62
?attribute:string ->
63
?extract:string ->
64
-
unit ->
65
-
t
66
-
67
-
(** Create a warning message (legacy). *)
68
-
val warning :
69
message:string ->
70
-
?code:string ->
71
-
?location:location ->
72
-
?element:string ->
73
-
?attribute:string ->
74
-
?extract:string ->
75
-
unit ->
76
-
t
77
-
78
-
(** Create an informational message (legacy). *)
79
-
val info :
80
-
message:string ->
81
-
?code:string ->
82
-
?location:location ->
83
-
?element:string ->
84
-
?attribute:string ->
85
-
?extract:string ->
86
-
unit ->
87
t
88
89
(** Create a location record. *)
···
95
?system_id:string ->
96
unit ->
97
location
98
99
(** {1 Formatting} *)
100
···
18
system_id : string option; (** File path or URL *)
19
}
20
21
+
(** Unified error code type covering both parse errors and conformance errors. *)
22
+
type error_code =
23
+
| Parse_error of Html5rw.Parse_error_code.t
24
+
(** Parse error from the HTML5 tokenizer/parser *)
25
+
| Conformance_error of Error_code.t
26
+
(** Conformance error from semantic validation *)
27
+
28
(** A validation message. *)
29
type t = {
30
severity : severity;
31
message : string; (** Human-readable description *)
32
+
error_code : error_code; (** Typed error code *)
33
location : location option;
34
element : string option; (** Element name if relevant *)
35
attribute : string option; (** Attribute name if relevant *)
···
38
39
(** {1 Constructors} *)
40
41
+
(** Create a message from a conformance error code. *)
42
+
val of_conformance_error :
43
?location:location ->
44
?element:string ->
45
?attribute:string ->
···
47
Error_code.t ->
48
t
49
50
+
(** Create a message from a parse error code. *)
51
+
val of_parse_error :
52
?location:location ->
53
?element:string ->
54
?attribute:string ->
55
?extract:string ->
56
message:string ->
57
+
Html5rw.Parse_error_code.t ->
58
t
59
60
(** Create a location record. *)
···
66
?system_id:string ->
67
unit ->
68
location
69
+
70
+
(** {1 Utilities} *)
71
+
72
+
(** Get the string representation of an error code. *)
73
+
val error_code_to_string : error_code -> string
74
75
(** {1 Formatting} *)
76
+2
-27
lib/htmlrw_check/message_collector.ml
+2
-27
lib/htmlrw_check/message_collector.ml
···
13
14
let add t msg = t.messages <- msg :: t.messages
15
16
-
(** Add a message from a typed error code *)
17
let add_typed t ?location ?element ?attribute ?extract error_code =
18
(* Use provided location, or fall back to current_location *)
19
let loc = match location with
20
| Some _ -> location
21
| None -> t.current_location
22
in
23
-
let msg = Message.of_error_code ?location:loc ?element ?attribute ?extract error_code in
24
-
add t msg
25
-
26
-
(** Add an error from a typed error code *)
27
-
let add_error_code t ?location ?element ?attribute ?extract error_code =
28
-
add_typed t ?location ?element ?attribute ?extract error_code
29
-
30
-
(** Legacy: Add an error with manual message text *)
31
-
let add_error t ~message ?code ?location ?element ?attribute ?extract () =
32
-
let msg =
33
-
Message.error ~message ?code ?location ?element ?attribute ?extract ()
34
-
in
35
-
add t msg
36
-
37
-
(** Legacy: Add a warning with manual message text *)
38
-
let add_warning t ~message ?code ?location ?element ?attribute ?extract () =
39
-
let msg =
40
-
Message.warning ~message ?code ?location ?element ?attribute ?extract ()
41
-
in
42
-
add t msg
43
-
44
-
(** Legacy: Add an info message with manual message text *)
45
-
let add_info t ~message ?code ?location ?element ?attribute ?extract () =
46
-
let msg =
47
-
Message.info ~message ?code ?location ?element ?attribute ?extract ()
48
-
in
49
add t msg
50
51
let messages t = List.rev t.messages
···
13
14
let add t msg = t.messages <- msg :: t.messages
15
16
+
(** Add a message from a typed conformance error code *)
17
let add_typed t ?location ?element ?attribute ?extract error_code =
18
(* Use provided location, or fall back to current_location *)
19
let loc = match location with
20
| Some _ -> location
21
| None -> t.current_location
22
in
23
+
let msg = Message.of_conformance_error ?location:loc ?element ?attribute ?extract error_code in
24
add t msg
25
26
let messages t = List.rev t.messages
+5
-52
lib/htmlrw_check/message_collector.mli
+5
-52
lib/htmlrw_check/message_collector.mli
···
20
(** Get the current location. *)
21
val get_current_location : t -> Message.location option
22
23
-
(** {1 Adding Messages - Typed Error Codes (Preferred)} *)
24
-
25
-
(** Add a message from a typed error code. *)
26
-
val add_typed :
27
-
t ->
28
-
?location:Message.location ->
29
-
?element:string ->
30
-
?attribute:string ->
31
-
?extract:string ->
32
-
Error_code.t ->
33
-
unit
34
-
35
-
(** Add an error from a typed error code. Alias for add_typed. *)
36
-
val add_error_code :
37
-
t ->
38
-
?location:Message.location ->
39
-
?element:string ->
40
-
?attribute:string ->
41
-
?extract:string ->
42
-
Error_code.t ->
43
-
unit
44
-
45
-
(** {1 Adding Messages - Legacy (for migration)} *)
46
47
(** Add a message to the collector. *)
48
val add : t -> Message.t -> unit
49
50
-
(** Add an error message to the collector (legacy). *)
51
-
val add_error :
52
t ->
53
-
message:string ->
54
-
?code:string ->
55
-
?location:Message.location ->
56
-
?element:string ->
57
-
?attribute:string ->
58
-
?extract:string ->
59
-
unit ->
60
-
unit
61
-
62
-
(** Add a warning message to the collector (legacy). *)
63
-
val add_warning :
64
-
t ->
65
-
message:string ->
66
-
?code:string ->
67
?location:Message.location ->
68
?element:string ->
69
?attribute:string ->
70
?extract:string ->
71
-
unit ->
72
-
unit
73
-
74
-
(** Add an info message to the collector (legacy). *)
75
-
val add_info :
76
-
t ->
77
-
message:string ->
78
-
?code:string ->
79
-
?location:Message.location ->
80
-
?element:string ->
81
-
?attribute:string ->
82
-
?extract:string ->
83
-
unit ->
84
unit
85
86
(** {1 Retrieving Messages} *)
···
20
(** Get the current location. *)
21
val get_current_location : t -> Message.location option
22
23
+
(** {1 Adding Messages} *)
24
25
(** Add a message to the collector. *)
26
val add : t -> Message.t -> unit
27
28
+
(** Add a message from a typed conformance error code.
29
+
Uses the current location if no explicit location is provided. *)
30
+
val add_typed :
31
t ->
32
?location:Message.location ->
33
?element:string ->
34
?attribute:string ->
35
?extract:string ->
36
+
Error_code.t ->
37
unit
38
39
(** {1 Retrieving Messages} *)
+3
-3
lib/htmlrw_check/message_format.ml
+3
-3
lib/htmlrw_check/message_format.ml
···
24
match system_id with Some s -> s | None -> "input")
25
in
26
let severity_str = Message.severity_to_string msg.Message.severity in
27
-
let code_str = " [" ^ msg.Message.code ^ "]" in
28
let elem_str =
29
match msg.Message.element with
30
| Some e -> " (element: " ^ e ^ ")"
···
59
match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
60
in
61
let severity_str = Message.severity_to_string msg.Message.severity in
62
-
let code_str = " [" ^ msg.Message.code ^ "]" in
63
Buffer.add_string buf
64
(Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
65
msg.Message.message))
···
72
let message_text = String (msg.Message.message, Meta.none) in
73
let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
74
let with_code =
75
-
(("subType", Meta.none), String (msg.Message.code, Meta.none)) :: base
76
in
77
let with_location =
78
match msg.Message.location with
···
24
match system_id with Some s -> s | None -> "input")
25
in
26
let severity_str = Message.severity_to_string msg.Message.severity in
27
+
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
28
let elem_str =
29
match msg.Message.element with
30
| Some e -> " (element: " ^ e ^ ")"
···
59
match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
60
in
61
let severity_str = Message.severity_to_string msg.Message.severity in
62
+
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
63
Buffer.add_string buf
64
(Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
65
msg.Message.message))
···
72
let message_text = String (msg.Message.message, Meta.none) in
73
let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
74
let with_code =
75
+
(("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base
76
in
77
let with_location =
78
match msg.Message.location with
+79
-84
lib/htmlrw_check/parse_error_bridge.ml
+79
-84
lib/htmlrw_check/parse_error_bridge.ml
···
3
SPDX-License-Identifier: MIT
4
---------------------------------------------------------------------------*)
5
6
let of_parse_error ?system_id err =
7
let code = Html5rw.error_code err in
8
let line = Html5rw.error_line err in
9
let column = Html5rw.error_column err in
10
-
let location =
11
-
Message.make_location ~line ~column ?system_id ()
12
-
in
13
-
let code_str = Html5rw.Parse_error_code.to_string code in
14
-
let (message, final_code) = match code with
15
-
| Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
16
-
("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str)
17
-
| Html5rw.Parse_error_code.Null_character_reference ->
18
-
("Character reference expands to zero.", "null-character-reference")
19
-
| Html5rw.Parse_error_code.Tree_construction_error s ->
20
-
(* Check for control-character/noncharacter/surrogate with codepoint info *)
21
-
(try
22
-
if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
23
-
let colon_pos = String.index s ':' in
24
-
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25
-
let cp = int_of_string ("0x" ^ cp_str) in
26
-
(Printf.sprintf "Forbidden code point U+%04x." cp, "forbidden-codepoint")
27
-
else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
28
-
let colon_pos = String.index s ':' in
29
-
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30
-
let cp = int_of_string ("0x" ^ cp_str) in
31
-
(Printf.sprintf "Forbidden code point U+%04x." cp, "forbidden-codepoint")
32
-
else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
33
-
let colon_pos = String.index s ':' in
34
-
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
35
-
let cp = int_of_string ("0x" ^ cp_str) in
36
-
(Printf.sprintf "Forbidden code point U+%04x." cp, "forbidden-codepoint")
37
-
(* Character reference errors *)
38
-
else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
39
-
let cp_str = String.sub s 28 (String.length s - 28) in
40
-
let cp = int_of_string ("0x" ^ cp_str) in
41
-
if cp = 0x0D then
42
-
("A numeric character reference expanded to carriage return.", "control-character-reference")
43
-
else
44
-
(Printf.sprintf "Character reference expands to a control character (U+%04x)." cp, "control-character-reference")
45
-
else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
46
-
let colon_pos = String.index s ':' in
47
-
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
48
-
let cp = int_of_string ("0x" ^ cp_str) in
49
-
(* U+FDD0-U+FDEF are "permanently unassigned" *)
50
-
if cp >= 0xFDD0 && cp <= 0xFDEF then
51
-
("Character reference expands to a permanently unassigned code point.", "noncharacter-character-reference")
52
-
(* Astral noncharacters (planes 1-16) *)
53
-
else if cp >= 0x10000 then
54
-
(Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp, "noncharacter-character-reference")
55
-
else
56
-
(Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp, "noncharacter-character-reference")
57
-
else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
58
-
let colon_pos = String.index s ':' in
59
-
let _ = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
60
-
("Character reference outside the permissible Unicode range.", "character-reference-outside-unicode-range")
61
-
else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
62
-
let colon_pos = String.index s ':' in
63
-
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
64
-
let cp = int_of_string ("0x" ^ cp_str) in
65
-
(Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp, "surrogate-character-reference")
66
-
else if s = "no-p-element-in-scope" then
67
-
("No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen.", "no-p-element-in-scope")
68
-
else if s = "end-tag-p-implied-but-open-elements" then
69
-
("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied")
70
-
else if s = "end-tag-br" then
71
-
("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br")
72
-
else if s = "expected-closing-tag-but-got-eof" then
73
-
("End of file seen and there were open elements.", "eof-in-open-element")
74
-
else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
75
-
let colon_pos = String.index s ':' in
76
-
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
77
-
(Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element, "bad-start-tag-in-head-noscript")
78
-
else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
79
-
let element = String.sub s 19 (String.length s - 19) in
80
-
(Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
81
-
else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
82
-
let tag = String.sub s 19 (String.length s - 19) in
83
-
(Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag, "start-tag-in-table")
84
-
else
85
-
(Printf.sprintf "Parse error: %s" s, s)
86
-
with _ -> (Printf.sprintf "Parse error: %s" s, s))
87
-
| _ -> (Printf.sprintf "Parse error: %s" code_str, code_str)
88
-
in
89
-
Message.error
90
-
~message
91
-
~code:final_code
92
-
~location
93
-
()
94
95
let collect_parse_errors ?system_id result =
96
let errors = Html5rw.errors result in
···
3
SPDX-License-Identifier: MIT
4
---------------------------------------------------------------------------*)
5
6
+
(** Generate human-readable message for a parse error code *)
7
+
let message_of_parse_error code =
8
+
let code_str = Html5rw.Parse_error_code.to_string code in
9
+
match code with
10
+
| Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
11
+
"Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag."
12
+
| Html5rw.Parse_error_code.Null_character_reference ->
13
+
"Character reference expands to zero."
14
+
| Html5rw.Parse_error_code.Tree_construction_error s ->
15
+
(* Check for control-character/noncharacter/surrogate with codepoint info *)
16
+
(try
17
+
if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
18
+
let colon_pos = String.index s ':' in
19
+
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20
+
let cp = int_of_string ("0x" ^ cp_str) in
21
+
Printf.sprintf "Forbidden code point U+%04x." cp
22
+
else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
23
+
let colon_pos = String.index s ':' in
24
+
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25
+
let cp = int_of_string ("0x" ^ cp_str) in
26
+
Printf.sprintf "Forbidden code point U+%04x." cp
27
+
else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
28
+
let colon_pos = String.index s ':' in
29
+
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30
+
let cp = int_of_string ("0x" ^ cp_str) in
31
+
Printf.sprintf "Forbidden code point U+%04x." cp
32
+
(* Character reference errors *)
33
+
else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
34
+
let cp_str = String.sub s 28 (String.length s - 28) in
35
+
let cp = int_of_string ("0x" ^ cp_str) in
36
+
if cp = 0x0D then
37
+
"A numeric character reference expanded to carriage return."
38
+
else
39
+
Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40
+
else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
41
+
let colon_pos = String.index s ':' in
42
+
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43
+
let cp = int_of_string ("0x" ^ cp_str) in
44
+
(* U+FDD0-U+FDEF are "permanently unassigned" *)
45
+
if cp >= 0xFDD0 && cp <= 0xFDEF then
46
+
"Character reference expands to a permanently unassigned code point."
47
+
(* Astral noncharacters (planes 1-16) *)
48
+
else if cp >= 0x10000 then
49
+
Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50
+
else
51
+
Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52
+
else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
53
+
"Character reference outside the permissible Unicode range."
54
+
else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
55
+
let colon_pos = String.index s ':' in
56
+
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57
+
let cp = int_of_string ("0x" ^ cp_str) in
58
+
Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
59
+
else if s = "no-p-element-in-scope" then
60
+
"No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen."
61
+
else if s = "end-tag-p-implied-but-open-elements" then
62
+
"End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements."
63
+
else if s = "end-tag-br" then
64
+
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65
+
else if s = "expected-closing-tag-but-got-eof" then
66
+
"End of file seen and there were open elements."
67
+
else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
68
+
let colon_pos = String.index s ':' in
69
+
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
+
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71
+
else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
72
+
let element = String.sub s 19 (String.length s - 19) in
73
+
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74
+
else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
75
+
let tag = String.sub s 19 (String.length s - 19) in
76
+
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77
+
else
78
+
Printf.sprintf "Parse error: %s" s
79
+
with _ -> Printf.sprintf "Parse error: %s" s)
80
+
| _ -> Printf.sprintf "Parse error: %s" code_str
81
+
82
let of_parse_error ?system_id err =
83
let code = Html5rw.error_code err in
84
let line = Html5rw.error_line err in
85
let column = Html5rw.error_column err in
86
+
let location = Message.make_location ~line ~column ?system_id () in
87
+
let message = message_of_parse_error code in
88
+
Message.of_parse_error ~location ~message code
89
90
let collect_parse_errors ?system_id result =
91
let errors = Html5rw.errors result in
+2
-2
test/expected_message.ml
+2
-2
test/expected_message.ml
···
360
let code_matches =
361
match (expected.error_code, actual.Htmlrw_check.error_code) with
362
| (None, _) -> true (* No expected code to match *)
363
-
| (Some ec, Some ac) -> error_codes_match ec ac
364
-
| (Some _, None) -> false (* Expected typed but got untyped *)
365
in
366
367
(* Check message text *)
···
360
let code_matches =
361
match (expected.error_code, actual.Htmlrw_check.error_code) with
362
| (None, _) -> true (* No expected code to match *)
363
+
| (Some ec, Htmlrw_check.Conformance ac) -> error_codes_match ec ac
364
+
| (Some _, Htmlrw_check.Parse _) -> false (* Expected conformance but got parse error *)
365
in
366
367
(* Check message text *)