+3
-1
lib/htmlrw_check/datatype/dt_language.ml
+3
-1
lib/htmlrw_check/datatype/dt_language.ml
···
1
1
(** Helper functions for language tag validation *)
2
2
3
+
let q = Error_code.q
4
+
3
5
let is_lower_alpha c = c >= 'a' && c <= 'z'
4
6
let is_upper_alpha c = c >= 'A' && c <= 'Z'
5
7
let is_alpha c = is_lower_alpha c || is_upper_alpha c
···
123
125
if is_valid_extlang first_lower second_lower then
124
126
Ok ()
125
127
else
126
-
Error (Printf.sprintf "Bad extlang subtag \xe2\x80\x9c%s\xe2\x80\x9d" second_lower)
128
+
Error (Printf.sprintf "Bad extlang subtag %s" (q second_lower))
127
129
else
128
130
Ok () (* Not an extlang pattern, continue *)
129
131
| [] -> Ok ())
+8
-6
lib/htmlrw_check/parse_error_bridge.ml
+8
-6
lib/htmlrw_check/parse_error_bridge.ml
···
3
3
SPDX-License-Identifier: MIT
4
4
---------------------------------------------------------------------------*)
5
5
6
+
let q = Error_code.q
7
+
6
8
(** Generate human-readable message for a parse error code *)
7
9
let message_of_parse_error code =
8
10
let code_str = Html5rw.Parse_error_code.to_string code in
···
57
59
let cp = int_of_string ("0x" ^ cp_str) in
58
60
Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
59
61
else if s = "no-p-element-in-scope" then
60
-
"No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen."
62
+
Printf.sprintf "No %s element in scope but a %s end tag seen." (q "p") (q "p")
61
63
else if s = "end-tag-p-implied-but-open-elements" then
62
-
"End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements."
64
+
Printf.sprintf "End tag %s implied, but there were open elements." (q "p")
63
65
else if s = "end-tag-br" then
64
-
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
66
+
Printf.sprintf "End tag %s." (q "br")
65
67
else if s = "expected-closing-tag-but-got-eof" then
66
68
"End of file seen and there were open elements."
67
69
else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68
70
let colon_pos = String.index s ':' in
69
71
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
-
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
72
+
Printf.sprintf "Bad start tag in %s in %s in %s." (q element) (q "noscript") (q "head")
71
73
else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72
74
let element = String.sub s 19 (String.length s - 19) in
73
-
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
75
+
Printf.sprintf "Stray end tag %s." (q element)
74
76
else if String.starts_with ~prefix:"start-tag-in-table:" s then
75
77
let tag = String.sub s 19 (String.length s - 19) in
76
-
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
78
+
Printf.sprintf "Start tag %s seen in %s." (q tag) (q "table")
77
79
else
78
80
Printf.sprintf "Parse error: %s" s
79
81
with _ -> Printf.sprintf "Parse error: %s" s)
+4
-2
lib/htmlrw_check/semantic/obsolete_checker.ml
+4
-2
lib/htmlrw_check/semantic/obsolete_checker.ml
···
1
+
let q = Error_code.q
2
+
1
3
(** Obsolete elements map: element name -> suggestion message *)
2
4
let obsolete_elements =
3
5
let tbl = Hashtbl.create 32 in
···
131
133
"Use the HTTP OPTIONS feature instead.";
132
134
133
135
register "name" ["a"]
134
-
"Consider putting an \xe2\x80\x9cid\xe2\x80\x9d attribute on the nearest container instead.";
136
+
(Printf.sprintf "Consider putting an %s attribute on the nearest container instead." (q "id"));
135
137
136
138
register "name" ["embed"; "img"; "option"]
137
-
"Use the \xe2\x80\x9cid\xe2\x80\x9d attribute instead.";
139
+
(Printf.sprintf "Use the %s attribute instead." (q "id"));
138
140
139
141
register "nohref" ["area"]
140
142
"Omitting the \"href\" attribute is sufficient.";
+2
-4
lib/htmlrw_check/semantic/required_attr_checker.ml
+2
-4
lib/htmlrw_check/semantic/required_attr_checker.ml
···
1
1
(** Required attribute checker implementation. *)
2
2
3
+
let q = Error_code.q
4
+
3
5
type state = {
4
6
mutable _in_figure : bool;
5
7
(** Track if we're inside a <figure> element (alt is more critical there) *)
···
81
83
in
82
84
83
85
if not valid then
84
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
85
86
Message_collector.add_typed collector
86
87
(`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
87
88
(q "meta") (q "charset") (q "name")
···
122
123
let value_lower = String.lowercase_ascii value in
123
124
(* Valid values: empty string, auto, manual, hint *)
124
125
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
125
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
126
126
Message_collector.add_typed collector
127
127
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128
128
(q value) (q "popover") (q element_name)))))
···
141
141
let value = float_of_string value_str in
142
142
let min_val = float_of_string min_str in
143
143
if min_val > value then
144
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
145
144
Message_collector.add_typed collector
146
145
(`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
147
146
(q "min") (q "value")))
···
161
160
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
162
161
in
163
162
if value > max_val then
164
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165
163
(* Check which message to use based on whether max is present *)
166
164
if Attr_utils.has_attr "max" attrs then
167
165
Message_collector.add_typed collector
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
···
586
586
if aria_checked <> None then
587
587
Message_collector.add_typed collector
588
588
(`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
589
-
`Condition "a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9ccheckbox\xe2\x80\x9d")))
589
+
`Condition (Printf.sprintf "a %s attribute whose value is %s" (q "type") (q "checkbox")))))
590
590
| _ -> ()
591
591
end;
592
592
+24
-22
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
+24
-22
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
···
1
1
(** Attribute restrictions checker - validates that certain attributes
2
2
are not used on elements where they're not allowed. *)
3
3
4
+
let q = Error_code.q
5
+
4
6
(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
5
7
let disallowed_attrs_html = [
6
8
(* Elements that cannot have href attribute (RDFa misuses) *)
···
174
176
if attr_value = "#" then
175
177
Message_collector.add_typed collector
176
178
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
177
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad hash-name reference: A hash-name reference must have at least one character after \xe2\x80\x9c#\xe2\x80\x9d."
178
-
attr_value attr_name name))))
179
+
"Bad value %s for attribute %s on element %s: Bad hash-name reference: A hash-name reference must have at least one character after %s."
180
+
(q attr_value) (q attr_name) (q name) (q "#")))))
179
181
end
180
182
) attrs
181
183
end;
···
190
192
| Error msg ->
191
193
Message_collector.add_typed collector
192
194
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
193
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: %s"
194
-
attr_value attr_name name msg))))
195
+
"Bad value %s for attribute %s on element %s: Bad MIME type: %s"
196
+
(q attr_value) (q attr_name) (q name) msg))))
195
197
end
196
198
) attrs
197
199
end;
···
213
215
(* Determine specific error message *)
214
216
let error_msg =
215
217
if String.length attr_value = 0 then
216
-
Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The empty string is not a valid non-negative integer."
217
-
attr_name name
218
+
Printf.sprintf "Bad value %s for attribute %s on element %s: The empty string is not a valid non-negative integer."
219
+
(q "") (q attr_name) (q name)
218
220
else if String.contains attr_value '%' then
219
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%%\xe2\x80\x9d instead."
220
-
attr_value attr_name name
221
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
222
+
(q attr_value) (q attr_name) (q name) (q "%")
221
223
else if String.length attr_value > 0 && attr_value.[0] = '-' then
222
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c-\xe2\x80\x9d instead."
223
-
attr_value attr_name name
224
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
225
+
(q attr_value) (q attr_name) (q name) (q "-")
224
226
else
225
227
(* Find first non-digit character *)
226
228
let bad_char =
···
234
236
in
235
237
match bad_char with
236
238
| Some c ->
237
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead."
238
-
attr_value attr_name name c
239
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
240
+
(q attr_value) (q attr_name) (q name) (q (String.make 1 c))
239
241
| None ->
240
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit."
241
-
attr_value attr_name name
242
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit."
243
+
(q attr_value) (q attr_name) (q name)
242
244
in
243
245
Message_collector.add_typed collector
244
246
(`Attr (`Bad_value_generic (`Message error_msg)))
···
377
379
if count_codepoints key > 1 then
378
380
Message_collector.add_typed collector
379
381
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
380
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Key label has multiple characters. Each key label must be a single character."
381
-
attr_value attr_name name))))
382
+
"Bad value %s for attribute %s on element %s: Bad key label list: Key label has multiple characters. Each key label must be a single character."
383
+
(q attr_value) (q attr_name) (q name)))))
382
384
) keys;
383
385
(* Check for duplicate keys *)
384
386
let rec find_duplicates seen = function
···
387
389
if List.mem k seen then
388
390
Message_collector.add_typed collector
389
391
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
390
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Duplicate key label. Each key label must be unique."
391
-
attr_value attr_name name))))
392
+
"Bad value %s for attribute %s on element %s: Bad key label list: Duplicate key label. Each key label must be unique."
393
+
(q attr_value) (q attr_name) (q name)))))
392
394
else
393
395
find_duplicates (k :: seen) rest
394
396
in
···
405
407
if has_command && has_aria_expanded then
406
408
Message_collector.add_typed collector
407
409
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
408
-
`Condition "a \xe2\x80\x9ccommand\xe2\x80\x9d attribute")));
410
+
`Condition (Printf.sprintf "a %s attribute" (q "command")))));
409
411
410
412
if has_popovertarget && has_aria_expanded then
411
413
Message_collector.add_typed collector
412
414
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
413
-
`Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
415
+
`Condition (Printf.sprintf "a %s attribute" (q "popovertarget")))))
414
416
end;
415
417
416
418
(* Note: data-* uppercase check requires XML parsing which preserves case.
···
432
434
| Error msg ->
433
435
Message_collector.add_typed collector
434
436
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
435
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
436
-
attr_value attr_name name msg))))
437
+
"Bad value %s for attribute %s on element %s: Bad media query: %s"
438
+
(q attr_value) (q attr_name) (q name) msg))))
437
439
end
438
440
end
439
441
) attrs
+18
-16
lib/htmlrw_check/specialized/datetime_checker.ml
+18
-16
lib/htmlrw_check/specialized/datetime_checker.ml
···
1
1
(** Datetime attribute validation checker *)
2
2
3
+
let q = Error_code.q
4
+
3
5
(** Elements that have datetime attribute *)
4
6
let datetime_elements = ["del"; "ins"; "time"]
5
7
···
346
348
if value <> String.trim value then begin
347
349
let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
348
350
let date_msg = "Bad date: The literal did not satisfy the date format." in
349
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
350
-
value attr_name element_name tz_msg date_msg)
351
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
352
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
351
353
end
352
354
else
353
355
(* Try datetime with timezone first *)
···
355
357
| DtOk -> Ok (* Valid datetime with timezone *)
356
358
| DtWarning w ->
357
359
(* Valid but with warning - format matches Nu validator *)
358
-
Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
359
-
value attr_name element_name w)
360
+
Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
361
+
(q value) (q attr_name) (q element_name) w)
360
362
| DtError tz_error ->
361
363
(* Try just date - valid for all elements *)
362
364
match validate_date value with
···
365
367
if has_suspicious_year value || has_old_year value then begin
366
368
let date_msg = "Bad date: Year may be mistyped." in
367
369
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
368
-
Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
369
-
value attr_name element_name date_msg tz_msg)
370
+
Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
371
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
370
372
end else
371
373
Ok (* Valid date with normal year *)
372
374
| (false, date_error) ->
···
394
396
| (true, _) -> Ok (* Valid duration P... *)
395
397
| (false, _) ->
396
398
(* Use simplified message for time element matching Nu validator format *)
397
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad time-datetime: The literal did not satisfy the time-datetime format."
398
-
value attr_name element_name)
399
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad time-datetime: The literal did not satisfy the time-datetime format."
400
+
(q value) (q attr_name) (q element_name))
399
401
end
400
402
else begin
401
403
(* del/ins only allow date or datetime-with-timezone *)
···
426
428
(* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427
429
Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428
430
if is_month_less_than_error then
429
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430
-
value attr_name element_name date_msg tz_msg)
431
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
432
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
431
433
else if is_tz_minutes_error || is_fraction_error then
432
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433
-
value attr_name element_name date_msg tz_msg)
434
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
435
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
434
436
else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436
-
value attr_name element_name tz_msg date_msg)
437
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
438
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
437
439
else
438
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439
-
value attr_name element_name tz_msg date_msg)
440
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
441
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
440
442
end
441
443
442
444
(** Checker state *)
+9
-6
lib/htmlrw_check/specialized/microdata_checker.ml
+9
-6
lib/htmlrw_check/specialized/microdata_checker.ml
···
2
2
3
3
Validates HTML5 microdata attributes. *)
4
4
5
+
(** Quote helper for consistent message formatting. *)
6
+
let q = Error_code.q
7
+
5
8
(** Information about an itemscope. *)
6
9
type item_scope = {
7
10
element : string;
···
74
77
let url_trimmed = String.trim url in
75
78
if String.length url_trimmed = 0 then
76
79
Some (Printf.sprintf
77
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
78
-
original_value attr_name element)
80
+
"Bad value %s for attribute %s on element %s: Bad absolute URL: Must be non-empty."
81
+
(q original_value) (q attr_name) (q element))
79
82
else
80
83
(* First check if it has a scheme (required for absolute URL) *)
81
84
match Url_checker.extract_scheme url_trimmed with
82
85
| None ->
83
86
Some (Printf.sprintf
84
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
85
-
original_value attr_name element url)
87
+
"Bad value %s for attribute %s on element %s: Bad absolute URL: The string %s is not an absolute URL."
88
+
(q original_value) (q attr_name) (q element) (q url))
86
89
| Some _ ->
87
90
(* Has a scheme - do comprehensive URL validation *)
88
91
match Url_checker.validate_url url element attr_name with
···
94
97
(* Escape backslashes in replacement string for Str.global_replace *)
95
98
let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
96
99
let error_msg = Str.global_replace
97
-
(Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
98
-
(Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
100
+
(Str.regexp_string (Printf.sprintf "%s for attribute" (q url)))
101
+
(Printf.sprintf "%s for attribute" (q escaped_original))
99
102
error_msg in
100
103
Some error_msg
101
104
+32
-30
lib/htmlrw_check/specialized/mime_type_checker.ml
+32
-30
lib/htmlrw_check/specialized/mime_type_checker.ml
···
2
2
3
3
Validates MIME type values in type attributes. *)
4
4
5
+
let q = Error_code.q
6
+
5
7
(** Validate a MIME type value. Returns error message or None. *)
6
8
let validate_mime_type value element attr_name =
7
9
let len = String.length value in
8
10
if len = 0 then
9
11
Some (Printf.sprintf
10
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
11
-
value attr_name element)
12
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Empty value."
13
+
(q value) (q attr_name) (q element))
12
14
else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
13
15
(* Check if this is a semicolon followed by only whitespace *)
14
16
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
18
20
let params_trimmed = String.trim params in
19
21
if params_trimmed = "" then
20
22
Some (Printf.sprintf
21
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
22
-
value attr_name element)
23
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
24
+
(q value) (q attr_name) (q element))
23
25
else
24
26
Some (Printf.sprintf
25
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
26
-
value attr_name element)
27
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
28
+
(q value) (q attr_name) (q element))
27
29
| None ->
28
30
Some (Printf.sprintf
29
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
30
-
value attr_name element)
31
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
32
+
(q value) (q attr_name) (q element))
31
33
end
32
34
else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
33
35
Some (Printf.sprintf
34
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
35
-
value attr_name element)
36
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Expected a token character but saw %s instead."
37
+
(q value) (q attr_name) (q element) (q " "))
36
38
else
37
39
(* Parse type/subtype *)
38
40
let slash_pos = try Some (String.index value '/') with Not_found -> None in
···
43
45
(match semicolon_pos with
44
46
| Some _ ->
45
47
Some (Printf.sprintf
46
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
47
-
value attr_name element)
48
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
49
+
(q value) (q attr_name) (q element))
48
50
| None ->
49
51
Some (Printf.sprintf
50
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
51
-
value attr_name element))
52
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
53
+
(q value) (q attr_name) (q element)))
52
54
| Some slash_pos ->
53
55
(* Check for empty subtype *)
54
56
let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···
60
62
let subtype_trimmed = String.trim subtype in
61
63
if subtype_trimmed = "" then
62
64
Some (Printf.sprintf
63
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
64
-
value attr_name element)
65
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
66
+
(q value) (q attr_name) (q element))
65
67
else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
66
68
(* Space before semicolon - also check parameter format *)
67
69
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
72
74
let params_trimmed = String.trim params in
73
75
if params_trimmed = "" then
74
76
Some (Printf.sprintf
75
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
76
-
value attr_name element)
77
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
78
+
(q value) (q attr_name) (q element))
77
79
else
78
80
(* Check for param_name=value format *)
79
81
let eq_pos = try Some (String.index params '=') with Not_found -> None in
80
82
(match eq_pos with
81
83
| None ->
82
84
Some (Printf.sprintf
83
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
84
-
value attr_name element)
85
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
86
+
(q value) (q attr_name) (q element))
85
87
| Some _ -> None)
86
88
| None -> None)
87
89
else
···
94
96
let params_trimmed = String.trim params in
95
97
if params_trimmed = "" then
96
98
Some (Printf.sprintf
97
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
98
-
value attr_name element)
99
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
100
+
(q value) (q attr_name) (q element))
99
101
else
100
102
(* Check for param_name=value format *)
101
103
let eq_pos = try Some (String.index params '=') with Not_found -> None in
102
104
(match eq_pos with
103
105
| None ->
104
106
Some (Printf.sprintf
105
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
106
-
value attr_name element)
107
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
108
+
(q value) (q attr_name) (q element))
107
109
| Some eq_pos ->
108
110
let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
109
111
let param_value_trimmed = String.trim param_value in
110
112
if param_value_trimmed = "" then
111
113
Some (Printf.sprintf
112
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
113
-
value attr_name element)
114
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
115
+
(q value) (q attr_name) (q element))
114
116
else if param_value_trimmed.[0] = '"' then
115
117
(* Quoted string - check for closing quote *)
116
118
let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···
127
129
in
128
130
if has_backslash_at_end then
129
131
Some (Printf.sprintf
130
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
131
-
value attr_name element)
132
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
133
+
(q value) (q attr_name) (q element))
132
134
else
133
135
Some (Printf.sprintf
134
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
135
-
value attr_name element))
136
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
137
+
(q value) (q attr_name) (q element)))
136
138
else
137
139
None))
138
140
+10
-8
lib/htmlrw_check/specialized/svg_checker.ml
+10
-8
lib/htmlrw_check/specialized/svg_checker.ml
···
2
2
3
3
Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
4
4
5
+
let q = Error_code.q
6
+
5
7
type font_state = {
6
8
mutable has_missing_glyph : bool;
7
9
}
···
292
294
if value <> svg_ns_url then
293
295
Message_collector.add_typed collector
294
296
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
295
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns\xe2\x80\x9d (only \xe2\x80\x9c%s\xe2\x80\x9d permitted here)."
296
-
value svg_ns_url))))
297
+
"Bad value %s for the attribute %s (only %s permitted here)."
298
+
(q value) (q "xmlns") (q svg_ns_url)))))
297
299
| "xmlns:xlink" ->
298
300
if value <> "http://www.w3.org/1999/xlink" then
299
301
Message_collector.add_typed collector
300
302
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
301
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:link\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
302
-
value))))
303
+
"Bad value %s for the attribute %s (only %s permitted here)."
304
+
(q value) (q "xmlns:link") (q "http://www.w3.org/1999/xlink")))))
303
305
| _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
304
306
(* Other xmlns declarations are not allowed in HTML-embedded SVG *)
305
307
Message_collector.add_typed collector
···
324
326
let context = String.sub d !context_start (ctx_end - !context_start) in
325
327
Message_collector.add_typed collector
326
328
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
327
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected command but found \xe2\x80\x9c#\xe2\x80\x9d (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
328
-
d element context))));
329
+
"Bad value %s for attribute %s on element %s: Bad SVG path data: Expected command but found %s (context: %s)."
330
+
(q d) (q "d") (q element) (q "#") (q context)))));
329
331
i := len (* Stop processing *)
330
332
| _ ->
331
333
incr i
···
344
346
let context = String.sub d ctx_start (flag_end - ctx_start) in
345
347
Message_collector.add_typed collector
346
348
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
347
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
348
-
d element flag context))))
349
+
"Bad value %s for attribute %s on element %s: Bad SVG path data: Expected %s or %s for large-arc-flag for %s command but found %s instead (context: %s)."
350
+
(q d) (q "d") (q element) (q "0") (q "1") (q "a") (q flag) (q context)))))
349
351
end
350
352
with Not_found -> ()
351
353
+73
-70
lib/htmlrw_check/specialized/url_checker.ml
+73
-70
lib/htmlrw_check/specialized/url_checker.ml
···
1
1
(** URL validation checker for href, src, action, and other URL attributes. *)
2
2
3
+
(** Quote helper for consistent message formatting. *)
4
+
let q = Error_code.q
5
+
3
6
(** Attributes that contain URLs and should be validated.
4
7
Note: srcset uses special microsyntax, not validated as URL here.
5
8
Note: input[value] is only checked for type="url", handled specially below. *)
···
44
47
let validate_ipv6_host host url attr_name element_name =
45
48
(* Host should be in format [xxxx:...] *)
46
49
if String.length host < 3 then
47
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
48
-
url attr_name element_name)
50
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
51
+
(q url) (q attr_name) (q element_name))
49
52
else begin
50
53
(* Check if all characters are valid IPv6 chars *)
51
54
let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
52
55
if invalid_char then
53
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
54
-
url attr_name element_name)
56
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
57
+
(q url) (q attr_name) (q element_name))
55
58
else
56
59
None
57
60
end
···
239
242
let _ = contains_invalid_unicode decoded in
240
243
None
241
244
with Exit ->
242
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
243
-
url attr_name element_name)
245
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
246
+
(q url) (q attr_name) (q element_name))
244
247
245
248
(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
246
249
let contains_percent_char s =
···
258
261
let decoded = percent_decode host in
259
262
(* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
260
263
if contains_percent_char decoded then
261
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%%\xe2\x80\x9d is not allowed."
262
-
url attr_name element_name)
264
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
265
+
(q url) (q attr_name) (q element_name) (q "%"))
263
266
else
264
267
None
265
268
···
275
278
) port;
276
279
match !invalid_char with
277
280
| Some c ->
278
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in port: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
279
-
url attr_name element_name c)
281
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in port: %s is not allowed."
282
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
280
283
| None ->
281
284
(* Check port range *)
282
285
try
283
286
let port_num = int_of_string port in
284
287
if port_num >= 65536 then
285
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Port number must be less than 65536."
286
-
url attr_name element_name)
288
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Port number must be less than 65536."
289
+
(q url) (q attr_name) (q element_name))
287
290
else
288
291
None
289
292
with _ -> None
···
297
300
(* Check for empty host *)
298
301
let requires_host = List.mem scheme special_schemes in
299
302
if host = "" && requires_host && scheme <> "file" then
300
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: empty host."
301
-
url attr_name element_name)
303
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: empty host."
304
+
(q url) (q attr_name) (q element_name))
302
305
else
303
306
(* Check for invalid chars *)
304
307
let invalid_char =
···
306
309
in
307
310
match invalid_char with
308
311
| Some c ->
309
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
310
-
url attr_name element_name c)
312
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
313
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
311
314
| None ->
312
315
(* Check for | *)
313
316
if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
314
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
315
-
url attr_name element_name)
317
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
318
+
(q url) (q attr_name) (q element_name) (q "|"))
316
319
(* Check for backslash in host *)
317
320
else if String.contains host '\\' then
318
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
319
-
url attr_name element_name)
321
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
322
+
(q url) (q attr_name) (q element_name) (q "\\"))
320
323
(* Check for space in host *)
321
324
else if String.contains host ' ' then
322
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
323
-
url attr_name element_name)
325
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
326
+
(q url) (q attr_name) (q element_name))
324
327
(* Check for invalid percent-encoded Unicode in host *)
325
328
else begin
326
329
match check_invalid_percent_encoded_unicode host url attr_name element_name with
···
342
345
let colon_pos = String.index url ':' in
343
346
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
344
347
if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
345
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")."
346
-
url attr_name element_name)
348
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a slash (\"/\")."
349
+
(q url) (q attr_name) (q element_name))
347
350
else
348
351
None
349
352
end else
···
357
360
| Some scheme ->
358
361
if scheme = "data" && String.contains url '#' then
359
362
let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
360
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s Fragment is not allowed for data: URIs according to RFC 2397."
361
-
url attr_name element_name url_type)
363
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: %s Fragment is not allowed for data: URIs according to RFC 2397."
364
+
(q url) (q attr_name) (q element_name) url_type)
362
365
else
363
366
None
364
367
···
375
378
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
376
379
(* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
377
380
if String.length after_colon > 0 && after_colon.[0] = '/' then
378
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a token character or a semicolon but saw \xe2\x80\x9c/\xe2\x80\x9d instead."
379
-
url attr_name element_name)
381
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a token character or a semicolon but saw %s instead."
382
+
(q url) (q attr_name) (q element_name) (q "/"))
380
383
else
381
384
None
382
385
end else
···
393
396
let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
394
397
(* Check for tab in scheme data *)
395
398
if String.contains scheme_data '\t' then
396
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: tab is not allowed."
397
-
url attr_name element_name)
399
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: tab is not allowed."
400
+
(q url) (q attr_name) (q element_name))
398
401
(* Check for newline in scheme data *)
399
402
else if String.contains scheme_data '\n' then
400
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
401
-
url attr_name element_name)
403
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
404
+
(q url) (q attr_name) (q element_name))
402
405
(* Check for carriage return in scheme data *)
403
406
else if String.contains scheme_data '\r' then
404
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
405
-
url attr_name element_name)
407
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
408
+
(q url) (q attr_name) (q element_name))
406
409
(* Check for space in scheme data *)
407
410
else if String.contains scheme_data ' ' then
408
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: space is not allowed."
409
-
url attr_name element_name)
411
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: space is not allowed."
412
+
(q url) (q attr_name) (q element_name))
410
413
else
411
414
None
412
415
end else
···
449
452
let path = remove_query_fragment raw_path in
450
453
(* Check for space in path (not allowed) *)
451
454
if String.contains path ' ' then
452
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: space is not allowed."
453
-
url attr_name element_name)
455
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: space is not allowed."
456
+
(q url) (q attr_name) (q element_name))
454
457
(* Check for pipe in path (not allowed except in file:// authority) *)
455
458
else if String.contains path '|' then
456
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
457
-
url attr_name element_name)
459
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
460
+
(q url) (q attr_name) (q element_name) (q "|"))
458
461
(* Check for unescaped square brackets in path *)
459
462
else if String.contains path '[' then
460
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
461
-
url attr_name element_name)
463
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
464
+
(q url) (q attr_name) (q element_name) (q "["))
462
465
else
463
466
None
464
467
···
470
473
| None ->
471
474
(* Check for square brackets at start (not IPv6 - that requires scheme) *)
472
475
if String.length url > 0 && url.[0] = '[' then
473
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
474
-
url attr_name element_name)
476
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
477
+
(q url) (q attr_name) (q element_name) (q "["))
475
478
else
476
479
None
477
480
···
489
492
if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
490
493
find_bare_percent (i + 3) (* Valid percent encoding, continue *)
491
494
else
492
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Percentage (\xe2\x80\x9c%%\xe2\x80\x9d) is not followed by two hexadecimal digits."
493
-
url attr_name element_name)
495
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Percentage (%s) is not followed by two hexadecimal digits."
496
+
(q url) (q attr_name) (q element_name) (q "%"))
494
497
end else
495
498
find_bare_percent (i + 1)
496
499
in
···
511
514
let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
512
515
(* Check for unescaped space in query *)
513
516
if String.contains query ' ' then
514
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in query: space is not allowed."
515
-
url attr_name element_name)
517
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in query: space is not allowed."
518
+
(q url) (q attr_name) (q element_name))
516
519
else
517
520
None
518
521
with Not_found -> None (* No query string *)
···
524
527
let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
525
528
(* Check for backslash in fragment *)
526
529
if String.contains fragment '\\' then
527
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
528
-
url attr_name element_name)
530
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
531
+
(q url) (q attr_name) (q element_name) (q "\\"))
529
532
(* Check for second hash in fragment *)
530
533
else if String.contains fragment '#' then
531
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c#\xe2\x80\x9d is not allowed."
532
-
url attr_name element_name)
534
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
535
+
(q url) (q attr_name) (q element_name) (q "#"))
533
536
(* Check for space in fragment *)
534
537
else if String.contains fragment ' ' then
535
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: space is not allowed."
536
-
url attr_name element_name)
538
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: space is not allowed."
539
+
(q url) (q attr_name) (q element_name))
537
540
else
538
541
None
539
542
with Not_found -> None (* No fragment *)
···
572
575
let userinfo = String.sub authority 0 at in
573
576
(* Check for @ in userinfo (should be percent-encoded) *)
574
577
if String.contains userinfo '@' then
575
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: User or password contains an at symbol (\xe2\x80\x9c@\xe2\x80\x9d) not percent-encoded."
576
-
url attr_name element_name)
578
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: User or password contains an at symbol (%s) not percent-encoded."
579
+
(q url) (q attr_name) (q element_name) (q "@"))
577
580
(* Check for space *)
578
581
else if String.contains userinfo ' ' then
579
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: space is not allowed."
580
-
url attr_name element_name)
582
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: space is not allowed."
583
+
(q url) (q attr_name) (q element_name))
581
584
else begin
582
585
(* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
583
586
let find_non_ascii_char userinfo =
···
600
603
in
601
604
match find_non_ascii_char userinfo with
602
605
| Some bad_char ->
603
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%s\xe2\x80\x9d is not allowed."
604
-
url attr_name element_name bad_char)
606
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
607
+
(q url) (q attr_name) (q element_name) (q bad_char))
605
608
| None ->
606
609
(* Check for other invalid chars *)
607
610
let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
608
611
match invalid with
609
612
| Some c ->
610
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
611
-
url attr_name element_name c)
613
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
614
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
612
615
| None -> None
613
616
end
614
617
with _ -> None
···
634
637
let attr_lower = String.lowercase_ascii attr_name in
635
638
if List.mem attr_lower must_be_non_empty ||
636
639
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
637
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Must be non-empty."
638
-
original_url attr_name element_name)
640
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
641
+
(q original_url) (q attr_name) (q element_name))
639
642
else
640
643
None
641
644
end
···
647
650
let last = original_url.[String.length original_url - 1] in
648
651
last = ' ' || last = '\t' in
649
652
if has_leading || has_trailing then
650
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character: leading/trailing ASCII whitespace."
651
-
original_url attr_name element_name)
653
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character: leading/trailing ASCII whitespace."
654
+
(q original_url) (q attr_name) (q element_name))
652
655
else None
653
656
(* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
654
657
else begin
···
657
660
| None ->
658
661
(* Check for newlines/tabs in special scheme URLs *)
659
662
if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
660
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Tab, new line or carriage return found."
661
-
url attr_name element_name)
663
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Tab, new line or carriage return found."
664
+
(q url) (q attr_name) (q element_name))
662
665
else begin
663
666
(* Check for relative URL issues first *)
664
667
match check_relative_url url attr_name element_name with
···
697
700
698
701
(* Check for backslash AFTER special scheme check *)
699
702
if String.contains url '\\' then
700
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Backslash (\"\\\") used as path segment delimiter."
701
-
url attr_name element_name)
703
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Backslash (\"\\\") used as path segment delimiter."
704
+
(q url) (q attr_name) (q element_name))
702
705
else
703
706
704
707
(* Check path segment for illegal characters *)