+3
-1
lib/htmlrw_check/datatype/dt_language.ml
+3
-1
lib/htmlrw_check/datatype/dt_language.ml
···
1
(** Helper functions for language tag validation *)
2
3
let is_lower_alpha c = c >= 'a' && c <= 'z'
4
let is_upper_alpha c = c >= 'A' && c <= 'Z'
5
let is_alpha c = is_lower_alpha c || is_upper_alpha c
···
123
if is_valid_extlang first_lower second_lower then
124
Ok ()
125
else
126
-
Error (Printf.sprintf "Bad extlang subtag \xe2\x80\x9c%s\xe2\x80\x9d" second_lower)
127
else
128
Ok () (* Not an extlang pattern, continue *)
129
| [] -> Ok ())
···
1
(** Helper functions for language tag validation *)
2
3
+
let q = Error_code.q
4
+
5
let is_lower_alpha c = c >= 'a' && c <= 'z'
6
let is_upper_alpha c = c >= 'A' && c <= 'Z'
7
let is_alpha c = is_lower_alpha c || is_upper_alpha c
···
125
if is_valid_extlang first_lower second_lower then
126
Ok ()
127
else
128
+
Error (Printf.sprintf "Bad extlang subtag %s" (q second_lower))
129
else
130
Ok () (* Not an extlang pattern, continue *)
131
| [] -> Ok ())
+8
-6
lib/htmlrw_check/parse_error_bridge.ml
+8
-6
lib/htmlrw_check/parse_error_bridge.ml
···
3
SPDX-License-Identifier: MIT
4
---------------------------------------------------------------------------*)
5
6
(** Generate human-readable message for a parse error code *)
7
let message_of_parse_error code =
8
let code_str = Html5rw.Parse_error_code.to_string code in
···
57
let cp = int_of_string ("0x" ^ cp_str) in
58
Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
59
else if s = "no-p-element-in-scope" then
60
-
"No \xe2\x80\x9cp\xe2\x80\x9d element in scope but a \xe2\x80\x9cp\xe2\x80\x9d end tag seen."
61
else if s = "end-tag-p-implied-but-open-elements" then
62
-
"End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements."
63
else if s = "end-tag-br" then
64
-
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65
else if s = "expected-closing-tag-but-got-eof" then
66
"End of file seen and there were open elements."
67
else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68
let colon_pos = String.index s ':' in
69
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
-
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71
else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72
let element = String.sub s 19 (String.length s - 19) in
73
-
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74
else if String.starts_with ~prefix:"start-tag-in-table:" s then
75
let tag = String.sub s 19 (String.length s - 19) in
76
-
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77
else
78
Printf.sprintf "Parse error: %s" s
79
with _ -> Printf.sprintf "Parse error: %s" s)
···
3
SPDX-License-Identifier: MIT
4
---------------------------------------------------------------------------*)
5
6
+
let q = Error_code.q
7
+
8
(** Generate human-readable message for a parse error code *)
9
let message_of_parse_error code =
10
let code_str = Html5rw.Parse_error_code.to_string code in
···
59
let cp = int_of_string ("0x" ^ cp_str) in
60
Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp
61
else if s = "no-p-element-in-scope" then
62
+
Printf.sprintf "No %s element in scope but a %s end tag seen." (q "p") (q "p")
63
else if s = "end-tag-p-implied-but-open-elements" then
64
+
Printf.sprintf "End tag %s implied, but there were open elements." (q "p")
65
else if s = "end-tag-br" then
66
+
Printf.sprintf "End tag %s." (q "br")
67
else if s = "expected-closing-tag-but-got-eof" then
68
"End of file seen and there were open elements."
69
else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
70
let colon_pos = String.index s ':' in
71
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
72
+
Printf.sprintf "Bad start tag in %s in %s in %s." (q element) (q "noscript") (q "head")
73
else if String.starts_with ~prefix:"unexpected-end-tag:" s then
74
let element = String.sub s 19 (String.length s - 19) in
75
+
Printf.sprintf "Stray end tag %s." (q element)
76
else if String.starts_with ~prefix:"start-tag-in-table:" s then
77
let tag = String.sub s 19 (String.length s - 19) in
78
+
Printf.sprintf "Start tag %s seen in %s." (q tag) (q "table")
79
else
80
Printf.sprintf "Parse error: %s" s
81
with _ -> Printf.sprintf "Parse error: %s" s)
+4
-2
lib/htmlrw_check/semantic/obsolete_checker.ml
+4
-2
lib/htmlrw_check/semantic/obsolete_checker.ml
···
1
(** Obsolete elements map: element name -> suggestion message *)
2
let obsolete_elements =
3
let tbl = Hashtbl.create 32 in
···
131
"Use the HTTP OPTIONS feature instead.";
132
133
register "name" ["a"]
134
-
"Consider putting an \xe2\x80\x9cid\xe2\x80\x9d attribute on the nearest container instead.";
135
136
register "name" ["embed"; "img"; "option"]
137
-
"Use the \xe2\x80\x9cid\xe2\x80\x9d attribute instead.";
138
139
register "nohref" ["area"]
140
"Omitting the \"href\" attribute is sufficient.";
···
1
+
let q = Error_code.q
2
+
3
(** Obsolete elements map: element name -> suggestion message *)
4
let obsolete_elements =
5
let tbl = Hashtbl.create 32 in
···
133
"Use the HTTP OPTIONS feature instead.";
134
135
register "name" ["a"]
136
+
(Printf.sprintf "Consider putting an %s attribute on the nearest container instead." (q "id"));
137
138
register "name" ["embed"; "img"; "option"]
139
+
(Printf.sprintf "Use the %s attribute instead." (q "id"));
140
141
register "nohref" ["area"]
142
"Omitting the \"href\" attribute is sufficient.";
+2
-4
lib/htmlrw_check/semantic/required_attr_checker.ml
+2
-4
lib/htmlrw_check/semantic/required_attr_checker.ml
···
1
(** Required attribute checker implementation. *)
2
3
type state = {
4
mutable _in_figure : bool;
5
(** Track if we're inside a <figure> element (alt is more critical there) *)
···
81
in
82
83
if not valid then
84
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
85
Message_collector.add_typed collector
86
(`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
87
(q "meta") (q "charset") (q "name")
···
122
let value_lower = String.lowercase_ascii value in
123
(* Valid values: empty string, auto, manual, hint *)
124
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
125
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
126
Message_collector.add_typed collector
127
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128
(q value) (q "popover") (q element_name)))))
···
141
let value = float_of_string value_str in
142
let min_val = float_of_string min_str in
143
if min_val > value then
144
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
145
Message_collector.add_typed collector
146
(`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
147
(q "min") (q "value")))
···
161
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
162
in
163
if value > max_val then
164
-
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165
(* Check which message to use based on whether max is present *)
166
if Attr_utils.has_attr "max" attrs then
167
Message_collector.add_typed collector
···
1
(** Required attribute checker implementation. *)
2
3
+
let q = Error_code.q
4
+
5
type state = {
6
mutable _in_figure : bool;
7
(** Track if we're inside a <figure> element (alt is more critical there) *)
···
83
in
84
85
if not valid then
86
Message_collector.add_typed collector
87
(`Generic (Printf.sprintf "A %s element must have either a %s attribute, a %s attribute with a %s attribute, or an %s attribute with a %s attribute."
88
(q "meta") (q "charset") (q "name")
···
123
let value_lower = String.lowercase_ascii value in
124
(* Valid values: empty string, auto, manual, hint *)
125
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
126
Message_collector.add_typed collector
127
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s."
128
(q value) (q "popover") (q element_name)))))
···
141
let value = float_of_string value_str in
142
let min_val = float_of_string min_str in
143
if min_val > value then
144
Message_collector.add_typed collector
145
(`Generic (Printf.sprintf "The value of the %s attribute must be less than or equal to the value of the %s attribute."
146
(q "min") (q "value")))
···
160
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
161
in
162
if value > max_val then
163
(* Check which message to use based on whether max is present *)
164
if Attr_utils.has_attr "max" attrs then
165
Message_collector.add_typed collector
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
+24
-22
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
+24
-22
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
···
1
(** Attribute restrictions checker - validates that certain attributes
2
are not used on elements where they're not allowed. *)
3
4
(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
5
let disallowed_attrs_html = [
6
(* Elements that cannot have href attribute (RDFa misuses) *)
···
174
if attr_value = "#" then
175
Message_collector.add_typed collector
176
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
177
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad hash-name reference: A hash-name reference must have at least one character after \xe2\x80\x9c#\xe2\x80\x9d."
178
-
attr_value attr_name name))))
179
end
180
) attrs
181
end;
···
190
| Error msg ->
191
Message_collector.add_typed collector
192
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
193
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: %s"
194
-
attr_value attr_name name msg))))
195
end
196
) attrs
197
end;
···
213
(* Determine specific error message *)
214
let error_msg =
215
if String.length attr_value = 0 then
216
-
Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The empty string is not a valid non-negative integer."
217
-
attr_name name
218
else if String.contains attr_value '%' then
219
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%%\xe2\x80\x9d instead."
220
-
attr_value attr_name name
221
else if String.length attr_value > 0 && attr_value.[0] = '-' then
222
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c-\xe2\x80\x9d instead."
223
-
attr_value attr_name name
224
else
225
(* Find first non-digit character *)
226
let bad_char =
···
234
in
235
match bad_char with
236
| Some c ->
237
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead."
238
-
attr_value attr_name name c
239
| None ->
240
-
Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad non-negative integer: Expected a digit."
241
-
attr_value attr_name name
242
in
243
Message_collector.add_typed collector
244
(`Attr (`Bad_value_generic (`Message error_msg)))
···
377
if count_codepoints key > 1 then
378
Message_collector.add_typed collector
379
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
380
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Key label has multiple characters. Each key label must be a single character."
381
-
attr_value attr_name name))))
382
) keys;
383
(* Check for duplicate keys *)
384
let rec find_duplicates seen = function
···
387
if List.mem k seen then
388
Message_collector.add_typed collector
389
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
390
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad key label list: Duplicate key label. Each key label must be unique."
391
-
attr_value attr_name name))))
392
else
393
find_duplicates (k :: seen) rest
394
in
···
405
if has_command && has_aria_expanded then
406
Message_collector.add_typed collector
407
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
408
-
`Condition "a \xe2\x80\x9ccommand\xe2\x80\x9d attribute")));
409
410
if has_popovertarget && has_aria_expanded then
411
Message_collector.add_typed collector
412
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
413
-
`Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
414
end;
415
416
(* Note: data-* uppercase check requires XML parsing which preserves case.
···
432
| Error msg ->
433
Message_collector.add_typed collector
434
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
435
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
436
-
attr_value attr_name name msg))))
437
end
438
end
439
) attrs
···
1
(** Attribute restrictions checker - validates that certain attributes
2
are not used on elements where they're not allowed. *)
3
4
+
let q = Error_code.q
5
+
6
(** List of (element, [disallowed attributes]) pairs for HTML elements. *)
7
let disallowed_attrs_html = [
8
(* Elements that cannot have href attribute (RDFa misuses) *)
···
176
if attr_value = "#" then
177
Message_collector.add_typed collector
178
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
179
+
"Bad value %s for attribute %s on element %s: Bad hash-name reference: A hash-name reference must have at least one character after %s."
180
+
(q attr_value) (q attr_name) (q name) (q "#")))))
181
end
182
) attrs
183
end;
···
192
| Error msg ->
193
Message_collector.add_typed collector
194
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
195
+
"Bad value %s for attribute %s on element %s: Bad MIME type: %s"
196
+
(q attr_value) (q attr_name) (q name) msg))))
197
end
198
) attrs
199
end;
···
215
(* Determine specific error message *)
216
let error_msg =
217
if String.length attr_value = 0 then
218
+
Printf.sprintf "Bad value %s for attribute %s on element %s: The empty string is not a valid non-negative integer."
219
+
(q "") (q attr_name) (q name)
220
else if String.contains attr_value '%' then
221
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
222
+
(q attr_value) (q attr_name) (q name) (q "%")
223
else if String.length attr_value > 0 && attr_value.[0] = '-' then
224
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
225
+
(q attr_value) (q attr_name) (q name) (q "-")
226
else
227
(* Find first non-digit character *)
228
let bad_char =
···
236
in
237
match bad_char with
238
| Some c ->
239
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit but saw %s instead."
240
+
(q attr_value) (q attr_name) (q name) (q (String.make 1 c))
241
| None ->
242
+
Printf.sprintf "Bad value %s for attribute %s on element %s: Bad non-negative integer: Expected a digit."
243
+
(q attr_value) (q attr_name) (q name)
244
in
245
Message_collector.add_typed collector
246
(`Attr (`Bad_value_generic (`Message error_msg)))
···
379
if count_codepoints key > 1 then
380
Message_collector.add_typed collector
381
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
382
+
"Bad value %s for attribute %s on element %s: Bad key label list: Key label has multiple characters. Each key label must be a single character."
383
+
(q attr_value) (q attr_name) (q name)))))
384
) keys;
385
(* Check for duplicate keys *)
386
let rec find_duplicates seen = function
···
389
if List.mem k seen then
390
Message_collector.add_typed collector
391
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
392
+
"Bad value %s for attribute %s on element %s: Bad key label list: Duplicate key label. Each key label must be unique."
393
+
(q attr_value) (q attr_name) (q name)))))
394
else
395
find_duplicates (k :: seen) rest
396
in
···
407
if has_command && has_aria_expanded then
408
Message_collector.add_typed collector
409
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
410
+
`Condition (Printf.sprintf "a %s attribute" (q "command")))));
411
412
if has_popovertarget && has_aria_expanded then
413
Message_collector.add_typed collector
414
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
415
+
`Condition (Printf.sprintf "a %s attribute" (q "popovertarget")))))
416
end;
417
418
(* Note: data-* uppercase check requires XML parsing which preserves case.
···
434
| Error msg ->
435
Message_collector.add_typed collector
436
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
437
+
"Bad value %s for attribute %s on element %s: Bad media query: %s"
438
+
(q attr_value) (q attr_name) (q name) msg))))
439
end
440
end
441
) attrs
+18
-16
lib/htmlrw_check/specialized/datetime_checker.ml
+18
-16
lib/htmlrw_check/specialized/datetime_checker.ml
···
1
(** Datetime attribute validation checker *)
2
3
(** Elements that have datetime attribute *)
4
let datetime_elements = ["del"; "ins"; "time"]
5
···
346
if value <> String.trim value then begin
347
let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
348
let date_msg = "Bad date: The literal did not satisfy the date format." in
349
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
350
-
value attr_name element_name tz_msg date_msg)
351
end
352
else
353
(* Try datetime with timezone first *)
···
355
| DtOk -> Ok (* Valid datetime with timezone *)
356
| DtWarning w ->
357
(* Valid but with warning - format matches Nu validator *)
358
-
Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
359
-
value attr_name element_name w)
360
| DtError tz_error ->
361
(* Try just date - valid for all elements *)
362
match validate_date value with
···
365
if has_suspicious_year value || has_old_year value then begin
366
let date_msg = "Bad date: Year may be mistyped." in
367
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
368
-
Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
369
-
value attr_name element_name date_msg tz_msg)
370
end else
371
Ok (* Valid date with normal year *)
372
| (false, date_error) ->
···
394
| (true, _) -> Ok (* Valid duration P... *)
395
| (false, _) ->
396
(* Use simplified message for time element matching Nu validator format *)
397
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad time-datetime: The literal did not satisfy the time-datetime format."
398
-
value attr_name element_name)
399
end
400
else begin
401
(* del/ins only allow date or datetime-with-timezone *)
···
426
(* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427
Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428
if is_month_less_than_error then
429
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430
-
value attr_name element_name date_msg tz_msg)
431
else if is_tz_minutes_error || is_fraction_error then
432
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433
-
value attr_name element_name date_msg tz_msg)
434
else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436
-
value attr_name element_name tz_msg date_msg)
437
else
438
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439
-
value attr_name element_name tz_msg date_msg)
440
end
441
442
(** Checker state *)
···
1
(** Datetime attribute validation checker *)
2
3
+
let q = Error_code.q
4
+
5
(** Elements that have datetime attribute *)
6
let datetime_elements = ["del"; "ins"; "time"]
7
···
348
if value <> String.trim value then begin
349
let tz_msg = "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format." in
350
let date_msg = "Bad date: The literal did not satisfy the date format." in
351
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
352
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
353
end
354
else
355
(* Try datetime with timezone first *)
···
357
| DtOk -> Ok (* Valid datetime with timezone *)
358
| DtWarning w ->
359
(* Valid but with warning - format matches Nu validator *)
360
+
Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad datetime with timezone: %s Bad date: The literal did not satisfy the date format."
361
+
(q value) (q attr_name) (q element_name) w)
362
| DtError tz_error ->
363
(* Try just date - valid for all elements *)
364
match validate_date value with
···
367
if has_suspicious_year value || has_old_year value then begin
368
let date_msg = "Bad date: Year may be mistyped." in
369
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
370
+
Warning (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
371
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
372
end else
373
Ok (* Valid date with normal year *)
374
| (false, date_error) ->
···
396
| (true, _) -> Ok (* Valid duration P... *)
397
| (false, _) ->
398
(* Use simplified message for time element matching Nu validator format *)
399
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad time-datetime: The literal did not satisfy the time-datetime format."
400
+
(q value) (q attr_name) (q element_name))
401
end
402
else begin
403
(* del/ins only allow date or datetime-with-timezone *)
···
428
(* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
429
Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
430
if is_month_less_than_error then
431
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
432
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
433
else if is_tz_minutes_error || is_fraction_error then
434
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
435
+
(q value) (q attr_name) (q element_name) date_msg tz_msg)
436
else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
437
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
438
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
439
else
440
+
Error (Printf.sprintf "Bad value %s for attribute %s on element %s: %s %s"
441
+
(q value) (q attr_name) (q element_name) tz_msg date_msg)
442
end
443
444
(** Checker state *)
+9
-6
lib/htmlrw_check/specialized/microdata_checker.ml
+9
-6
lib/htmlrw_check/specialized/microdata_checker.ml
···
2
3
Validates HTML5 microdata attributes. *)
4
5
(** Information about an itemscope. *)
6
type item_scope = {
7
element : string;
···
74
let url_trimmed = String.trim url in
75
if String.length url_trimmed = 0 then
76
Some (Printf.sprintf
77
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
78
-
original_value attr_name element)
79
else
80
(* First check if it has a scheme (required for absolute URL) *)
81
match Url_checker.extract_scheme url_trimmed with
82
| None ->
83
Some (Printf.sprintf
84
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
85
-
original_value attr_name element url)
86
| Some _ ->
87
(* Has a scheme - do comprehensive URL validation *)
88
match Url_checker.validate_url url element attr_name with
···
94
(* Escape backslashes in replacement string for Str.global_replace *)
95
let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
96
let error_msg = Str.global_replace
97
-
(Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
98
-
(Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
99
error_msg in
100
Some error_msg
101
···
2
3
Validates HTML5 microdata attributes. *)
4
5
+
(** Quote helper for consistent message formatting. *)
6
+
let q = Error_code.q
7
+
8
(** Information about an itemscope. *)
9
type item_scope = {
10
element : string;
···
77
let url_trimmed = String.trim url in
78
if String.length url_trimmed = 0 then
79
Some (Printf.sprintf
80
+
"Bad value %s for attribute %s on element %s: Bad absolute URL: Must be non-empty."
81
+
(q original_value) (q attr_name) (q element))
82
else
83
(* First check if it has a scheme (required for absolute URL) *)
84
match Url_checker.extract_scheme url_trimmed with
85
| None ->
86
Some (Printf.sprintf
87
+
"Bad value %s for attribute %s on element %s: Bad absolute URL: The string %s is not an absolute URL."
88
+
(q original_value) (q attr_name) (q element) (q url))
89
| Some _ ->
90
(* Has a scheme - do comprehensive URL validation *)
91
match Url_checker.validate_url url element attr_name with
···
97
(* Escape backslashes in replacement string for Str.global_replace *)
98
let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
99
let error_msg = Str.global_replace
100
+
(Str.regexp_string (Printf.sprintf "%s for attribute" (q url)))
101
+
(Printf.sprintf "%s for attribute" (q escaped_original))
102
error_msg in
103
Some error_msg
104
+32
-30
lib/htmlrw_check/specialized/mime_type_checker.ml
+32
-30
lib/htmlrw_check/specialized/mime_type_checker.ml
···
2
3
Validates MIME type values in type attributes. *)
4
5
(** Validate a MIME type value. Returns error message or None. *)
6
let validate_mime_type value element attr_name =
7
let len = String.length value in
8
if len = 0 then
9
Some (Printf.sprintf
10
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
11
-
value attr_name element)
12
else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
13
(* Check if this is a semicolon followed by only whitespace *)
14
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
18
let params_trimmed = String.trim params in
19
if params_trimmed = "" then
20
Some (Printf.sprintf
21
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
22
-
value attr_name element)
23
else
24
Some (Printf.sprintf
25
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
26
-
value attr_name element)
27
| None ->
28
Some (Printf.sprintf
29
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
30
-
value attr_name element)
31
end
32
else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
33
Some (Printf.sprintf
34
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
35
-
value attr_name element)
36
else
37
(* Parse type/subtype *)
38
let slash_pos = try Some (String.index value '/') with Not_found -> None in
···
43
(match semicolon_pos with
44
| Some _ ->
45
Some (Printf.sprintf
46
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
47
-
value attr_name element)
48
| None ->
49
Some (Printf.sprintf
50
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
51
-
value attr_name element))
52
| Some slash_pos ->
53
(* Check for empty subtype *)
54
let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···
60
let subtype_trimmed = String.trim subtype in
61
if subtype_trimmed = "" then
62
Some (Printf.sprintf
63
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
64
-
value attr_name element)
65
else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
66
(* Space before semicolon - also check parameter format *)
67
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
72
let params_trimmed = String.trim params in
73
if params_trimmed = "" then
74
Some (Printf.sprintf
75
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
76
-
value attr_name element)
77
else
78
(* Check for param_name=value format *)
79
let eq_pos = try Some (String.index params '=') with Not_found -> None in
80
(match eq_pos with
81
| None ->
82
Some (Printf.sprintf
83
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
84
-
value attr_name element)
85
| Some _ -> None)
86
| None -> None)
87
else
···
94
let params_trimmed = String.trim params in
95
if params_trimmed = "" then
96
Some (Printf.sprintf
97
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
98
-
value attr_name element)
99
else
100
(* Check for param_name=value format *)
101
let eq_pos = try Some (String.index params '=') with Not_found -> None in
102
(match eq_pos with
103
| None ->
104
Some (Printf.sprintf
105
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
106
-
value attr_name element)
107
| Some eq_pos ->
108
let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
109
let param_value_trimmed = String.trim param_value in
110
if param_value_trimmed = "" then
111
Some (Printf.sprintf
112
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
113
-
value attr_name element)
114
else if param_value_trimmed.[0] = '"' then
115
(* Quoted string - check for closing quote *)
116
let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···
127
in
128
if has_backslash_at_end then
129
Some (Printf.sprintf
130
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
131
-
value attr_name element)
132
else
133
Some (Printf.sprintf
134
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
135
-
value attr_name element))
136
else
137
None))
138
···
2
3
Validates MIME type values in type attributes. *)
4
5
+
let q = Error_code.q
6
+
7
(** Validate a MIME type value. Returns error message or None. *)
8
let validate_mime_type value element attr_name =
9
let len = String.length value in
10
if len = 0 then
11
Some (Printf.sprintf
12
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Empty value."
13
+
(q value) (q attr_name) (q element))
14
else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then begin
15
(* Check if this is a semicolon followed by only whitespace *)
16
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
20
let params_trimmed = String.trim params in
21
if params_trimmed = "" then
22
Some (Printf.sprintf
23
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
24
+
(q value) (q attr_name) (q element))
25
else
26
Some (Printf.sprintf
27
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
28
+
(q value) (q attr_name) (q element))
29
| None ->
30
Some (Printf.sprintf
31
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Extraneous trailing whitespace."
32
+
(q value) (q attr_name) (q element))
33
end
34
else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
35
Some (Printf.sprintf
36
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Expected a token character but saw %s instead."
37
+
(q value) (q attr_name) (q element) (q " "))
38
else
39
(* Parse type/subtype *)
40
let slash_pos = try Some (String.index value '/') with Not_found -> None in
···
45
(match semicolon_pos with
46
| Some _ ->
47
Some (Printf.sprintf
48
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
49
+
(q value) (q attr_name) (q element))
50
| None ->
51
Some (Printf.sprintf
52
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
53
+
(q value) (q attr_name) (q element)))
54
| Some slash_pos ->
55
(* Check for empty subtype *)
56
let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
···
62
let subtype_trimmed = String.trim subtype in
63
if subtype_trimmed = "" then
64
Some (Printf.sprintf
65
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Subtype missing."
66
+
(q value) (q attr_name) (q element))
67
else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
68
(* Space before semicolon - also check parameter format *)
69
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
···
74
let params_trimmed = String.trim params in
75
if params_trimmed = "" then
76
Some (Printf.sprintf
77
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
78
+
(q value) (q attr_name) (q element))
79
else
80
(* Check for param_name=value format *)
81
let eq_pos = try Some (String.index params '=') with Not_found -> None in
82
(match eq_pos with
83
| None ->
84
Some (Printf.sprintf
85
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
86
+
(q value) (q attr_name) (q element))
87
| Some _ -> None)
88
| None -> None)
89
else
···
96
let params_trimmed = String.trim params in
97
if params_trimmed = "" then
98
Some (Printf.sprintf
99
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Semicolon seen but there was no parameter following it."
100
+
(q value) (q attr_name) (q element))
101
else
102
(* Check for param_name=value format *)
103
let eq_pos = try Some (String.index params '=') with Not_found -> None in
104
(match eq_pos with
105
| None ->
106
Some (Printf.sprintf
107
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
108
+
(q value) (q attr_name) (q element))
109
| Some eq_pos ->
110
let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
111
let param_value_trimmed = String.trim param_value in
112
if param_value_trimmed = "" then
113
Some (Printf.sprintf
114
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Parameter value missing."
115
+
(q value) (q attr_name) (q element))
116
else if param_value_trimmed.[0] = '"' then
117
(* Quoted string - check for closing quote *)
118
let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
···
129
in
130
if has_backslash_at_end then
131
Some (Printf.sprintf
132
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
133
+
(q value) (q attr_name) (q element))
134
else
135
Some (Printf.sprintf
136
+
"Bad value %s for attribute %s on element %s: Bad MIME type: Unfinished quoted string."
137
+
(q value) (q attr_name) (q element)))
138
else
139
None))
140
+10
-8
lib/htmlrw_check/specialized/svg_checker.ml
+10
-8
lib/htmlrw_check/specialized/svg_checker.ml
···
2
3
Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
4
5
type font_state = {
6
mutable has_missing_glyph : bool;
7
}
···
292
if value <> svg_ns_url then
293
Message_collector.add_typed collector
294
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
295
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns\xe2\x80\x9d (only \xe2\x80\x9c%s\xe2\x80\x9d permitted here)."
296
-
value svg_ns_url))))
297
| "xmlns:xlink" ->
298
if value <> "http://www.w3.org/1999/xlink" then
299
Message_collector.add_typed collector
300
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
301
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for the attribute \xe2\x80\x9cxmlns:link\xe2\x80\x9d (only \xe2\x80\x9chttp://www.w3.org/1999/xlink\xe2\x80\x9d permitted here)."
302
-
value))))
303
| _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
304
(* Other xmlns declarations are not allowed in HTML-embedded SVG *)
305
Message_collector.add_typed collector
···
324
let context = String.sub d !context_start (ctx_end - !context_start) in
325
Message_collector.add_typed collector
326
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
327
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected command but found \xe2\x80\x9c#\xe2\x80\x9d (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
328
-
d element context))));
329
i := len (* Stop processing *)
330
| _ ->
331
incr i
···
344
let context = String.sub d ctx_start (flag_end - ctx_start) in
345
Message_collector.add_typed collector
346
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
347
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9cd\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad SVG path data: Expected \xe2\x80\x9c0\xe2\x80\x9d or \xe2\x80\x9c1\xe2\x80\x9d for large-arc-flag for \xe2\x80\x9ca\xe2\x80\x9d command but found \xe2\x80\x9c%s\xe2\x80\x9d instead (context: \xe2\x80\x9c%s\xe2\x80\x9d)."
348
-
d element flag context))))
349
end
350
with Not_found -> ()
351
···
2
3
Validates SVG elements and attributes according to SVG 1.1/2 specifications. *)
4
5
+
let q = Error_code.q
6
+
7
type font_state = {
8
mutable has_missing_glyph : bool;
9
}
···
294
if value <> svg_ns_url then
295
Message_collector.add_typed collector
296
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
297
+
"Bad value %s for the attribute %s (only %s permitted here)."
298
+
(q value) (q "xmlns") (q svg_ns_url)))))
299
| "xmlns:xlink" ->
300
if value <> "http://www.w3.org/1999/xlink" then
301
Message_collector.add_typed collector
302
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
303
+
"Bad value %s for the attribute %s (only %s permitted here)."
304
+
(q value) (q "xmlns:link") (q "http://www.w3.org/1999/xlink")))))
305
| _ when String.starts_with ~prefix:"xmlns:" attr && attr <> "xmlns:xlink" ->
306
(* Other xmlns declarations are not allowed in HTML-embedded SVG *)
307
Message_collector.add_typed collector
···
326
let context = String.sub d !context_start (ctx_end - !context_start) in
327
Message_collector.add_typed collector
328
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
329
+
"Bad value %s for attribute %s on element %s: Bad SVG path data: Expected command but found %s (context: %s)."
330
+
(q d) (q "d") (q element) (q "#") (q context)))));
331
i := len (* Stop processing *)
332
| _ ->
333
incr i
···
346
let context = String.sub d ctx_start (flag_end - ctx_start) in
347
Message_collector.add_typed collector
348
(`Attr (`Bad_value_generic (`Message (Printf.sprintf
349
+
"Bad value %s for attribute %s on element %s: Bad SVG path data: Expected %s or %s for large-arc-flag for %s command but found %s instead (context: %s)."
350
+
(q d) (q "d") (q element) (q "0") (q "1") (q "a") (q flag) (q context)))))
351
end
352
with Not_found -> ()
353
+73
-70
lib/htmlrw_check/specialized/url_checker.ml
+73
-70
lib/htmlrw_check/specialized/url_checker.ml
···
1
(** URL validation checker for href, src, action, and other URL attributes. *)
2
3
(** Attributes that contain URLs and should be validated.
4
Note: srcset uses special microsyntax, not validated as URL here.
5
Note: input[value] is only checked for type="url", handled specially below. *)
···
44
let validate_ipv6_host host url attr_name element_name =
45
(* Host should be in format [xxxx:...] *)
46
if String.length host < 3 then
47
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
48
-
url attr_name element_name)
49
else begin
50
(* Check if all characters are valid IPv6 chars *)
51
let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
52
if invalid_char then
53
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character."
54
-
url attr_name element_name)
55
else
56
None
57
end
···
239
let _ = contains_invalid_unicode decoded in
240
None
241
with Exit ->
242
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
243
-
url attr_name element_name)
244
245
(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
246
let contains_percent_char s =
···
258
let decoded = percent_decode host in
259
(* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
260
if contains_percent_char decoded then
261
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%%\xe2\x80\x9d is not allowed."
262
-
url attr_name element_name)
263
else
264
None
265
···
275
) port;
276
match !invalid_char with
277
| Some c ->
278
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in port: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
279
-
url attr_name element_name c)
280
| None ->
281
(* Check port range *)
282
try
283
let port_num = int_of_string port in
284
if port_num >= 65536 then
285
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Port number must be less than 65536."
286
-
url attr_name element_name)
287
else
288
None
289
with _ -> None
···
297
(* Check for empty host *)
298
let requires_host = List.mem scheme special_schemes in
299
if host = "" && requires_host && scheme <> "file" then
300
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: empty host."
301
-
url attr_name element_name)
302
else
303
(* Check for invalid chars *)
304
let invalid_char =
···
306
in
307
match invalid_char with
308
| Some c ->
309
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
310
-
url attr_name element_name c)
311
| None ->
312
(* Check for | *)
313
if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
314
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
315
-
url attr_name element_name)
316
(* Check for backslash in host *)
317
else if String.contains host '\\' then
318
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
319
-
url attr_name element_name)
320
(* Check for space in host *)
321
else if String.contains host ' ' then
322
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
323
-
url attr_name element_name)
324
(* Check for invalid percent-encoded Unicode in host *)
325
else begin
326
match check_invalid_percent_encoded_unicode host url attr_name element_name with
···
342
let colon_pos = String.index url ':' in
343
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
344
if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
345
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")."
346
-
url attr_name element_name)
347
else
348
None
349
end else
···
357
| Some scheme ->
358
if scheme = "data" && String.contains url '#' then
359
let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
360
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s Fragment is not allowed for data: URIs according to RFC 2397."
361
-
url attr_name element_name url_type)
362
else
363
None
364
···
375
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
376
(* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
377
if String.length after_colon > 0 && after_colon.[0] = '/' then
378
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a token character or a semicolon but saw \xe2\x80\x9c/\xe2\x80\x9d instead."
379
-
url attr_name element_name)
380
else
381
None
382
end else
···
393
let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
394
(* Check for tab in scheme data *)
395
if String.contains scheme_data '\t' then
396
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: tab is not allowed."
397
-
url attr_name element_name)
398
(* Check for newline in scheme data *)
399
else if String.contains scheme_data '\n' then
400
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
401
-
url attr_name element_name)
402
(* Check for carriage return in scheme data *)
403
else if String.contains scheme_data '\r' then
404
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: line break is not allowed."
405
-
url attr_name element_name)
406
(* Check for space in scheme data *)
407
else if String.contains scheme_data ' ' then
408
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in scheme data: space is not allowed."
409
-
url attr_name element_name)
410
else
411
None
412
end else
···
449
let path = remove_query_fragment raw_path in
450
(* Check for space in path (not allowed) *)
451
if String.contains path ' ' then
452
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: space is not allowed."
453
-
url attr_name element_name)
454
(* Check for pipe in path (not allowed except in file:// authority) *)
455
else if String.contains path '|' then
456
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c|\xe2\x80\x9d is not allowed."
457
-
url attr_name element_name)
458
(* Check for unescaped square brackets in path *)
459
else if String.contains path '[' then
460
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
461
-
url attr_name element_name)
462
else
463
None
464
···
470
| None ->
471
(* Check for square brackets at start (not IPv6 - that requires scheme) *)
472
if String.length url > 0 && url.[0] = '[' then
473
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in path segment: \xe2\x80\x9c[\xe2\x80\x9d is not allowed."
474
-
url attr_name element_name)
475
else
476
None
477
···
489
if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
490
find_bare_percent (i + 3) (* Valid percent encoding, continue *)
491
else
492
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Percentage (\xe2\x80\x9c%%\xe2\x80\x9d) is not followed by two hexadecimal digits."
493
-
url attr_name element_name)
494
end else
495
find_bare_percent (i + 1)
496
in
···
511
let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
512
(* Check for unescaped space in query *)
513
if String.contains query ' ' then
514
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in query: space is not allowed."
515
-
url attr_name element_name)
516
else
517
None
518
with Not_found -> None (* No query string *)
···
524
let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
525
(* Check for backslash in fragment *)
526
if String.contains fragment '\\' then
527
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c\\\xe2\x80\x9d is not allowed."
528
-
url attr_name element_name)
529
(* Check for second hash in fragment *)
530
else if String.contains fragment '#' then
531
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: \xe2\x80\x9c#\xe2\x80\x9d is not allowed."
532
-
url attr_name element_name)
533
(* Check for space in fragment *)
534
else if String.contains fragment ' ' then
535
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in fragment: space is not allowed."
536
-
url attr_name element_name)
537
else
538
None
539
with Not_found -> None (* No fragment *)
···
572
let userinfo = String.sub authority 0 at in
573
(* Check for @ in userinfo (should be percent-encoded) *)
574
if String.contains userinfo '@' then
575
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: User or password contains an at symbol (\xe2\x80\x9c@\xe2\x80\x9d) not percent-encoded."
576
-
url attr_name element_name)
577
(* Check for space *)
578
else if String.contains userinfo ' ' then
579
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: space is not allowed."
580
-
url attr_name element_name)
581
else begin
582
(* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
583
let find_non_ascii_char userinfo =
···
600
in
601
match find_non_ascii_char userinfo with
602
| Some bad_char ->
603
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%s\xe2\x80\x9d is not allowed."
604
-
url attr_name element_name bad_char)
605
| None ->
606
(* Check for other invalid chars *)
607
let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
608
match invalid with
609
| Some c ->
610
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character in user or password: \xe2\x80\x9c%c\xe2\x80\x9d is not allowed."
611
-
url attr_name element_name c)
612
| None -> None
613
end
614
with _ -> None
···
634
let attr_lower = String.lowercase_ascii attr_name in
635
if List.mem attr_lower must_be_non_empty ||
636
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
637
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Must be non-empty."
638
-
original_url attr_name element_name)
639
else
640
None
641
end
···
647
let last = original_url.[String.length original_url - 1] in
648
last = ' ' || last = '\t' in
649
if has_leading || has_trailing then
650
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Illegal character: leading/trailing ASCII whitespace."
651
-
original_url attr_name element_name)
652
else None
653
(* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
654
else begin
···
657
| None ->
658
(* Check for newlines/tabs in special scheme URLs *)
659
if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
660
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Tab, new line or carriage return found."
661
-
url attr_name element_name)
662
else begin
663
(* Check for relative URL issues first *)
664
match check_relative_url url attr_name element_name with
···
697
698
(* Check for backslash AFTER special scheme check *)
699
if String.contains url '\\' then
700
-
Some (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Backslash (\"\\\") used as path segment delimiter."
701
-
url attr_name element_name)
702
else
703
704
(* Check path segment for illegal characters *)
···
1
(** URL validation checker for href, src, action, and other URL attributes. *)
2
3
+
(** Quote helper for consistent message formatting. *)
4
+
let q = Error_code.q
5
+
6
(** Attributes that contain URLs and should be validated.
7
Note: srcset uses special microsyntax, not validated as URL here.
8
Note: input[value] is only checked for type="url", handled specially below. *)
···
47
let validate_ipv6_host host url attr_name element_name =
48
(* Host should be in format [xxxx:...] *)
49
if String.length host < 3 then
50
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
51
+
(q url) (q attr_name) (q element_name))
52
else begin
53
(* Check if all characters are valid IPv6 chars *)
54
let invalid_char = String.exists (fun c -> not (is_valid_ipv6_char c)) host in
55
if invalid_char then
56
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character."
57
+
(q url) (q attr_name) (q element_name))
58
else
59
None
60
end
···
242
let _ = contains_invalid_unicode decoded in
243
None
244
with Exit ->
245
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: A label or domain name contains disallowed characters.."
246
+
(q url) (q attr_name) (q element_name))
247
248
(** Check if string contains a character (checking both ASCII and UTF-8 fullwidth variants). *)
249
let contains_percent_char s =
···
261
let decoded = percent_decode host in
262
(* Check for % character in decoded host - this catches fullwidth percent signs etc. *)
263
if contains_percent_char decoded then
264
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
265
+
(q url) (q attr_name) (q element_name) (q "%"))
266
else
267
None
268
···
278
) port;
279
match !invalid_char with
280
| Some c ->
281
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in port: %s is not allowed."
282
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
283
| None ->
284
(* Check port range *)
285
try
286
let port_num = int_of_string port in
287
if port_num >= 65536 then
288
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Port number must be less than 65536."
289
+
(q url) (q attr_name) (q element_name))
290
else
291
None
292
with _ -> None
···
300
(* Check for empty host *)
301
let requires_host = List.mem scheme special_schemes in
302
if host = "" && requires_host && scheme <> "file" then
303
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: empty host."
304
+
(q url) (q attr_name) (q element_name))
305
else
306
(* Check for invalid chars *)
307
let invalid_char =
···
309
in
310
match invalid_char with
311
| Some c ->
312
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
313
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
314
| None ->
315
(* Check for | *)
316
if String.contains host '|' && not (is_pipe_allowed_in_host url host) then
317
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
318
+
(q url) (q attr_name) (q element_name) (q "|"))
319
(* Check for backslash in host *)
320
else if String.contains host '\\' then
321
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: %s is not allowed."
322
+
(q url) (q attr_name) (q element_name) (q "\\"))
323
(* Check for space in host *)
324
else if String.contains host ' ' then
325
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Invalid host: Illegal character in domain: space is not allowed."
326
+
(q url) (q attr_name) (q element_name))
327
(* Check for invalid percent-encoded Unicode in host *)
328
else begin
329
match check_invalid_percent_encoded_unicode host url attr_name element_name with
···
345
let colon_pos = String.index url ':' in
346
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
347
if String.length after_colon < 2 || after_colon.[0] <> '/' || after_colon.[1] <> '/' then
348
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a slash (\"/\")."
349
+
(q url) (q attr_name) (q element_name))
350
else
351
None
352
end else
···
360
| Some scheme ->
361
if scheme = "data" && String.contains url '#' then
362
let url_type = if is_absolute_url then "Bad absolute URL:" else "Bad URL:" in
363
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: %s Fragment is not allowed for data: URIs according to RFC 2397."
364
+
(q url) (q attr_name) (q element_name) url_type)
365
else
366
None
367
···
378
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
379
(* data: URLs should NOT start with / - format is data:[mediatype][;base64],data *)
380
if String.length after_colon > 0 && after_colon.[0] = '/' then
381
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Expected a token character or a semicolon but saw %s instead."
382
+
(q url) (q attr_name) (q element_name) (q "/"))
383
else
384
None
385
end else
···
396
let scheme_data = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
397
(* Check for tab in scheme data *)
398
if String.contains scheme_data '\t' then
399
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: tab is not allowed."
400
+
(q url) (q attr_name) (q element_name))
401
(* Check for newline in scheme data *)
402
else if String.contains scheme_data '\n' then
403
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
404
+
(q url) (q attr_name) (q element_name))
405
(* Check for carriage return in scheme data *)
406
else if String.contains scheme_data '\r' then
407
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: line break is not allowed."
408
+
(q url) (q attr_name) (q element_name))
409
(* Check for space in scheme data *)
410
else if String.contains scheme_data ' ' then
411
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in scheme data: space is not allowed."
412
+
(q url) (q attr_name) (q element_name))
413
else
414
None
415
end else
···
452
let path = remove_query_fragment raw_path in
453
(* Check for space in path (not allowed) *)
454
if String.contains path ' ' then
455
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: space is not allowed."
456
+
(q url) (q attr_name) (q element_name))
457
(* Check for pipe in path (not allowed except in file:// authority) *)
458
else if String.contains path '|' then
459
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
460
+
(q url) (q attr_name) (q element_name) (q "|"))
461
(* Check for unescaped square brackets in path *)
462
else if String.contains path '[' then
463
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
464
+
(q url) (q attr_name) (q element_name) (q "["))
465
else
466
None
467
···
473
| None ->
474
(* Check for square brackets at start (not IPv6 - that requires scheme) *)
475
if String.length url > 0 && url.[0] = '[' then
476
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in path segment: %s is not allowed."
477
+
(q url) (q attr_name) (q element_name) (q "["))
478
else
479
None
480
···
492
if i + 2 < len && is_hex_digit url.[i + 1] && is_hex_digit url.[i + 2] then
493
find_bare_percent (i + 3) (* Valid percent encoding, continue *)
494
else
495
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Percentage (%s) is not followed by two hexadecimal digits."
496
+
(q url) (q attr_name) (q element_name) (q "%"))
497
end else
498
find_bare_percent (i + 1)
499
in
···
514
let query = String.sub url (query_start + 1) (query_end - query_start - 1) in
515
(* Check for unescaped space in query *)
516
if String.contains query ' ' then
517
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in query: space is not allowed."
518
+
(q url) (q attr_name) (q element_name))
519
else
520
None
521
with Not_found -> None (* No query string *)
···
527
let fragment = String.sub url (fragment_start + 1) (String.length url - fragment_start - 1) in
528
(* Check for backslash in fragment *)
529
if String.contains fragment '\\' then
530
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
531
+
(q url) (q attr_name) (q element_name) (q "\\"))
532
(* Check for second hash in fragment *)
533
else if String.contains fragment '#' then
534
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: %s is not allowed."
535
+
(q url) (q attr_name) (q element_name) (q "#"))
536
(* Check for space in fragment *)
537
else if String.contains fragment ' ' then
538
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in fragment: space is not allowed."
539
+
(q url) (q attr_name) (q element_name))
540
else
541
None
542
with Not_found -> None (* No fragment *)
···
575
let userinfo = String.sub authority 0 at in
576
(* Check for @ in userinfo (should be percent-encoded) *)
577
if String.contains userinfo '@' then
578
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: User or password contains an at symbol (%s) not percent-encoded."
579
+
(q url) (q attr_name) (q element_name) (q "@"))
580
(* Check for space *)
581
else if String.contains userinfo ' ' then
582
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: space is not allowed."
583
+
(q url) (q attr_name) (q element_name))
584
else begin
585
(* Check for non-ASCII characters (like emoji) using UTF-8 decoding *)
586
let find_non_ascii_char userinfo =
···
603
in
604
match find_non_ascii_char userinfo with
605
| Some bad_char ->
606
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
607
+
(q url) (q attr_name) (q element_name) (q bad_char))
608
| None ->
609
(* Check for other invalid chars *)
610
let invalid = List.find_opt (fun c -> String.contains userinfo c) invalid_userinfo_chars in
611
match invalid with
612
| Some c ->
613
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character in user or password: %s is not allowed."
614
+
(q url) (q attr_name) (q element_name) (q (String.make 1 c)))
615
| None -> None
616
end
617
with _ -> None
···
637
let attr_lower = String.lowercase_ascii attr_name in
638
if List.mem attr_lower must_be_non_empty ||
639
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
640
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
641
+
(q original_url) (q attr_name) (q element_name))
642
else
643
None
644
end
···
650
let last = original_url.[String.length original_url - 1] in
651
last = ' ' || last = '\t' in
652
if has_leading || has_trailing then
653
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Illegal character: leading/trailing ASCII whitespace."
654
+
(q original_url) (q attr_name) (q element_name))
655
else None
656
(* Check scheme data for non-special schemes FIRST - handles tab/newline/CR in scheme data *)
657
else begin
···
660
| None ->
661
(* Check for newlines/tabs in special scheme URLs *)
662
if String.contains url '\n' || String.contains url '\r' || String.contains url '\t' then
663
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Tab, new line or carriage return found."
664
+
(q url) (q attr_name) (q element_name))
665
else begin
666
(* Check for relative URL issues first *)
667
match check_relative_url url attr_name element_name with
···
700
701
(* Check for backslash AFTER special scheme check *)
702
if String.contains url '\\' then
703
+
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Backslash (\"\\\") used as path segment delimiter."
704
+
(q url) (q attr_name) (q element_name))
705
else
706
707
(* Check path segment for illegal characters *)