+3
-1
lib/html5_checker/checker_registry.ml
+3
-1
lib/html5_checker/checker_registry.ml
···
37
37
Hashtbl.replace reg "option" Option_checker.checker;
38
38
Hashtbl.replace reg "language" Language_checker.checker;
39
39
Hashtbl.replace reg "microdata" Microdata_checker.checker;
40
-
(* Hashtbl.replace reg "table" Table_checker.checker; *)
40
+
Hashtbl.replace reg "importmap" Importmap_checker.checker;
41
+
Hashtbl.replace reg "table" Table_checker.checker;
42
+
Hashtbl.replace reg "mime-type" Mime_type_checker.checker;
41
43
(* Hashtbl.replace reg "heading" Heading_checker.checker; *)
42
44
(* Hashtbl.replace reg "content" Content_checker.checker; *)
43
45
reg
+336
lib/html5_checker/specialized/importmap_checker.ml
+336
lib/html5_checker/specialized/importmap_checker.ml
···
1
+
(** Importmap validation checker.
2
+
3
+
Validates that <script type="importmap"> elements contain valid JSON
4
+
and conform to importmap structural requirements. *)
5
+
6
+
type state = {
7
+
mutable in_importmap : bool;
8
+
content : Buffer.t;
9
+
}
10
+
11
+
let create () = {
12
+
in_importmap = false;
13
+
content = Buffer.create 256;
14
+
}
15
+
16
+
let reset state =
17
+
state.in_importmap <- false;
18
+
Buffer.clear state.content
19
+
20
+
(** Simple JSON value representation *)
21
+
type json =
22
+
| JNull
23
+
| JBool of bool
24
+
| JNumber of float
25
+
| JString of string
26
+
| JArray of json list
27
+
| JObject of (string * json) list
28
+
29
+
(** Simple JSON parser *)
30
+
let parse_json s_orig =
31
+
let s = String.trim s_orig in
32
+
let len = String.length s in
33
+
if len = 0 then Error "Empty JSON"
34
+
else
35
+
let pos = ref 0 in
36
+
37
+
let skip_ws () =
38
+
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t' || s.[!pos] = '\n' || s.[!pos] = '\r') do
39
+
incr pos
40
+
done
41
+
in
42
+
43
+
let peek () = if !pos < len then Some s.[!pos] else None in
44
+
let consume () = let c = s.[!pos] in incr pos; c in
45
+
46
+
let rec parse_value () =
47
+
skip_ws ();
48
+
match peek () with
49
+
| None -> Error "Unexpected end of input"
50
+
| Some '{' -> parse_object ()
51
+
| Some '[' -> parse_array ()
52
+
| Some '"' -> parse_string ()
53
+
| Some 't' -> parse_true ()
54
+
| Some 'f' -> parse_false ()
55
+
| Some 'n' -> parse_null ()
56
+
| Some c when c = '-' || (c >= '0' && c <= '9') -> parse_number ()
57
+
| Some _ -> Error "Unexpected character"
58
+
59
+
and parse_object () =
60
+
ignore (consume ()); (* consume { *)
61
+
skip_ws ();
62
+
match peek () with
63
+
| Some '}' -> ignore (consume ()); Ok (JObject [])
64
+
| _ ->
65
+
let rec parse_members acc =
66
+
skip_ws ();
67
+
match parse_string () with
68
+
| Error e -> Error e
69
+
| Ok (JString key) ->
70
+
skip_ws ();
71
+
(match peek () with
72
+
| Some ':' ->
73
+
ignore (consume ());
74
+
(match parse_value () with
75
+
| Error e -> Error e
76
+
| Ok value ->
77
+
skip_ws ();
78
+
let acc' = (key, value) :: acc in
79
+
match peek () with
80
+
| Some ',' -> ignore (consume ()); parse_members acc'
81
+
| Some '}' -> ignore (consume ()); Ok (JObject (List.rev acc'))
82
+
| _ -> Error "Expected ',' or '}'")
83
+
| _ -> Error "Expected ':'")
84
+
| Ok _ -> Error "Expected string key"
85
+
in
86
+
parse_members []
87
+
88
+
and parse_array () =
89
+
ignore (consume ()); (* consume [ *)
90
+
skip_ws ();
91
+
match peek () with
92
+
| Some ']' -> ignore (consume ()); Ok (JArray [])
93
+
| _ ->
94
+
let rec parse_elements acc =
95
+
match parse_value () with
96
+
| Error e -> Error e
97
+
| Ok value ->
98
+
skip_ws ();
99
+
let acc' = value :: acc in
100
+
match peek () with
101
+
| Some ',' -> ignore (consume ()); parse_elements acc'
102
+
| Some ']' -> ignore (consume ()); Ok (JArray (List.rev acc'))
103
+
| _ -> Error "Expected ',' or ']'"
104
+
in
105
+
parse_elements []
106
+
107
+
and parse_string () =
108
+
skip_ws ();
109
+
match peek () with
110
+
| Some '"' ->
111
+
ignore (consume ());
112
+
let buf = Buffer.create 32 in
113
+
let rec read () =
114
+
match peek () with
115
+
| None -> Error "Unterminated string"
116
+
| Some '"' -> ignore (consume ()); Ok (JString (Buffer.contents buf))
117
+
| Some '\\' ->
118
+
ignore (consume ());
119
+
(match peek () with
120
+
| None -> Error "Unterminated escape"
121
+
| Some c -> ignore (consume ()); Buffer.add_char buf c; read ())
122
+
| Some c -> ignore (consume ()); Buffer.add_char buf c; read ()
123
+
in
124
+
read ()
125
+
| _ -> Error "Expected string"
126
+
127
+
and parse_number () =
128
+
let start = !pos in
129
+
if peek () = Some '-' then incr pos;
130
+
while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done;
131
+
if !pos < len && s.[!pos] = '.' then begin
132
+
incr pos;
133
+
while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
134
+
end;
135
+
if !pos < len && (s.[!pos] = 'e' || s.[!pos] = 'E') then begin
136
+
incr pos;
137
+
if !pos < len && (s.[!pos] = '+' || s.[!pos] = '-') then incr pos;
138
+
while !pos < len && s.[!pos] >= '0' && s.[!pos] <= '9' do incr pos done
139
+
end;
140
+
let num_str = String.sub s start (!pos - start) in
141
+
match float_of_string_opt num_str with
142
+
| Some f -> Ok (JNumber f)
143
+
| None -> Error "Invalid number"
144
+
145
+
and parse_true () =
146
+
if !pos + 4 <= len && String.sub s !pos 4 = "true" then
147
+
(pos := !pos + 4; Ok (JBool true))
148
+
else Error "Expected 'true'"
149
+
150
+
and parse_false () =
151
+
if !pos + 5 <= len && String.sub s !pos 5 = "false" then
152
+
(pos := !pos + 5; Ok (JBool false))
153
+
else Error "Expected 'false'"
154
+
155
+
and parse_null () =
156
+
if !pos + 4 <= len && String.sub s !pos 4 = "null" then
157
+
(pos := !pos + 4; Ok JNull)
158
+
else Error "Expected 'null'"
159
+
in
160
+
161
+
match parse_value () with
162
+
| Error e -> Error e
163
+
| Ok v ->
164
+
skip_ws ();
165
+
if !pos = len then Ok v
166
+
else Error "Unexpected content after JSON"
167
+
168
+
(** Validate importmap structure *)
169
+
type importmap_error =
170
+
| InvalidJSON of string
171
+
| EmptyKey of string (* property name where empty key was found *)
172
+
| NotObject of string (* property name that should be object but isn't *)
173
+
| NotString of string (* property name that should be string but isn't *)
174
+
| ForbiddenProperty of string
175
+
| SlashKeyWithoutSlashValue of string (* property name where slash key doesn't have slash value *)
176
+
| InvalidScopeKey (* scope key is not a valid URL *)
177
+
| InvalidScopeValue of string (* scope value is not a valid URL *)
178
+
179
+
(** Check if a string looks like a valid URL-like specifier for importmaps *)
180
+
let is_valid_url_like s =
181
+
if String.length s = 0 then false
182
+
else
183
+
(* Valid URL-like: starts with /, ./, ../, or has a scheme followed by :// or : *)
184
+
let starts_with_slash = s.[0] = '/' in
185
+
let starts_with_dot_slash = String.length s >= 2 && s.[0] = '.' && s.[1] = '/' in
186
+
let starts_with_dot_dot_slash = String.length s >= 3 && s.[0] = '.' && s.[1] = '.' && s.[2] = '/' in
187
+
let has_scheme =
188
+
match String.index_opt s ':' with
189
+
| None -> false
190
+
| Some pos when pos > 0 ->
191
+
(* Check that characters before : are valid scheme characters *)
192
+
let scheme = String.sub s 0 pos in
193
+
String.length scheme > 0 &&
194
+
String.for_all (fun c ->
195
+
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
196
+
(c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
197
+
) scheme
198
+
| _ -> false
199
+
in
200
+
starts_with_slash || starts_with_dot_slash || starts_with_dot_dot_slash || has_scheme
201
+
202
+
let validate_importmap s =
203
+
match parse_json s with
204
+
| Error msg -> [InvalidJSON msg]
205
+
| Ok json ->
206
+
let errors = ref [] in
207
+
let add_error e = errors := e :: !errors in
208
+
209
+
(match json with
210
+
| JObject members ->
211
+
List.iter (fun (key, value) ->
212
+
(* Check for forbidden top-level properties *)
213
+
if key <> "imports" && key <> "scopes" && key <> "integrity" then
214
+
add_error (ForbiddenProperty key);
215
+
216
+
(* Check imports *)
217
+
if key = "imports" then begin
218
+
match value with
219
+
| JObject import_members ->
220
+
List.iter (fun (ikey, ivalue) ->
221
+
if ikey = "" then add_error (EmptyKey "imports");
222
+
(* Check slash-ending consistency *)
223
+
let key_ends_with_slash = String.length ikey > 0 && ikey.[String.length ikey - 1] = '/' in
224
+
match ivalue with
225
+
| JString v ->
226
+
if key_ends_with_slash then begin
227
+
let val_ends_with_slash = String.length v > 0 && v.[String.length v - 1] = '/' in
228
+
if not val_ends_with_slash then
229
+
add_error (SlashKeyWithoutSlashValue "imports")
230
+
end
231
+
| JNull -> () (* null is allowed *)
232
+
| _ -> add_error (NotString ("imports[" ^ ikey ^ "]"))
233
+
) import_members
234
+
| _ -> add_error (NotObject "imports")
235
+
end;
236
+
237
+
(* Check scopes *)
238
+
if key = "scopes" then begin
239
+
match value with
240
+
| JObject scope_members ->
241
+
List.iter (fun (skey, svalue) ->
242
+
if skey = "" then add_error (EmptyKey "scopes");
243
+
(* Check that scope key is a valid URL *)
244
+
if skey <> "" && not (is_valid_url_like skey) then
245
+
add_error InvalidScopeKey;
246
+
match svalue with
247
+
| JObject scope_imports ->
248
+
List.iter (fun (sikey, sivalue) ->
249
+
if sikey = "" then add_error (EmptyKey ("scopes[" ^ skey ^ "]"));
250
+
match sivalue with
251
+
| JString v ->
252
+
(* Check that scope value is a valid URL *)
253
+
if not (is_valid_url_like v) then
254
+
add_error (InvalidScopeValue sikey)
255
+
| JNull -> ()
256
+
| _ -> add_error (NotString ("scopes[" ^ skey ^ "][" ^ sikey ^ "]"))
257
+
) scope_imports
258
+
| _ -> add_error (NotObject ("scopes[" ^ skey ^ "]"))
259
+
) scope_members
260
+
| _ -> add_error (NotObject "scopes")
261
+
end
262
+
) members
263
+
| _ -> add_error (NotObject "root"));
264
+
265
+
List.rev !errors
266
+
267
+
let start_element state ~name ~namespace ~attrs _collector =
268
+
if namespace <> None then ()
269
+
else begin
270
+
let name_lower = String.lowercase_ascii name in
271
+
if name_lower = "script" then begin
272
+
(* Check if type="importmap" *)
273
+
let type_attr = List.find_opt (fun (n, _) ->
274
+
String.lowercase_ascii n = "type"
275
+
) attrs in
276
+
match type_attr with
277
+
| Some (_, v) when String.lowercase_ascii v = "importmap" ->
278
+
state.in_importmap <- true;
279
+
Buffer.clear state.content
280
+
| _ -> ()
281
+
end
282
+
end
283
+
284
+
let error_to_message = function
285
+
| InvalidJSON _ ->
286
+
"A script \xe2\x80\x9cscript\xe2\x80\x9d with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must have valid JSON content."
287
+
| EmptyKey prop ->
288
+
Printf.sprintf "A specifier map defined in a \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain non-empty keys." prop
289
+
| NotObject prop ->
290
+
Printf.sprintf "The value of the \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must be a JSON object." prop
291
+
| NotString _ ->
292
+
"A specifier map defined in a \xe2\x80\x9cimports\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain string values."
293
+
| ForbiddenProperty prop ->
294
+
Printf.sprintf "The \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d is not an allowed property." prop
295
+
| SlashKeyWithoutSlashValue prop ->
296
+
Printf.sprintf "A specifier map defined in a \xe2\x80\x9c%s\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must have values that end with \xe2\x80\x9c/\xe2\x80\x9d when its corresponding key ends with \xe2\x80\x9c/\xe2\x80\x9d." prop
297
+
| InvalidScopeKey ->
298
+
"The value of the \xe2\x80\x9cscopes\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must be a JSON object whose keys are valid URL strings."
299
+
| InvalidScopeValue _ ->
300
+
"A specifier map defined in a \xe2\x80\x9cscopes\xe2\x80\x9d property within the content of a \xe2\x80\x9cscript\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9cimportmap\xe2\x80\x9d must only contain valid URL values."
301
+
302
+
let end_element state ~name ~namespace collector =
303
+
if namespace <> None then ()
304
+
else begin
305
+
let name_lower = String.lowercase_ascii name in
306
+
if name_lower = "script" && state.in_importmap then begin
307
+
let content = Buffer.contents state.content in
308
+
let errors = validate_importmap content in
309
+
List.iter (fun err ->
310
+
Message_collector.add_error collector
311
+
~message:(error_to_message err)
312
+
~code:"importmap-invalid"
313
+
~element:"script"
314
+
~attribute:"type"
315
+
()
316
+
) errors;
317
+
state.in_importmap <- false
318
+
end
319
+
end
320
+
321
+
let characters state text _collector =
322
+
if state.in_importmap then
323
+
Buffer.add_string state.content text
324
+
325
+
let end_document _state _collector = ()
326
+
327
+
let checker =
328
+
(module struct
329
+
type nonrec state = state
330
+
let create = create
331
+
let reset = reset
332
+
let start_element = start_element
333
+
let end_element = end_element
334
+
let characters = characters
335
+
let end_document = end_document
336
+
end : Checker.S)
+5
lib/html5_checker/specialized/importmap_checker.mli
+5
lib/html5_checker/specialized/importmap_checker.mli
+60
-46
lib/html5_checker/specialized/microdata_checker.ml
+60
-46
lib/html5_checker/specialized/microdata_checker.ml
···
67
67
let is_url s =
68
68
String.contains s ':'
69
69
70
-
(** Validate that a URL is a valid absolute URL for itemtype.
71
-
itemtype must be an absolute URL per the HTML5 spec.
72
-
http/https URLs require :// but other schemes like mailto:, data:, javascript: don't. *)
73
-
let validate_itemtype_url url =
74
-
let url = String.trim url in
75
-
if String.length url = 0 then
76
-
Error "itemtype must not be empty"
70
+
(** Validate that a URL is a valid absolute URL for itemtype/itemid.
71
+
Uses the comprehensive URL validation from Url_checker. *)
72
+
let validate_microdata_url url element attr_name =
73
+
let url_trimmed = String.trim url in
74
+
if String.length url_trimmed = 0 then
75
+
Some (Printf.sprintf
76
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
77
+
url attr_name element)
77
78
else
78
-
match String.index_opt url ':' with
79
-
| None -> Error "Expected a slash (\"/\")."
80
-
| Some colon_pos ->
81
-
if colon_pos = 0 then
82
-
Error "Expected a slash (\"/\")."
83
-
else
84
-
let scheme = String.lowercase_ascii (String.sub url 0 colon_pos) in
85
-
(* Schemes that require :// for itemtype validation
86
-
Note: The Nu validator only enforces :// for http, https, and ftp *)
87
-
let special_schemes = [
88
-
"http"; "https"; "ftp"
89
-
] in
90
-
if List.mem scheme special_schemes then begin
91
-
if colon_pos + 2 >= String.length url then
92
-
Error "Expected a slash (\"/\")."
93
-
else if url.[colon_pos + 1] <> '/' || url.[colon_pos + 2] <> '/' then
94
-
Error "Expected a slash (\"/\")."
95
-
else
96
-
Ok ()
97
-
end else
98
-
(* Other schemes (mailto:, data:, javascript:, etc.) are valid as-is *)
99
-
Ok ()
79
+
(* First check if it has a scheme (required for absolute URL) *)
80
+
match Url_checker.extract_scheme url_trimmed with
81
+
| None ->
82
+
Some (Printf.sprintf
83
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
84
+
url attr_name element url)
85
+
| Some _ ->
86
+
(* Has a scheme - do comprehensive URL validation *)
87
+
match Url_checker.validate_url url element attr_name with
88
+
| None -> None
89
+
| Some error_msg ->
90
+
(* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
91
+
let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
92
+
Some error_msg
100
93
101
94
(** Check if itemprop value is valid. *)
102
95
let validate_itemprop_value value =
···
125
118
let itemref_opt = get_attr attrs "itemref" in
126
119
let itemprop_opt = get_attr attrs "itemprop" in
127
120
128
-
(* Check itemid requires itemscope and itemtype *)
121
+
(* Check itemid requires itemscope and itemtype, and validate URL *)
129
122
begin match itemid_opt with
130
-
| Some _itemid ->
123
+
| Some itemid ->
131
124
if not has_itemscope then
132
125
Message_collector.add_error collector
133
126
~message:"itemid attribute requires itemscope attribute"
···
143
136
?location
144
137
~element
145
138
~attribute:"itemid"
146
-
()
139
+
();
140
+
(* Validate itemid as URL (note: itemid can be relative, unlike itemtype) *)
141
+
(match Url_checker.validate_url itemid element "itemid" with
142
+
| None -> ()
143
+
| Some error_msg ->
144
+
Message_collector.add_error collector
145
+
~message:error_msg
146
+
~code:"microdata-invalid-itemid"
147
+
?location
148
+
~element
149
+
~attribute:"itemid"
150
+
())
147
151
| None -> ()
148
152
end;
149
153
···
184
188
else begin
185
189
(* Validate each itemtype URL (can be space-separated) *)
186
190
let types = split_whitespace itemtype in
187
-
List.iter (fun url ->
188
-
match validate_itemtype_url url with
189
-
| Ok () -> ()
190
-
| Error msg ->
191
-
Message_collector.add_error collector
192
-
~message:(Printf.sprintf
193
-
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: %s"
194
-
url element msg)
195
-
~code:"microdata-invalid-itemtype"
196
-
?location
197
-
~element
198
-
~attribute:"itemtype"
199
-
()
200
-
) types
191
+
if types = [] then
192
+
(* Empty itemtype is an error *)
193
+
Message_collector.add_error collector
194
+
~message:(Printf.sprintf
195
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
196
+
itemtype element)
197
+
~code:"microdata-invalid-itemtype"
198
+
?location
199
+
~element
200
+
~attribute:"itemtype"
201
+
()
202
+
else
203
+
List.iter (fun url ->
204
+
match validate_microdata_url url element "itemtype" with
205
+
| None -> ()
206
+
| Some error_msg ->
207
+
Message_collector.add_error collector
208
+
~message:error_msg
209
+
~code:"microdata-invalid-itemtype"
210
+
?location
211
+
~element
212
+
~attribute:"itemtype"
213
+
()
214
+
) types
201
215
end
202
216
| None -> ()
203
217
end;
+189
lib/html5_checker/specialized/mime_type_checker.ml
+189
lib/html5_checker/specialized/mime_type_checker.ml
···
1
+
(** MIME type validation checker.
2
+
3
+
Validates MIME type values in type attributes. *)
4
+
5
+
(** Validate a MIME type value. Returns error message or None. *)
6
+
let validate_mime_type value element attr_name =
7
+
let len = String.length value in
8
+
if len = 0 then
9
+
Some (Printf.sprintf
10
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Empty value."
11
+
value attr_name element)
12
+
else if value.[len - 1] = ' ' || value.[len - 1] = '\t' then
13
+
Some (Printf.sprintf
14
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Extraneous trailing whitespace."
15
+
value attr_name element)
16
+
else if len > 0 && (value.[0] = ' ' || value.[0] = '\t') then
17
+
Some (Printf.sprintf
18
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Expected a token character but saw \xe2\x80\x9c \xe2\x80\x9d instead."
19
+
value attr_name element)
20
+
else
21
+
(* Parse type/subtype *)
22
+
let slash_pos = try Some (String.index value '/') with Not_found -> None in
23
+
match slash_pos with
24
+
| None ->
25
+
(* No slash found - check if it looks like a type without subtype *)
26
+
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
27
+
(match semicolon_pos with
28
+
| Some _ ->
29
+
Some (Printf.sprintf
30
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
31
+
value attr_name element)
32
+
| None ->
33
+
Some (Printf.sprintf
34
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
35
+
value attr_name element))
36
+
| Some slash_pos ->
37
+
(* Check for empty subtype *)
38
+
let after_slash = String.sub value (slash_pos + 1) (len - slash_pos - 1) in
39
+
let subtype_end =
40
+
try String.index after_slash ';'
41
+
with Not_found -> String.length after_slash
42
+
in
43
+
let subtype = String.sub after_slash 0 subtype_end in
44
+
let subtype_trimmed = String.trim subtype in
45
+
if subtype_trimmed = "" then
46
+
Some (Printf.sprintf
47
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Subtype missing."
48
+
value attr_name element)
49
+
else if String.length subtype > 0 && subtype.[String.length subtype - 1] = ' ' then
50
+
(* Space before semicolon - also check parameter format *)
51
+
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
52
+
(match semicolon_pos with
53
+
| Some semi_pos ->
54
+
(* Check what comes after semicolon *)
55
+
let params = String.sub value (semi_pos + 1) (len - semi_pos - 1) in
56
+
let params_trimmed = String.trim params in
57
+
if params_trimmed = "" then
58
+
Some (Printf.sprintf
59
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
60
+
value attr_name element)
61
+
else
62
+
(* Check for param_name=value format *)
63
+
let eq_pos = try Some (String.index params '=') with Not_found -> None in
64
+
(match eq_pos with
65
+
| None ->
66
+
Some (Printf.sprintf
67
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
68
+
value attr_name element)
69
+
| Some _ -> None)
70
+
| None -> None)
71
+
else
72
+
(* Check parameters after semicolon *)
73
+
let semicolon_pos = try Some (String.index value ';') with Not_found -> None in
74
+
(match semicolon_pos with
75
+
| None -> None (* No parameters - OK *)
76
+
| Some semi_pos ->
77
+
let params = String.sub value (semi_pos + 1) (len - semi_pos - 1) in
78
+
let params_trimmed = String.trim params in
79
+
if params_trimmed = "" then
80
+
Some (Printf.sprintf
81
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Semicolon seen but there was no parameter following it."
82
+
value attr_name element)
83
+
else
84
+
(* Check for param_name=value format *)
85
+
let eq_pos = try Some (String.index params '=') with Not_found -> None in
86
+
(match eq_pos with
87
+
| None ->
88
+
Some (Printf.sprintf
89
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
90
+
value attr_name element)
91
+
| Some eq_pos ->
92
+
let param_value = String.sub params (eq_pos + 1) (String.length params - eq_pos - 1) in
93
+
let param_value_trimmed = String.trim param_value in
94
+
if param_value_trimmed = "" then
95
+
Some (Printf.sprintf
96
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Parameter value missing."
97
+
value attr_name element)
98
+
else if param_value_trimmed.[0] = '"' then
99
+
(* Quoted string - check for closing quote *)
100
+
let quote_end = try Some (String.index_from param_value_trimmed 1 '"') with
101
+
| Not_found -> None
102
+
| Invalid_argument _ -> None
103
+
in
104
+
(match quote_end with
105
+
| Some _ -> None (* Properly quoted *)
106
+
| None ->
107
+
(* Check for escaped quote at end *)
108
+
let has_backslash_at_end =
109
+
String.length param_value_trimmed >= 2 &&
110
+
param_value_trimmed.[String.length param_value_trimmed - 1] = '\\'
111
+
in
112
+
if has_backslash_at_end then
113
+
Some (Printf.sprintf
114
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
115
+
value attr_name element)
116
+
else
117
+
Some (Printf.sprintf
118
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad MIME type: Unfinished quoted string."
119
+
value attr_name element))
120
+
else
121
+
None))
122
+
123
+
(** Elements and attributes that contain MIME types. *)
124
+
let mime_type_attrs = [
125
+
("link", ["type"]);
126
+
("style", ["type"]);
127
+
("script", ["type"]);
128
+
("source", ["type"]);
129
+
("embed", ["type"]);
130
+
("object", ["type"]);
131
+
]
132
+
133
+
type state = unit
134
+
135
+
let create () = ()
136
+
let reset _state = ()
137
+
138
+
let get_attr_value name attrs =
139
+
List.find_map (fun (k, v) ->
140
+
if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
141
+
) attrs
142
+
143
+
let start_element _state ~name ~namespace ~attrs collector =
144
+
if namespace <> None then ()
145
+
else begin
146
+
let name_lower = String.lowercase_ascii name in
147
+
match List.assoc_opt name_lower mime_type_attrs with
148
+
| None -> ()
149
+
| Some type_attrs ->
150
+
List.iter (fun attr_name ->
151
+
match get_attr_value attr_name attrs with
152
+
| None -> ()
153
+
| Some value ->
154
+
(* Don't validate empty type attributes or special script types *)
155
+
if value = "" then ()
156
+
else if name_lower = "script" then
157
+
(* script type can be module, importmap, etc. - skip validation for non-MIME types *)
158
+
let value_lower = String.lowercase_ascii value in
159
+
if value_lower = "module" || value_lower = "importmap" ||
160
+
not (String.contains value '/') then ()
161
+
else
162
+
match validate_mime_type value name attr_name with
163
+
| None -> ()
164
+
| Some err ->
165
+
Message_collector.add_error collector
166
+
~message:err ~code:"bad-mime-type" ~element:name ~attribute:attr_name ()
167
+
else
168
+
match validate_mime_type value name attr_name with
169
+
| None -> ()
170
+
| Some err ->
171
+
Message_collector.add_error collector
172
+
~message:err ~code:"bad-mime-type" ~element:name ~attribute:attr_name ()
173
+
) type_attrs
174
+
end
175
+
176
+
let end_element _state ~name:_ ~namespace:_ _collector = ()
177
+
let characters _state _text _collector = ()
178
+
let end_document _state _collector = ()
179
+
180
+
let checker =
181
+
(module struct
182
+
type nonrec state = state
183
+
let create = create
184
+
let reset = reset
185
+
let start_element = start_element
186
+
let end_element = end_element
187
+
let characters = characters
188
+
let end_document = end_document
189
+
end : Checker.S)
+5
lib/html5_checker/specialized/mime_type_checker.mli
+5
lib/html5_checker/specialized/mime_type_checker.mli
+42
-40
lib/html5_checker/specialized/table_checker.ml
+42
-40
lib/html5_checker/specialized/table_checker.ml
···
767
767
768
768
let reset state = state.tables := []
769
769
770
+
let is_html_namespace = function
771
+
| None -> true (* HTML mode - no namespace specified *)
772
+
| Some ns -> ns = html_ns (* XHTML mode - check namespace *)
773
+
770
774
let start_element state ~name ~namespace ~attrs collector =
771
-
match namespace with
772
-
| Some ns when ns = html_ns -> (
773
-
match name with
774
-
| "table" ->
775
-
(* Push a new table onto the stack *)
776
-
state.tables := make_table () :: !(state.tables)
777
-
| _ -> (
778
-
match !(state.tables) with
779
-
| [] -> ()
780
-
| table :: _ -> (
781
-
match name with
782
-
| "td" -> start_cell table false attrs collector
783
-
| "th" -> start_cell table true attrs collector
784
-
| "tr" -> start_row table collector
785
-
| "tbody" | "thead" | "tfoot" -> start_row_group table name collector
786
-
| "col" -> start_col table attrs collector
787
-
| "colgroup" -> start_colgroup table attrs collector
788
-
| _ -> ())))
789
-
| _ -> ()
775
+
if is_html_namespace namespace then (
776
+
let name_lower = String.lowercase_ascii name in
777
+
match name_lower with
778
+
| "table" ->
779
+
(* Push a new table onto the stack *)
780
+
state.tables := make_table () :: !(state.tables)
781
+
| _ -> (
782
+
match !(state.tables) with
783
+
| [] -> ()
784
+
| table :: _ -> (
785
+
match name_lower with
786
+
| "td" -> start_cell table false attrs collector
787
+
| "th" -> start_cell table true attrs collector
788
+
| "tr" -> start_row table collector
789
+
| "tbody" | "thead" | "tfoot" -> start_row_group table name collector
790
+
| "col" -> start_col table attrs collector
791
+
| "colgroup" -> start_colgroup table attrs collector
792
+
| _ -> ())))
790
793
791
794
let end_element state ~name ~namespace collector =
792
-
match namespace with
793
-
| Some ns when ns = html_ns -> (
794
-
match name with
795
-
| "table" -> (
796
-
match !(state.tables) with
797
-
| [] -> failwith "Bug: end table but no table on stack"
798
-
| table :: rest ->
799
-
end_table table collector;
800
-
state.tables := rest)
801
-
| _ -> (
802
-
match !(state.tables) with
803
-
| [] -> ()
804
-
| table :: _ -> (
805
-
match name with
806
-
| "td" | "th" -> end_cell table
807
-
| "tr" -> end_row table collector
808
-
| "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector
809
-
| "col" -> end_col table
810
-
| "colgroup" -> end_colgroup table
811
-
| _ -> ())))
812
-
| _ -> ()
795
+
if is_html_namespace namespace then (
796
+
let name_lower = String.lowercase_ascii name in
797
+
match name_lower with
798
+
| "table" -> (
799
+
match !(state.tables) with
800
+
| [] -> () (* End tag without start - ignore *)
801
+
| table :: rest ->
802
+
end_table table collector;
803
+
state.tables := rest)
804
+
| _ -> (
805
+
match !(state.tables) with
806
+
| [] -> ()
807
+
| table :: _ -> (
808
+
match name_lower with
809
+
| "td" | "th" -> end_cell table
810
+
| "tr" -> end_row table collector
811
+
| "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector
812
+
| "col" -> end_col table
813
+
| "colgroup" -> end_colgroup table
814
+
| _ -> ())))
813
815
814
816
let characters _state _text _collector = ()
815
817
+1
-1
test/debug_check.ml
+1
-1
test/debug_check.ml
···
1
1
let () =
2
-
let test_file = "validator/tests/html/microdata/itemtype/scheme-https-no-slash-novalid.html" in
2
+
let test_file = "validator/tests/html/mime-types/004-novalid.html" in
3
3
let ic = open_in test_file in
4
4
let html = really_input_string ic (in_channel_length ic) in
5
5
close_in ic;