+4
-4
lib/check/semantic/lang_detecting_checker.ml
+4
-4
lib/check/semantic/lang_detecting_checker.ml
···
13
13
mutable char_count : int;
14
14
}
15
15
16
-
let max_chars = 30720
16
+
let max_chars = 8192 (* Reduced from 30720 to avoid slow language detection *)
17
17
let min_chars = 1024
18
18
19
19
(* Elements whose text content we skip for language detection - O(1) lookup *)
20
20
let skip_elements =
21
21
Attr_utils.hashtbl_of_list [
22
-
"a"; "button"; "details"; "figcaption"; "form"; "li"; "nav";
23
-
"pre"; "script"; "select"; "span"; "style"; "summary";
24
-
"td"; "textarea"; "th"; "tr"
22
+
"a"; "button"; "code"; "details"; "figcaption"; "form"; "kbd"; "li"; "nav";
23
+
"pre"; "samp"; "script"; "select"; "span"; "style"; "summary";
24
+
"td"; "textarea"; "th"; "tr"; "var"; "xmp"
25
25
]
26
26
27
27
let is_skip_element name = Hashtbl.mem skip_elements name
+20
-6
lib/check/specialized/normalization_checker.ml
+20
-6
lib/check/specialized/normalization_checker.ml
···
2
2
3
3
Validates that text content is in Unicode Normalization Form C (NFC). *)
4
4
5
-
type state = unit [@@warning "-34"]
5
+
type state = {
6
+
mutable in_raw_text : int; (** Depth inside style/script elements *)
7
+
}
6
8
7
-
let create () = ()
8
-
let reset _state = ()
9
+
let create () = { in_raw_text = 0 }
10
+
let reset state = state.in_raw_text <- 0
11
+
12
+
(** Elements whose text content is raw text and should be skipped *)
13
+
let is_raw_text_element name =
14
+
name = "style" || name = "script" || name = "xmp" || name = "textarea"
9
15
10
16
(** Normalize a string to NFC form using uunf. *)
11
17
let normalize_nfc text =
···
40
46
if end_pos = len then s
41
47
else String.sub s 0 end_pos
42
48
43
-
let start_element _state ~element:_ _collector = ()
49
+
let start_element state ~element _collector =
50
+
let name = Tag.tag_to_string element.Element.tag in
51
+
if is_raw_text_element name then
52
+
state.in_raw_text <- state.in_raw_text + 1
44
53
45
-
let end_element _state ~tag:_ _collector = ()
54
+
let end_element state ~tag _collector =
55
+
let name = Tag.tag_to_string tag in
56
+
if is_raw_text_element name && state.in_raw_text > 0 then
57
+
state.in_raw_text <- state.in_raw_text - 1
46
58
47
-
let characters _state text collector =
59
+
let characters state text collector =
60
+
(* Skip text inside raw text elements like style/script *)
61
+
if state.in_raw_text > 0 then () else
48
62
(* Skip empty text or whitespace-only text *)
49
63
let text_trimmed = String.trim text in
50
64
if String.length text_trimmed = 0 then ()
+6
-1
test/test_roundtrip.ml
+6
-1
test/test_roundtrip.ml
···
129
129
Printf.printf "Running roundtrip tests...\n%!";
130
130
131
131
(* Run tests *)
132
-
let results = List.map test_file test_files in
132
+
let total = List.length test_files in
133
+
let results = List.mapi (fun i path ->
134
+
Printf.printf "\r[%d/%d] %s%!" (i + 1) total (Filename.basename path);
135
+
test_file path
136
+
) test_files in
137
+
Printf.printf "\n%!";
133
138
134
139
(* Categorize results *)
135
140
let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
+6
-1
test/test_validator.ml
+6
-1
test/test_validator.ml
···
426
426
Printf.printf "Found %d test files\n%!" (List.length tests);
427
427
428
428
Printf.printf "Running tests...\n%!";
429
-
let results = List.map (run_test messages) tests in
429
+
let total = List.length tests in
430
+
let results = List.mapi (fun i test ->
431
+
Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path;
432
+
run_test messages test
433
+
) tests in
434
+
Printf.printf "\n%!";
430
435
431
436
(* Print failing isvalid tests *)
432
437
let failing_isvalid = List.filter (fun r ->