+1
-1
dune-project
+1
-1
dune-project
+1
-1
gen/dune
+1
-1
gen/dune
+53
-37
gen/gen_entities.ml
+53
-37
gen/gen_entities.ml
···
1
1
(* Entity table generator for html5rw.
2
2
Reads WHATWG entities.json and generates OCaml code. *)
3
3
4
+
(* Helper functions for jsont *)
5
+
let json_object = function
6
+
| Jsont.Object (obj, _) -> obj
7
+
| _ -> failwith "Expected JSON object"
8
+
9
+
let json_array = function
10
+
| Jsont.Array (arr, _) -> arr
11
+
| _ -> failwith "Expected JSON array"
12
+
13
+
let json_number = function
14
+
| Jsont.Number (n, _) -> int_of_float n
15
+
| _ -> failwith "Expected JSON number"
16
+
17
+
let json_mem name obj =
18
+
match List.find_opt (fun ((n, _), _) -> n = name) obj with
19
+
| Some (_, v) -> Some v
20
+
| None -> None
21
+
4
22
let () =
5
23
let json_file = Sys.argv.(1) in
6
24
let out_file = Sys.argv.(2) in
···
10
28
let s = really_input_string ic n in
11
29
close_in ic;
12
30
13
-
let json = Yojson.Basic.from_string s in
31
+
let json = match Jsont_bytesrw.decode_string Jsont.json s with
32
+
| Ok j -> j
33
+
| Error e -> failwith (Printf.sprintf "JSON parse error: %s" e)
34
+
in
14
35
15
36
let oc = open_out out_file in
16
37
···
24
45
let entities = ref [] in
25
46
let legacy = ref [] in
26
47
27
-
(match json with
28
-
| `Assoc entries ->
29
-
List.iter (fun (name, value) ->
30
-
(* name is like "&" or "&" *)
31
-
let name_without_amp =
32
-
if String.length name > 0 && name.[0] = '&' then
33
-
String.sub name 1 (String.length name - 1)
34
-
else name
35
-
in
36
-
let has_semicolon =
37
-
String.length name_without_amp > 0 &&
38
-
name_without_amp.[String.length name_without_amp - 1] = ';'
39
-
in
40
-
let key =
41
-
if has_semicolon then
42
-
String.sub name_without_amp 0 (String.length name_without_amp - 1)
43
-
else
44
-
name_without_amp
45
-
in
46
-
(match value with
47
-
| `Assoc fields ->
48
-
let codepoints =
49
-
match List.assoc_opt "codepoints" fields with
50
-
| Some (`List cps) ->
51
-
List.map (function `Int i -> i | _ -> 0) cps
52
-
| _ -> []
53
-
in
54
-
if codepoints <> [] then begin
55
-
entities := (key, codepoints, has_semicolon) :: !entities;
56
-
(* Legacy entities are those that appear without semicolon in the JSON *)
57
-
if not has_semicolon then
58
-
legacy := key :: !legacy
59
-
end
60
-
| _ -> ())
61
-
) entries
62
-
| _ -> failwith "Expected JSON object");
48
+
let entries = json_object json in
49
+
List.iter (fun ((name, _), value) ->
50
+
(* name is like "&" or "&" *)
51
+
let name_without_amp =
52
+
if String.length name > 0 && name.[0] = '&' then
53
+
String.sub name 1 (String.length name - 1)
54
+
else name
55
+
in
56
+
let has_semicolon =
57
+
String.length name_without_amp > 0 &&
58
+
name_without_amp.[String.length name_without_amp - 1] = ';'
59
+
in
60
+
let key =
61
+
if has_semicolon then
62
+
String.sub name_without_amp 0 (String.length name_without_amp - 1)
63
+
else
64
+
name_without_amp
65
+
in
66
+
let fields = json_object value in
67
+
let codepoints =
68
+
match json_mem "codepoints" fields with
69
+
| Some arr -> List.map json_number (json_array arr)
70
+
| None -> []
71
+
in
72
+
if codepoints <> [] then begin
73
+
entities := (key, codepoints, has_semicolon) :: !entities;
74
+
(* Legacy entities are those that appear without semicolon in the JSON *)
75
+
if not has_semicolon then
76
+
legacy := key :: !legacy
77
+
end
78
+
) entries;
63
79
64
80
(* Remove duplicates - prefer semicolon version *)
65
81
let seen = Hashtbl.create 2500 in