OCaml HTML5 parser/serialiser based on Python's JustHTML

use jsont instead of yojson for the entity gen

+1 -1
dune-project
··· 25 25 (uutf (>= 1.0.0)) 26 26 (uuuu (>= 0.3.0)) 27 27 (odoc :with-doc) 28 - (jsont (and :with-test (>= 0.2.0))))) 28 + (jsont (>= 0.2.0))))
+1 -1
gen/dune
··· 1 1 (executable 2 2 (name gen_entities) 3 - (libraries yojson)) 3 + (libraries jsont jsont.bytesrw))
+53 -37
gen/gen_entities.ml
··· 1 1 (* Entity table generator for html5rw. 2 2 Reads WHATWG entities.json and generates OCaml code. *) 3 3 4 + (* Helper functions for jsont *) 5 + let json_object = function 6 + | Jsont.Object (obj, _) -> obj 7 + | _ -> failwith "Expected JSON object" 8 + 9 + let json_array = function 10 + | Jsont.Array (arr, _) -> arr 11 + | _ -> failwith "Expected JSON array" 12 + 13 + let json_number = function 14 + | Jsont.Number (n, _) -> int_of_float n 15 + | _ -> failwith "Expected JSON number" 16 + 17 + let json_mem name obj = 18 + match List.find_opt (fun ((n, _), _) -> n = name) obj with 19 + | Some (_, v) -> Some v 20 + | None -> None 21 + 4 22 let () = 5 23 let json_file = Sys.argv.(1) in 6 24 let out_file = Sys.argv.(2) in ··· 10 28 let s = really_input_string ic n in 11 29 close_in ic; 12 30 13 - let json = Yojson.Basic.from_string s in 31 + let json = match Jsont_bytesrw.decode_string Jsont.json s with 32 + | Ok j -> j 33 + | Error e -> failwith (Printf.sprintf "JSON parse error: %s" e) 34 + in 14 35 15 36 let oc = open_out out_file in 16 37 ··· 24 45 let entities = ref [] in 25 46 let legacy = ref [] in 26 47 27 - (match json with 28 - | `Assoc entries -> 29 - List.iter (fun (name, value) -> 30 - (* name is like "&amp;" or "&amp" *) 31 - let name_without_amp = 32 - if String.length name > 0 && name.[0] = '&' then 33 - String.sub name 1 (String.length name - 1) 34 - else name 35 - in 36 - let has_semicolon = 37 - String.length name_without_amp > 0 && 38 - name_without_amp.[String.length name_without_amp - 1] = ';' 39 - in 40 - let key = 41 - if has_semicolon then 42 - String.sub name_without_amp 0 (String.length name_without_amp - 1) 43 - else 44 - name_without_amp 45 - in 46 - (match value with 47 - | `Assoc fields -> 48 - let codepoints = 49 - match List.assoc_opt "codepoints" fields with 50 - | Some (`List cps) -> 51 - List.map (function `Int i -> i | _ -> 0) cps 52 - | _ -> [] 53 - in 54 - if codepoints <> [] then begin 55 - entities := (key, codepoints, has_semicolon) :: !entities; 56 - (* Legacy entities are those that appear without semicolon in the JSON *) 57 - if not has_semicolon then 58 - legacy := key :: !legacy 59 - end 60 - | _ -> ()) 61 - ) entries 62 - | _ -> failwith "Expected JSON object"); 48 + let entries = json_object json in 49 + List.iter (fun ((name, _), value) -> 50 + (* name is like "&amp;" or "&amp" *) 51 + let name_without_amp = 52 + if String.length name > 0 && name.[0] = '&' then 53 + String.sub name 1 (String.length name - 1) 54 + else name 55 + in 56 + let has_semicolon = 57 + String.length name_without_amp > 0 && 58 + name_without_amp.[String.length name_without_amp - 1] = ';' 59 + in 60 + let key = 61 + if has_semicolon then 62 + String.sub name_without_amp 0 (String.length name_without_amp - 1) 63 + else 64 + name_without_amp 65 + in 66 + let fields = json_object value in 67 + let codepoints = 68 + match json_mem "codepoints" fields with 69 + | Some arr -> List.map json_number (json_array arr) 70 + | None -> [] 71 + in 72 + if codepoints <> [] then begin 73 + entities := (key, codepoints, has_semicolon) :: !entities; 74 + (* Legacy entities are those that appear without semicolon in the JSON *) 75 + if not has_semicolon then 76 + legacy := key :: !legacy 77 + end 78 + ) entries; 63 79 64 80 (* Remove duplicates - prefer semicolon version *) 65 81 let seen = Hashtbl.create 2500 in
+1 -1
html5rw.opam
··· 16 16 "uutf" {>= "1.0.0"} 17 17 "uuuu" {>= "0.3.0"} 18 18 "odoc" {with-doc} 19 - "jsont" {with-test & >= "0.2.0"} 19 + "jsont" {>= "0.2.0"} 20 20 ] 21 21 build: [ 22 22 ["dune" "subst"] {dev}