OCaml HTML5 parser/serialiser based on Python's JustHTML

fixes

Changed files
+11 -6
examples
lib
html5_checker
specialized
test
+8
examples/dune
··· 1 1 (executable 2 2 (name basic_parsing) 3 + (modules basic_parsing) 3 4 (libraries bytesrw html5rw)) 4 5 5 6 (executable 6 7 (name css_selectors) 8 + (modules css_selectors) 7 9 (libraries bytesrw html5rw)) 8 10 9 11 (executable 10 12 (name dom_manipulation) 13 + (modules dom_manipulation) 11 14 (libraries bytesrw html5rw)) 12 15 13 16 (executable 14 17 (name text_extraction) 18 + (modules text_extraction) 15 19 (libraries bytesrw html5rw)) 16 20 17 21 (executable 18 22 (name error_handling) 23 + (modules error_handling) 19 24 (libraries bytesrw html5rw)) 20 25 21 26 (executable 22 27 (name fragment_parsing) 28 + (modules fragment_parsing) 23 29 (libraries bytesrw html5rw)) 24 30 25 31 (executable 26 32 (name encoding_detection) 33 + (modules encoding_detection) 27 34 (libraries bytesrw html5rw)) 28 35 29 36 (executable 30 37 (name web_scraper) 38 + (modules web_scraper) 31 39 (libraries bytesrw html5rw))
+1 -4
lib/html5_checker/specialized/table_checker.ml
··· 27 27 (** First row onto which this cell does not span (or rowspan_zero_magic) *) 28 28 headers : string list; 29 29 (** IDs referenced by the headers attribute *) 30 - is_header : bool; 31 - (** Whether this is a th element *) 32 30 element_name : string; 33 31 (** "td" or "th" *) 34 32 } ··· 62 60 right = colspan; 63 61 bottom = (if rowspan = 0 then rowspan_zero_magic else rowspan); 64 62 headers; 65 - is_header; 66 63 element_name = (if is_header then "th" else "td"); 67 64 } 68 65 ··· 468 465 | _ -> failwith "Bug: end_row_group in wrong state" 469 466 470 467 (** Start a row *) 471 - let start_row table collector = 468 + let start_row table _collector = 472 469 if need_suppress_start table then () 473 470 else 474 471 match table.state with
+1 -1
test/test_all.ml
··· 195 195 module TokenCollector = struct 196 196 type t = { mutable tokens : Html5rw.Tokenizer.Token.t list } 197 197 let create () = { tokens = [] } 198 - let process t token = t.tokens <- token :: t.tokens; `Continue 198 + let process t token ~line:_ ~column:_ = t.tokens <- token :: t.tokens; `Continue 199 199 let adjusted_current_node_in_html_namespace _ = true 200 200 let get_tokens t = List.rev t.tokens 201 201 end
+1 -1
test/test_tokenizer.ml
··· 13 13 14 14 let create () = { tokens = [] } 15 15 16 - let process t token = 16 + let process t token ~line:_ ~column:_ = 17 17 t.tokens <- token :: t.tokens; 18 18 `Continue 19 19