OCaml HTML5 parser/serialiser based on Python's JustHTML
1open Bytesrw
2
3(* Basic HTML parsing example *)
4
5let html = {|
6<!DOCTYPE html>
7<html>
8<head>
9 <title>Hello World</title>
10</head>
11<body>
12 <h1>Welcome</h1>
13 <p>This is a <strong>simple</strong> example.</p>
14</body>
15</html>
16|}
17
18let () =
19 (* Parse HTML string *)
20 let result = Html5rw.parse (Bytes.Reader.of_string html) in
21
22 (* Access the root document node *)
23 let doc = Html5rw.root result in
24 Printf.printf "Root node: %s\n" doc.Html5rw.Dom.name;
25
26 (* Convert back to HTML *)
27 let output = Html5rw.to_string result in
28 Printf.printf "\nParsed and serialized:\n%s\n" output;
29
30 (* Extract plain text *)
31 let text = Html5rw.to_text result in
32 Printf.printf "\nText content: %s\n" text