OCaml HTML5 parser/serialiser based on Python's JustHTML
1open Bytesrw 2 3(* Basic HTML parsing example *) 4 5let html = {| 6<!DOCTYPE html> 7<html> 8<head> 9 <title>Hello World</title> 10</head> 11<body> 12 <h1>Welcome</h1> 13 <p>This is a <strong>simple</strong> example.</p> 14</body> 15</html> 16|} 17 18let () = 19 (* Parse HTML string *) 20 let result = Html5rw.parse (Bytes.Reader.of_string html) in 21 22 (* Access the root document node *) 23 let doc = Html5rw.root result in 24 Printf.printf "Root node: %s\n" doc.Html5rw.Dom.name; 25 26 (* Convert back to HTML *) 27 let output = Html5rw.to_string result in 28 Printf.printf "\nParsed and serialized:\n%s\n" output; 29 30 (* Extract plain text *) 31 let text = Html5rw.to_text result in 32 Printf.printf "\nText content: %s\n" text