open Bytesrw (* Text extraction example *) let html = {| Article

The Great HTML5 Parser

This is the introduction to an article about HTML parsing in OCaml.

The parser follows the WHATWG specification and handles all kinds of malformed HTML gracefully.

|} let () = let result = Html5rw.parse (Bytes.Reader.of_string html) in (* Extract all text *) Printf.printf "=== All Text (default) ===\n"; let text = Html5rw.to_text result in Printf.printf "%s\n\n" text; (* Extract text with custom separator *) Printf.printf "=== Text with Newline Separator ===\n"; let text = Html5rw.to_text ~separator:"\n" result in Printf.printf "%s\n\n" text; (* Extract text from specific element *) Printf.printf "=== Article Text Only ===\n"; let articles = Html5rw.query result "article" in List.iter (fun article -> let text = Html5rw.get_text_content article in Printf.printf "%s\n" text ) articles; (* Extract structured data *) Printf.printf "\n=== Structured Extraction ===\n"; let headings = Html5rw.query result "h1" in List.iter (fun h -> Printf.printf "Title: %s\n" (Html5rw.get_text_content h) ) headings; let items = Html5rw.query result "li" in Printf.printf "Features:\n"; List.iter (fun li -> Printf.printf " - %s\n" (Html5rw.get_text_content li) ) items