(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (** CSS Selector Engine This module provides CSS selector parsing and matching for querying the HTML5 DOM. It supports a subset of CSS3 selectors suitable for common web scraping and DOM manipulation tasks. {2 Supported Selectors} {3 Simple Selectors} - Tag: [div], [p], [span] - ID: [#myid] - Class: [.myclass] - Universal: [*] {3 Attribute Selectors} - Presence: [[attr]] - Exact match: [[attr="value"]] - Contains word: [[attr~="value"]] - Starts with: [[attr^="value"]] - Ends with: [[attr$="value"]] - Contains: [[attr*="value"]] - Hyphen-separated: [[attr|="value"]] {3 Pseudo-classes} - [:first-child], [:last-child] - [:nth-child(n)], [:nth-last-child(n)] - [:only-child] - [:empty] - [:not(selector)] {3 Combinators} - Descendant: [div p] (p anywhere inside div) - Child: [div > p] (p direct child of div) - Adjacent sibling: [div + p] (p immediately after div) - General sibling: [div ~ p] (p after div, same parent) {2 Usage} {[ let doc = Html5rw.parse reader in (* Find all paragraphs *) let paragraphs = Html5rw.query doc "p" in (* Find links with specific class *) let links = Html5rw.query doc "a.external" in (* Find table cells in rows *) let cells = Html5rw.query doc "tr > td" in (* Check if a node matches *) let is_active = Html5rw.matches node ".active" ]} *) (** {1 Error Types} *) (** CSS selector error codes. *) module Error_code = Selector_error_code (** {1 Exceptions} *) (** Raised when a selector string is malformed. The exception contains a typed error code describing the parse error. *) exception Selector_error = Selector_lexer.Selector_error (** {1 Sub-modules} *) (** Abstract syntax tree for parsed selectors. *) module Ast = Selector_ast (** Token types for the selector lexer. *) module Token = Selector_token (** {1 Functions} *) (** Parse a CSS selector string. @raise Selector_error if the selector is malformed. *) let parse = Selector_parser.parse_selector (** Query the DOM tree with a CSS selector. Returns all nodes matching the selector in document order. @raise Selector_error if the selector is malformed. {[ let divs = query root_node "div.content > p" ]} *) let query = Selector_match.query (** Check if a node matches a CSS selector. @raise Selector_error if the selector is malformed. {[ if matches node ".active" then (* node has class "active" *) ]} *) let matches = Selector_match.matches