(** RichText handling for AT Protocol. This module provides facilities for working with rich text in Bluesky posts, including facets for mentions, links, and hashtags. Facets are byte-indexed annotations that mark up portions of text. *) (** {1 Types} *) type byte_slice = { byte_start : int; byte_end : int } (** A byte range within text *) type mention = { did : string } (** Mention facet feature - links to a user *) type link = { uri : string } (** Link facet feature - external URL *) type tag = { tag : string } (** Tag/hashtag facet feature *) type feature = | Mention of mention | Link of link | Tag of tag (** Facet feature types *) type facet = { index : byte_slice; features : feature list } (** A facet annotation on text *) type t = { text : string; facets : facet list } (** Rich text with facets *) (** {1 Construction} *) (** Create plain text with no facets *) let of_string text = { text; facets = [] } (** Create rich text with facets *) let create ~text ~facets = { text; facets } (** Get the plain text *) let text t = t.text (** Get the facets *) let facets t = t.facets (** {1 Facet Creation} *) (** Create a byte slice *) let byte_slice ~start ~end_ = { byte_start = start; byte_end = end_ } (** Create a mention facet *) let mention_facet ~start ~end_ ~did = { index = byte_slice ~start ~end_; features = [ Mention { did } ] } (** Create a link facet *) let link_facet ~start ~end_ ~uri = { index = byte_slice ~start ~end_; features = [ Link { uri } ] } (** Create a tag facet *) let tag_facet ~start ~end_ ~tag = { index = byte_slice ~start ~end_; features = [ Tag { tag } ] } (** Add a facet to rich text *) let add_facet t facet = { t with facets = facet :: t.facets } (** {1 Facet Detection} *) (** Check if character is valid in a handle *) let is_handle_char c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c = '.' || c = '-' (** Check if character is valid in a hashtag *) let is_tag_char c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c = '_' (** Check if character is whitespace or punctuation (word boundary for URLs) *) let is_url_boundary c = c = ' ' || c = '\n' || c = '\t' || c = '\r' || c = ',' || c = '!' || c = '?' || c = ';' || c = ')' || c = ']' || c = '>' (** Find mentions (@handle.domain) in text. Returns list of (byte_start, byte_end, handle) *) let find_mentions text = let len = String.length text in let rec scan i acc = if i >= len then List.rev acc else if text.[i] = '@' then (* Found @ - look for handle *) let start = i in let rec read_handle j = if j >= len then j else if is_handle_char text.[j] then read_handle (j + 1) else j in let end_ = read_handle (i + 1) in if end_ > start + 1 then begin let handle = String.sub text (start + 1) (end_ - start - 1) in (* Basic validation: must contain a dot for domain *) if String.contains handle '.' then scan end_ ((start, end_, handle) :: acc) else scan end_ acc end else scan (i + 1) acc else scan (i + 1) acc in scan 0 [] (** Find URLs (http:// or https://) in text. Returns list of (byte_start, byte_end, url) *) let find_urls text = let len = String.length text in let rec scan i acc = if i >= len - 7 then List.rev acc (* Need at least "http://" *) else let is_http = i + 7 <= len && String.sub text i 7 = "http://" in let is_https = i + 8 <= len && String.sub text i 8 = "https://" in if is_http || is_https then let start = i in let rec read_url j = if j >= len then j else if is_url_boundary text.[j] then j else read_url (j + 1) in let end_ = read_url (if is_https then i + 8 else i + 7) in let url = String.sub text start (end_ - start) in scan end_ ((start, end_, url) :: acc) else scan (i + 1) acc in scan 0 [] (** Find hashtags (#tag) in text. Returns list of (byte_start, byte_end, tag) *) let find_tags text = let len = String.length text in let rec scan i acc = if i >= len then List.rev acc else if text.[i] = '#' then let start = i in let rec read_tag j = if j >= len then j else if is_tag_char text.[j] then read_tag (j + 1) else j in let end_ = read_tag (i + 1) in if end_ > start + 1 then begin let tag = String.sub text (start + 1) (end_ - start - 1) in scan end_ ((start, end_, tag) :: acc) end else scan (i + 1) acc else scan (i + 1) acc in scan 0 [] (** Detect all facets in text (mentions, links, tags). Note: Mentions require DID resolution which is not done here - they are returned with placeholder DIDs. *) let detect_facets text = let mentions = find_mentions text in let urls = find_urls text in let tags = find_tags text in let facets = List.map (fun (start, end_, _handle) -> (* In real usage, you'd resolve handle -> DID here *) mention_facet ~start ~end_ ~did:"did:plc:placeholder") mentions @ List.map (fun (start, end_, uri) -> link_facet ~start ~end_ ~uri) urls @ List.map (fun (start, end_, tag) -> tag_facet ~start ~end_ ~tag) tags in { text; facets } (** {1 JSON Encoding} *) type json = Atproto_json.t (** Encode byte slice to JSON *) let byte_slice_to_json slice : json = Atproto_json.object_ [ ("byteStart", Atproto_json.int slice.byte_start); ("byteEnd", Atproto_json.int slice.byte_end); ] (** Encode feature to JSON *) let feature_to_json = function | Mention { did } -> Atproto_json.object_ [ ("$type", Atproto_json.string "app.bsky.richtext.facet#mention"); ("did", Atproto_json.string did); ] | Link { uri } -> Atproto_json.object_ [ ("$type", Atproto_json.string "app.bsky.richtext.facet#link"); ("uri", Atproto_json.string uri); ] | Tag { tag } -> Atproto_json.object_ [ ("$type", Atproto_json.string "app.bsky.richtext.facet#tag"); ("tag", Atproto_json.string tag); ] (** Encode facet to JSON *) let facet_to_json facet : json = Atproto_json.object_ [ ("index", byte_slice_to_json facet.index); ("features", Atproto_json.array (List.map feature_to_json facet.features)); ] (** Encode rich text to JSON (for post record) *) let to_json t : json = if t.facets = [] then Atproto_json.object_ [ ("text", Atproto_json.string t.text) ] else Atproto_json.object_ [ ("text", Atproto_json.string t.text); ("facets", Atproto_json.array (List.map facet_to_json t.facets)); ] (** {1 JSON Decoding} *) let int_of_json ?(default = 0) (json : json) : int = match Atproto_json.to_int64_opt json with | Some i -> if i > Int64.of_int max_int || i < Int64.of_int min_int then default else Int64.to_int i | None -> default (** Decode byte slice from JSON *) let byte_slice_of_json json = match Atproto_json.to_object_opt json with | Some pairs -> let byte_start = match Atproto_json.get "byteStart" pairs with | Some v -> int_of_json v | None -> 0 in let byte_end = match Atproto_json.get "byteEnd" pairs with | Some v -> int_of_json v | None -> 0 in Some { byte_start; byte_end } | None -> None (** Decode feature from JSON *) let feature_of_json json = match Atproto_json.to_object_opt json with | Some pairs -> let type_ = match Atproto_json.get_string_opt "$type" pairs with | Some s -> s | None -> "" in if type_ = "app.bsky.richtext.facet#mention" then match Atproto_json.get_string_opt "did" pairs with | Some did -> Some (Mention { did }) | None -> None else if type_ = "app.bsky.richtext.facet#link" then match Atproto_json.get_string_opt "uri" pairs with | Some uri -> Some (Link { uri }) | None -> None else if type_ = "app.bsky.richtext.facet#tag" then match Atproto_json.get_string_opt "tag" pairs with | Some tag -> Some (Tag { tag }) | None -> None else None | None -> None (** Decode facet from JSON *) let facet_of_json json = match Atproto_json.to_object_opt json with | Some pairs -> ( let index = match Atproto_json.get "index" pairs with | Some idx -> byte_slice_of_json idx | None -> None in let features = match Atproto_json.get "features" pairs with | Some items -> ( match Atproto_json.to_array_opt items with | Some items -> List.filter_map feature_of_json items | None -> []) | None -> [] in match index with Some index -> Some { index; features } | None -> None) | None -> None (** Decode rich text from JSON *) let of_json json = match Atproto_json.to_object_opt json with | Some pairs -> let text = match Atproto_json.get_string_opt "text" pairs with | Some s -> s | None -> "" in let facets = match Atproto_json.get "facets" pairs with | Some items -> ( match Atproto_json.to_array_opt items with | Some items -> List.filter_map facet_of_json items | None -> []) | None -> [] in Some { text; facets } | None -> None (** {1 Utilities} *) (** Get the length of text in bytes *) let byte_length t = String.length t.text (** Get the length of text in Unicode graphemes (approximate) *) let grapheme_length t = (* Simple approximation - counts UTF-8 start bytes *) let count = ref 0 in String.iter (fun c -> let code = Char.code c in if code < 0x80 || code >= 0xC0 then incr count) t.text; !count (** Check if text exceeds Bluesky's limit (300 graphemes) *) let exceeds_limit ?(limit = 300) t = grapheme_length t > limit (** Truncate text to fit within grapheme limit *) let truncate ?(limit = 300) t = if not (exceeds_limit ~limit t) then t else (* Simple truncation - doesn't preserve facets properly *) let text = t.text in let len = String.length text in let rec find_cutoff i graphemes = if i >= len || graphemes >= limit then i else let code = Char.code text.[i] in if code < 0x80 then find_cutoff (i + 1) (graphemes + 1) else if code < 0xC0 then find_cutoff (i + 1) graphemes (* continuation byte *) else if code < 0xE0 then find_cutoff (i + 2) (graphemes + 1) else if code < 0xF0 then find_cutoff (i + 3) (graphemes + 1) else find_cutoff (i + 4) (graphemes + 1) in let cutoff = find_cutoff 0 0 in let new_text = String.sub text 0 cutoff in (* Filter facets that are still within bounds *) let new_facets = List.filter (fun f -> f.index.byte_end <= cutoff) t.facets in { text = new_text; facets = new_facets }