atproto libraries implementation in ocaml
at main 11 kB view raw
1(** RichText handling for AT Protocol. 2 3 This module provides facilities for working with rich text in Bluesky posts, 4 including facets for mentions, links, and hashtags. 5 6 Facets are byte-indexed annotations that mark up portions of text. *) 7 8(** {1 Types} *) 9 10type byte_slice = { byte_start : int; byte_end : int } 11(** A byte range within text *) 12 13type mention = { did : string } 14(** Mention facet feature - links to a user *) 15 16type link = { uri : string } 17(** Link facet feature - external URL *) 18 19type tag = { tag : string } 20(** Tag/hashtag facet feature *) 21 22type feature = 23 | Mention of mention 24 | Link of link 25 | Tag of tag (** Facet feature types *) 26 27type facet = { index : byte_slice; features : feature list } 28(** A facet annotation on text *) 29 30type t = { text : string; facets : facet list } 31(** Rich text with facets *) 32 33(** {1 Construction} *) 34 35(** Create plain text with no facets *) 36let of_string text = { text; facets = [] } 37 38(** Create rich text with facets *) 39let create ~text ~facets = { text; facets } 40 41(** Get the plain text *) 42let text t = t.text 43 44(** Get the facets *) 45let facets t = t.facets 46 47(** {1 Facet Creation} *) 48 49(** Create a byte slice *) 50let byte_slice ~start ~end_ = { byte_start = start; byte_end = end_ } 51 52(** Create a mention facet *) 53let mention_facet ~start ~end_ ~did = 54 { index = byte_slice ~start ~end_; features = [ Mention { did } ] } 55 56(** Create a link facet *) 57let link_facet ~start ~end_ ~uri = 58 { index = byte_slice ~start ~end_; features = [ Link { uri } ] } 59 60(** Create a tag facet *) 61let tag_facet ~start ~end_ ~tag = 62 { index = byte_slice ~start ~end_; features = [ Tag { tag } ] } 63 64(** Add a facet to rich text *) 65let add_facet t facet = { t with facets = facet :: t.facets } 66 67(** {1 Facet Detection} *) 68 69(** Check if character is valid in a handle *) 70let is_handle_char c = 71 (c >= 'a' && c <= 'z') 72 || (c >= 'A' && c <= 'Z') 73 || (c >= '0' && c <= '9') 74 || c = '.' || c = '-' 75 76(** Check if character is valid in a hashtag *) 77let is_tag_char c = 78 (c >= 'a' && c <= 'z') 79 || (c >= 'A' && c <= 'Z') 80 || (c >= '0' && c <= '9') 81 || c = '_' 82 83(** Check if character is whitespace or punctuation (word boundary for URLs) *) 84let is_url_boundary c = 85 c = ' ' || c = '\n' || c = '\t' || c = '\r' || c = ',' || c = '!' || c = '?' 86 || c = ';' || c = ')' || c = ']' || c = '>' 87 88(** Find mentions (@handle.domain) in text. Returns list of (byte_start, 89 byte_end, handle) *) 90let find_mentions text = 91 let len = String.length text in 92 let rec scan i acc = 93 if i >= len then List.rev acc 94 else if text.[i] = '@' then 95 (* Found @ - look for handle *) 96 let start = i in 97 let rec read_handle j = 98 if j >= len then j 99 else if is_handle_char text.[j] then read_handle (j + 1) 100 else j 101 in 102 let end_ = read_handle (i + 1) in 103 if end_ > start + 1 then begin 104 let handle = String.sub text (start + 1) (end_ - start - 1) in 105 (* Basic validation: must contain a dot for domain *) 106 if String.contains handle '.' then 107 scan end_ ((start, end_, handle) :: acc) 108 else scan end_ acc 109 end 110 else scan (i + 1) acc 111 else scan (i + 1) acc 112 in 113 scan 0 [] 114 115(** Find URLs (http:// or https://) in text. Returns list of (byte_start, 116 byte_end, url) *) 117let find_urls text = 118 let len = String.length text in 119 let rec scan i acc = 120 if i >= len - 7 then List.rev acc (* Need at least "http://" *) 121 else 122 let is_http = i + 7 <= len && String.sub text i 7 = "http://" in 123 let is_https = i + 8 <= len && String.sub text i 8 = "https://" in 124 if is_http || is_https then 125 let start = i in 126 let rec read_url j = 127 if j >= len then j 128 else if is_url_boundary text.[j] then j 129 else read_url (j + 1) 130 in 131 let end_ = read_url (if is_https then i + 8 else i + 7) in 132 let url = String.sub text start (end_ - start) in 133 scan end_ ((start, end_, url) :: acc) 134 else scan (i + 1) acc 135 in 136 scan 0 [] 137 138(** Find hashtags (#tag) in text. Returns list of (byte_start, byte_end, tag) *) 139let find_tags text = 140 let len = String.length text in 141 let rec scan i acc = 142 if i >= len then List.rev acc 143 else if text.[i] = '#' then 144 let start = i in 145 let rec read_tag j = 146 if j >= len then j 147 else if is_tag_char text.[j] then read_tag (j + 1) 148 else j 149 in 150 let end_ = read_tag (i + 1) in 151 if end_ > start + 1 then begin 152 let tag = String.sub text (start + 1) (end_ - start - 1) in 153 scan end_ ((start, end_, tag) :: acc) 154 end 155 else scan (i + 1) acc 156 else scan (i + 1) acc 157 in 158 scan 0 [] 159 160(** Detect all facets in text (mentions, links, tags). Note: Mentions require 161 DID resolution which is not done here - they are returned with placeholder 162 DIDs. *) 163let detect_facets text = 164 let mentions = find_mentions text in 165 let urls = find_urls text in 166 let tags = find_tags text in 167 let facets = 168 List.map 169 (fun (start, end_, _handle) -> 170 (* In real usage, you'd resolve handle -> DID here *) 171 mention_facet ~start ~end_ ~did:"did:plc:placeholder") 172 mentions 173 @ List.map (fun (start, end_, uri) -> link_facet ~start ~end_ ~uri) urls 174 @ List.map (fun (start, end_, tag) -> tag_facet ~start ~end_ ~tag) tags 175 in 176 { text; facets } 177 178(** {1 JSON Encoding} *) 179 180type json = Atproto_json.t 181 182(** Encode byte slice to JSON *) 183let byte_slice_to_json slice : json = 184 Atproto_json.object_ 185 [ 186 ("byteStart", Atproto_json.int slice.byte_start); 187 ("byteEnd", Atproto_json.int slice.byte_end); 188 ] 189 190(** Encode feature to JSON *) 191let feature_to_json = function 192 | Mention { did } -> 193 Atproto_json.object_ 194 [ 195 ("$type", Atproto_json.string "app.bsky.richtext.facet#mention"); 196 ("did", Atproto_json.string did); 197 ] 198 | Link { uri } -> 199 Atproto_json.object_ 200 [ 201 ("$type", Atproto_json.string "app.bsky.richtext.facet#link"); 202 ("uri", Atproto_json.string uri); 203 ] 204 | Tag { tag } -> 205 Atproto_json.object_ 206 [ 207 ("$type", Atproto_json.string "app.bsky.richtext.facet#tag"); 208 ("tag", Atproto_json.string tag); 209 ] 210 211(** Encode facet to JSON *) 212let facet_to_json facet : json = 213 Atproto_json.object_ 214 [ 215 ("index", byte_slice_to_json facet.index); 216 ("features", Atproto_json.array (List.map feature_to_json facet.features)); 217 ] 218 219(** Encode rich text to JSON (for post record) *) 220let to_json t : json = 221 if t.facets = [] then 222 Atproto_json.object_ [ ("text", Atproto_json.string t.text) ] 223 else 224 Atproto_json.object_ 225 [ 226 ("text", Atproto_json.string t.text); 227 ("facets", Atproto_json.array (List.map facet_to_json t.facets)); 228 ] 229 230(** {1 JSON Decoding} *) 231 232let int_of_json ?(default = 0) (json : json) : int = 233 match Atproto_json.to_int64_opt json with 234 | Some i -> 235 if i > Int64.of_int max_int || i < Int64.of_int min_int then default 236 else Int64.to_int i 237 | None -> default 238 239(** Decode byte slice from JSON *) 240let byte_slice_of_json json = 241 match Atproto_json.to_object_opt json with 242 | Some pairs -> 243 let byte_start = 244 match Atproto_json.get "byteStart" pairs with 245 | Some v -> int_of_json v 246 | None -> 0 247 in 248 let byte_end = 249 match Atproto_json.get "byteEnd" pairs with 250 | Some v -> int_of_json v 251 | None -> 0 252 in 253 Some { byte_start; byte_end } 254 | None -> None 255 256(** Decode feature from JSON *) 257let feature_of_json json = 258 match Atproto_json.to_object_opt json with 259 | Some pairs -> 260 let type_ = 261 match Atproto_json.get_string_opt "$type" pairs with 262 | Some s -> s 263 | None -> "" 264 in 265 if type_ = "app.bsky.richtext.facet#mention" then 266 match Atproto_json.get_string_opt "did" pairs with 267 | Some did -> Some (Mention { did }) 268 | None -> None 269 else if type_ = "app.bsky.richtext.facet#link" then 270 match Atproto_json.get_string_opt "uri" pairs with 271 | Some uri -> Some (Link { uri }) 272 | None -> None 273 else if type_ = "app.bsky.richtext.facet#tag" then 274 match Atproto_json.get_string_opt "tag" pairs with 275 | Some tag -> Some (Tag { tag }) 276 | None -> None 277 else None 278 | None -> None 279 280(** Decode facet from JSON *) 281let facet_of_json json = 282 match Atproto_json.to_object_opt json with 283 | Some pairs -> ( 284 let index = 285 match Atproto_json.get "index" pairs with 286 | Some idx -> byte_slice_of_json idx 287 | None -> None 288 in 289 let features = 290 match Atproto_json.get "features" pairs with 291 | Some items -> ( 292 match Atproto_json.to_array_opt items with 293 | Some items -> List.filter_map feature_of_json items 294 | None -> []) 295 | None -> [] 296 in 297 match index with Some index -> Some { index; features } | None -> None) 298 | None -> None 299 300(** Decode rich text from JSON *) 301let of_json json = 302 match Atproto_json.to_object_opt json with 303 | Some pairs -> 304 let text = 305 match Atproto_json.get_string_opt "text" pairs with 306 | Some s -> s 307 | None -> "" 308 in 309 let facets = 310 match Atproto_json.get "facets" pairs with 311 | Some items -> ( 312 match Atproto_json.to_array_opt items with 313 | Some items -> List.filter_map facet_of_json items 314 | None -> []) 315 | None -> [] 316 in 317 Some { text; facets } 318 | None -> None 319 320(** {1 Utilities} *) 321 322(** Get the length of text in bytes *) 323let byte_length t = String.length t.text 324 325(** Get the length of text in Unicode graphemes (approximate) *) 326let grapheme_length t = 327 (* Simple approximation - counts UTF-8 start bytes *) 328 let count = ref 0 in 329 String.iter 330 (fun c -> 331 let code = Char.code c in 332 if code < 0x80 || code >= 0xC0 then incr count) 333 t.text; 334 !count 335 336(** Check if text exceeds Bluesky's limit (300 graphemes) *) 337let exceeds_limit ?(limit = 300) t = grapheme_length t > limit 338 339(** Truncate text to fit within grapheme limit *) 340let truncate ?(limit = 300) t = 341 if not (exceeds_limit ~limit t) then t 342 else 343 (* Simple truncation - doesn't preserve facets properly *) 344 let text = t.text in 345 let len = String.length text in 346 let rec find_cutoff i graphemes = 347 if i >= len || graphemes >= limit then i 348 else 349 let code = Char.code text.[i] in 350 if code < 0x80 then find_cutoff (i + 1) (graphemes + 1) 351 else if code < 0xC0 then 352 find_cutoff (i + 1) graphemes (* continuation byte *) 353 else if code < 0xE0 then find_cutoff (i + 2) (graphemes + 1) 354 else if code < 0xF0 then find_cutoff (i + 3) (graphemes + 1) 355 else find_cutoff (i + 4) (graphemes + 1) 356 in 357 let cutoff = find_cutoff 0 0 in 358 let new_text = String.sub text 0 cutoff in 359 (* Filter facets that are still within bounds *) 360 let new_facets = 361 List.filter (fun f -> f.index.byte_end <= cutoff) t.facets 362 in 363 { text = new_text; facets = new_facets }