atproto libraries implementation in ocaml
1(** RichText handling for AT Protocol.
2
3 This module provides facilities for working with rich text in Bluesky posts,
4 including facets for mentions, links, and hashtags.
5
6 Facets are byte-indexed annotations that mark up portions of text. *)
7
8(** {1 Types} *)
9
10type byte_slice = { byte_start : int; byte_end : int }
11(** A byte range within text *)
12
13type mention = { did : string }
14(** Mention facet feature - links to a user *)
15
16type link = { uri : string }
17(** Link facet feature - external URL *)
18
19type tag = { tag : string }
20(** Tag/hashtag facet feature *)
21
22type feature =
23 | Mention of mention
24 | Link of link
25 | Tag of tag (** Facet feature types *)
26
27type facet = { index : byte_slice; features : feature list }
28(** A facet annotation on text *)
29
30type t = { text : string; facets : facet list }
31(** Rich text with facets *)
32
33(** {1 Construction} *)
34
35(** Create plain text with no facets *)
36let of_string text = { text; facets = [] }
37
38(** Create rich text with facets *)
39let create ~text ~facets = { text; facets }
40
41(** Get the plain text *)
42let text t = t.text
43
44(** Get the facets *)
45let facets t = t.facets
46
47(** {1 Facet Creation} *)
48
49(** Create a byte slice *)
50let byte_slice ~start ~end_ = { byte_start = start; byte_end = end_ }
51
52(** Create a mention facet *)
53let mention_facet ~start ~end_ ~did =
54 { index = byte_slice ~start ~end_; features = [ Mention { did } ] }
55
56(** Create a link facet *)
57let link_facet ~start ~end_ ~uri =
58 { index = byte_slice ~start ~end_; features = [ Link { uri } ] }
59
60(** Create a tag facet *)
61let tag_facet ~start ~end_ ~tag =
62 { index = byte_slice ~start ~end_; features = [ Tag { tag } ] }
63
64(** Add a facet to rich text *)
65let add_facet t facet = { t with facets = facet :: t.facets }
66
67(** {1 Facet Detection} *)
68
69(** Check if character is valid in a handle *)
70let is_handle_char c =
71 (c >= 'a' && c <= 'z')
72 || (c >= 'A' && c <= 'Z')
73 || (c >= '0' && c <= '9')
74 || c = '.' || c = '-'
75
76(** Check if character is valid in a hashtag *)
77let is_tag_char c =
78 (c >= 'a' && c <= 'z')
79 || (c >= 'A' && c <= 'Z')
80 || (c >= '0' && c <= '9')
81 || c = '_'
82
83(** Check if character is whitespace or punctuation (word boundary for URLs) *)
84let is_url_boundary c =
85 c = ' ' || c = '\n' || c = '\t' || c = '\r' || c = ',' || c = '!' || c = '?'
86 || c = ';' || c = ')' || c = ']' || c = '>'
87
88(** Find mentions (@handle.domain) in text. Returns list of (byte_start,
89 byte_end, handle) *)
90let find_mentions text =
91 let len = String.length text in
92 let rec scan i acc =
93 if i >= len then List.rev acc
94 else if text.[i] = '@' then
95 (* Found @ - look for handle *)
96 let start = i in
97 let rec read_handle j =
98 if j >= len then j
99 else if is_handle_char text.[j] then read_handle (j + 1)
100 else j
101 in
102 let end_ = read_handle (i + 1) in
103 if end_ > start + 1 then begin
104 let handle = String.sub text (start + 1) (end_ - start - 1) in
105 (* Basic validation: must contain a dot for domain *)
106 if String.contains handle '.' then
107 scan end_ ((start, end_, handle) :: acc)
108 else scan end_ acc
109 end
110 else scan (i + 1) acc
111 else scan (i + 1) acc
112 in
113 scan 0 []
114
115(** Find URLs (http:// or https://) in text. Returns list of (byte_start,
116 byte_end, url) *)
117let find_urls text =
118 let len = String.length text in
119 let rec scan i acc =
120 if i >= len - 7 then List.rev acc (* Need at least "http://" *)
121 else
122 let is_http = i + 7 <= len && String.sub text i 7 = "http://" in
123 let is_https = i + 8 <= len && String.sub text i 8 = "https://" in
124 if is_http || is_https then
125 let start = i in
126 let rec read_url j =
127 if j >= len then j
128 else if is_url_boundary text.[j] then j
129 else read_url (j + 1)
130 in
131 let end_ = read_url (if is_https then i + 8 else i + 7) in
132 let url = String.sub text start (end_ - start) in
133 scan end_ ((start, end_, url) :: acc)
134 else scan (i + 1) acc
135 in
136 scan 0 []
137
138(** Find hashtags (#tag) in text. Returns list of (byte_start, byte_end, tag) *)
139let find_tags text =
140 let len = String.length text in
141 let rec scan i acc =
142 if i >= len then List.rev acc
143 else if text.[i] = '#' then
144 let start = i in
145 let rec read_tag j =
146 if j >= len then j
147 else if is_tag_char text.[j] then read_tag (j + 1)
148 else j
149 in
150 let end_ = read_tag (i + 1) in
151 if end_ > start + 1 then begin
152 let tag = String.sub text (start + 1) (end_ - start - 1) in
153 scan end_ ((start, end_, tag) :: acc)
154 end
155 else scan (i + 1) acc
156 else scan (i + 1) acc
157 in
158 scan 0 []
159
160(** Detect all facets in text (mentions, links, tags). Note: Mentions require
161 DID resolution which is not done here - they are returned with placeholder
162 DIDs. *)
163let detect_facets text =
164 let mentions = find_mentions text in
165 let urls = find_urls text in
166 let tags = find_tags text in
167 let facets =
168 List.map
169 (fun (start, end_, _handle) ->
170 (* In real usage, you'd resolve handle -> DID here *)
171 mention_facet ~start ~end_ ~did:"did:plc:placeholder")
172 mentions
173 @ List.map (fun (start, end_, uri) -> link_facet ~start ~end_ ~uri) urls
174 @ List.map (fun (start, end_, tag) -> tag_facet ~start ~end_ ~tag) tags
175 in
176 { text; facets }
177
178(** {1 JSON Encoding} *)
179
180type json = Atproto_json.t
181
182(** Encode byte slice to JSON *)
183let byte_slice_to_json slice : json =
184 Atproto_json.object_
185 [
186 ("byteStart", Atproto_json.int slice.byte_start);
187 ("byteEnd", Atproto_json.int slice.byte_end);
188 ]
189
190(** Encode feature to JSON *)
191let feature_to_json = function
192 | Mention { did } ->
193 Atproto_json.object_
194 [
195 ("$type", Atproto_json.string "app.bsky.richtext.facet#mention");
196 ("did", Atproto_json.string did);
197 ]
198 | Link { uri } ->
199 Atproto_json.object_
200 [
201 ("$type", Atproto_json.string "app.bsky.richtext.facet#link");
202 ("uri", Atproto_json.string uri);
203 ]
204 | Tag { tag } ->
205 Atproto_json.object_
206 [
207 ("$type", Atproto_json.string "app.bsky.richtext.facet#tag");
208 ("tag", Atproto_json.string tag);
209 ]
210
211(** Encode facet to JSON *)
212let facet_to_json facet : json =
213 Atproto_json.object_
214 [
215 ("index", byte_slice_to_json facet.index);
216 ("features", Atproto_json.array (List.map feature_to_json facet.features));
217 ]
218
219(** Encode rich text to JSON (for post record) *)
220let to_json t : json =
221 if t.facets = [] then
222 Atproto_json.object_ [ ("text", Atproto_json.string t.text) ]
223 else
224 Atproto_json.object_
225 [
226 ("text", Atproto_json.string t.text);
227 ("facets", Atproto_json.array (List.map facet_to_json t.facets));
228 ]
229
230(** {1 JSON Decoding} *)
231
232let int_of_json ?(default = 0) (json : json) : int =
233 match Atproto_json.to_int64_opt json with
234 | Some i ->
235 if i > Int64.of_int max_int || i < Int64.of_int min_int then default
236 else Int64.to_int i
237 | None -> default
238
239(** Decode byte slice from JSON *)
240let byte_slice_of_json json =
241 match Atproto_json.to_object_opt json with
242 | Some pairs ->
243 let byte_start =
244 match Atproto_json.get "byteStart" pairs with
245 | Some v -> int_of_json v
246 | None -> 0
247 in
248 let byte_end =
249 match Atproto_json.get "byteEnd" pairs with
250 | Some v -> int_of_json v
251 | None -> 0
252 in
253 Some { byte_start; byte_end }
254 | None -> None
255
256(** Decode feature from JSON *)
257let feature_of_json json =
258 match Atproto_json.to_object_opt json with
259 | Some pairs ->
260 let type_ =
261 match Atproto_json.get_string_opt "$type" pairs with
262 | Some s -> s
263 | None -> ""
264 in
265 if type_ = "app.bsky.richtext.facet#mention" then
266 match Atproto_json.get_string_opt "did" pairs with
267 | Some did -> Some (Mention { did })
268 | None -> None
269 else if type_ = "app.bsky.richtext.facet#link" then
270 match Atproto_json.get_string_opt "uri" pairs with
271 | Some uri -> Some (Link { uri })
272 | None -> None
273 else if type_ = "app.bsky.richtext.facet#tag" then
274 match Atproto_json.get_string_opt "tag" pairs with
275 | Some tag -> Some (Tag { tag })
276 | None -> None
277 else None
278 | None -> None
279
280(** Decode facet from JSON *)
281let facet_of_json json =
282 match Atproto_json.to_object_opt json with
283 | Some pairs -> (
284 let index =
285 match Atproto_json.get "index" pairs with
286 | Some idx -> byte_slice_of_json idx
287 | None -> None
288 in
289 let features =
290 match Atproto_json.get "features" pairs with
291 | Some items -> (
292 match Atproto_json.to_array_opt items with
293 | Some items -> List.filter_map feature_of_json items
294 | None -> [])
295 | None -> []
296 in
297 match index with Some index -> Some { index; features } | None -> None)
298 | None -> None
299
300(** Decode rich text from JSON *)
301let of_json json =
302 match Atproto_json.to_object_opt json with
303 | Some pairs ->
304 let text =
305 match Atproto_json.get_string_opt "text" pairs with
306 | Some s -> s
307 | None -> ""
308 in
309 let facets =
310 match Atproto_json.get "facets" pairs with
311 | Some items -> (
312 match Atproto_json.to_array_opt items with
313 | Some items -> List.filter_map facet_of_json items
314 | None -> [])
315 | None -> []
316 in
317 Some { text; facets }
318 | None -> None
319
320(** {1 Utilities} *)
321
322(** Get the length of text in bytes *)
323let byte_length t = String.length t.text
324
325(** Get the length of text in Unicode graphemes (approximate) *)
326let grapheme_length t =
327 (* Simple approximation - counts UTF-8 start bytes *)
328 let count = ref 0 in
329 String.iter
330 (fun c ->
331 let code = Char.code c in
332 if code < 0x80 || code >= 0xC0 then incr count)
333 t.text;
334 !count
335
336(** Check if text exceeds Bluesky's limit (300 graphemes) *)
337let exceeds_limit ?(limit = 300) t = grapheme_length t > limit
338
339(** Truncate text to fit within grapheme limit *)
340let truncate ?(limit = 300) t =
341 if not (exceeds_limit ~limit t) then t
342 else
343 (* Simple truncation - doesn't preserve facets properly *)
344 let text = t.text in
345 let len = String.length text in
346 let rec find_cutoff i graphemes =
347 if i >= len || graphemes >= limit then i
348 else
349 let code = Char.code text.[i] in
350 if code < 0x80 then find_cutoff (i + 1) (graphemes + 1)
351 else if code < 0xC0 then
352 find_cutoff (i + 1) graphemes (* continuation byte *)
353 else if code < 0xE0 then find_cutoff (i + 2) (graphemes + 1)
354 else if code < 0xF0 then find_cutoff (i + 3) (graphemes + 1)
355 else find_cutoff (i + 4) (graphemes + 1)
356 in
357 let cutoff = find_cutoff 0 0 in
358 let new_text = String.sub text 0 cutoff in
359 (* Filter facets that are still within bounds *)
360 let new_facets =
361 List.filter (fun f -> f.index.byte_end <= cutoff) t.facets
362 in
363 { text = new_text; facets = new_facets }