atproto libraries implementation in ocaml
1(** Base32-sortable encoding for AT Protocol TIDs.
2
3 This is a non-standard base32 encoding using the alphabet:
4 "234567abcdefghijklmnopqrstuvwxyz"
5
6 This alphabet is designed to produce lexicographically sortable strings when
7 encoding timestamps, which is essential for TID ordering.
8
9 Note: This is NOT the same as RFC 4648 base32 or multibase base32. *)
10
11(** The sortable base32 alphabet used by AT Protocol for TIDs *)
12let alphabet = "234567abcdefghijklmnopqrstuvwxyz"
13
14(** Lookup table for decoding: char code -> value (or -1 if invalid) *)
15let decode_table =
16 let tbl = Array.make 256 (-1) in
17 String.iteri (fun i c -> tbl.(Char.code c) <- i) alphabet;
18 tbl
19
20(** Encode an int64 value to base32-sortable string. Returns the shortest
21 representation (no padding). *)
22let encode_int64 (n : int64) : string =
23 if n = 0L then "2" (* '2' is the zero character in this alphabet *)
24 else
25 let buf = Buffer.create 13 in
26 let rec loop n =
27 if n = 0L then ()
28 else begin
29 let idx = Int64.to_int (Int64.unsigned_rem n 32L) in
30 Buffer.add_char buf alphabet.[idx];
31 loop (Int64.unsigned_div n 32L)
32 end
33 in
34 loop n;
35 (* Reverse the buffer contents *)
36 let s = Buffer.contents buf in
37 let len = String.length s in
38 String.init len (fun i -> s.[len - 1 - i])
39
40(** Encode an int64 value with left-padding to specified length. Uses '2' (the
41 zero character) for padding. *)
42let encode_int64_padded (n : int64) (len : int) : string =
43 let s = encode_int64 n in
44 let slen = String.length s in
45 if slen >= len then s else String.make (len - slen) '2' ^ s
46
47(** Decode a base32-sortable string to int64. Returns Error if the string
48 contains invalid characters. *)
49let decode_int64 (s : string) : (int64, [ `Invalid_char of char ]) result =
50 let len = String.length s in
51 let rec loop acc i =
52 if i >= len then Ok acc
53 else
54 let c = s.[i] in
55 let v = decode_table.(Char.code c) in
56 if v < 0 then Error (`Invalid_char c)
57 else
58 let acc' = Int64.add (Int64.mul acc 32L) (Int64.of_int v) in
59 loop acc' (i + 1)
60 in
61 loop 0L 0
62
63(** Decode a base32-sortable string to int64. Raises Invalid_argument if the
64 string contains invalid characters. *)
65let decode_int64_exn (s : string) : int64 =
66 match decode_int64 s with
67 | Ok n -> n
68 | Error (`Invalid_char c) ->
69 invalid_arg (Printf.sprintf "invalid base32-sortable character: %c" c)
70
71(** Check if a string contains only valid base32-sortable characters *)
72let is_valid (s : string) : bool =
73 String.for_all (fun c -> decode_table.(Char.code c) >= 0) s
74
75(** Encode raw bytes to base32-sortable string. This treats the bytes as a
76 big-endian unsigned integer. *)
77let encode_bytes (b : bytes) : string =
78 let len = Bytes.length b in
79 if len = 0 then "2"
80 else if len <= 8 then begin
81 (* Fits in int64 *)
82 let n = ref 0L in
83 for i = 0 to len - 1 do
84 n :=
85 Int64.add (Int64.shift_left !n 8)
86 (Int64.of_int (Char.code (Bytes.get b i)))
87 done;
88 encode_int64 !n
89 end
90 else begin
91 (* For larger values, process in chunks *)
92 let buf = Buffer.create ((len * 8 / 5) + 1) in
93 (* Simple implementation: convert to base32 digit by digit *)
94 let digits = Array.make ((len * 8 / 5) + 1) 0 in
95 let num_digits = ref 0 in
96 for byte_idx = 0 to len - 1 do
97 let byte = Char.code (Bytes.get b byte_idx) in
98 (* Multiply existing digits by 256 and add new byte *)
99 let carry = ref byte in
100 for i = 0 to !num_digits - 1 do
101 let v = (digits.(i) * 256) + !carry in
102 digits.(i) <- v mod 32;
103 carry := v / 32
104 done;
105 while !carry > 0 do
106 digits.(!num_digits) <- !carry mod 32;
107 carry := !carry / 32;
108 incr num_digits
109 done
110 done;
111 (* Convert digits to characters (in reverse order) *)
112 for i = !num_digits - 1 downto 0 do
113 Buffer.add_char buf alphabet.[digits.(i)]
114 done;
115 if Buffer.length buf = 0 then "2" else Buffer.contents buf
116 end
117
118(** Decode base32-sortable string to bytes. Returns the minimal byte
119 representation (no leading zeros). *)
120let decode_bytes (s : string) : (bytes, [ `Invalid_char of char ]) result =
121 let len = String.length s in
122 if len = 0 then Ok (Bytes.create 0)
123 else begin
124 (* Decode to array of digits first *)
125 let digits = Array.make len 0 in
126 let valid = ref true in
127 let invalid_char = ref '\x00' in
128 for i = 0 to len - 1 do
129 let c = s.[i] in
130 let v = decode_table.(Char.code c) in
131 if v < 0 then begin
132 valid := false;
133 invalid_char := c
134 end
135 else digits.(i) <- v
136 done;
137 if not !valid then Error (`Invalid_char !invalid_char)
138 else begin
139 (* Convert from base32 to bytes *)
140 let bytes_arr = Array.make ((len * 5 / 8) + 1) 0 in
141 let num_bytes = ref 0 in
142 for digit_idx = 0 to len - 1 do
143 (* Multiply existing bytes by 32 and add new digit *)
144 let carry = ref digits.(digit_idx) in
145 for i = 0 to !num_bytes - 1 do
146 let v = (bytes_arr.(i) * 32) + !carry in
147 bytes_arr.(i) <- v land 0xff;
148 carry := v lsr 8
149 done;
150 while !carry > 0 do
151 bytes_arr.(!num_bytes) <- !carry land 0xff;
152 carry := !carry lsr 8;
153 incr num_bytes
154 done
155 done;
156 (* Create bytes in correct order (reverse) *)
157 let result = Bytes.create !num_bytes in
158 for i = 0 to !num_bytes - 1 do
159 Bytes.set result i (Char.chr bytes_arr.(!num_bytes - 1 - i))
160 done;
161 Ok result
162 end
163 end