fork
Configure Feed
Select the types of activity you want to include in your feed.
Live video on the AT Protocol
fork
Configure Feed
Select the types of activity you want to include in your feed.
1// Taken from ATcute's richtext-segmenter
2// https://github.com/mary-ext/atcute/blob/trunk/packages/bluesky/richtext-segmenter/lib/index.ts
3// repoed b/c we need to import types from @atproto/api not @atcute/bsky
4import { Main } from "@atproto/api/dist/client/types/app/bsky/richtext/facet";
5
6type UnwrapArray<T> = T extends (infer V)[] ? V : never;
7
8export type Facet = Main;
9export type FacetFeature = UnwrapArray<Facet["features"]>;
10
11export interface RichtextSegment {
12 text: string;
13 features: FacetFeature[] | undefined;
14}
15
16const segment = (
17 text: string,
18 features: FacetFeature[] | undefined,
19): RichtextSegment => {
20 return { text, features: text.length > 0 ? features : undefined };
21};
22
23export const segmentize = (
24 text: string,
25 facets: Facet[] | undefined,
26): RichtextSegment[] => {
27 if (facets === undefined || facets.length === 0) {
28 return [segment(text, undefined)];
29 }
30
31 const segments: RichtextSegment[] = [];
32 const utf16Length = text.length;
33 let utf16Cursor = 0;
34 let utf8Cursor = 0;
35
36 const advanceCursor = (startUtf16: number, endUtf8: number): number => {
37 let curs = startUtf16;
38
39 // Fast-path for entirely ASCII text
40 const isLikelyAsciiText = text.charCodeAt(curs) < 0x80;
41 if (isLikelyAsciiText) {
42 curs += 1;
43 utf8Cursor += 1;
44
45 // SIMD-like batch processing
46 while (utf8Cursor + 8 <= endUtf8 && curs + 8 <= utf16Length) {
47 const char1 = text.charCodeAt(curs);
48 const char2 = text.charCodeAt(curs + 1);
49 const char3 = text.charCodeAt(curs + 2);
50 const char4 = text.charCodeAt(curs + 3);
51 const char5 = text.charCodeAt(curs + 4);
52 const char6 = text.charCodeAt(curs + 5);
53 const char7 = text.charCodeAt(curs + 6);
54 const char8 = text.charCodeAt(curs + 7);
55
56 if (
57 (char1 | char2 | char3 | char4 | char5 | char6 | char7 | char8) <
58 0x80
59 ) {
60 curs += 8;
61 utf8Cursor += 8;
62 continue;
63 }
64
65 break;
66 }
67 }
68
69 // Process remaining characters individually
70 while (utf8Cursor < endUtf8 && curs < utf16Length) {
71 const code = text.charCodeAt(curs);
72
73 if (code < 0x80) {
74 curs += 1;
75 utf8Cursor += 1;
76 } else if (code < 0x800) {
77 curs += 1;
78 utf8Cursor += 2;
79 } else if (code < 0xd800 || code > 0xdbff) {
80 curs += 1;
81 utf8Cursor += 3;
82 } else {
83 curs += 2;
84 utf8Cursor += 4;
85 }
86 }
87
88 return curs;
89 };
90
91 // Process facets
92 for (let idx = 0, len = facets.length; idx < len; idx++) {
93 const facet = facets[idx];
94
95 const { byteStart, byteEnd } = facet.index;
96 const features = facet.features;
97
98 if (byteStart > byteEnd || features.length === 0) {
99 continue;
100 }
101
102 if (utf8Cursor < byteStart) {
103 const nextUtf16Cursor = advanceCursor(utf16Cursor, byteStart);
104 if (nextUtf16Cursor > utf16Cursor) {
105 segments.push(
106 segment(text.slice(utf16Cursor, nextUtf16Cursor), undefined),
107 );
108 }
109
110 utf16Cursor = nextUtf16Cursor;
111 }
112
113 {
114 const nextUtf16Cursor = advanceCursor(utf16Cursor, byteEnd);
115 if (nextUtf16Cursor > utf16Cursor) {
116 segments.push(
117 segment(text.slice(utf16Cursor, nextUtf16Cursor), features),
118 );
119 }
120
121 utf16Cursor = nextUtf16Cursor;
122 }
123 }
124
125 // Handle remaining text
126 if (utf16Cursor < utf16Length) {
127 segments.push(segment(text.slice(utf16Cursor), undefined));
128 }
129
130 return segments;
131};