replies timeline only, appview-less bluesky client
at main 349 lines 7.0 kB view raw
1// taken and modified from: https://github.com/mary-ext/atcute/blob/trunk/packages/bluesky/richtext-parser/lib/index.ts 2 3const ESCAPE_RE = /^\\([^0-9A-Za-z\s])/; 4 5const MENTION_RE = /^[@]([a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*(?:\.[a-zA-Z]{2,}))($|\s|\p{P})/u; 6 7const DID_RE = /^(did:([a-z0-9]+):([A-Za-z0-9.\-_%:]+))($|\s|\p{P})/u; 8 9const TOPIC_RE = 10 /^(?:#(?!\ufe0f|\u20e3)|)([\p{N}]*[\p{L}\p{M}\p{Pc}][\p{L}\p{M}\p{Pc}\p{N}]*)($|\s|\p{P})/u; 11 12const EMOTE_RE = /^:([\w-]+):/; 13 14const AUTOLINK_RE = /^https?:\/\/[\S]+/; 15const AUTOLINK_BACKPEDAL_RE = /(?:(?<!\(.*)\))?[.,;]*$/; 16 17const LINK_RE = 18 /^\[((?:\[[^\]]*\]|[^[\]]|\](?=[^[]*\]))*)\]\(\s*<?((?:\([^)]*\)|[^\s\\]|\\.)*?)>?(?:\s+['"]([^]*?)['"])?\s*\)/; 19const UNESCAPE_URL_RE = /\\([^0-9A-Za-z\s])/g; 20 21const EMPHASIS_RE = 22 /^\b_((?:__|\\[^]|[^\\_])+?)_\b|^\*(?=\S)((?:\*\*|\\[^]|\s+(?:\\[^]|[^\s*\\]|\*\*)|[^\s*\\])+?)\*(?!\*)/; 23 24const STRONG_RE = /^\*\*((?:\\[^]|[^\\])+?)\*\*(?!\*)/; 25 26const UNDERLINE_RE = /^__((?:\\[^]|~(?!~)|[^~\\]|\s(?!~~))+?)__(?!_)/; 27 28const DELETE_RE = /^~~((?:\\[^]|~(?!~)|[^~\\]|\s(?!~~))+?)~~/; 29 30const CODE_RE = /^(`+)([^]*?[^`])\1(?!`)/; 31const CODE_ESCAPE_BACKTICKS_RE = /^ (?= *`)|(` *) $/g; 32 33const TEXT_RE = 34 /^[^]+?(?:(?=$|[~*_`:\\[]|https?:\/\/)|(?<=\s|[(){}/\\[\]\-|:;'".,=+])(?=[@@##]|did:[a-z0-9]+:))/; 35 36export interface EscapeToken { 37 type: 'escape'; 38 raw: string; 39 escaped: string; 40} 41 42export interface MentionToken { 43 type: 'mention'; 44 raw: string; 45 handle?: string; 46 did?: string; 47} 48 49export interface TopicToken { 50 type: 'topic'; 51 raw: string; 52 name: string; 53} 54 55export interface EmoteToken { 56 type: 'emote'; 57 raw: string; 58 name: string; 59} 60 61export interface AutolinkToken { 62 type: 'autolink'; 63 raw: string; 64 url: string; 65} 66 67export interface LinkToken { 68 type: 'link'; 69 raw: string; 70 url: string; 71 children: Token[]; 72} 73 74export interface UnderlineToken { 75 type: 'underline'; 76 raw: string; 77 children: Token[]; 78} 79 80export interface StrongToken { 81 type: 'strong'; 82 raw: string; 83 children: Token[]; 84} 85 86export interface EmphasisToken { 87 type: 'emphasis'; 88 raw: string; 89 children: Token[]; 90} 91 92export interface DeleteToken { 93 type: 'delete'; 94 raw: string; 95 children: Token[]; 96} 97 98export interface CodeToken { 99 type: 'code'; 100 raw: string; 101 content: string; 102} 103 104export interface TextToken { 105 type: 'text'; 106 raw: string; 107 content: string; 108} 109 110export type Token = 111 | EscapeToken 112 | MentionToken 113 | TopicToken 114 | EmoteToken 115 | AutolinkToken 116 | LinkToken 117 | StrongToken 118 | EmphasisToken 119 | UnderlineToken 120 | DeleteToken 121 | CodeToken 122 | TextToken; 123 124const tokenizeEscape = (src: string): EscapeToken | undefined => { 125 const match = ESCAPE_RE.exec(src); 126 if (match) { 127 return { 128 type: 'escape', 129 raw: match[0], 130 escaped: match[1] 131 }; 132 } 133}; 134 135const tokenizeMention = (src: string): MentionToken | undefined => { 136 const match = MENTION_RE.exec(src); 137 if (match && match[2] !== '@') { 138 const suffix = match[2].length; 139 140 return { 141 type: 'mention', 142 raw: suffix > 0 ? match[0].slice(0, -suffix) : match[0], 143 handle: match[1] 144 }; 145 } 146 147 const didMatch = DID_RE.exec(src); 148 if (didMatch) { 149 const suffix = didMatch[4].length; 150 151 return { 152 type: 'mention', 153 raw: suffix > 0 ? didMatch[0].slice(0, -suffix) : didMatch[0], 154 did: didMatch[1] 155 }; 156 } 157}; 158 159const tokenizeTopic = (src: string): TopicToken | undefined => { 160 const match = TOPIC_RE.exec(src); 161 if (match && match[2] !== '#') { 162 const suffix = match[2].length; 163 164 return { 165 type: 'topic', 166 raw: suffix > 0 ? match[0].slice(0, -suffix) : match[0], 167 name: match[1] 168 }; 169 } 170}; 171 172const tokenizeEmote = (src: string): EmoteToken | undefined => { 173 const match = EMOTE_RE.exec(src); 174 if (match) { 175 return { 176 type: 'emote', 177 raw: match[0], 178 name: match[1] 179 }; 180 } 181}; 182 183const tokenizeAutolink = (src: string): AutolinkToken | undefined => { 184 const match = AUTOLINK_RE.exec(src); 185 if (match) { 186 const url = match[0].replace(AUTOLINK_BACKPEDAL_RE, ''); 187 188 return { 189 type: 'autolink', 190 raw: url, 191 url: url 192 }; 193 } 194}; 195 196const tokenizeLink = (src: string): LinkToken | undefined => { 197 const match = LINK_RE.exec(src); 198 if (match) { 199 return { 200 type: 'link', 201 raw: match[0], 202 url: match[2].replace(UNESCAPE_URL_RE, '$1'), 203 children: tokenize(match[1]) 204 }; 205 } 206}; 207 208const _tokenizeEmphasis = (src: string): EmphasisToken | undefined => { 209 const match = EMPHASIS_RE.exec(src); 210 if (match) { 211 return { 212 type: 'emphasis', 213 raw: match[0], 214 children: tokenize(match[2] || match[1]) 215 }; 216 } 217}; 218 219const _tokenizeStrong = (src: string): StrongToken | undefined => { 220 const match = STRONG_RE.exec(src); 221 if (match) { 222 return { 223 type: 'strong', 224 raw: match[0], 225 children: tokenize(match[1]) 226 }; 227 } 228}; 229 230const _tokenizeUnderline = (src: string): UnderlineToken | undefined => { 231 const match = UNDERLINE_RE.exec(src); 232 if (match) { 233 return { 234 type: 'underline', 235 raw: match[0], 236 children: tokenize(match[1]) 237 }; 238 } 239}; 240 241const tokenizeEmStrongU = ( 242 src: string 243): EmphasisToken | StrongToken | UnderlineToken | undefined => { 244 let token: EmphasisToken | StrongToken | UnderlineToken | undefined; 245 246 { 247 const match = _tokenizeEmphasis(src); 248 if (match && (!token || match.raw.length > token.raw.length)) { 249 token = match; 250 } 251 } 252 253 { 254 const match = _tokenizeStrong(src); 255 if (match && (!token || match.raw.length > token.raw.length)) { 256 token = match; 257 } 258 } 259 260 { 261 const match = _tokenizeUnderline(src); 262 if (match && (!token || match.raw.length > token.raw.length)) { 263 token = match; 264 } 265 } 266 267 return token; 268}; 269 270const tokenizeDelete = (src: string): DeleteToken | undefined => { 271 const match = DELETE_RE.exec(src); 272 if (match) { 273 return { 274 type: 'delete', 275 raw: match[0], 276 children: tokenize(match[1]) 277 }; 278 } 279}; 280 281const tokenizeCode = (src: string): CodeToken | undefined => { 282 const match = CODE_RE.exec(src); 283 if (match) { 284 return { 285 type: 'code', 286 raw: match[0], 287 content: match[2].replace(CODE_ESCAPE_BACKTICKS_RE, '$1') 288 }; 289 } 290}; 291 292const tokenizeText = (src: string): TextToken | undefined => { 293 const match = TEXT_RE.exec(src); 294 if (match) { 295 return { 296 type: 'text', 297 raw: match[0], 298 content: match[0] 299 }; 300 } 301}; 302 303export const tokenize = (src: string): Token[] => { 304 const tokens: Token[] = []; 305 306 let last: Token | undefined; 307 let token: Token | undefined; 308 309 while (src) { 310 last = token; 311 312 if ( 313 (token = 314 tokenizeEscape(src) || 315 tokenizeMention(src) || 316 tokenizeAutolink(src) || 317 tokenizeTopic(src) || 318 tokenizeEmote(src) || 319 tokenizeLink(src) || 320 tokenizeEmStrongU(src) || 321 tokenizeDelete(src) || 322 tokenizeCode(src)) 323 ) { 324 src = src.slice(token.raw.length); 325 tokens.push(token); 326 continue; 327 } 328 329 if ((token = tokenizeText(src))) { 330 src = src.slice(token.raw.length); 331 332 if (last && last.type === 'text') { 333 last.raw += token.raw; 334 last.content += token.content; 335 token = last; 336 } else { 337 tokens.push(token); 338 } 339 340 continue; 341 } 342 343 if (src) { 344 throw new Error(`infinite loop encountered`); 345 } 346 } 347 348 return tokens; 349};