Bluesky app fork with some witchin' additions 💫
at feat/markdown-basic 270 lines 7.9 kB view raw
1import {type AppBskyRichtextFacet} from '@atproto/api' 2import {parse, postprocess, preprocess} from 'micromark' 3 4type FormatKind = 'bold' | 'italic' | 'underline' | 'link' 5 6interface FormatRange { 7 kind: FormatKind 8 start: number 9 end: number 10 uri?: string 11} 12 13function mergeOverlapping( 14 ranges: FormatRange[], 15 kind: FormatKind, 16): FormatRange[] { 17 const filtered = ranges 18 .filter(f => f.kind === kind) 19 .sort((a, b) => a.start - b.start) 20 const out: FormatRange[] = [] 21 for (const r of filtered) { 22 const prev = out[out.length - 1] 23 if (prev && r.start <= prev.end) { 24 prev.end = Math.max(prev.end, r.end) 25 } else { 26 out.push({...r}) 27 } 28 } 29 return out 30} 31 32export function parseMarkdownEvents(text: string) { 33 try { 34 const chunks = preprocess()(text, undefined, true) 35 return postprocess(parse().document().write(chunks)) 36 } catch { 37 return null 38 } 39} 40 41export function parseMarkdownRichText(text: string): { 42 text: string 43 facets: AppBskyRichtextFacet.Main[] 44 escapedByteStarts: Set<number> 45} { 46 const events = parseMarkdownEvents(text) 47 if (!events) return {text, facets: [], escapedByteStarts: new Set()} 48 49 const stripRanges: {start: number; end: number}[] = [] 50 const formatRanges: FormatRange[] = [] 51 const escapedCharOrigPositions: number[] = [] 52 53 // Links: labelText comes before resource in the event stream, so we 54 // buffer link state per nesting level until we can emit the full range. 55 const linkStack: { 56 labelTextStart: number 57 labelTextEnd: number 58 uri: string 59 }[] = [] 60 // Track whether each strong span uses _ (underline) or * (bold). 61 const strongKindStack: ('bold' | 'underline')[] = [] 62 let awaitingStrongKind = false 63 64 for (const [eventType, token] of events) { 65 const {type} = token 66 const start = token.start.offset 67 const end = token.end.offset 68 69 if (eventType === 'enter') { 70 switch (type) { 71 case 'emphasisSequence': 72 case 'labelMarker': // [ and ] 73 case 'resource': // (url) 74 stripRanges.push({start, end}) 75 break 76 case 'strongSequence': 77 if (awaitingStrongKind) { 78 strongKindStack.push(text[start] === '_' ? 'underline' : 'bold') 79 awaitingStrongKind = false 80 } 81 stripRanges.push({start, end}) 82 break 83 case 'strong': 84 awaitingStrongKind = true 85 break 86 case 'characterEscape': 87 stripRanges.push({start, end: start + 1}) // strip only the backslash 88 escapedCharOrigPositions.push(start + 1) // position of the kept char 89 break 90 case 'link': 91 linkStack.push({labelTextStart: -1, labelTextEnd: -1, uri: ''}) 92 break 93 case 'labelText': 94 if (linkStack.length > 0) { 95 const top = linkStack[linkStack.length - 1] 96 top.labelTextStart = start 97 top.labelTextEnd = end 98 } 99 break 100 case 'resourceDestinationString': 101 if (linkStack.length > 0) { 102 let uri = text.slice(start, end) 103 if ( 104 !uri.startsWith('http://') && 105 !uri.startsWith('https://') && 106 !uri.startsWith('mailto:') 107 ) { 108 uri = `https://${uri}` 109 } 110 linkStack[linkStack.length - 1].uri = uri 111 } 112 break 113 case 'emphasisText': 114 formatRanges.push({kind: 'italic', start, end}) 115 break 116 case 'strongText': 117 formatRanges.push({ 118 kind: strongKindStack[strongKindStack.length - 1] ?? 'bold', 119 start, 120 end, 121 }) 122 break 123 } 124 } else { 125 if (type === 'strong') { 126 strongKindStack.pop() 127 } else if (type === 'link') { 128 const info = linkStack.pop() 129 if (info && info.labelTextStart >= 0 && info.uri) { 130 formatRanges.push({ 131 kind: 'link', 132 start: info.labelTextStart, 133 end: info.labelTextEnd, 134 uri: info.uri, 135 }) 136 } 137 } 138 } 139 } 140 141 if (stripRanges.length === 0) { 142 return {text, facets: [], escapedByteStarts: new Set()} 143 } 144 145 stripRanges.sort((a, b) => a.start - b.start) 146 147 // removedBefore[i] = total chars stripped from positions [0, i) 148 // Computed by marking each delimiter's removal at its end position, 149 // then prefix-summing. 150 const removedBefore = new Int32Array(text.length + 2) 151 for (const {start, end} of stripRanges) { 152 if (end <= text.length) { 153 removedBefore[end] += end - start 154 } 155 } 156 for (let i = 1; i <= text.length; i++) { 157 removedBefore[i] += removedBefore[i - 1] 158 } 159 160 const toStripped = (orig: number): number => orig - removedBefore[orig] 161 162 // Build stripped text 163 let stripped = '' 164 let si = 0 165 let i = 0 166 while (i < text.length) { 167 if (si < stripRanges.length && i === stripRanges[si].start) { 168 i = stripRanges[si].end 169 si++ 170 } else { 171 stripped += text[i] 172 i++ 173 } 174 } 175 176 // Build char-index → UTF-8-byte-offset map once so facet byte positions 177 // don't require O(n) UnicodeString construction per boundary. 178 const charToUtf8Byte = new Int32Array(stripped.length + 1) 179 { 180 let b = 0 181 for (let c = 0; c < stripped.length; ) { 182 charToUtf8Byte[c] = b 183 const cp = stripped.codePointAt(c)! 184 b += cp <= 0x7f ? 1 : cp <= 0x7ff ? 2 : cp <= 0xffff ? 3 : 4 185 c += cp > 0xffff ? 2 : 1 186 } 187 charToUtf8Byte[stripped.length] = b 188 } 189 190 const escapedByteStarts = new Set( 191 escapedCharOrigPositions.map(orig => charToUtf8Byte[toStripped(orig)]), 192 ) 193 194 if (formatRanges.length === 0) { 195 return {text: stripped, facets: [], escapedByteStarts} 196 } 197 198 // Map format ranges to stripped char positions, discard empty ranges 199 const mapped = formatRanges 200 .map(f => ({ 201 kind: f.kind, 202 start: toStripped(f.start), 203 end: toStripped(f.end), 204 uri: f.uri, 205 })) 206 .filter(f => f.start < f.end) 207 208 if (mapped.length === 0) { 209 return {text: stripped, facets: [], escapedByteStarts} 210 } 211 212 // Merge overlapping ranges of the same kind (bold or italic) so that nested 213 // same-kind emphasis (e.g. *emph *with emph* in it*) doesn't lose the outer 214 // range when the inner one closes first in the sweep below. 215 const mergedMapped = [ 216 ...(['bold', 'italic', 'underline'] as const).flatMap(k => 217 mergeOverlapping(mapped, k), 218 ), 219 ...mapped.filter(f => f.kind === 'link'), 220 ] 221 222 // Interval sweep to produce non-overlapping facets with merged features. 223 // This correctly handles bold+italic on the same range (e.g. ***text***). 224 type SweepEvent = { 225 pos: number 226 isOpen: boolean 227 kind: FormatKind 228 uri?: string 229 } 230 const sweep: SweepEvent[] = [] 231 for (const f of mergedMapped) { 232 sweep.push({pos: f.start, isOpen: true, kind: f.kind, uri: f.uri}) 233 sweep.push({pos: f.end, isOpen: false, kind: f.kind, uri: f.uri}) 234 } 235 sweep.sort((a, b) => a.pos - b.pos || (a.isOpen ? -1 : 1)) 236 237 const active = new Map<FormatKind, string | undefined>() 238 let curPos = 0 239 const facets: AppBskyRichtextFacet.Main[] = [] 240 241 for (const ev of sweep) { 242 if (ev.pos > curPos && active.size > 0) { 243 const features: AppBskyRichtextFacet.Main['features'] = [] 244 for (const kind of ['bold', 'italic', 'underline'] as const) { 245 if (active.has(kind)) 246 features.push({$type: `app.bsky.richtext.facet#${kind}`} as any) 247 } 248 if (active.has('link')) 249 features.push({ 250 $type: 'app.bsky.richtext.facet#link', 251 uri: active.get('link')!, 252 }) 253 facets.push({ 254 index: { 255 byteStart: charToUtf8Byte[curPos], 256 byteEnd: charToUtf8Byte[ev.pos], 257 }, 258 features, 259 }) 260 } 261 curPos = ev.pos 262 if (ev.isOpen) { 263 active.set(ev.kind, ev.uri) 264 } else { 265 active.delete(ev.kind) 266 } 267 } 268 269 return {text: stripped, facets, escapedByteStarts} 270}