Bluesky app fork with some witchin' additions 馃挮 witchsky.app
bluesky fork

Render original HTML text of posts bridged from the Fediverse or Wafrn #26

open opened by maxine.puppykitty.racing targeting main
Labels

None yet.

assignee

None yet.

Participants 4
AT URI
at://did:plc:nmc77zslrwafxn75j66mep6o/sh.tangled.repo.pull/3m7io6kv5sl22
+310 -417
Diff #1
+310 -61
src/components/Post/MastodonHtmlContent.tsx
··· 1 - import {useMemo} from 'react' 2 - import {type StyleProp, type TextStyle, View, ViewStyle} from 'react-native' 3 import {type AppBskyFeedPost} from '@atproto/api' 4 5 - import {sanitizeHtml} from '#/lib/strings/html-sanitizer' 6 import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html' 7 - import { atoms } from '#/alf' 8 import {InlineLinkText} from '#/components/Link' 9 10 11 ··· 17 18 19 20 21 22 23 ··· 27 28 29 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 }: MastodonHtmlContentProps) { 39 const renderMastodonHtml = useRenderMastodonHtml() 40 41 - const htmlContent = useMemo(() => { 42 if (!renderMastodonHtml) return null 43 44 - const fullText = (record as any).fullText as string | undefined 45 - 46 - 47 - 48 49 50 51 if (!rawHtml) return null 52 53 - return sanitizeHtml(rawHtml) 54 - }, [record, renderMastodonHtml]) 55 - 56 - const renderedContent = useMemo(() => { 57 - if (!htmlContent) return null 58 - 59 - // Parse and render with React components on all platforms 60 - return renderHtmlAsReact(htmlContent, numberOfLines, textStyle) 61 - }, [htmlContent, numberOfLines, textStyle]) 62 63 if (!renderedContent) return null 64 65 - return <View style={style}>{renderedContent}</View> 66 } 67 68 - function renderHtmlAsReact( 69 html: string, 70 _numberOfLines?: number, 71 inputTextStyle?: StyleProp<TextStyle>, 72 ): React.ReactNode { 73 const parser = new DOMParser() 74 const doc = parser.parseFromString(html, 'text/html') 75 76 - 77 - 78 - 79 inputTextStyle, 80 ] 81 82 - const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => { 83 if (node.nodeType === Node.TEXT_NODE) { 84 // Don't wrap text in styled Text component if inside a link 85 - 86 87 88 ··· 92 93 if (node.nodeType === Node.ELEMENT_NODE) { 94 const element = node as Element 95 const children = Array.from(element.childNodes).map((child, i) => 96 - renderNode(child, i, insideLink || element.tagName.toLowerCase() === 'a'), 97 ) 98 99 - switch (element.tagName.toLowerCase()) { 100 case 'p': 101 return <P key={key} style={textStyle}>{children}</P> 102 case 'blockquote': ··· 108 case 'pre': 109 return ( 110 <View key={key} style={{backgroundColor: '#f5f5f5', padding: 8, borderRadius: 4, marginVertical: 4}}> 111 - <P style={{...textStyle, fontFamily: 'monospace'}}>{children}</P> 112 </View> 113 ) 114 case 'code': 115 return ( 116 - <Text key={key} style={{...textStyle, fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}}> 117 {children} 118 </Text> 119 ) 120 case 'strong': 121 case 'b': 122 return ( 123 - <Text key={key} style={{...textStyle, fontWeight: 'bold'}}> 124 {children} 125 </Text> 126 ) 127 case 'em': 128 case 'i': 129 return ( 130 - <Text key={key} style={{...textStyle, fontStyle: 'italic'}}> 131 {children} 132 </Text> 133 ) 134 case 'u': 135 return ( 136 - <Text key={key} style={{...textStyle, textDecorationLine: 'underline'}}> 137 {children} 138 </Text> 139 ) 140 case 'del': 141 return ( 142 - <Text key={key} style={{...textStyle, textDecorationLine: 'line-through'}}> 143 {children} 144 </Text> 145 ) ··· 151 ) 152 case 'ol': 153 const start = element.getAttribute('start') 154 - const reversed = element.getAttribute('reversed') !== null 155 return ( 156 - <View key={key} style={{marginVertical: 4}} data-start={start} data-reversed={reversed}> 157 - {children} 158 </View> 159 ) 160 case 'li': 161 - const value = element.getAttribute('value') 162 - const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol' 163 return ( 164 <View key={key} style={{flexDirection: 'row', marginVertical: 2}}> 165 - <Text style={{...textStyle, marginRight: 8}}>{parentIsOl ? (value || '螕脟贸') : '螕脟贸'}</Text> 166 - <Text style={{...textStyle, flex: 1}}>{children}</Text> 167 </View> 168 ) 169 case 'a': 170 const href = element.getAttribute('href') 171 if (href) { ··· 179 to={href} 180 label={linkText} 181 shouldProxy 182 - style={isInvisible ? {width: 0, height: 0, position: 'absolute'} : textStyle}> 183 {children} 184 </InlineLinkText> 185 ) ··· 191 const spanClass = element.getAttribute('class') 192 // Handle invisible/ellipsis classes for link formatting 193 if (spanClass?.includes('invisible')) { 194 - return null 195 } 196 if (spanClass?.includes('ellipsis')) { 197 // If inside a link, return plain text, otherwise wrapped 198 - 199 - 200 - 201 - 202 - 203 - 204 - 205 206 207 ··· 213 return children 214 } 215 return <Text key={key} style={textStyle}>{children}</Text> 216 - case 'div': 217 - return <P key={key} style={textStyle}>{children}</P> 218 default: 219 return <Text key={key} style={textStyle}>{children}</Text> 220 } 221 222 223 224 225 226 227 228 229 230 231 232 233 - </View> 234 - ) 235 }
··· 1 + import {useMemo, useState} from 'react' 2 + import { 3 + type LayoutChangeEvent, 4 + type StyleProp, 5 + type TextStyle, 6 + View, 7 + type ViewStyle, 8 + } from 'react-native' 9 import {type AppBskyFeedPost} from '@atproto/api' 10 + import {msg, Trans} from '@lingui/macro' 11 + import {useLingui} from '@lingui/react' 12 13 import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html' 14 + import {atoms as a} from '#/alf' 15 + import {Button, ButtonText} from '#/components/Button' 16 import {InlineLinkText} from '#/components/Link' 17 + import {P, Text} from '#/components/Typography' 18 19 20 ··· 26 27 28 29 + return useMemo(() => { 30 + if (!renderMastodonHtml) return false 31 32 + const fullText = record.fullText as string | undefined 33 + const bridgyOriginalText = record.bridgyOriginalText as 34 + | string 35 + | undefined 36 37 38 ··· 42 43 44 45 + numberOfLines, 46 }: MastodonHtmlContentProps) { 47 const renderMastodonHtml = useRenderMastodonHtml() 48 + const {_} = useLingui() 49 + const [isExpanded, setIsExpanded] = useState(false) 50 + const [contentHeight, setContentHeight] = useState<number | null>(null) 51 + const [isTall, setIsTall] = useState(false) 52 53 + const renderedContent = useMemo(() => { 54 if (!renderMastodonHtml) return null 55 56 + const fullText = record.fullText as string | undefined 57 + const bridgyOriginalText = record.bridgyOriginalText as 58 + | string 59 + | undefined 60 61 62 63 if (!rawHtml) return null 64 65 + // Parse HTML once and sanitize/render in a single pass 66 + return sanitizeAndRenderHtml(rawHtml, numberOfLines, textStyle) 67 + }, [record, renderMastodonHtml, numberOfLines, textStyle]) 68 + 69 + const handleLayout = (event: LayoutChangeEvent) => { 70 + const height = event.nativeEvent.layout.height 71 + if (contentHeight === null) { 72 + setContentHeight(height) 73 + // Consider content "tall" if it's taller than 150px 74 + setIsTall(height > 150) 75 + } 76 + } 77 78 if (!renderedContent) return null 79 80 + const shouldCollapse = isTall && !isExpanded 81 + 82 + return ( 83 + <View style={style}> 84 + <View 85 + style={shouldCollapse ? {maxHeight: 150, overflow: 'hidden'} : undefined} 86 + onLayout={handleLayout}> 87 + {renderedContent} 88 + </View> 89 + {shouldCollapse && ( 90 + <Button 91 + label={_(msg`Show more`)} 92 + onPress={() => setIsExpanded(true)} 93 + variant="ghost" 94 + color="primary" 95 + size="small" 96 + style={[a.mt_xs]}> 97 + <ButtonText> 98 + <Trans>Show more</Trans> 99 + </ButtonText> 100 + </Button> 101 + )} 102 + </View> 103 + ) 104 } 105 106 + const LINK_PROTOCOLS = [ 107 + 'http', 108 + 'https', 109 + 'dat', 110 + 'dweb', 111 + 'ipfs', 112 + 'ipns', 113 + 'ssb', 114 + 'gopher', 115 + 'xmpp', 116 + 'magnet', 117 + 'gemini', 118 + ] 119 + 120 + const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i 121 + 122 + const ALLOWED_ELEMENTS = [ 123 + 'p', 124 + 'br', 125 + 'span', 126 + 'a', 127 + 'del', 128 + 's', 129 + 'pre', 130 + 'blockquote', 131 + 'code', 132 + 'b', 133 + 'strong', 134 + 'u', 135 + 'i', 136 + 'em', 137 + 'ul', 138 + 'ol', 139 + 'li', 140 + 'ruby', 141 + 'rt', 142 + 'rp', 143 + ] 144 + 145 + function sanitizeAndRenderHtml( 146 html: string, 147 _numberOfLines?: number, 148 inputTextStyle?: StyleProp<TextStyle>, 149 ): React.ReactNode { 150 + if (typeof DOMParser === 'undefined') { 151 + // Fallback for environments without DOMParser 152 + return html.replace(/<[^>]*>/g, '') 153 + } 154 + 155 const parser = new DOMParser() 156 const doc = parser.parseFromString(html, 'text/html') 157 158 + const textStyle: StyleProp<TextStyle> = [ 159 + a.leading_snug, 160 + a.text_md, 161 inputTextStyle, 162 ] 163 164 + // Sanitize and render in a single pass 165 + const renderNode = (node: Node, key: string, insideLink = false, listItemIndex?: number): React.ReactNode => { 166 if (node.nodeType === Node.TEXT_NODE) { 167 // Don't wrap text in styled Text component if inside a link 168 + if (insideLink) { 169 170 171 ··· 175 176 if (node.nodeType === Node.ELEMENT_NODE) { 177 const element = node as Element 178 + const tagName = element.tagName.toLowerCase() 179 + 180 + // Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p> 181 + if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { 182 + const children = Array.from(element.childNodes).map((child, i) => 183 + renderNode(child, String(i), insideLink), 184 + ) 185 + return ( 186 + <P key={key} style={textStyle}> 187 + <Text style={{...textStyle, fontWeight: 'bold'}}>{children}</Text> 188 + </P> 189 + ) 190 + } 191 + 192 + // Handle math elements - extract annotation text 193 + if (tagName === 'math') { 194 + const mathText = extractMathAnnotation(element) 195 + if (mathText) { 196 + return <Text key={key} style={textStyle}>{mathText}</Text> 197 + } 198 + return null 199 + } 200 + 201 + // Remove elements not in allowlist - replace with text content 202 + if (!ALLOWED_ELEMENTS.includes(tagName)) { 203 + return element.textContent ? ( 204 + <Text key={key} style={textStyle}>{element.textContent}</Text> 205 + ) : null 206 + } 207 + 208 + // Sanitize and process element 209 + sanitizeElementAttributes(element) 210 + 211 const children = Array.from(element.childNodes).map((child, i) => 212 + renderNode(child, String(i), insideLink || tagName === 'a'), 213 ) 214 215 + switch (tagName) { 216 case 'p': 217 return <P key={key} style={textStyle}>{children}</P> 218 case 'blockquote': ··· 224 case 'pre': 225 return ( 226 <View key={key} style={{backgroundColor: '#f5f5f5', padding: 8, borderRadius: 4, marginVertical: 4}}> 227 + <P style={[textStyle, { fontFamily: 'monospace'}]}>{children}</P> 228 </View> 229 ) 230 case 'code': 231 return ( 232 + <Text key={key} style={[textStyle, { fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}]}> 233 {children} 234 </Text> 235 ) 236 case 'strong': 237 case 'b': 238 return ( 239 + <Text key={key} style={[textStyle, { fontWeight: 'bold'}]}> 240 {children} 241 </Text> 242 ) 243 case 'em': 244 case 'i': 245 return ( 246 + <Text key={key} style={[textStyle, { fontStyle: 'italic'}]}> 247 {children} 248 </Text> 249 ) 250 case 'u': 251 return ( 252 + <Text key={key} style={[textStyle, { textDecorationLine: 'underline'}]}> 253 {children} 254 </Text> 255 ) 256 case 'del': 257 + case 's': 258 return ( 259 + <Text key={key} style={[textStyle, { textDecorationLine: 'line-through'}]}> 260 {children} 261 </Text> 262 ) ··· 268 ) 269 case 'ol': 270 const start = element.getAttribute('start') 271 + const startNum = start ? parseInt(start, 10) : 1 272 return ( 273 + <View key={key} style={{marginVertical: 4}}> 274 + {Array.from(element.childNodes) 275 + .filter(child => child.nodeType === Node.ELEMENT_NODE && (child as Element).tagName.toLowerCase() === 'li') 276 + .map((child, i) => renderNode(child, `${key}-${i}`, insideLink, startNum + i))} 277 </View> 278 ) 279 case 'li': 280 + const marker = listItemIndex !== undefined ? `${listItemIndex}.` : '\u2022' 281 return ( 282 <View key={key} style={{flexDirection: 'row', marginVertical: 2}}> 283 + <Text style={[textStyle, { marginRight: 8 }]}>{marker}</Text> 284 + <Text style={[textStyle, { flex: 1 }]}>{children}</Text> 285 </View> 286 ) 287 + case 'ruby': 288 + return <Text key={key} style={textStyle}>{children}</Text> 289 + case 'rt': 290 + case 'rp': 291 + return null // TODO support ruby text rendering 292 case 'a': 293 const href = element.getAttribute('href') 294 if (href) { ··· 302 to={href} 303 label={linkText} 304 shouldProxy 305 + style={isInvisible ? {display: 'none'} : textStyle}> 306 {children} 307 </InlineLinkText> 308 ) ··· 314 const spanClass = element.getAttribute('class') 315 // Handle invisible/ellipsis classes for link formatting 316 if (spanClass?.includes('invisible')) { 317 + return <Text key={key} style={{ display: 'none' }}>{children}</Text> 318 } 319 if (spanClass?.includes('ellipsis')) { 320 // If inside a link, return plain text, otherwise wrapped 321 + if (insideLink) { 322 + return '\u2026' 323 + } 324 + return <Text key={key} style={textStyle}>{'\u2026'}</Text> 325 + } 326 + // Handle mentions and hashtags 327 + if (spanClass?.includes('mention') || spanClass?.includes('hashtag')) { 328 329 330 ··· 336 return children 337 } 338 return <Text key={key} style={textStyle}>{children}</Text> 339 default: 340 return <Text key={key} style={textStyle}>{children}</Text> 341 } 342 343 344 345 + } 346 + 347 + const content = Array.from(doc.body.childNodes).map((node, i) => 348 + renderNode(node, String(i)), 349 + ) 350 + 351 + return ( 352 353 354 + </View> 355 + ) 356 + } 357 358 + function sanitizeElementAttributes(element: Element): void { 359 + const tagName = element.tagName.toLowerCase() 360 + const allowedAttrs: Record<string, string[]> = { 361 + a: ['href', 'rel', 'class', 'translate'], 362 + span: ['class', 'translate'], 363 + ol: ['start', 'reversed'], 364 + li: ['value'], 365 + p: ['class'], 366 + } 367 + 368 + const allowed = allowedAttrs[tagName] || [] 369 + const attrs = Array.from(element.attributes) 370 + 371 + // Remove non-allowed attributes 372 + for (const attr of attrs) { 373 + const attrName = attr.name.toLowerCase() 374 + const isAllowed = allowed.some(allowedAttr => { 375 + if (allowedAttr.endsWith('*')) { 376 + return attrName.startsWith(allowedAttr.slice(0, -1)) 377 + } 378 + return allowedAttr === attrName 379 + }) 380 381 + if (!isAllowed) { 382 + element.removeAttribute(attr.name) 383 + } 384 + } 385 + 386 + // Process specific attributes 387 + if (tagName === 'a') { 388 + processAnchorElement(element) 389 + } 390 + 391 + // Process class whitelist 392 + if (element.hasAttribute('class')) { 393 + processClassWhitelist(element) 394 + } 395 + 396 + // Process translate attribute - remove unless it's "no" 397 + if (element.hasAttribute('translate')) { 398 + const translate = element.getAttribute('translate') 399 + if (translate !== 'no') { 400 + element.removeAttribute('translate') 401 + } 402 + } 403 + } 404 405 + function processAnchorElement(element: Element): void { 406 + // Check if href has unsupported protocol 407 + const href = element.getAttribute('href') 408 + if (href) { 409 + const scheme = getScheme(href) 410 + if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) { 411 + // Remove the href to disable the link 412 + element.removeAttribute('href') 413 + } 414 + } 415 + } 416 417 + function processClassWhitelist(element: Element): void { 418 + const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean) 419 + const whitelisted = classList.filter(className => { 420 + // microformats classes 421 + if (/^[hpuedt]-/.test(className)) return true 422 + // semantic classes 423 + if (/^(mention|hashtag)$/.test(className)) return true 424 + // link formatting classes 425 + if (/^(ellipsis|invisible)$/.test(className)) return true 426 + // quote inline class 427 + if (className === 'quote-inline') return true 428 + return false 429 + }) 430 + 431 + if (whitelisted.length > 0) { 432 + element.className = whitelisted.join(' ') 433 + } else { 434 + element.removeAttribute('class') 435 + } 436 + } 437 438 + function getScheme(url: string): string | null { 439 + const match = url.match(PROTOCOL_REGEX) 440 + if (match) { 441 + return match[1].toLowerCase() 442 + } 443 + // Check if it's a relative URL 444 + if (url.startsWith('/') || url.startsWith('.')) { 445 + return 'relative' 446 + } 447 + return null 448 + } 449 450 + function extractMathAnnotation(mathElement: Element): string | null { 451 + const semantics = Array.from(mathElement.children).find( 452 + child => child.tagName.toLowerCase() === 'semantics', 453 + ) as Element | undefined 454 + 455 + if (!semantics) return null 456 + 457 + // Look for LaTeX annotation (application/x-tex) 458 + const latexAnnotation = Array.from(semantics.children).find(child => { 459 + return ( 460 + child.tagName.toLowerCase() === 'annotation' && 461 + child.getAttribute('encoding') === 'application/x-tex' 462 + ) 463 + }) 464 + 465 + if (latexAnnotation) { 466 + const display = mathElement.getAttribute('display') 467 + const text = latexAnnotation.textContent || '' 468 + return display === 'block' ? `$$${text}$$` : `$${text}$` 469 + } 470 + 471 + // Look for plain text annotation 472 + const plainAnnotation = Array.from(semantics.children).find(child => { 473 + return ( 474 + child.tagName.toLowerCase() === 'annotation' && 475 + child.getAttribute('encoding') === 'text/plain' 476 + ) 477 + }) 478 + 479 + if (plainAnnotation) { 480 + return plainAnnotation.textContent || null 481 + } 482 483 + return null 484 }
-356
src/lib/strings/html-sanitizer.ts
··· 1 - /** 2 - * HTML sanitizer inspired by Mastodon's Sanitize::Config 3 - * Sanitizes HTML content to prevent XSS while preserving safe formatting 4 - */ 5 - 6 - const HTTP_PROTOCOLS = ['http', 'https'] 7 - 8 - const LINK_PROTOCOLS = [ 9 - 'http', 10 - 'https', 11 - 'dat', 12 - 'dweb', 13 - 'ipfs', 14 - 'ipns', 15 - 'ssb', 16 - 'gopher', 17 - 'xmpp', 18 - 'magnet', 19 - 'gemini', 20 - ] 21 - 22 - const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i 23 - 24 - interface SanitizeOptions { 25 - allowOembed?: boolean 26 - } 27 - 28 - /** 29 - * Sanitizes HTML content following Mastodon's strict rules 30 - */ 31 - export function sanitizeHtml( 32 - html: string, 33 - options: SanitizeOptions = {}, 34 - ): string { 35 - if (typeof DOMParser === 'undefined') { 36 - // Fallback for environments without DOMParser 37 - return sanitizeTextOnly(html) 38 - } 39 - 40 - const parser = new DOMParser() 41 - const doc = parser.parseFromString(html, 'text/html') 42 - const body = doc.body 43 - 44 - sanitizeNode(body, options) 45 - 46 - return body.innerHTML 47 - } 48 - 49 - function sanitizeNode(node: Node, options: SanitizeOptions): void { 50 - const childNodes = Array.from(node.childNodes) 51 - 52 - for (const child of childNodes) { 53 - if (child.nodeType === Node.ELEMENT_NODE) { 54 - const element = child as HTMLElement 55 - const tagName = element.tagName.toLowerCase() 56 - 57 - // Define allowed elements 58 - const allowedElements = options.allowOembed 59 - ? [ 60 - 'p', 61 - 'br', 62 - 'span', 63 - 'a', 64 - 'del', 65 - 's', 66 - 'pre', 67 - 'blockquote', 68 - 'code', 69 - 'b', 70 - 'strong', 71 - 'u', 72 - 'i', 73 - 'em', 74 - 'ul', 75 - 'ol', 76 - 'li', 77 - 'ruby', 78 - 'rt', 79 - 'rp', 80 - 'audio', 81 - 'iframe', 82 - 'source', 83 - 'video', 84 - ] 85 - : [ 86 - 'p', 87 - 'br', 88 - 'span', 89 - 'a', 90 - 'del', 91 - 's', 92 - 'pre', 93 - 'blockquote', 94 - 'code', 95 - 'b', 96 - 'strong', 97 - 'u', 98 - 'i', 99 - 'em', 100 - 'ul', 101 - 'ol', 102 - 'li', 103 - 'ruby', 104 - 'rt', 105 - 'rp', 106 - ] 107 - 108 - // Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p> 109 - if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) { 110 - const strong = element.ownerDocument!.createElement('strong') 111 - while (element.firstChild) { 112 - strong.appendChild(element.firstChild) 113 - } 114 - const p = element.ownerDocument!.createElement('p') 115 - p.appendChild(strong) 116 - element.replaceWith(p) 117 - sanitizeNode(p, options) 118 - continue 119 - } 120 - 121 - // Handle math elements - extract annotation text 122 - if (tagName === 'math') { 123 - const mathText = extractMathAnnotation(element) 124 - if (mathText) { 125 - const textNode = element.ownerDocument!.createTextNode(mathText) 126 - element.replaceWith(textNode) 127 - } else { 128 - element.remove() 129 - } 130 - continue 131 - } 132 - 133 - if (tagName === 'li') { 134 - // Keep li elements but sanitize their children 135 - sanitizeNode(element, options) 136 - continue 137 - } 138 - 139 - // Remove elements not in allowlist 140 - if (!allowedElements.includes(tagName)) { 141 - // Replace with text content 142 - const textNode = element.ownerDocument!.createTextNode( 143 - element.textContent || '', 144 - ) 145 - element.replaceWith(textNode) 146 - continue 147 - } 148 - 149 - // Sanitize attributes 150 - sanitizeAttributes(element, options) 151 - 152 - // Recursively sanitize children 153 - sanitizeNode(element, options) 154 - } 155 - } 156 - } 157 - 158 - function sanitizeAttributes( 159 - element: HTMLElement, 160 - options: SanitizeOptions, 161 - ): void { 162 - const tagName = element.tagName.toLowerCase() 163 - const allowedAttrs: Record<string, string[]> = { 164 - a: ['href', 'rel', 'class', 'translate'], 165 - span: ['class', 'translate'], 166 - ol: ['start', 'reversed'], 167 - li: ['value'], 168 - p: ['class'], 169 - } 170 - 171 - if (options.allowOembed) { 172 - allowedAttrs.audio = ['controls'] 173 - allowedAttrs.iframe = [ 174 - 'allowfullscreen', 175 - 'frameborder', 176 - 'height', 177 - 'scrolling', 178 - 'src', 179 - 'width', 180 - ] 181 - allowedAttrs.source = ['src', 'type'] 182 - allowedAttrs.video = ['controls', 'height', 'loop', 'width'] 183 - } 184 - 185 - const allowed = allowedAttrs[tagName] || [] 186 - const attrs = Array.from(element.attributes) 187 - 188 - // Remove non-allowed attributes 189 - for (const attr of attrs) { 190 - const attrName = attr.name.toLowerCase() 191 - const isAllowed = allowed.some(a => { 192 - if (a.endsWith('*')) { 193 - return attrName.startsWith(a.slice(0, -1)) 194 - } 195 - return a === attrName 196 - }) 197 - 198 - if (!isAllowed) { 199 - element.removeAttribute(attr.name) 200 - } 201 - } 202 - 203 - // Process specific attributes 204 - if (tagName === 'a') { 205 - processAnchorElement(element) 206 - } 207 - 208 - // Process class whitelist 209 - if (element.hasAttribute('class')) { 210 - processClassWhitelist(element) 211 - } 212 - 213 - // Process translate attribute - remove unless it's "no" 214 - if (element.hasAttribute('translate')) { 215 - const translate = element.getAttribute('translate') 216 - if (translate !== 'no') { 217 - element.removeAttribute('translate') 218 - } 219 - } 220 - 221 - // Validate protocols for elements with src/href 222 - if (element.hasAttribute('href') || element.hasAttribute('src')) { 223 - validateProtocols(element, options) 224 - } 225 - } 226 - 227 - function processAnchorElement(element: HTMLElement): void { 228 - // Add required attributes 229 - element.setAttribute('rel', 'nofollow noopener') 230 - element.setAttribute('target', '_blank') 231 - 232 - // Check if href has unsupported protocol 233 - const href = element.getAttribute('href') 234 - if (href) { 235 - const scheme = getScheme(href) 236 - if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) { 237 - // Replace element with its text content 238 - const textNode = element.ownerDocument!.createTextNode( 239 - element.textContent || '', 240 - ) 241 - element.replaceWith(textNode) 242 - } 243 - } 244 - } 245 - 246 - function processClassWhitelist(element: HTMLElement): void { 247 - const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean) 248 - const whitelisted = classList.filter(className => { 249 - // microformats classes 250 - if (/^[hpuedt]-/.test(className)) return true 251 - // semantic classes 252 - if (/^(mention|hashtag)$/.test(className)) return true 253 - // link formatting classes 254 - if (/^(ellipsis|invisible)$/.test(className)) return true 255 - // quote inline class 256 - if (className === 'quote-inline') return true 257 - return false 258 - }) 259 - 260 - if (whitelisted.length > 0) { 261 - element.className = whitelisted.join(' ') 262 - } else { 263 - element.removeAttribute('class') 264 - } 265 - } 266 - 267 - function validateProtocols( 268 - element: HTMLElement, 269 - options: SanitizeOptions, 270 - ): void { 271 - const tagName = element.tagName.toLowerCase() 272 - const src = element.getAttribute('src') 273 - const href = element.getAttribute('href') 274 - const url = src || href 275 - 276 - if (!url) return 277 - 278 - const scheme = getScheme(url) 279 - 280 - // For oembed elements, only allow HTTP protocols for src 281 - if ( 282 - options.allowOembed && 283 - src && 284 - ['iframe', 'source'].includes(tagName) 285 - ) { 286 - if (scheme !== null && !HTTP_PROTOCOLS.includes(scheme)) { 287 - element.removeAttribute('src') 288 - } 289 - // Add sandbox attribute to iframes 290 - if (tagName === 'iframe') { 291 - element.setAttribute( 292 - 'sandbox', 293 - 'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms', 294 - ) 295 - } 296 - } 297 - } 298 - 299 - function getScheme(url: string): string | null { 300 - const match = url.match(PROTOCOL_REGEX) 301 - if (match) { 302 - return match[1].toLowerCase() 303 - } 304 - // Check if it's a relative URL 305 - if (url.startsWith('/') || url.startsWith('.')) { 306 - return 'relative' 307 - } 308 - return null 309 - } 310 - 311 - /** 312 - * Extract math annotation from MathML element 313 - * Follows FEP-dc88 spec for math element representation 314 - */ 315 - function extractMathAnnotation(mathElement: HTMLElement): string | null { 316 - const semantics = Array.from(mathElement.children).find( 317 - child => child.tagName.toLowerCase() === 'semantics', 318 - ) as HTMLElement | undefined 319 - 320 - if (!semantics) return null 321 - 322 - // Look for LaTeX annotation (application/x-tex) 323 - const latexAnnotation = Array.from(semantics.children).find(child => { 324 - return ( 325 - child.tagName.toLowerCase() === 'annotation' && 326 - child.getAttribute('encoding') === 'application/x-tex' 327 - ) 328 - }) 329 - 330 - if (latexAnnotation) { 331 - const display = mathElement.getAttribute('display') 332 - const text = latexAnnotation.textContent || '' 333 - return display === 'block' ? `$$${text}$$` : `$${text}$` 334 - } 335 - 336 - // Look for plain text annotation 337 - const plainAnnotation = Array.from(semantics.children).find(child => { 338 - return ( 339 - child.tagName.toLowerCase() === 'annotation' && 340 - child.getAttribute('encoding') === 'text/plain' 341 - ) 342 - }) 343 - 344 - if (plainAnnotation) { 345 - return plainAnnotation.textContent || null 346 - } 347 - 348 - return null 349 - } 350 - 351 - /** 352 - * Fallback sanitizer that strips all HTML tags 353 - */ 354 - function sanitizeTextOnly(html: string): string { 355 - return html.replace(/<[^>]*>/g, '') 356 - }
···

History

2 rounds 5 comments
sign up or login to add to the discussion
5 commits
expand
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
3e5262ab
chore: remove any casts
265f3ab4
chore: replace unicode ellipsis with escaped version
eff00beb
feat/MastodonHtml: render as ordered lists (with numeric prefixes)
a28c6d3f
feat/MastodonHtml: collapse posts taller than 150px
merge conflicts detected
expand
  • src/screens/PostThread/components/ThreadItemPost.tsx:301
  • src/state/persisted/schema.ts:166
  • src/state/preferences/index.tsx:33
  • src/view/com/posts/PostFeedItem.tsx:460
expand 5 comments

i am like 99% sure this would be considered a license violation if merged as mastodon is licensed under AGPL while witchsky is MIT

Good point, I will rewrite the sanitizer from scratch

Hey Maxine! Did you get this done? I鈥檇 like to see if we can merge it once the conflicts are resolved.

Sorry ewan, haven't had the time, also this PR has some weird bugs (sometimes the render crashes and I never diagnosed it), you might want to close this one for the meanwhile

I might look into writing a non-vibe-coded version of this at some point, it'd be a fun way to cut my teeth on webdev again

2 commits
expand
6e85dcd3
feat: render full post contents for posts bridged from mastodon or wafrn
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
expand 0 comments