This one was mostly written by Copilot so it's not great. For posts containing bridgyOriginalText or fullText it will render the HTML in place of the bridged RichText. The sanitizer was directly translated from Mastodon's at https://github.com/mastodon/mastodon/blob/main/lib/sanitize_ext/sanitize_config.rb. Ruby text is unimplemented as I have no idea what is a good way to do that in React Native.
+310
-61
src/components/Post/MastodonHtmlContent.tsx
+310
-61
src/components/Post/MastodonHtmlContent.tsx
···
1
-
import {useMemo} from 'react'
2
-
import {type StyleProp, type TextStyle, View, ViewStyle} from 'react-native'
3
import {type AppBskyFeedPost} from '@atproto/api'
4
5
-
import {sanitizeHtml} from '#/lib/strings/html-sanitizer'
6
import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html'
7
-
import { atoms } from '#/alf'
8
import {InlineLinkText} from '#/components/Link'
9
10
11
···
17
18
19
20
21
22
23
···
27
28
29
30
-
31
-
32
-
33
-
34
-
35
-
36
-
37
-
38
}: MastodonHtmlContentProps) {
39
const renderMastodonHtml = useRenderMastodonHtml()
40
41
-
const htmlContent = useMemo(() => {
42
if (!renderMastodonHtml) return null
43
44
-
const fullText = (record as any).fullText as string | undefined
45
-
46
-
47
-
48
49
50
51
if (!rawHtml) return null
52
53
-
return sanitizeHtml(rawHtml)
54
-
}, [record, renderMastodonHtml])
55
-
56
-
const renderedContent = useMemo(() => {
57
-
if (!htmlContent) return null
58
-
59
-
// Parse and render with React components on all platforms
60
-
return renderHtmlAsReact(htmlContent, numberOfLines, textStyle)
61
-
}, [htmlContent, numberOfLines, textStyle])
62
63
if (!renderedContent) return null
64
65
-
return <View style={style}>{renderedContent}</View>
66
}
67
68
-
function renderHtmlAsReact(
69
html: string,
70
_numberOfLines?: number,
71
inputTextStyle?: StyleProp<TextStyle>,
72
): React.ReactNode {
73
const parser = new DOMParser()
74
const doc = parser.parseFromString(html, 'text/html')
75
76
-
77
-
78
-
79
inputTextStyle,
80
]
81
82
-
const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => {
83
if (node.nodeType === Node.TEXT_NODE) {
84
// Don't wrap text in styled Text component if inside a link
85
-
86
87
88
···
92
93
if (node.nodeType === Node.ELEMENT_NODE) {
94
const element = node as Element
95
const children = Array.from(element.childNodes).map((child, i) =>
96
-
renderNode(child, i, insideLink || element.tagName.toLowerCase() === 'a'),
97
)
98
99
-
switch (element.tagName.toLowerCase()) {
100
case 'p':
101
return <P key={key} style={textStyle}>{children}</P>
102
case 'blockquote':
···
108
case 'pre':
109
return (
110
<View key={key} style={{backgroundColor: '#f5f5f5', padding: 8, borderRadius: 4, marginVertical: 4}}>
111
-
<P style={{...textStyle, fontFamily: 'monospace'}}>{children}</P>
112
</View>
113
)
114
case 'code':
115
return (
116
-
<Text key={key} style={{...textStyle, fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}}>
117
{children}
118
</Text>
119
)
120
case 'strong':
121
case 'b':
122
return (
123
-
<Text key={key} style={{...textStyle, fontWeight: 'bold'}}>
124
{children}
125
</Text>
126
)
127
case 'em':
128
case 'i':
129
return (
130
-
<Text key={key} style={{...textStyle, fontStyle: 'italic'}}>
131
{children}
132
</Text>
133
)
134
case 'u':
135
return (
136
-
<Text key={key} style={{...textStyle, textDecorationLine: 'underline'}}>
137
{children}
138
</Text>
139
)
140
case 'del':
141
return (
142
-
<Text key={key} style={{...textStyle, textDecorationLine: 'line-through'}}>
143
{children}
144
</Text>
145
)
···
151
)
152
case 'ol':
153
const start = element.getAttribute('start')
154
-
const reversed = element.getAttribute('reversed') !== null
155
return (
156
-
<View key={key} style={{marginVertical: 4}} data-start={start} data-reversed={reversed}>
157
-
{children}
158
</View>
159
)
160
case 'li':
161
-
const value = element.getAttribute('value')
162
-
const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol'
163
return (
164
<View key={key} style={{flexDirection: 'row', marginVertical: 2}}>
165
-
<Text style={{...textStyle, marginRight: 8}}>{parentIsOl ? (value || '螕脟贸') : '螕脟贸'}</Text>
166
-
<Text style={{...textStyle, flex: 1}}>{children}</Text>
167
</View>
168
)
169
case 'a':
170
const href = element.getAttribute('href')
171
if (href) {
···
179
to={href}
180
label={linkText}
181
shouldProxy
182
-
style={isInvisible ? {width: 0, height: 0, position: 'absolute'} : textStyle}>
183
{children}
184
</InlineLinkText>
185
)
···
191
const spanClass = element.getAttribute('class')
192
// Handle invisible/ellipsis classes for link formatting
193
if (spanClass?.includes('invisible')) {
194
-
return null
195
}
196
if (spanClass?.includes('ellipsis')) {
197
// If inside a link, return plain text, otherwise wrapped
198
-
199
-
200
-
201
-
202
-
203
-
204
-
205
206
207
···
213
return children
214
}
215
return <Text key={key} style={textStyle}>{children}</Text>
216
-
case 'div':
217
-
return <P key={key} style={textStyle}>{children}</P>
218
default:
219
return <Text key={key} style={textStyle}>{children}</Text>
220
}
221
222
223
224
225
226
227
228
229
230
231
232
233
-
</View>
234
-
)
235
}
···
1
+
import {useMemo, useState} from 'react'
2
+
import {
3
+
type LayoutChangeEvent,
4
+
type StyleProp,
5
+
type TextStyle,
6
+
View,
7
+
type ViewStyle,
8
+
} from 'react-native'
9
import {type AppBskyFeedPost} from '@atproto/api'
10
+
import {msg, Trans} from '@lingui/macro'
11
+
import {useLingui} from '@lingui/react'
12
13
import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html'
14
+
import {atoms as a} from '#/alf'
15
+
import {Button, ButtonText} from '#/components/Button'
16
import {InlineLinkText} from '#/components/Link'
17
+
import {P, Text} from '#/components/Typography'
18
19
20
···
26
27
28
29
+
return useMemo(() => {
30
+
if (!renderMastodonHtml) return false
31
32
+
const fullText = record.fullText as string | undefined
33
+
const bridgyOriginalText = record.bridgyOriginalText as
34
+
| string
35
+
| undefined
36
37
38
···
42
43
44
45
+
numberOfLines,
46
}: MastodonHtmlContentProps) {
47
const renderMastodonHtml = useRenderMastodonHtml()
48
+
const {_} = useLingui()
49
+
const [isExpanded, setIsExpanded] = useState(false)
50
+
const [contentHeight, setContentHeight] = useState<number | null>(null)
51
+
const [isTall, setIsTall] = useState(false)
52
53
+
const renderedContent = useMemo(() => {
54
if (!renderMastodonHtml) return null
55
56
+
const fullText = record.fullText as string | undefined
57
+
const bridgyOriginalText = record.bridgyOriginalText as
58
+
| string
59
+
| undefined
60
61
62
63
if (!rawHtml) return null
64
65
+
// Parse HTML once and sanitize/render in a single pass
66
+
return sanitizeAndRenderHtml(rawHtml, numberOfLines, textStyle)
67
+
}, [record, renderMastodonHtml, numberOfLines, textStyle])
68
+
69
+
const handleLayout = (event: LayoutChangeEvent) => {
70
+
const height = event.nativeEvent.layout.height
71
+
if (contentHeight === null) {
72
+
setContentHeight(height)
73
+
// Consider content "tall" if it's taller than 150px
74
+
setIsTall(height > 150)
75
+
}
76
+
}
77
78
if (!renderedContent) return null
79
80
+
const shouldCollapse = isTall && !isExpanded
81
+
82
+
return (
83
+
<View style={style}>
84
+
<View
85
+
style={shouldCollapse ? {maxHeight: 150, overflow: 'hidden'} : undefined}
86
+
onLayout={handleLayout}>
87
+
{renderedContent}
88
+
</View>
89
+
{shouldCollapse && (
90
+
<Button
91
+
label={_(msg`Show more`)}
92
+
onPress={() => setIsExpanded(true)}
93
+
variant="ghost"
94
+
color="primary"
95
+
size="small"
96
+
style={[a.mt_xs]}>
97
+
<ButtonText>
98
+
<Trans>Show more</Trans>
99
+
</ButtonText>
100
+
</Button>
101
+
)}
102
+
</View>
103
+
)
104
}
105
106
+
const LINK_PROTOCOLS = [
107
+
'http',
108
+
'https',
109
+
'dat',
110
+
'dweb',
111
+
'ipfs',
112
+
'ipns',
113
+
'ssb',
114
+
'gopher',
115
+
'xmpp',
116
+
'magnet',
117
+
'gemini',
118
+
]
119
+
120
+
const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i
121
+
122
+
const ALLOWED_ELEMENTS = [
123
+
'p',
124
+
'br',
125
+
'span',
126
+
'a',
127
+
'del',
128
+
's',
129
+
'pre',
130
+
'blockquote',
131
+
'code',
132
+
'b',
133
+
'strong',
134
+
'u',
135
+
'i',
136
+
'em',
137
+
'ul',
138
+
'ol',
139
+
'li',
140
+
'ruby',
141
+
'rt',
142
+
'rp',
143
+
]
144
+
145
+
function sanitizeAndRenderHtml(
146
html: string,
147
_numberOfLines?: number,
148
inputTextStyle?: StyleProp<TextStyle>,
149
): React.ReactNode {
150
+
if (typeof DOMParser === 'undefined') {
151
+
// Fallback for environments without DOMParser
152
+
return html.replace(/<[^>]*>/g, '')
153
+
}
154
+
155
const parser = new DOMParser()
156
const doc = parser.parseFromString(html, 'text/html')
157
158
+
const textStyle: StyleProp<TextStyle> = [
159
+
a.leading_snug,
160
+
a.text_md,
161
inputTextStyle,
162
]
163
164
+
// Sanitize and render in a single pass
165
+
const renderNode = (node: Node, key: string, insideLink = false, listItemIndex?: number): React.ReactNode => {
166
if (node.nodeType === Node.TEXT_NODE) {
167
// Don't wrap text in styled Text component if inside a link
168
+
if (insideLink) {
169
170
171
···
175
176
if (node.nodeType === Node.ELEMENT_NODE) {
177
const element = node as Element
178
+
const tagName = element.tagName.toLowerCase()
179
+
180
+
// Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p>
181
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
182
+
const children = Array.from(element.childNodes).map((child, i) =>
183
+
renderNode(child, String(i), insideLink),
184
+
)
185
+
return (
186
+
<P key={key} style={textStyle}>
187
+
<Text style={{...textStyle, fontWeight: 'bold'}}>{children}</Text>
188
+
</P>
189
+
)
190
+
}
191
+
192
+
// Handle math elements - extract annotation text
193
+
if (tagName === 'math') {
194
+
const mathText = extractMathAnnotation(element)
195
+
if (mathText) {
196
+
return <Text key={key} style={textStyle}>{mathText}</Text>
197
+
}
198
+
return null
199
+
}
200
+
201
+
// Remove elements not in allowlist - replace with text content
202
+
if (!ALLOWED_ELEMENTS.includes(tagName)) {
203
+
return element.textContent ? (
204
+
<Text key={key} style={textStyle}>{element.textContent}</Text>
205
+
) : null
206
+
}
207
+
208
+
// Sanitize and process element
209
+
sanitizeElementAttributes(element)
210
+
211
const children = Array.from(element.childNodes).map((child, i) =>
212
+
renderNode(child, String(i), insideLink || tagName === 'a'),
213
)
214
215
+
switch (tagName) {
216
case 'p':
217
return <P key={key} style={textStyle}>{children}</P>
218
case 'blockquote':
···
224
case 'pre':
225
return (
226
<View key={key} style={{backgroundColor: '#f5f5f5', padding: 8, borderRadius: 4, marginVertical: 4}}>
227
+
<P style={[textStyle, { fontFamily: 'monospace'}]}>{children}</P>
228
</View>
229
)
230
case 'code':
231
return (
232
+
<Text key={key} style={[textStyle, { fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}]}>
233
{children}
234
</Text>
235
)
236
case 'strong':
237
case 'b':
238
return (
239
+
<Text key={key} style={[textStyle, { fontWeight: 'bold'}]}>
240
{children}
241
</Text>
242
)
243
case 'em':
244
case 'i':
245
return (
246
+
<Text key={key} style={[textStyle, { fontStyle: 'italic'}]}>
247
{children}
248
</Text>
249
)
250
case 'u':
251
return (
252
+
<Text key={key} style={[textStyle, { textDecorationLine: 'underline'}]}>
253
{children}
254
</Text>
255
)
256
case 'del':
257
+
case 's':
258
return (
259
+
<Text key={key} style={[textStyle, { textDecorationLine: 'line-through'}]}>
260
{children}
261
</Text>
262
)
···
268
)
269
case 'ol':
270
const start = element.getAttribute('start')
271
+
const startNum = start ? parseInt(start, 10) : 1
272
return (
273
+
<View key={key} style={{marginVertical: 4}}>
274
+
{Array.from(element.childNodes)
275
+
.filter(child => child.nodeType === Node.ELEMENT_NODE && (child as Element).tagName.toLowerCase() === 'li')
276
+
.map((child, i) => renderNode(child, `${key}-${i}`, insideLink, startNum + i))}
277
</View>
278
)
279
case 'li':
280
+
const marker = listItemIndex !== undefined ? `${listItemIndex}.` : '\u2022'
281
return (
282
<View key={key} style={{flexDirection: 'row', marginVertical: 2}}>
283
+
<Text style={[textStyle, { marginRight: 8 }]}>{marker}</Text>
284
+
<Text style={[textStyle, { flex: 1 }]}>{children}</Text>
285
</View>
286
)
287
+
case 'ruby':
288
+
return <Text key={key} style={textStyle}>{children}</Text>
289
+
case 'rt':
290
+
case 'rp':
291
+
return null // TODO support ruby text rendering
292
case 'a':
293
const href = element.getAttribute('href')
294
if (href) {
···
302
to={href}
303
label={linkText}
304
shouldProxy
305
+
style={isInvisible ? {display: 'none'} : textStyle}>
306
{children}
307
</InlineLinkText>
308
)
···
314
const spanClass = element.getAttribute('class')
315
// Handle invisible/ellipsis classes for link formatting
316
if (spanClass?.includes('invisible')) {
317
+
return <Text key={key} style={{ display: 'none' }}>{children}</Text>
318
}
319
if (spanClass?.includes('ellipsis')) {
320
// If inside a link, return plain text, otherwise wrapped
321
+
if (insideLink) {
322
+
return '\u2026'
323
+
}
324
+
return <Text key={key} style={textStyle}>{'\u2026'}</Text>
325
+
}
326
+
// Handle mentions and hashtags
327
+
if (spanClass?.includes('mention') || spanClass?.includes('hashtag')) {
328
329
330
···
336
return children
337
}
338
return <Text key={key} style={textStyle}>{children}</Text>
339
default:
340
return <Text key={key} style={textStyle}>{children}</Text>
341
}
342
343
344
345
+
}
346
+
347
+
const content = Array.from(doc.body.childNodes).map((node, i) =>
348
+
renderNode(node, String(i)),
349
+
)
350
+
351
+
return (
352
353
354
+
</View>
355
+
)
356
+
}
357
358
+
function sanitizeElementAttributes(element: Element): void {
359
+
const tagName = element.tagName.toLowerCase()
360
+
const allowedAttrs: Record<string, string[]> = {
361
+
a: ['href', 'rel', 'class', 'translate'],
362
+
span: ['class', 'translate'],
363
+
ol: ['start', 'reversed'],
364
+
li: ['value'],
365
+
p: ['class'],
366
+
}
367
+
368
+
const allowed = allowedAttrs[tagName] || []
369
+
const attrs = Array.from(element.attributes)
370
+
371
+
// Remove non-allowed attributes
372
+
for (const attr of attrs) {
373
+
const attrName = attr.name.toLowerCase()
374
+
const isAllowed = allowed.some(allowedAttr => {
375
+
if (allowedAttr.endsWith('*')) {
376
+
return attrName.startsWith(allowedAttr.slice(0, -1))
377
+
}
378
+
return allowedAttr === attrName
379
+
})
380
381
+
if (!isAllowed) {
382
+
element.removeAttribute(attr.name)
383
+
}
384
+
}
385
+
386
+
// Process specific attributes
387
+
if (tagName === 'a') {
388
+
processAnchorElement(element)
389
+
}
390
+
391
+
// Process class whitelist
392
+
if (element.hasAttribute('class')) {
393
+
processClassWhitelist(element)
394
+
}
395
+
396
+
// Process translate attribute - remove unless it's "no"
397
+
if (element.hasAttribute('translate')) {
398
+
const translate = element.getAttribute('translate')
399
+
if (translate !== 'no') {
400
+
element.removeAttribute('translate')
401
+
}
402
+
}
403
+
}
404
405
+
function processAnchorElement(element: Element): void {
406
+
// Check if href has unsupported protocol
407
+
const href = element.getAttribute('href')
408
+
if (href) {
409
+
const scheme = getScheme(href)
410
+
if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) {
411
+
// Remove the href to disable the link
412
+
element.removeAttribute('href')
413
+
}
414
+
}
415
+
}
416
417
+
function processClassWhitelist(element: Element): void {
418
+
const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean)
419
+
const whitelisted = classList.filter(className => {
420
+
// microformats classes
421
+
if (/^[hpuedt]-/.test(className)) return true
422
+
// semantic classes
423
+
if (/^(mention|hashtag)$/.test(className)) return true
424
+
// link formatting classes
425
+
if (/^(ellipsis|invisible)$/.test(className)) return true
426
+
// quote inline class
427
+
if (className === 'quote-inline') return true
428
+
return false
429
+
})
430
+
431
+
if (whitelisted.length > 0) {
432
+
element.className = whitelisted.join(' ')
433
+
} else {
434
+
element.removeAttribute('class')
435
+
}
436
+
}
437
438
+
function getScheme(url: string): string | null {
439
+
const match = url.match(PROTOCOL_REGEX)
440
+
if (match) {
441
+
return match[1].toLowerCase()
442
+
}
443
+
// Check if it's a relative URL
444
+
if (url.startsWith('/') || url.startsWith('.')) {
445
+
return 'relative'
446
+
}
447
+
return null
448
+
}
449
450
+
function extractMathAnnotation(mathElement: Element): string | null {
451
+
const semantics = Array.from(mathElement.children).find(
452
+
child => child.tagName.toLowerCase() === 'semantics',
453
+
) as Element | undefined
454
+
455
+
if (!semantics) return null
456
+
457
+
// Look for LaTeX annotation (application/x-tex)
458
+
const latexAnnotation = Array.from(semantics.children).find(child => {
459
+
return (
460
+
child.tagName.toLowerCase() === 'annotation' &&
461
+
child.getAttribute('encoding') === 'application/x-tex'
462
+
)
463
+
})
464
+
465
+
if (latexAnnotation) {
466
+
const display = mathElement.getAttribute('display')
467
+
const text = latexAnnotation.textContent || ''
468
+
return display === 'block' ? `$$${text}$$` : `$${text}$`
469
+
}
470
+
471
+
// Look for plain text annotation
472
+
const plainAnnotation = Array.from(semantics.children).find(child => {
473
+
return (
474
+
child.tagName.toLowerCase() === 'annotation' &&
475
+
child.getAttribute('encoding') === 'text/plain'
476
+
)
477
+
})
478
+
479
+
if (plainAnnotation) {
480
+
return plainAnnotation.textContent || null
481
+
}
482
483
+
return null
484
}
-356
src/lib/strings/html-sanitizer.ts
-356
src/lib/strings/html-sanitizer.ts
···
1
-
/**
2
-
* HTML sanitizer inspired by Mastodon's Sanitize::Config
3
-
* Sanitizes HTML content to prevent XSS while preserving safe formatting
4
-
*/
5
-
6
-
const HTTP_PROTOCOLS = ['http', 'https']
7
-
8
-
const LINK_PROTOCOLS = [
9
-
'http',
10
-
'https',
11
-
'dat',
12
-
'dweb',
13
-
'ipfs',
14
-
'ipns',
15
-
'ssb',
16
-
'gopher',
17
-
'xmpp',
18
-
'magnet',
19
-
'gemini',
20
-
]
21
-
22
-
const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i
23
-
24
-
interface SanitizeOptions {
25
-
allowOembed?: boolean
26
-
}
27
-
28
-
/**
29
-
* Sanitizes HTML content following Mastodon's strict rules
30
-
*/
31
-
export function sanitizeHtml(
32
-
html: string,
33
-
options: SanitizeOptions = {},
34
-
): string {
35
-
if (typeof DOMParser === 'undefined') {
36
-
// Fallback for environments without DOMParser
37
-
return sanitizeTextOnly(html)
38
-
}
39
-
40
-
const parser = new DOMParser()
41
-
const doc = parser.parseFromString(html, 'text/html')
42
-
const body = doc.body
43
-
44
-
sanitizeNode(body, options)
45
-
46
-
return body.innerHTML
47
-
}
48
-
49
-
function sanitizeNode(node: Node, options: SanitizeOptions): void {
50
-
const childNodes = Array.from(node.childNodes)
51
-
52
-
for (const child of childNodes) {
53
-
if (child.nodeType === Node.ELEMENT_NODE) {
54
-
const element = child as HTMLElement
55
-
const tagName = element.tagName.toLowerCase()
56
-
57
-
// Define allowed elements
58
-
const allowedElements = options.allowOembed
59
-
? [
60
-
'p',
61
-
'br',
62
-
'span',
63
-
'a',
64
-
'del',
65
-
's',
66
-
'pre',
67
-
'blockquote',
68
-
'code',
69
-
'b',
70
-
'strong',
71
-
'u',
72
-
'i',
73
-
'em',
74
-
'ul',
75
-
'ol',
76
-
'li',
77
-
'ruby',
78
-
'rt',
79
-
'rp',
80
-
'audio',
81
-
'iframe',
82
-
'source',
83
-
'video',
84
-
]
85
-
: [
86
-
'p',
87
-
'br',
88
-
'span',
89
-
'a',
90
-
'del',
91
-
's',
92
-
'pre',
93
-
'blockquote',
94
-
'code',
95
-
'b',
96
-
'strong',
97
-
'u',
98
-
'i',
99
-
'em',
100
-
'ul',
101
-
'ol',
102
-
'li',
103
-
'ruby',
104
-
'rt',
105
-
'rp',
106
-
]
107
-
108
-
// Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p>
109
-
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
110
-
const strong = element.ownerDocument!.createElement('strong')
111
-
while (element.firstChild) {
112
-
strong.appendChild(element.firstChild)
113
-
}
114
-
const p = element.ownerDocument!.createElement('p')
115
-
p.appendChild(strong)
116
-
element.replaceWith(p)
117
-
sanitizeNode(p, options)
118
-
continue
119
-
}
120
-
121
-
// Handle math elements - extract annotation text
122
-
if (tagName === 'math') {
123
-
const mathText = extractMathAnnotation(element)
124
-
if (mathText) {
125
-
const textNode = element.ownerDocument!.createTextNode(mathText)
126
-
element.replaceWith(textNode)
127
-
} else {
128
-
element.remove()
129
-
}
130
-
continue
131
-
}
132
-
133
-
if (tagName === 'li') {
134
-
// Keep li elements but sanitize their children
135
-
sanitizeNode(element, options)
136
-
continue
137
-
}
138
-
139
-
// Remove elements not in allowlist
140
-
if (!allowedElements.includes(tagName)) {
141
-
// Replace with text content
142
-
const textNode = element.ownerDocument!.createTextNode(
143
-
element.textContent || '',
144
-
)
145
-
element.replaceWith(textNode)
146
-
continue
147
-
}
148
-
149
-
// Sanitize attributes
150
-
sanitizeAttributes(element, options)
151
-
152
-
// Recursively sanitize children
153
-
sanitizeNode(element, options)
154
-
}
155
-
}
156
-
}
157
-
158
-
function sanitizeAttributes(
159
-
element: HTMLElement,
160
-
options: SanitizeOptions,
161
-
): void {
162
-
const tagName = element.tagName.toLowerCase()
163
-
const allowedAttrs: Record<string, string[]> = {
164
-
a: ['href', 'rel', 'class', 'translate'],
165
-
span: ['class', 'translate'],
166
-
ol: ['start', 'reversed'],
167
-
li: ['value'],
168
-
p: ['class'],
169
-
}
170
-
171
-
if (options.allowOembed) {
172
-
allowedAttrs.audio = ['controls']
173
-
allowedAttrs.iframe = [
174
-
'allowfullscreen',
175
-
'frameborder',
176
-
'height',
177
-
'scrolling',
178
-
'src',
179
-
'width',
180
-
]
181
-
allowedAttrs.source = ['src', 'type']
182
-
allowedAttrs.video = ['controls', 'height', 'loop', 'width']
183
-
}
184
-
185
-
const allowed = allowedAttrs[tagName] || []
186
-
const attrs = Array.from(element.attributes)
187
-
188
-
// Remove non-allowed attributes
189
-
for (const attr of attrs) {
190
-
const attrName = attr.name.toLowerCase()
191
-
const isAllowed = allowed.some(a => {
192
-
if (a.endsWith('*')) {
193
-
return attrName.startsWith(a.slice(0, -1))
194
-
}
195
-
return a === attrName
196
-
})
197
-
198
-
if (!isAllowed) {
199
-
element.removeAttribute(attr.name)
200
-
}
201
-
}
202
-
203
-
// Process specific attributes
204
-
if (tagName === 'a') {
205
-
processAnchorElement(element)
206
-
}
207
-
208
-
// Process class whitelist
209
-
if (element.hasAttribute('class')) {
210
-
processClassWhitelist(element)
211
-
}
212
-
213
-
// Process translate attribute - remove unless it's "no"
214
-
if (element.hasAttribute('translate')) {
215
-
const translate = element.getAttribute('translate')
216
-
if (translate !== 'no') {
217
-
element.removeAttribute('translate')
218
-
}
219
-
}
220
-
221
-
// Validate protocols for elements with src/href
222
-
if (element.hasAttribute('href') || element.hasAttribute('src')) {
223
-
validateProtocols(element, options)
224
-
}
225
-
}
226
-
227
-
function processAnchorElement(element: HTMLElement): void {
228
-
// Add required attributes
229
-
element.setAttribute('rel', 'nofollow noopener')
230
-
element.setAttribute('target', '_blank')
231
-
232
-
// Check if href has unsupported protocol
233
-
const href = element.getAttribute('href')
234
-
if (href) {
235
-
const scheme = getScheme(href)
236
-
if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) {
237
-
// Replace element with its text content
238
-
const textNode = element.ownerDocument!.createTextNode(
239
-
element.textContent || '',
240
-
)
241
-
element.replaceWith(textNode)
242
-
}
243
-
}
244
-
}
245
-
246
-
function processClassWhitelist(element: HTMLElement): void {
247
-
const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean)
248
-
const whitelisted = classList.filter(className => {
249
-
// microformats classes
250
-
if (/^[hpuedt]-/.test(className)) return true
251
-
// semantic classes
252
-
if (/^(mention|hashtag)$/.test(className)) return true
253
-
// link formatting classes
254
-
if (/^(ellipsis|invisible)$/.test(className)) return true
255
-
// quote inline class
256
-
if (className === 'quote-inline') return true
257
-
return false
258
-
})
259
-
260
-
if (whitelisted.length > 0) {
261
-
element.className = whitelisted.join(' ')
262
-
} else {
263
-
element.removeAttribute('class')
264
-
}
265
-
}
266
-
267
-
function validateProtocols(
268
-
element: HTMLElement,
269
-
options: SanitizeOptions,
270
-
): void {
271
-
const tagName = element.tagName.toLowerCase()
272
-
const src = element.getAttribute('src')
273
-
const href = element.getAttribute('href')
274
-
const url = src || href
275
-
276
-
if (!url) return
277
-
278
-
const scheme = getScheme(url)
279
-
280
-
// For oembed elements, only allow HTTP protocols for src
281
-
if (
282
-
options.allowOembed &&
283
-
src &&
284
-
['iframe', 'source'].includes(tagName)
285
-
) {
286
-
if (scheme !== null && !HTTP_PROTOCOLS.includes(scheme)) {
287
-
element.removeAttribute('src')
288
-
}
289
-
// Add sandbox attribute to iframes
290
-
if (tagName === 'iframe') {
291
-
element.setAttribute(
292
-
'sandbox',
293
-
'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms',
294
-
)
295
-
}
296
-
}
297
-
}
298
-
299
-
function getScheme(url: string): string | null {
300
-
const match = url.match(PROTOCOL_REGEX)
301
-
if (match) {
302
-
return match[1].toLowerCase()
303
-
}
304
-
// Check if it's a relative URL
305
-
if (url.startsWith('/') || url.startsWith('.')) {
306
-
return 'relative'
307
-
}
308
-
return null
309
-
}
310
-
311
-
/**
312
-
* Extract math annotation from MathML element
313
-
* Follows FEP-dc88 spec for math element representation
314
-
*/
315
-
function extractMathAnnotation(mathElement: HTMLElement): string | null {
316
-
const semantics = Array.from(mathElement.children).find(
317
-
child => child.tagName.toLowerCase() === 'semantics',
318
-
) as HTMLElement | undefined
319
-
320
-
if (!semantics) return null
321
-
322
-
// Look for LaTeX annotation (application/x-tex)
323
-
const latexAnnotation = Array.from(semantics.children).find(child => {
324
-
return (
325
-
child.tagName.toLowerCase() === 'annotation' &&
326
-
child.getAttribute('encoding') === 'application/x-tex'
327
-
)
328
-
})
329
-
330
-
if (latexAnnotation) {
331
-
const display = mathElement.getAttribute('display')
332
-
const text = latexAnnotation.textContent || ''
333
-
return display === 'block' ? `$$${text}$$` : `$${text}$`
334
-
}
335
-
336
-
// Look for plain text annotation
337
-
const plainAnnotation = Array.from(semantics.children).find(child => {
338
-
return (
339
-
child.tagName.toLowerCase() === 'annotation' &&
340
-
child.getAttribute('encoding') === 'text/plain'
341
-
)
342
-
})
343
-
344
-
if (plainAnnotation) {
345
-
return plainAnnotation.textContent || null
346
-
}
347
-
348
-
return null
349
-
}
350
-
351
-
/**
352
-
* Fallback sanitizer that strips all HTML tags
353
-
*/
354
-
function sanitizeTextOnly(html: string): string {
355
-
return html.replace(/<[^>]*>/g, '')
356
-
}
···
History
2 rounds
5 comments
maxine.puppykitty.racing
submitted
#1
5 commits
expand
collapse
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
3e5262ab
chore: remove any casts
265f3ab4
chore: replace unicode ellipsis with escaped version
eff00beb
feat/MastodonHtml: render as ordered lists (with numeric prefixes)
a28c6d3f
feat/MastodonHtml: collapse posts taller than 150px
merge conflicts detected
expand
collapse
expand
collapse
- src/screens/PostThread/components/ThreadItemPost.tsx:301
- src/state/persisted/schema.ts:166
- src/state/preferences/index.tsx:33
- src/view/com/posts/PostFeedItem.tsx:460
expand 5 comments
Good point, I will rewrite the sanitizer from scratch
Hey Maxine! Did you get this done? I鈥檇 like to see if we can merge it once the conflicts are resolved.
Sorry ewan, haven't had the time, also this PR has some weird bugs (sometimes the render crashes and I never diagnosed it), you might want to close this one for the meanwhile
I might look into writing a non-vibe-coded version of this at some point, it'd be a fun way to cut my teeth on webdev again
maxine.puppykitty.racing
submitted
#0
2 commits
expand
collapse
6e85dcd3
feat: render full post contents for posts bridged from mastodon or wafrn
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
i am like 99% sure this would be considered a license violation if merged as mastodon is licensed under AGPL while witchsky is MIT