This one was mostly written by Copilot so it's not great. For posts containing bridgyOriginalText or fullText it will render the HTML in place of the bridged RichText. The sanitizer was directly translated from Mastodon's at https://github.com/mastodon/mastodon/blob/main/lib/sanitize_ext/sanitize_config.rb. Ruby text is unimplemented as I have no idea what is a good way to do that in React Native.
+310
-417
Diff
round #1
+310
-61
src/components/Post/MastodonHtmlContent.tsx
+310
-61
src/components/Post/MastodonHtmlContent.tsx
···
1
-
import {useMemo} from 'react'
2
-
import {type StyleProp, type TextStyle, View, ViewStyle} from 'react-native'
1
+
import {useMemo, useState} from 'react'
2
+
import {
3
+
type LayoutChangeEvent,
4
+
type StyleProp,
5
+
type TextStyle,
6
+
View,
7
+
type ViewStyle,
8
+
} from 'react-native'
3
9
import {type AppBskyFeedPost} from '@atproto/api'
10
+
import {msg, Trans} from '@lingui/macro'
11
+
import {useLingui} from '@lingui/react'
4
12
5
-
import {sanitizeHtml} from '#/lib/strings/html-sanitizer'
6
13
import {useRenderMastodonHtml} from '#/state/preferences/render-mastodon-html'
7
-
import { atoms } from '#/alf'
14
+
import {atoms as a} from '#/alf'
15
+
import {Button, ButtonText} from '#/components/Button'
8
16
import {InlineLinkText} from '#/components/Link'
17
+
import {P, Text} from '#/components/Typography'
9
18
10
19
11
20
···
17
26
18
27
19
28
29
+
return useMemo(() => {
30
+
if (!renderMastodonHtml) return false
20
31
32
+
const fullText = record.fullText as string | undefined
33
+
const bridgyOriginalText = record.bridgyOriginalText as
34
+
| string
35
+
| undefined
21
36
22
37
23
38
···
27
42
28
43
29
44
30
-
31
-
32
-
33
-
34
-
35
-
36
-
37
-
45
+
numberOfLines,
38
46
}: MastodonHtmlContentProps) {
39
47
const renderMastodonHtml = useRenderMastodonHtml()
48
+
const {_} = useLingui()
49
+
const [isExpanded, setIsExpanded] = useState(false)
50
+
const [contentHeight, setContentHeight] = useState<number | null>(null)
51
+
const [isTall, setIsTall] = useState(false)
40
52
41
-
const htmlContent = useMemo(() => {
53
+
const renderedContent = useMemo(() => {
42
54
if (!renderMastodonHtml) return null
43
55
44
-
const fullText = (record as any).fullText as string | undefined
45
-
46
-
47
-
56
+
const fullText = record.fullText as string | undefined
57
+
const bridgyOriginalText = record.bridgyOriginalText as
58
+
| string
59
+
| undefined
48
60
49
61
50
62
51
63
if (!rawHtml) return null
52
64
53
-
return sanitizeHtml(rawHtml)
54
-
}, [record, renderMastodonHtml])
55
-
56
-
const renderedContent = useMemo(() => {
57
-
if (!htmlContent) return null
58
-
59
-
// Parse and render with React components on all platforms
60
-
return renderHtmlAsReact(htmlContent, numberOfLines, textStyle)
61
-
}, [htmlContent, numberOfLines, textStyle])
65
+
// Parse HTML once and sanitize/render in a single pass
66
+
return sanitizeAndRenderHtml(rawHtml, numberOfLines, textStyle)
67
+
}, [record, renderMastodonHtml, numberOfLines, textStyle])
68
+
69
+
const handleLayout = (event: LayoutChangeEvent) => {
70
+
const height = event.nativeEvent.layout.height
71
+
if (contentHeight === null) {
72
+
setContentHeight(height)
73
+
// Consider content "tall" if it's taller than 150px
74
+
setIsTall(height > 150)
75
+
}
76
+
}
62
77
63
78
if (!renderedContent) return null
64
79
65
-
return <View style={style}>{renderedContent}</View>
80
+
const shouldCollapse = isTall && !isExpanded
81
+
82
+
return (
83
+
<View style={style}>
84
+
<View
85
+
style={shouldCollapse ? {maxHeight: 150, overflow: 'hidden'} : undefined}
86
+
onLayout={handleLayout}>
87
+
{renderedContent}
88
+
</View>
89
+
{shouldCollapse && (
90
+
<Button
91
+
label={_(msg`Show more`)}
92
+
onPress={() => setIsExpanded(true)}
93
+
variant="ghost"
94
+
color="primary"
95
+
size="small"
96
+
style={[a.mt_xs]}>
97
+
<ButtonText>
98
+
<Trans>Show more</Trans>
99
+
</ButtonText>
100
+
</Button>
101
+
)}
102
+
</View>
103
+
)
66
104
}
67
105
68
-
function renderHtmlAsReact(
106
+
const LINK_PROTOCOLS = [
107
+
'http',
108
+
'https',
109
+
'dat',
110
+
'dweb',
111
+
'ipfs',
112
+
'ipns',
113
+
'ssb',
114
+
'gopher',
115
+
'xmpp',
116
+
'magnet',
117
+
'gemini',
118
+
]
119
+
120
+
const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i
121
+
122
+
const ALLOWED_ELEMENTS = [
123
+
'p',
124
+
'br',
125
+
'span',
126
+
'a',
127
+
'del',
128
+
's',
129
+
'pre',
130
+
'blockquote',
131
+
'code',
132
+
'b',
133
+
'strong',
134
+
'u',
135
+
'i',
136
+
'em',
137
+
'ul',
138
+
'ol',
139
+
'li',
140
+
'ruby',
141
+
'rt',
142
+
'rp',
143
+
]
144
+
145
+
function sanitizeAndRenderHtml(
69
146
html: string,
70
147
_numberOfLines?: number,
71
148
inputTextStyle?: StyleProp<TextStyle>,
72
149
): React.ReactNode {
150
+
if (typeof DOMParser === 'undefined') {
151
+
// Fallback for environments without DOMParser
152
+
return html.replace(/<[^>]*>/g, '')
153
+
}
154
+
73
155
const parser = new DOMParser()
74
156
const doc = parser.parseFromString(html, 'text/html')
75
157
76
-
77
-
78
-
158
+
const textStyle: StyleProp<TextStyle> = [
159
+
a.leading_snug,
160
+
a.text_md,
79
161
inputTextStyle,
80
162
]
81
163
82
-
const renderNode = (node: Node, key: number, insideLink = false): React.ReactNode => {
164
+
// Sanitize and render in a single pass
165
+
const renderNode = (node: Node, key: string, insideLink = false, listItemIndex?: number): React.ReactNode => {
83
166
if (node.nodeType === Node.TEXT_NODE) {
84
167
// Don't wrap text in styled Text component if inside a link
85
-
168
+
if (insideLink) {
86
169
87
170
88
171
···
92
175
93
176
if (node.nodeType === Node.ELEMENT_NODE) {
94
177
const element = node as Element
178
+
const tagName = element.tagName.toLowerCase()
179
+
180
+
// Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p>
181
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
182
+
const children = Array.from(element.childNodes).map((child, i) =>
183
+
renderNode(child, String(i), insideLink),
184
+
)
185
+
return (
186
+
<P key={key} style={textStyle}>
187
+
<Text style={{...textStyle, fontWeight: 'bold'}}>{children}</Text>
188
+
</P>
189
+
)
190
+
}
191
+
192
+
// Handle math elements - extract annotation text
193
+
if (tagName === 'math') {
194
+
const mathText = extractMathAnnotation(element)
195
+
if (mathText) {
196
+
return <Text key={key} style={textStyle}>{mathText}</Text>
197
+
}
198
+
return null
199
+
}
200
+
201
+
// Remove elements not in allowlist - replace with text content
202
+
if (!ALLOWED_ELEMENTS.includes(tagName)) {
203
+
return element.textContent ? (
204
+
<Text key={key} style={textStyle}>{element.textContent}</Text>
205
+
) : null
206
+
}
207
+
208
+
// Sanitize and process element
209
+
sanitizeElementAttributes(element)
210
+
95
211
const children = Array.from(element.childNodes).map((child, i) =>
96
-
renderNode(child, i, insideLink || element.tagName.toLowerCase() === 'a'),
212
+
renderNode(child, String(i), insideLink || tagName === 'a'),
97
213
)
98
214
99
-
switch (element.tagName.toLowerCase()) {
215
+
switch (tagName) {
100
216
case 'p':
101
217
return <P key={key} style={textStyle}>{children}</P>
102
218
case 'blockquote':
···
108
224
case 'pre':
109
225
return (
110
226
<View key={key} style={{backgroundColor: '#f5f5f5', padding: 8, borderRadius: 4, marginVertical: 4}}>
111
-
<P style={{...textStyle, fontFamily: 'monospace'}}>{children}</P>
227
+
<P style={[textStyle, { fontFamily: 'monospace'}]}>{children}</P>
112
228
</View>
113
229
)
114
230
case 'code':
115
231
return (
116
-
<Text key={key} style={{...textStyle, fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}}>
232
+
<Text key={key} style={[textStyle, { fontFamily: 'monospace', backgroundColor: '#f5f5f5', paddingHorizontal: 4, borderRadius: 2}]}>
117
233
{children}
118
234
</Text>
119
235
)
120
236
case 'strong':
121
237
case 'b':
122
238
return (
123
-
<Text key={key} style={{...textStyle, fontWeight: 'bold'}}>
239
+
<Text key={key} style={[textStyle, { fontWeight: 'bold'}]}>
124
240
{children}
125
241
</Text>
126
242
)
127
243
case 'em':
128
244
case 'i':
129
245
return (
130
-
<Text key={key} style={{...textStyle, fontStyle: 'italic'}}>
246
+
<Text key={key} style={[textStyle, { fontStyle: 'italic'}]}>
131
247
{children}
132
248
</Text>
133
249
)
134
250
case 'u':
135
251
return (
136
-
<Text key={key} style={{...textStyle, textDecorationLine: 'underline'}}>
252
+
<Text key={key} style={[textStyle, { textDecorationLine: 'underline'}]}>
137
253
{children}
138
254
</Text>
139
255
)
140
256
case 'del':
257
+
case 's':
141
258
return (
142
-
<Text key={key} style={{...textStyle, textDecorationLine: 'line-through'}}>
259
+
<Text key={key} style={[textStyle, { textDecorationLine: 'line-through'}]}>
143
260
{children}
144
261
</Text>
145
262
)
···
151
268
)
152
269
case 'ol':
153
270
const start = element.getAttribute('start')
154
-
const reversed = element.getAttribute('reversed') !== null
271
+
const startNum = start ? parseInt(start, 10) : 1
155
272
return (
156
-
<View key={key} style={{marginVertical: 4}} data-start={start} data-reversed={reversed}>
157
-
{children}
273
+
<View key={key} style={{marginVertical: 4}}>
274
+
{Array.from(element.childNodes)
275
+
.filter(child => child.nodeType === Node.ELEMENT_NODE && (child as Element).tagName.toLowerCase() === 'li')
276
+
.map((child, i) => renderNode(child, `${key}-${i}`, insideLink, startNum + i))}
158
277
</View>
159
278
)
160
279
case 'li':
161
-
const value = element.getAttribute('value')
162
-
const parentIsOl = element.parentElement?.tagName.toLowerCase() === 'ol'
280
+
const marker = listItemIndex !== undefined ? `${listItemIndex}.` : '\u2022'
163
281
return (
164
282
<View key={key} style={{flexDirection: 'row', marginVertical: 2}}>
165
-
<Text style={{...textStyle, marginRight: 8}}>{parentIsOl ? (value || '螕脟贸') : '螕脟贸'}</Text>
166
-
<Text style={{...textStyle, flex: 1}}>{children}</Text>
283
+
<Text style={[textStyle, { marginRight: 8 }]}>{marker}</Text>
284
+
<Text style={[textStyle, { flex: 1 }]}>{children}</Text>
167
285
</View>
168
286
)
287
+
case 'ruby':
288
+
return <Text key={key} style={textStyle}>{children}</Text>
289
+
case 'rt':
290
+
case 'rp':
291
+
return null // TODO support ruby text rendering
169
292
case 'a':
170
293
const href = element.getAttribute('href')
171
294
if (href) {
···
179
302
to={href}
180
303
label={linkText}
181
304
shouldProxy
182
-
style={isInvisible ? {width: 0, height: 0, position: 'absolute'} : textStyle}>
305
+
style={isInvisible ? {display: 'none'} : textStyle}>
183
306
{children}
184
307
</InlineLinkText>
185
308
)
···
191
314
const spanClass = element.getAttribute('class')
192
315
// Handle invisible/ellipsis classes for link formatting
193
316
if (spanClass?.includes('invisible')) {
194
-
return null
317
+
return <Text key={key} style={{ display: 'none' }}>{children}</Text>
195
318
}
196
319
if (spanClass?.includes('ellipsis')) {
197
320
// If inside a link, return plain text, otherwise wrapped
198
-
199
-
200
-
201
-
202
-
203
-
204
-
321
+
if (insideLink) {
322
+
return '\u2026'
323
+
}
324
+
return <Text key={key} style={textStyle}>{'\u2026'}</Text>
325
+
}
326
+
// Handle mentions and hashtags
327
+
if (spanClass?.includes('mention') || spanClass?.includes('hashtag')) {
205
328
206
329
207
330
···
213
336
return children
214
337
}
215
338
return <Text key={key} style={textStyle}>{children}</Text>
216
-
case 'div':
217
-
return <P key={key} style={textStyle}>{children}</P>
218
339
default:
219
340
return <Text key={key} style={textStyle}>{children}</Text>
220
341
}
221
342
222
343
223
344
345
+
}
346
+
347
+
const content = Array.from(doc.body.childNodes).map((node, i) =>
348
+
renderNode(node, String(i)),
349
+
)
350
+
351
+
return (
224
352
225
353
354
+
</View>
355
+
)
356
+
}
226
357
358
+
function sanitizeElementAttributes(element: Element): void {
359
+
const tagName = element.tagName.toLowerCase()
360
+
const allowedAttrs: Record<string, string[]> = {
361
+
a: ['href', 'rel', 'class', 'translate'],
362
+
span: ['class', 'translate'],
363
+
ol: ['start', 'reversed'],
364
+
li: ['value'],
365
+
p: ['class'],
366
+
}
367
+
368
+
const allowed = allowedAttrs[tagName] || []
369
+
const attrs = Array.from(element.attributes)
370
+
371
+
// Remove non-allowed attributes
372
+
for (const attr of attrs) {
373
+
const attrName = attr.name.toLowerCase()
374
+
const isAllowed = allowed.some(allowedAttr => {
375
+
if (allowedAttr.endsWith('*')) {
376
+
return attrName.startsWith(allowedAttr.slice(0, -1))
377
+
}
378
+
return allowedAttr === attrName
379
+
})
227
380
381
+
if (!isAllowed) {
382
+
element.removeAttribute(attr.name)
383
+
}
384
+
}
385
+
386
+
// Process specific attributes
387
+
if (tagName === 'a') {
388
+
processAnchorElement(element)
389
+
}
390
+
391
+
// Process class whitelist
392
+
if (element.hasAttribute('class')) {
393
+
processClassWhitelist(element)
394
+
}
395
+
396
+
// Process translate attribute - remove unless it's "no"
397
+
if (element.hasAttribute('translate')) {
398
+
const translate = element.getAttribute('translate')
399
+
if (translate !== 'no') {
400
+
element.removeAttribute('translate')
401
+
}
402
+
}
403
+
}
228
404
405
+
function processAnchorElement(element: Element): void {
406
+
// Check if href has unsupported protocol
407
+
const href = element.getAttribute('href')
408
+
if (href) {
409
+
const scheme = getScheme(href)
410
+
if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) {
411
+
// Remove the href to disable the link
412
+
element.removeAttribute('href')
413
+
}
414
+
}
415
+
}
229
416
417
+
function processClassWhitelist(element: Element): void {
418
+
const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean)
419
+
const whitelisted = classList.filter(className => {
420
+
// microformats classes
421
+
if (/^[hpuedt]-/.test(className)) return true
422
+
// semantic classes
423
+
if (/^(mention|hashtag)$/.test(className)) return true
424
+
// link formatting classes
425
+
if (/^(ellipsis|invisible)$/.test(className)) return true
426
+
// quote inline class
427
+
if (className === 'quote-inline') return true
428
+
return false
429
+
})
430
+
431
+
if (whitelisted.length > 0) {
432
+
element.className = whitelisted.join(' ')
433
+
} else {
434
+
element.removeAttribute('class')
435
+
}
436
+
}
230
437
438
+
function getScheme(url: string): string | null {
439
+
const match = url.match(PROTOCOL_REGEX)
440
+
if (match) {
441
+
return match[1].toLowerCase()
442
+
}
443
+
// Check if it's a relative URL
444
+
if (url.startsWith('/') || url.startsWith('.')) {
445
+
return 'relative'
446
+
}
447
+
return null
448
+
}
231
449
450
+
function extractMathAnnotation(mathElement: Element): string | null {
451
+
const semantics = Array.from(mathElement.children).find(
452
+
child => child.tagName.toLowerCase() === 'semantics',
453
+
) as Element | undefined
454
+
455
+
if (!semantics) return null
456
+
457
+
// Look for LaTeX annotation (application/x-tex)
458
+
const latexAnnotation = Array.from(semantics.children).find(child => {
459
+
return (
460
+
child.tagName.toLowerCase() === 'annotation' &&
461
+
child.getAttribute('encoding') === 'application/x-tex'
462
+
)
463
+
})
464
+
465
+
if (latexAnnotation) {
466
+
const display = mathElement.getAttribute('display')
467
+
const text = latexAnnotation.textContent || ''
468
+
return display === 'block' ? `$$${text}$$` : `$${text}$`
469
+
}
470
+
471
+
// Look for plain text annotation
472
+
const plainAnnotation = Array.from(semantics.children).find(child => {
473
+
return (
474
+
child.tagName.toLowerCase() === 'annotation' &&
475
+
child.getAttribute('encoding') === 'text/plain'
476
+
)
477
+
})
478
+
479
+
if (plainAnnotation) {
480
+
return plainAnnotation.textContent || null
481
+
}
232
482
233
-
</View>
234
-
)
483
+
return null
235
484
}
-356
src/lib/strings/html-sanitizer.ts
-356
src/lib/strings/html-sanitizer.ts
···
1
-
/**
2
-
* HTML sanitizer inspired by Mastodon's Sanitize::Config
3
-
* Sanitizes HTML content to prevent XSS while preserving safe formatting
4
-
*/
5
-
6
-
const HTTP_PROTOCOLS = ['http', 'https']
7
-
8
-
const LINK_PROTOCOLS = [
9
-
'http',
10
-
'https',
11
-
'dat',
12
-
'dweb',
13
-
'ipfs',
14
-
'ipns',
15
-
'ssb',
16
-
'gopher',
17
-
'xmpp',
18
-
'magnet',
19
-
'gemini',
20
-
]
21
-
22
-
const PROTOCOL_REGEX = /^([a-z][a-z0-9.+-]*):\/\//i
23
-
24
-
interface SanitizeOptions {
25
-
allowOembed?: boolean
26
-
}
27
-
28
-
/**
29
-
* Sanitizes HTML content following Mastodon's strict rules
30
-
*/
31
-
export function sanitizeHtml(
32
-
html: string,
33
-
options: SanitizeOptions = {},
34
-
): string {
35
-
if (typeof DOMParser === 'undefined') {
36
-
// Fallback for environments without DOMParser
37
-
return sanitizeTextOnly(html)
38
-
}
39
-
40
-
const parser = new DOMParser()
41
-
const doc = parser.parseFromString(html, 'text/html')
42
-
const body = doc.body
43
-
44
-
sanitizeNode(body, options)
45
-
46
-
return body.innerHTML
47
-
}
48
-
49
-
function sanitizeNode(node: Node, options: SanitizeOptions): void {
50
-
const childNodes = Array.from(node.childNodes)
51
-
52
-
for (const child of childNodes) {
53
-
if (child.nodeType === Node.ELEMENT_NODE) {
54
-
const element = child as HTMLElement
55
-
const tagName = element.tagName.toLowerCase()
56
-
57
-
// Define allowed elements
58
-
const allowedElements = options.allowOembed
59
-
? [
60
-
'p',
61
-
'br',
62
-
'span',
63
-
'a',
64
-
'del',
65
-
's',
66
-
'pre',
67
-
'blockquote',
68
-
'code',
69
-
'b',
70
-
'strong',
71
-
'u',
72
-
'i',
73
-
'em',
74
-
'ul',
75
-
'ol',
76
-
'li',
77
-
'ruby',
78
-
'rt',
79
-
'rp',
80
-
'audio',
81
-
'iframe',
82
-
'source',
83
-
'video',
84
-
]
85
-
: [
86
-
'p',
87
-
'br',
88
-
'span',
89
-
'a',
90
-
'del',
91
-
's',
92
-
'pre',
93
-
'blockquote',
94
-
'code',
95
-
'b',
96
-
'strong',
97
-
'u',
98
-
'i',
99
-
'em',
100
-
'ul',
101
-
'ol',
102
-
'li',
103
-
'ruby',
104
-
'rt',
105
-
'rp',
106
-
]
107
-
108
-
// Handle unsupported elements (h1-h6) - convert to <strong> wrapped in <p>
109
-
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) {
110
-
const strong = element.ownerDocument!.createElement('strong')
111
-
while (element.firstChild) {
112
-
strong.appendChild(element.firstChild)
113
-
}
114
-
const p = element.ownerDocument!.createElement('p')
115
-
p.appendChild(strong)
116
-
element.replaceWith(p)
117
-
sanitizeNode(p, options)
118
-
continue
119
-
}
120
-
121
-
// Handle math elements - extract annotation text
122
-
if (tagName === 'math') {
123
-
const mathText = extractMathAnnotation(element)
124
-
if (mathText) {
125
-
const textNode = element.ownerDocument!.createTextNode(mathText)
126
-
element.replaceWith(textNode)
127
-
} else {
128
-
element.remove()
129
-
}
130
-
continue
131
-
}
132
-
133
-
if (tagName === 'li') {
134
-
// Keep li elements but sanitize their children
135
-
sanitizeNode(element, options)
136
-
continue
137
-
}
138
-
139
-
// Remove elements not in allowlist
140
-
if (!allowedElements.includes(tagName)) {
141
-
// Replace with text content
142
-
const textNode = element.ownerDocument!.createTextNode(
143
-
element.textContent || '',
144
-
)
145
-
element.replaceWith(textNode)
146
-
continue
147
-
}
148
-
149
-
// Sanitize attributes
150
-
sanitizeAttributes(element, options)
151
-
152
-
// Recursively sanitize children
153
-
sanitizeNode(element, options)
154
-
}
155
-
}
156
-
}
157
-
158
-
function sanitizeAttributes(
159
-
element: HTMLElement,
160
-
options: SanitizeOptions,
161
-
): void {
162
-
const tagName = element.tagName.toLowerCase()
163
-
const allowedAttrs: Record<string, string[]> = {
164
-
a: ['href', 'rel', 'class', 'translate'],
165
-
span: ['class', 'translate'],
166
-
ol: ['start', 'reversed'],
167
-
li: ['value'],
168
-
p: ['class'],
169
-
}
170
-
171
-
if (options.allowOembed) {
172
-
allowedAttrs.audio = ['controls']
173
-
allowedAttrs.iframe = [
174
-
'allowfullscreen',
175
-
'frameborder',
176
-
'height',
177
-
'scrolling',
178
-
'src',
179
-
'width',
180
-
]
181
-
allowedAttrs.source = ['src', 'type']
182
-
allowedAttrs.video = ['controls', 'height', 'loop', 'width']
183
-
}
184
-
185
-
const allowed = allowedAttrs[tagName] || []
186
-
const attrs = Array.from(element.attributes)
187
-
188
-
// Remove non-allowed attributes
189
-
for (const attr of attrs) {
190
-
const attrName = attr.name.toLowerCase()
191
-
const isAllowed = allowed.some(a => {
192
-
if (a.endsWith('*')) {
193
-
return attrName.startsWith(a.slice(0, -1))
194
-
}
195
-
return a === attrName
196
-
})
197
-
198
-
if (!isAllowed) {
199
-
element.removeAttribute(attr.name)
200
-
}
201
-
}
202
-
203
-
// Process specific attributes
204
-
if (tagName === 'a') {
205
-
processAnchorElement(element)
206
-
}
207
-
208
-
// Process class whitelist
209
-
if (element.hasAttribute('class')) {
210
-
processClassWhitelist(element)
211
-
}
212
-
213
-
// Process translate attribute - remove unless it's "no"
214
-
if (element.hasAttribute('translate')) {
215
-
const translate = element.getAttribute('translate')
216
-
if (translate !== 'no') {
217
-
element.removeAttribute('translate')
218
-
}
219
-
}
220
-
221
-
// Validate protocols for elements with src/href
222
-
if (element.hasAttribute('href') || element.hasAttribute('src')) {
223
-
validateProtocols(element, options)
224
-
}
225
-
}
226
-
227
-
function processAnchorElement(element: HTMLElement): void {
228
-
// Add required attributes
229
-
element.setAttribute('rel', 'nofollow noopener')
230
-
element.setAttribute('target', '_blank')
231
-
232
-
// Check if href has unsupported protocol
233
-
const href = element.getAttribute('href')
234
-
if (href) {
235
-
const scheme = getScheme(href)
236
-
if (scheme !== null && scheme !== 'relative' && !LINK_PROTOCOLS.includes(scheme)) {
237
-
// Replace element with its text content
238
-
const textNode = element.ownerDocument!.createTextNode(
239
-
element.textContent || '',
240
-
)
241
-
element.replaceWith(textNode)
242
-
}
243
-
}
244
-
}
245
-
246
-
function processClassWhitelist(element: HTMLElement): void {
247
-
const classList = element.className.split(/[\t\n\f\r ]+/).filter(Boolean)
248
-
const whitelisted = classList.filter(className => {
249
-
// microformats classes
250
-
if (/^[hpuedt]-/.test(className)) return true
251
-
// semantic classes
252
-
if (/^(mention|hashtag)$/.test(className)) return true
253
-
// link formatting classes
254
-
if (/^(ellipsis|invisible)$/.test(className)) return true
255
-
// quote inline class
256
-
if (className === 'quote-inline') return true
257
-
return false
258
-
})
259
-
260
-
if (whitelisted.length > 0) {
261
-
element.className = whitelisted.join(' ')
262
-
} else {
263
-
element.removeAttribute('class')
264
-
}
265
-
}
266
-
267
-
function validateProtocols(
268
-
element: HTMLElement,
269
-
options: SanitizeOptions,
270
-
): void {
271
-
const tagName = element.tagName.toLowerCase()
272
-
const src = element.getAttribute('src')
273
-
const href = element.getAttribute('href')
274
-
const url = src || href
275
-
276
-
if (!url) return
277
-
278
-
const scheme = getScheme(url)
279
-
280
-
// For oembed elements, only allow HTTP protocols for src
281
-
if (
282
-
options.allowOembed &&
283
-
src &&
284
-
['iframe', 'source'].includes(tagName)
285
-
) {
286
-
if (scheme !== null && !HTTP_PROTOCOLS.includes(scheme)) {
287
-
element.removeAttribute('src')
288
-
}
289
-
// Add sandbox attribute to iframes
290
-
if (tagName === 'iframe') {
291
-
element.setAttribute(
292
-
'sandbox',
293
-
'allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox allow-forms',
294
-
)
295
-
}
296
-
}
297
-
}
298
-
299
-
function getScheme(url: string): string | null {
300
-
const match = url.match(PROTOCOL_REGEX)
301
-
if (match) {
302
-
return match[1].toLowerCase()
303
-
}
304
-
// Check if it's a relative URL
305
-
if (url.startsWith('/') || url.startsWith('.')) {
306
-
return 'relative'
307
-
}
308
-
return null
309
-
}
310
-
311
-
/**
312
-
* Extract math annotation from MathML element
313
-
* Follows FEP-dc88 spec for math element representation
314
-
*/
315
-
function extractMathAnnotation(mathElement: HTMLElement): string | null {
316
-
const semantics = Array.from(mathElement.children).find(
317
-
child => child.tagName.toLowerCase() === 'semantics',
318
-
) as HTMLElement | undefined
319
-
320
-
if (!semantics) return null
321
-
322
-
// Look for LaTeX annotation (application/x-tex)
323
-
const latexAnnotation = Array.from(semantics.children).find(child => {
324
-
return (
325
-
child.tagName.toLowerCase() === 'annotation' &&
326
-
child.getAttribute('encoding') === 'application/x-tex'
327
-
)
328
-
})
329
-
330
-
if (latexAnnotation) {
331
-
const display = mathElement.getAttribute('display')
332
-
const text = latexAnnotation.textContent || ''
333
-
return display === 'block' ? `$$${text}$$` : `$${text}$`
334
-
}
335
-
336
-
// Look for plain text annotation
337
-
const plainAnnotation = Array.from(semantics.children).find(child => {
338
-
return (
339
-
child.tagName.toLowerCase() === 'annotation' &&
340
-
child.getAttribute('encoding') === 'text/plain'
341
-
)
342
-
})
343
-
344
-
if (plainAnnotation) {
345
-
return plainAnnotation.textContent || null
346
-
}
347
-
348
-
return null
349
-
}
350
-
351
-
/**
352
-
* Fallback sanitizer that strips all HTML tags
353
-
*/
354
-
function sanitizeTextOnly(html: string): string {
355
-
return html.replace(/<[^>]*>/g, '')
356
-
}
History
2 rounds
5 comments
maxine.puppykitty.racing
submitted
#1
5 commits
expand
collapse
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
3e5262ab
chore: remove any casts
265f3ab4
chore: replace unicode ellipsis with escaped version
eff00beb
feat/MastodonHtml: render as ordered lists (with numeric prefixes)
a28c6d3f
feat/MastodonHtml: collapse posts taller than 150px
expand 5 comments
Good point, I will rewrite the sanitizer from scratch
Hey Maxine! Did you get this done? I鈥檇 like to see if we can merge it once the conflicts are resolved.
Sorry ewan, haven't had the time, also this PR has some weird bugs (sometimes the render crashes and I never diagnosed it), you might want to close this one for the meanwhile
I might look into writing a non-vibe-coded version of this at some point, it'd be a fun way to cut my teeth on webdev again
closed without merging
maxine.puppykitty.racing
submitted
#0
2 commits
expand
collapse
6e85dcd3
feat: render full post contents for posts bridged from mastodon or wafrn
e7e78fad
fix: don't duplicate work in MastodonHtmlContent
i am like 99% sure this would be considered a license violation if merged as mastodon is licensed under AGPL while witchsky is MIT