Barazo AppView backend
barazo.forum
1import { describe, it, expect } from 'vitest'
2import { sanitizeHtml, sanitizeText } from '../../../src/lib/sanitize.js'
3
4describe('sanitize', () => {
5 describe('sanitizeHtml', () => {
6 it('returns empty string for empty input', () => {
7 expect(sanitizeHtml('')).toBe('')
8 })
9
10 it('preserves valid markdown-rendered HTML tags', () => {
11 const input =
12 '<p>Hello <strong>bold</strong> and <em>italic</em></p>' +
13 '<blockquote>A quote</blockquote>' +
14 '<ul><li>Item</li></ul>' +
15 '<ol><li>Numbered</li></ol>' +
16 '<pre><code>code block</code></pre>' +
17 '<h1>Heading</h1><h2>H2</h2><h3>H3</h3><h4>H4</h4><h5>H5</h5><h6>H6</h6>' +
18 '<hr>' +
19 '<table><thead><tr><th>Col</th></tr></thead><tbody><tr><td>Cell</td></tr></tbody></table>' +
20 '<del>strikethrough</del><sup>sup</sup><sub>sub</sub><br>'
21 const result = sanitizeHtml(input)
22 // All these tags should survive
23 expect(result).toContain('<strong>')
24 expect(result).toContain('<em>')
25 expect(result).toContain('<blockquote>')
26 expect(result).toContain('<ul>')
27 expect(result).toContain('<ol>')
28 expect(result).toContain('<li>')
29 expect(result).toContain('<pre>')
30 expect(result).toContain('<code>')
31 expect(result).toContain('<h1>')
32 expect(result).toContain('<table>')
33 expect(result).toContain('<del>')
34 expect(result).toContain('<sup>')
35 expect(result).toContain('<sub>')
36 expect(result).toContain('<br>')
37 expect(result).toContain('<hr>')
38 })
39
40 it('preserves allowed attributes on links', () => {
41 const input = '<a href="https://example.com" rel="noopener noreferrer">Link</a>'
42 const result = sanitizeHtml(input)
43 expect(result).toContain('href="https://example.com"')
44 expect(result).toContain('rel="noopener noreferrer"')
45 })
46
47 it('preserves img tags with src and alt', () => {
48 const input = '<img src="https://example.com/img.png" alt="Photo">'
49 const result = sanitizeHtml(input)
50 expect(result).toContain('src="https://example.com/img.png"')
51 expect(result).toContain('alt="Photo"')
52 })
53
54 it('strips script tags', () => {
55 const input = '<p>Hello</p><script>alert("xss")</script>'
56 const result = sanitizeHtml(input)
57 expect(result).not.toContain('<script>')
58 expect(result).not.toContain('alert')
59 expect(result).toContain('<p>Hello</p>')
60 })
61
62 it('strips onerror attributes from img tags', () => {
63 const input = '<img src="x" onerror="alert(1)">'
64 const result = sanitizeHtml(input)
65 expect(result).not.toContain('onerror')
66 expect(result).not.toContain('alert')
67 })
68
69 it('strips javascript: protocol from href', () => {
70 const input = '<a href="javascript:alert(1)">click</a>'
71 const result = sanitizeHtml(input)
72 expect(result).not.toContain('javascript:')
73 })
74
75 it('strips iframe tags', () => {
76 const input = '<iframe src="https://evil.com"></iframe><p>Safe</p>'
77 const result = sanitizeHtml(input)
78 expect(result).not.toContain('<iframe')
79 expect(result).toContain('<p>Safe</p>')
80 })
81
82 it('strips style tags', () => {
83 const input = '<style>body { display: none }</style><p>Visible</p>'
84 const result = sanitizeHtml(input)
85 expect(result).not.toContain('<style')
86 expect(result).toContain('<p>Visible</p>')
87 })
88
89 it('strips data attributes', () => {
90 const input = '<p data-tracking="abc123">Text</p>'
91 const result = sanitizeHtml(input)
92 expect(result).not.toContain('data-tracking')
93 expect(result).toContain('Text')
94 })
95
96 it('strips on* event handler attributes', () => {
97 const input = '<p onclick="alert(1)" onmouseover="alert(2)">Text</p>'
98 const result = sanitizeHtml(input)
99 expect(result).not.toContain('onclick')
100 expect(result).not.toContain('onmouseover')
101 })
102
103 it('strips form and input tags', () => {
104 const input = '<form action="/steal"><input type="text"><button>Submit</button></form>'
105 const result = sanitizeHtml(input)
106 expect(result).not.toContain('<form')
107 expect(result).not.toContain('<input')
108 })
109
110 it('applies NFC normalization', () => {
111 // U+0065 (e) + U+0301 (combining acute accent) = NFD form of e-acute
112 // NFC normalizes to U+00E9 (e-acute precomposed)
113 const nfd = 'caf\u0065\u0301' // "café" in NFD
114 const nfc = 'caf\u00E9' // "café" in NFC
115 const result = sanitizeHtml(`<p>${nfd}</p>`)
116 expect(result).toContain(nfc)
117 })
118
119 it('strips bidirectional override characters', () => {
120 // U+202A (LRE), U+202B (RLE), U+202C (PDF), U+202D (LRO), U+202E (RLO)
121 // U+2066 (LRI), U+2067 (RLI), U+2068 (FSI), U+2069 (PDI)
122 // U+200E (LRM), U+200F (RLM)
123 const bidiChars = '\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069\u200E\u200F'
124 const input = `<p>Hello${bidiChars}World</p>`
125 const result = sanitizeHtml(input)
126 expect(result).not.toMatch(/[\u202A-\u202E\u2066-\u2069\u200E\u200F]/)
127 expect(result).toContain('HelloWorld')
128 })
129
130 it('handles combined bidi + script injection', () => {
131 const input = '<p>\u202EHello</p><script>alert("xss")</script>'
132 const result = sanitizeHtml(input)
133 expect(result).not.toContain('<script>')
134 expect(result).not.toContain('\u202E')
135 })
136
137 it('handles very large input without throwing', () => {
138 const large = '<p>' + 'A'.repeat(100_000) + '</p>'
139 const result = sanitizeHtml(large)
140 expect(result).toContain('<p>')
141 expect(result.length).toBeGreaterThan(0)
142 })
143
144 it('preserves plain text without modification (after normalization)', () => {
145 const input = 'Just plain text with no HTML'
146 const result = sanitizeHtml(input)
147 expect(result).toBe('Just plain text with no HTML')
148 })
149 })
150
151 describe('sanitizeText', () => {
152 it('returns empty string for empty input', () => {
153 expect(sanitizeText('')).toBe('')
154 })
155
156 it('strips all HTML tags', () => {
157 const input = '<b>Bold</b> and <script>evil()</script>'
158 const result = sanitizeText(input)
159 expect(result).not.toContain('<')
160 expect(result).not.toContain('>')
161 expect(result).toContain('Bold')
162 expect(result).not.toContain('evil')
163 })
164
165 it('preserves plain text', () => {
166 const input = 'How to configure PostgreSQL?'
167 expect(sanitizeText(input)).toBe('How to configure PostgreSQL?')
168 })
169
170 it('applies NFC normalization', () => {
171 const nfd = 'caf\u0065\u0301'
172 const nfc = 'caf\u00E9'
173 expect(sanitizeText(nfd)).toBe(nfc)
174 })
175
176 it('strips bidirectional override characters', () => {
177 const input = '\u202AHello\u202E World\u200F'
178 const result = sanitizeText(input)
179 expect(result).toBe('Hello World')
180 })
181
182 it('strips HTML from titles with injection attempts', () => {
183 const input = 'Topic <img src=x onerror=alert(1)> Title'
184 const result = sanitizeText(input)
185 expect(result).not.toContain('<img')
186 expect(result).not.toContain('onerror')
187 expect(result).toContain('Topic')
188 expect(result).toContain('Title')
189 })
190
191 it('handles very large input without throwing', () => {
192 const large = 'A'.repeat(100_000)
193 const result = sanitizeText(large)
194 expect(result.length).toBe(100_000)
195 })
196
197 it('strips nested HTML tags', () => {
198 const input = '<div><p><b>Nested</b></p></div>'
199 const result = sanitizeText(input)
200 expect(result).not.toContain('<')
201 expect(result).toContain('Nested')
202 })
203
204 it('handles homoglyph-style text (NFC normalization of composed chars)', () => {
205 // Latin Small Letter A with Ring Above: U+0061 + U+030A -> U+00E5
206 const decomposed = '\u0061\u030A'
207 const composed = '\u00E5'
208 expect(sanitizeText(decomposed)).toBe(composed)
209 })
210 })
211})