this repo has no description
1type AllowedTags = Set<string>;
2
3interface AllowedAttributes {
4 [tagName: string]: Set<string>;
5}
6
7export interface SanitizeHtmlOptions {
8 allowedTags?: string[];
9 extraAllowedTags?: string[];
10 keepChildrenWhenRemovingParent?: boolean;
11
12 /**
13 * When true, replaces all entities with regular spaces
14 * to prevent unwanted line breaks in the rendered HTML
15 */
16 removeNbsp?: boolean;
17
18 /**
19 * AllowedAttributes should be an object with tag name keys and array values
20 * containing all of the attributes allowed for that tag:
21 *
22 * { 'p': ['class'], 'div': ['role', 'aria-hidden'] }
23 *
24 * The above allows ONLY the class attribute for <p> and ONLY the role and
25 * aria-hidden attributes for <div>.
26 */
27 allowedAttributes?: {
28 [tagName: string]: string[];
29 };
30}
31
32export const DEFAULT_SAFE_TAGS: string[] = [
33 'strong',
34 'em',
35 'b',
36 'i',
37 'u',
38 'br',
39];
40const DEFAULT_SAFE_ATTRS = {};
41
42/**
43 * Sanitizes HTML by removing all tags and attributes that aren't explicitly allowed.
44 */
45export function sanitizeDocument(
46 unsafeDocument: Document,
47 unsafeNode: Node | DocumentFragment,
48 {
49 allowedTags,
50 extraAllowedTags,
51 allowedAttributes = DEFAULT_SAFE_ATTRS,
52 keepChildrenWhenRemovingParent,
53 removeNbsp,
54 }: SanitizeHtmlOptions = {},
55): string {
56 if (allowedTags && extraAllowedTags) {
57 throw new Error(
58 'sanitizeHtml got both allowedTags and extraAllowedTags',
59 );
60 }
61
62 const allowedTagsSet = new Set([
63 ...(extraAllowedTags || []),
64 ...(allowedTags || DEFAULT_SAFE_TAGS),
65 ]);
66
67 const allowedAttributeSets = {};
68 for (const [tag, attributes] of Object.entries(allowedAttributes)) {
69 allowedAttributeSets[tag] = new Set(attributes);
70 }
71
72 const sanitizedContainer = unsafeDocument.createElement('div');
73
74 for (const child of [...unsafeNode.childNodes]) {
75 const sanitizedChildArray = sanitizeNode(
76 child as Element,
77 allowedTagsSet,
78 allowedAttributeSets,
79 keepChildrenWhenRemovingParent,
80 );
81 sanitizedChildArray.forEach((node) => {
82 sanitizedContainer.appendChild(node);
83 });
84 }
85
86 let html = sanitizedContainer.innerHTML;
87
88 // Replace with regular spaces if removeNbsp option is enabled
89 if (removeNbsp) {
90 html = html.replace(/ /g, ' ');
91 }
92
93 return html;
94}
95
96function sanitizeNode(
97 node: Element,
98 allowedTags: AllowedTags,
99 allowedAttributes: AllowedAttributes,
100 keepChildrenWhenRemovingParent: boolean,
101): Node[] | Element[] {
102 // Plain text is safe as is
103 // NOTE: The lowercase node (instead of Node) is intentional. Node is only
104 // accessible in browser. In Node.js, it depends on jsdom (which we
105 // avoid importing to exclude from the clientside vendor bundle).
106 // Instead of passing down window.Node or jsdom.Node depending on
107 // context, we rely on the fact that instances of Node (of which node
108 // will be one) will also have these constants set on them.
109 if (
110 ([node.TEXT_NODE, node.CDATA_SECTION_NODE] as number[]).includes(
111 node.nodeType,
112 )
113 ) {
114 return [node];
115 }
116
117 // Refuse anything that isn't a tag or one of the allowed tags
118 const tagName = (node.tagName || '').toLowerCase();
119
120 if (!allowedTags.has(tagName)) {
121 // when keepChildrenWhenRemovingParent is true
122 // we check children for valid nodes as well
123 if (keepChildrenWhenRemovingParent) {
124 return sanitizeChildren(
125 node,
126 allowedTags,
127 allowedAttributes,
128 keepChildrenWhenRemovingParent,
129 );
130 }
131 return [];
132 }
133
134 // Reconstruct node with only the allowedAttributes and sanitize its children
135 const sanitized = node.ownerDocument.createElement(tagName);
136 const currentlyAllowedAttributes = allowedAttributes[tagName] || new Set();
137
138 for (const { name, nodeValue: value } of [...node.attributes]) {
139 if (currentlyAllowedAttributes.has(name)) {
140 sanitized.setAttribute(name, value);
141 }
142 }
143
144 const children = sanitizeChildren(
145 node,
146 allowedTags,
147 allowedAttributes,
148 keepChildrenWhenRemovingParent,
149 );
150
151 children.forEach((child) => {
152 sanitized.appendChild(child);
153 });
154
155 return [sanitized];
156}
157
158const sanitizeChildren = (
159 node: Element,
160 allowedTags: AllowedTags,
161 allowedAttributes: AllowedAttributes,
162 tagsToConvertToText: boolean,
163): Node[] => {
164 const children = [...node.childNodes]
165 .map((childNode) =>
166 sanitizeNode(
167 childNode as Element,
168 allowedTags,
169 allowedAttributes,
170 tagsToConvertToText,
171 ),
172 )
173 .flat();
174
175 return children;
176};