this repo has no description
at main 176 lines 5.0 kB view raw
1type AllowedTags = Set<string>; 2 3interface AllowedAttributes { 4 [tagName: string]: Set<string>; 5} 6 7export interface SanitizeHtmlOptions { 8 allowedTags?: string[]; 9 extraAllowedTags?: string[]; 10 keepChildrenWhenRemovingParent?: boolean; 11 12 /** 13 * When true, replaces all &nbsp; entities with regular spaces 14 * to prevent unwanted line breaks in the rendered HTML 15 */ 16 removeNbsp?: boolean; 17 18 /** 19 * AllowedAttributes should be an object with tag name keys and array values 20 * containing all of the attributes allowed for that tag: 21 * 22 * { 'p': ['class'], 'div': ['role', 'aria-hidden'] } 23 * 24 * The above allows ONLY the class attribute for <p> and ONLY the role and 25 * aria-hidden attributes for <div>. 26 */ 27 allowedAttributes?: { 28 [tagName: string]: string[]; 29 }; 30} 31 32export const DEFAULT_SAFE_TAGS: string[] = [ 33 'strong', 34 'em', 35 'b', 36 'i', 37 'u', 38 'br', 39]; 40const DEFAULT_SAFE_ATTRS = {}; 41 42/** 43 * Sanitizes HTML by removing all tags and attributes that aren't explicitly allowed. 44 */ 45export function sanitizeDocument( 46 unsafeDocument: Document, 47 unsafeNode: Node | DocumentFragment, 48 { 49 allowedTags, 50 extraAllowedTags, 51 allowedAttributes = DEFAULT_SAFE_ATTRS, 52 keepChildrenWhenRemovingParent, 53 removeNbsp, 54 }: SanitizeHtmlOptions = {}, 55): string { 56 if (allowedTags && extraAllowedTags) { 57 throw new Error( 58 'sanitizeHtml got both allowedTags and extraAllowedTags', 59 ); 60 } 61 62 const allowedTagsSet = new Set([ 63 ...(extraAllowedTags || []), 64 ...(allowedTags || DEFAULT_SAFE_TAGS), 65 ]); 66 67 const allowedAttributeSets = {}; 68 for (const [tag, attributes] of Object.entries(allowedAttributes)) { 69 allowedAttributeSets[tag] = new Set(attributes); 70 } 71 72 const sanitizedContainer = unsafeDocument.createElement('div'); 73 74 for (const child of [...unsafeNode.childNodes]) { 75 const sanitizedChildArray = sanitizeNode( 76 child as Element, 77 allowedTagsSet, 78 allowedAttributeSets, 79 keepChildrenWhenRemovingParent, 80 ); 81 sanitizedChildArray.forEach((node) => { 82 sanitizedContainer.appendChild(node); 83 }); 84 } 85 86 let html = sanitizedContainer.innerHTML; 87 88 // Replace &nbsp; with regular spaces if removeNbsp option is enabled 89 if (removeNbsp) { 90 html = html.replace(/&nbsp;/g, ' '); 91 } 92 93 return html; 94} 95 96function sanitizeNode( 97 node: Element, 98 allowedTags: AllowedTags, 99 allowedAttributes: AllowedAttributes, 100 keepChildrenWhenRemovingParent: boolean, 101): Node[] | Element[] { 102 // Plain text is safe as is 103 // NOTE: The lowercase node (instead of Node) is intentional. Node is only 104 // accessible in browser. In Node.js, it depends on jsdom (which we 105 // avoid importing to exclude from the clientside vendor bundle). 106 // Instead of passing down window.Node or jsdom.Node depending on 107 // context, we rely on the fact that instances of Node (of which node 108 // will be one) will also have these constants set on them. 109 if ( 110 ([node.TEXT_NODE, node.CDATA_SECTION_NODE] as number[]).includes( 111 node.nodeType, 112 ) 113 ) { 114 return [node]; 115 } 116 117 // Refuse anything that isn't a tag or one of the allowed tags 118 const tagName = (node.tagName || '').toLowerCase(); 119 120 if (!allowedTags.has(tagName)) { 121 // when keepChildrenWhenRemovingParent is true 122 // we check children for valid nodes as well 123 if (keepChildrenWhenRemovingParent) { 124 return sanitizeChildren( 125 node, 126 allowedTags, 127 allowedAttributes, 128 keepChildrenWhenRemovingParent, 129 ); 130 } 131 return []; 132 } 133 134 // Reconstruct node with only the allowedAttributes and sanitize its children 135 const sanitized = node.ownerDocument.createElement(tagName); 136 const currentlyAllowedAttributes = allowedAttributes[tagName] || new Set(); 137 138 for (const { name, nodeValue: value } of [...node.attributes]) { 139 if (currentlyAllowedAttributes.has(name)) { 140 sanitized.setAttribute(name, value); 141 } 142 } 143 144 const children = sanitizeChildren( 145 node, 146 allowedTags, 147 allowedAttributes, 148 keepChildrenWhenRemovingParent, 149 ); 150 151 children.forEach((child) => { 152 sanitized.appendChild(child); 153 }); 154 155 return [sanitized]; 156} 157 158const sanitizeChildren = ( 159 node: Element, 160 allowedTags: AllowedTags, 161 allowedAttributes: AllowedAttributes, 162 tagsToConvertToText: boolean, 163): Node[] => { 164 const children = [...node.childNodes] 165 .map((childNode) => 166 sanitizeNode( 167 childNode as Element, 168 allowedTags, 169 allowedAttributes, 170 tagsToConvertToText, 171 ), 172 ) 173 .flat(); 174 175 return children; 176};