Process HTML for better web typography.
at main 4.4 kB view raw
1/** 2 * @import {Root} from 'hast' 3 */ 4 5import { SKIP, visit } from 'unist-util-visit' 6 7/** 8 * @typedef {Object} QuotesOptions 9 */ 10/** 11 * @typedef {Object} PunctuationOptions 12 * @property {'double' | 'triple'} em-dash-replacement Replace hyphen 13 * sequences with em-dash glyph. If `triple` is set, two hyphens will 14 * be replaced with an en-dash. 15 */ 16/** 17 * @typedef {Object} SpacesOptions 18 * @property {'open' | 'closed'} [en-dash-spacing] Should en-dashes be surrounded by spaces. 19 */ 20/** 21 * @typedef {Object} PluginOptions 22 * @property {QuotesOptions | false} [quotes] 23 * @property {PunctuationOptions | false} [punctuation] 24 * @property {SpacesOptions | false} [spaces] 25 */ 26 27const HAIR_SPACE = '\u200A' 28const EN_DASH = '\u2013' 29const EM_DASH = '\u2014' 30const ELLIPSIS = '\u2026' 31 32const IGNORED_ELEMENTS = new Set(['title', 'script', 'style', 'pre', 'code']) 33 34/** @type {PluginOptions} */ 35const DEFAULT_OPTIONS = { 36 quotes: {}, 37 punctuation: { 38 'em-dash-replacement': 'double', 39 }, 40 spaces: { 41 'en-dash-spacing': 'open', 42 }, 43} 44 45/** 46 * Process HTML content for better web typography. 47 * 48 * @param {PluginOptions} [options] 49 */ 50export default function rehypeTypeset(options = DEFAULT_OPTIONS) { 51 /** 52 * @param {Root} tree 53 * @return {undefined} 54 */ 55 return function (tree) { 56 visit(tree, 'element', (node) => { 57 if (IGNORED_ELEMENTS.has(node.tagName)) return SKIP 58 59 for (let child of node.children) { 60 if (child.type === 'text') { 61 if (options.quotes) child.value = replaceQuotes(child.value) 62 if (options.punctuation) child.value = replacePunctuation(child.value) 63 if (options.spaces) 64 child.value = replaceSpaces(child.value, options.spaces) 65 } 66 } 67 }) 68 } 69} 70 71/** 72 * 73 * @param {string} text 74 */ 75function replaceQuotes(text) { 76 text = text.replaceAll(''', "'") 77 text = text.replaceAll('"', '"') 78 79 text = text 80 .replaceAll(/(\W|^)"([^\s!?:;.,‽»])/g, '$1\u201C$2') // Beginning " 81 .replaceAll(/(\u201C[^"]*)"([^"]*$|[^\u201C"]*\u201C)/g, '$1\u201D$2') // Ending " 82 .replaceAll(/([^0-9])"/g, '$1\u201D') // Remaining " at end of word 83 .replaceAll(/(\W|^)'(\S)/g, '$1\u2018$2') // Beginning ' 84 .replaceAll(/([a-z])'([a-z])/gi, '$1\u2019$2') // Conjunction's possession 85 .replaceAll(/((\u2018[^']*)|[a-z])'([^0-9]|$)/gi, '$1\u2019$3') // Ending ' 86 .replaceAll( 87 /(\u2018)([0-9]{2}[^\u2019]*)(\u2018([^0-9]|$)|$|\u2019[a-z])/gi, 88 '\u2019$2$3', 89 ) // Abbrev. years like '93 90 .replaceAll( 91 /(\B|^)\u2018(?=([^\u2019]*\u2019\b)*([^\u2019\u2018]*\W[\u2019\u2018]\b|[^\u2019\u2018]*$))/gi, 92 '$1\u2019', 93 ) // Backwards apostrophe 94 .replaceAll("'''", '\u2034') // Triple prime 95 .replaceAll(/("|'')/g, '\u2033') // Double prime 96 .replaceAll("'", '\u2032') 97 98 // Allow escaped quotes 99 text = text 100 .replaceAll(String.raw`\“`, '"') 101 .replaceAll(String.raw`\”`, '"') 102 .replaceAll(String.raw`\’`, "'") 103 .replaceAll(String.raw`\‘`, "'") 104 105 return text 106} 107 108/** 109 * 110 * @param {string} text 111 * 112 * @todo Add option for "open" or "closed" dashes. 113 */ 114function replacePunctuation(text) { 115 /** 116 * Replace hyphens, encoded en-dashes, and em-dashes — which are 117 * surrounded by digits — with an en-dash glyph. 118 * @see https://en.wikipedia.org/wiki/Dash#En_dash 119 */ 120 text = text.replaceAll(/(\d+\s?)-(\s?\d+)/g, `$1${EN_DASH}$2`) 121 text = text.replaceAll(/(\d+\s?)–(\s?\d+)/g, `$1${EN_DASH}$2`) 122 text = text.replaceAll(/(\d+\s?)—|—(\s?\d+)/g, `$1${EN_DASH}$2`) 123 124 /** 125 * @see https://en.wikipedia.org/wiki/Dash#Em_dash 126 */ 127 text = text.replaceAll('--', EM_DASH) 128 // Text = text.replaceAll(' – ', ` ${EM_DASH} `) 129 130 /** @see https://en.wikipedia.org/wiki/Ellipsis */ 131 text = text.replaceAll('...', ELLIPSIS) 132 133 /** 134 * @see https://en.wikipedia.org/wiki/Non-breaking_space 135 */ 136 let NBSP = ' ' 137 let NBSP_PUNCTUATION_START = /([«¿¡]) /g 138 let NBSP_PUNCTUATION_END = / ([!?:;.,‽»])/g 139 140 text = text.replaceAll(NBSP_PUNCTUATION_START, '$1' + NBSP) 141 text = text.replaceAll(NBSP_PUNCTUATION_END, NBSP + '$1') 142 143 return text 144} 145 146/** 147 * 148 * @param {string} text 149 * @param {SpacesOptions} [_options] 150 */ 151function replaceSpaces(text, _options) { 152 text = text.replaceAll( 153 /(\d+)\s?–\s?(\d+)/g, 154 `$1${HAIR_SPACE}${EN_DASH}${HAIR_SPACE}$2`, 155 ) 156 157 text = text.replaceAll(' — ', `${HAIR_SPACE}${EM_DASH}${HAIR_SPACE}`) 158 159 text = text.replaceAll(' × ', `${HAIR_SPACE}×${HAIR_SPACE}`) 160 text = text.replaceAll(' / ', `${HAIR_SPACE}/${HAIR_SPACE}`) 161 162 return text 163}