Process HTML for better web typography.
1/**
2 * @import {Root} from 'hast'
3 */
4
5import { SKIP, visit } from 'unist-util-visit'
6
7/**
8 * @typedef {Object} QuotesOptions
9 */
10/**
11 * @typedef {Object} PunctuationOptions
12 * @property {'double' | 'triple'} em-dash-replacement Replace hyphen
13 * sequences with em-dash glyph. If `triple` is set, two hyphens will
14 * be replaced with an en-dash.
15 */
16/**
17 * @typedef {Object} SpacesOptions
18 * @property {'open' | 'closed'} [en-dash-spacing] Should en-dashes be surrounded by spaces.
19 */
20/**
21 * @typedef {Object} PluginOptions
22 * @property {QuotesOptions | false} [quotes]
23 * @property {PunctuationOptions | false} [punctuation]
24 * @property {SpacesOptions | false} [spaces]
25 */
26
27const HAIR_SPACE = '\u200A'
28const EN_DASH = '\u2013'
29const EM_DASH = '\u2014'
30const ELLIPSIS = '\u2026'
31
32const IGNORED_ELEMENTS = new Set(['title', 'script', 'style', 'pre', 'code'])
33
34/** @type {PluginOptions} */
35const DEFAULT_OPTIONS = {
36 quotes: {},
37 punctuation: {
38 'em-dash-replacement': 'double',
39 },
40 spaces: {
41 'en-dash-spacing': 'open',
42 },
43}
44
45/**
46 * Process HTML content for better web typography.
47 *
48 * @param {PluginOptions} [options]
49 */
50export default function rehypeTypeset(options = DEFAULT_OPTIONS) {
51 /**
52 * @param {Root} tree
53 * @return {undefined}
54 */
55 return function (tree) {
56 visit(tree, 'element', (node) => {
57 if (IGNORED_ELEMENTS.has(node.tagName)) return SKIP
58
59 for (let child of node.children) {
60 if (child.type === 'text') {
61 if (options.quotes) child.value = replaceQuotes(child.value)
62 if (options.punctuation) child.value = replacePunctuation(child.value)
63 if (options.spaces)
64 child.value = replaceSpaces(child.value, options.spaces)
65 }
66 }
67 })
68 }
69}
70
71/**
72 *
73 * @param {string} text
74 */
75function replaceQuotes(text) {
76 text = text.replaceAll(''', "'")
77 text = text.replaceAll('"', '"')
78
79 text = text
80 .replaceAll(/(\W|^)"([^\s!?:;.,‽»])/g, '$1\u201C$2') // Beginning "
81 .replaceAll(/(\u201C[^"]*)"([^"]*$|[^\u201C"]*\u201C)/g, '$1\u201D$2') // Ending "
82 .replaceAll(/([^0-9])"/g, '$1\u201D') // Remaining " at end of word
83 .replaceAll(/(\W|^)'(\S)/g, '$1\u2018$2') // Beginning '
84 .replaceAll(/([a-z])'([a-z])/gi, '$1\u2019$2') // Conjunction's possession
85 .replaceAll(/((\u2018[^']*)|[a-z])'([^0-9]|$)/gi, '$1\u2019$3') // Ending '
86 .replaceAll(
87 /(\u2018)([0-9]{2}[^\u2019]*)(\u2018([^0-9]|$)|$|\u2019[a-z])/gi,
88 '\u2019$2$3',
89 ) // Abbrev. years like '93
90 .replaceAll(
91 /(\B|^)\u2018(?=([^\u2019]*\u2019\b)*([^\u2019\u2018]*\W[\u2019\u2018]\b|[^\u2019\u2018]*$))/gi,
92 '$1\u2019',
93 ) // Backwards apostrophe
94 .replaceAll("'''", '\u2034') // Triple prime
95 .replaceAll(/("|'')/g, '\u2033') // Double prime
96 .replaceAll("'", '\u2032')
97
98 // Allow escaped quotes
99 text = text
100 .replaceAll(String.raw`\“`, '"')
101 .replaceAll(String.raw`\”`, '"')
102 .replaceAll(String.raw`\’`, "'")
103 .replaceAll(String.raw`\‘`, "'")
104
105 return text
106}
107
108/**
109 *
110 * @param {string} text
111 *
112 * @todo Add option for "open" or "closed" dashes.
113 */
114function replacePunctuation(text) {
115 /**
116 * Replace hyphens, encoded en-dashes, and em-dashes — which are
117 * surrounded by digits — with an en-dash glyph.
118 * @see https://en.wikipedia.org/wiki/Dash#En_dash
119 */
120 text = text.replaceAll(/(\d+\s?)-(\s?\d+)/g, `$1${EN_DASH}$2`)
121 text = text.replaceAll(/(\d+\s?)–(\s?\d+)/g, `$1${EN_DASH}$2`)
122 text = text.replaceAll(/(\d+\s?)—|—(\s?\d+)/g, `$1${EN_DASH}$2`)
123
124 /**
125 * @see https://en.wikipedia.org/wiki/Dash#Em_dash
126 */
127 text = text.replaceAll('--', EM_DASH)
128 // Text = text.replaceAll(' – ', ` ${EM_DASH} `)
129
130 /** @see https://en.wikipedia.org/wiki/Ellipsis */
131 text = text.replaceAll('...', ELLIPSIS)
132
133 /**
134 * @see https://en.wikipedia.org/wiki/Non-breaking_space
135 */
136 let NBSP = ' '
137 let NBSP_PUNCTUATION_START = /([«¿¡]) /g
138 let NBSP_PUNCTUATION_END = / ([!?:;.,‽»])/g
139
140 text = text.replaceAll(NBSP_PUNCTUATION_START, '$1' + NBSP)
141 text = text.replaceAll(NBSP_PUNCTUATION_END, NBSP + '$1')
142
143 return text
144}
145
146/**
147 *
148 * @param {string} text
149 * @param {SpacesOptions} [_options]
150 */
151function replaceSpaces(text, _options) {
152 text = text.replaceAll(
153 /(\d+)\s?–\s?(\d+)/g,
154 `$1${HAIR_SPACE}${EN_DASH}${HAIR_SPACE}$2`,
155 )
156
157 text = text.replaceAll(' — ', `${HAIR_SPACE}${EM_DASH}${HAIR_SPACE}`)
158
159 text = text.replaceAll(' × ', `${HAIR_SPACE}×${HAIR_SPACE}`)
160 text = text.replaceAll(' / ', `${HAIR_SPACE}/${HAIR_SPACE}`)
161
162 return text
163}