Diffdown is a real-time collaborative Markdown editor/previewer built on the AT Protocol
diffdown.com
1import {InlineContext, BlockContext, MarkdownConfig,
2 LeafBlockParser, LeafBlock, Line, Element, space, Punctuation} from "./markdown"
3import {tags as t} from "@lezer/highlight"
4
5const StrikethroughDelim = {resolve: "Strikethrough", mark: "StrikethroughMark"}
6
7/// An extension that implements
8/// [GFM-style](https://github.github.com/gfm/#strikethrough-extension-)
9/// Strikethrough syntax using `~~` delimiters.
10export const Strikethrough: MarkdownConfig = {
11 defineNodes: [{
12 name: "Strikethrough",
13 style: {"Strikethrough/...": t.strikethrough}
14 }, {
15 name: "StrikethroughMark",
16 style: t.processingInstruction
17 }],
18 parseInline: [{
19 name: "Strikethrough",
20 parse(cx, next, pos) {
21 if (next != 126 /* '~' */ || cx.char(pos + 1) != 126 || cx.char(pos + 2) == 126) return -1
22 let before = cx.slice(pos - 1, pos), after = cx.slice(pos + 2, pos + 3)
23 let sBefore = /\s|^$/.test(before), sAfter = /\s|^$/.test(after)
24 let pBefore = Punctuation.test(before), pAfter = Punctuation.test(after)
25 return cx.addDelimiter(StrikethroughDelim, pos, pos + 2,
26 !sAfter && (!pAfter || sBefore || pBefore),
27 !sBefore && (!pBefore || sAfter || pAfter))
28 },
29 after: "Emphasis"
30 }]
31}
32
33// Parse a line as a table row and return the row count. When `elts`
34// is given, push syntax elements for the content onto it.
35function parseRow(cx: BlockContext, line: string, startI = 0, elts?: Element[], offset = 0) {
36 let count = 0, first = true, cellStart = -1, cellEnd = -1, esc = false
37 let parseCell = () => {
38 elts!.push(cx.elt("TableCell", offset + cellStart, offset + cellEnd,
39 cx.parser.parseInline(line.slice(cellStart, cellEnd), offset + cellStart)))
40 }
41
42 for (let i = startI; i < line.length; i++) {
43 let next = line.charCodeAt(i)
44 if (next == 124 /* '|' */ && !esc) {
45 if (!first || cellStart > -1) count++
46 first = false
47 if (elts) {
48 if (cellStart > -1) parseCell()
49 elts.push(cx.elt("TableDelimiter", i + offset, i + offset + 1))
50 }
51 cellStart = cellEnd = -1
52 } else if (esc || next != 32 && next != 9) {
53 if (cellStart < 0) cellStart = i
54 cellEnd = i + 1
55 }
56 esc = !esc && next == 92
57 }
58 if (cellStart > -1) {
59 count++
60 if (elts) parseCell()
61 }
62 return count
63}
64
65function hasPipe(str: string, start: number) {
66 for (let i = start; i < str.length; i++) {
67 let next = str.charCodeAt(i)
68 if (next == 124 /* '|' */) return true
69 if (next == 92 /* '\\' */) i++
70 }
71 return false
72}
73
74const delimiterLine = /^\|?(\s*:?-+:?\s*\|)+(\s*:?-+:?\s*)?$/
75
76class TableParser implements LeafBlockParser {
77 // Null means we haven't seen the second line yet, false means this
78 // isn't a table, and an array means this is a table and we've
79 // parsed the given rows so far.
80 rows: false | null | Element[] = null
81
82 nextLine(cx: BlockContext, line: Line, leaf: LeafBlock) {
83 if (this.rows == null) { // Second line
84 this.rows = false
85 let lineText
86 if ((line.next == 45 || line.next == 58 || line.next == 124 /* '-:|' */) &&
87 delimiterLine.test(lineText = line.text.slice(line.pos))) {
88 let firstRow: Element[] = [], firstCount = parseRow(cx, leaf.content, 0, firstRow, leaf.start)
89 if (firstCount == parseRow(cx, lineText, line.pos))
90 this.rows = [cx.elt("TableHeader", leaf.start, leaf.start + leaf.content.length, firstRow),
91 cx.elt("TableDelimiter", cx.lineStart + line.pos, cx.lineStart + line.text.length)]
92 }
93 } else if (this.rows) { // Line after the second
94 let content: Element[] = []
95 parseRow(cx, line.text, line.pos, content, cx.lineStart)
96 this.rows.push(cx.elt("TableRow", cx.lineStart + line.pos, cx.lineStart + line.text.length, content))
97 }
98 return false
99 }
100
101 finish(cx: BlockContext, leaf: LeafBlock) {
102 if (!this.rows) return false
103 cx.addLeafElement(leaf, cx.elt("Table", leaf.start, leaf.start + leaf.content.length, this.rows as readonly Element[]))
104 return true
105 }
106}
107
108/// This extension provides
109/// [GFM-style](https://github.github.com/gfm/#tables-extension-)
110/// tables, using syntax like this:
111///
112/// ```
113/// | head 1 | head 2 |
114/// | --- | --- |
115/// | cell 1 | cell 2 |
116/// ```
117export const Table: MarkdownConfig = {
118 defineNodes: [
119 {name: "Table", block: true},
120 {name: "TableHeader", style: {"TableHeader/...": t.heading}},
121 "TableRow",
122 {name: "TableCell", style: t.content},
123 {name: "TableDelimiter", style: t.processingInstruction},
124 ],
125 parseBlock: [{
126 name: "Table",
127 leaf(_, leaf) { return hasPipe(leaf.content, 0) ? new TableParser : null },
128 endLeaf(cx, line, leaf) {
129 if (leaf.parsers.some(p => p instanceof TableParser) || !hasPipe(line.text, line.basePos)) return false
130 let next = cx.peekLine()
131 return delimiterLine.test(next) && parseRow(cx, line.text, line.basePos) == parseRow(cx, next, line.basePos)
132 },
133 before: "SetextHeading"
134 }]
135}
136
137class TaskParser implements LeafBlockParser {
138 nextLine() { return false }
139
140 finish(cx: BlockContext, leaf: LeafBlock) {
141 cx.addLeafElement(leaf, cx.elt("Task", leaf.start, leaf.start + leaf.content.length, [
142 cx.elt("TaskMarker", leaf.start, leaf.start + 3),
143 ...cx.parser.parseInline(leaf.content.slice(3), leaf.start + 3)
144 ]))
145 return true
146 }
147}
148
149/// Extension providing
150/// [GFM-style](https://github.github.com/gfm/#task-list-items-extension-)
151/// task list items, where list items can be prefixed with `[ ]` or
152/// `[x]` to add a checkbox.
153export const TaskList: MarkdownConfig = {
154 defineNodes: [
155 {name: "Task", block: true, style: t.list},
156 {name: "TaskMarker", style: t.atom}
157 ],
158 parseBlock: [{
159 name: "TaskList",
160 leaf(cx, leaf) {
161 return /^\[[ xX]\][ \t]/.test(leaf.content) && cx.parentType().name == "ListItem" ? new TaskParser : null
162 },
163 after: "SetextHeading"
164 }]
165}
166
167const autolinkRE = /(www\.)|(https?:\/\/)|([\w.+-]{1,100}@)|(mailto:|xmpp:)/gy
168const urlRE = /[\w-]+(\.[\w-]+)+(\/[^\s<]*)?/gy
169const lastTwoDomainWords = /[\w-]+\.[\w-]+($|\/)/
170const emailRE = /[\w.+-]+@[\w-]+(\.[\w.-]+)+/gy
171const xmppResourceRE = /\/[a-zA-Z\d@.]+/gy
172
173function count(str: string, from: number, to: number, ch: string) {
174 let result = 0
175 for (let i = from; i < to; i++) if (str[i] == ch) result++
176 return result
177}
178
179function autolinkURLEnd(text: string, from: number) {
180 urlRE.lastIndex = from
181 let m = urlRE.exec(text)
182 if (!m || lastTwoDomainWords.exec(m[0])![0].indexOf("_") > -1) return -1
183 let end = from + m[0].length
184 for (;;) {
185 let last = text[end - 1], m
186 if (/[?!.,:*_~]/.test(last) ||
187 last == ")" && count(text, from, end, ")") > count(text, from, end, "("))
188 end--
189 else if (last == ";" && (m = /&(?:#\d+|#x[a-f\d]+|\w+);$/.exec(text.slice(from, end))))
190 end = from + m.index
191 else
192 break
193 }
194 return end
195}
196
197function autolinkEmailEnd(text: string, from: number) {
198 emailRE.lastIndex = from
199 let m = emailRE.exec(text)
200 if (!m) return -1
201 let last = m[0][m[0].length - 1]
202 return last == "_" || last == "-" ? -1 : from + m[0].length - (last == "." ? 1 : 0)
203}
204
205/// Extension that implements autolinking for
206/// `www.`/`http://`/`https://`/`mailto:`/`xmpp:` URLs and email
207/// addresses.
208export const Autolink: MarkdownConfig = {
209 parseInline: [{
210 name: "Autolink",
211 parse(cx, next, absPos) {
212 let pos = absPos - cx.offset
213 if (pos && /\w/.test(cx.text[pos - 1])) return -1
214 autolinkRE.lastIndex = pos
215 let m = autolinkRE.exec(cx.text), end = -1
216 if (!m) return -1
217 if (m[1] || m[2]) { // www., http://
218 end = autolinkURLEnd(cx.text, pos + m[0].length)
219 if (end > -1 && cx.hasOpenLink) {
220 let noBracket = /([^\[\]]|\[[^\]]*\])*/.exec(cx.text.slice(pos, end))
221 end = pos + noBracket![0].length
222 }
223 } else if (m[3]) { // email address
224 end = autolinkEmailEnd(cx.text, pos)
225 } else { // mailto:/xmpp:
226 end = autolinkEmailEnd(cx.text, pos + m[0].length)
227 if (end > -1 && m[0] == "xmpp:") {
228 xmppResourceRE.lastIndex = end
229 m = xmppResourceRE.exec(cx.text)
230 if (m) end = m.index + m[0].length
231 }
232 }
233 if (end < 0) return -1
234 cx.addElement(cx.elt("URL", absPos, end + cx.offset))
235 return end + cx.offset
236 }
237 }]
238}
239
240/// Extension bundle containing [`Table`](#Table),
241/// [`TaskList`](#TaskList), [`Strikethrough`](#Strikethrough), and
242/// [`Autolink`](#Autolink).
243export const GFM = [Table, TaskList, Strikethrough, Autolink]
244
245function parseSubSuper(ch: number, node: string, mark: string) {
246 return (cx: InlineContext, next: number, pos: number) => {
247 if (next != ch || cx.char(pos + 1) == ch) return -1
248 let elts = [cx.elt(mark, pos, pos + 1)]
249 for (let i = pos + 1; i < cx.end; i++) {
250 let next = cx.char(i)
251 if (next == ch)
252 return cx.addElement(cx.elt(node, pos, i + 1, elts.concat(cx.elt(mark, i, i + 1))))
253 if (next == 92 /* '\\' */)
254 elts.push(cx.elt("Escape", i, i++ + 2))
255 if (space(next)) break
256 }
257 return -1
258 }
259}
260
261/// Extension providing
262/// [Pandoc-style](https://pandoc.org/MANUAL.html#superscripts-and-subscripts)
263/// superscript using `^` markers.
264export const Superscript: MarkdownConfig = {
265 defineNodes: [
266 {name: "Superscript", style: t.special(t.content)},
267 {name: "SuperscriptMark", style: t.processingInstruction}
268 ],
269 parseInline: [{
270 name: "Superscript",
271 parse: parseSubSuper(94 /* '^' */, "Superscript", "SuperscriptMark")
272 }]
273}
274
275/// Extension providing
276/// [Pandoc-style](https://pandoc.org/MANUAL.html#superscripts-and-subscripts)
277/// subscript using `~` markers.
278export const Subscript: MarkdownConfig = {
279 defineNodes: [
280 {name: "Subscript", style: t.special(t.content)},
281 {name: "SubscriptMark", style: t.processingInstruction}
282 ],
283 parseInline: [{
284 name: "Subscript",
285 parse: parseSubSuper(126 /* '~' */, "Subscript", "SubscriptMark")
286 }]
287}
288
289/// Extension that parses two colons with only letters, underscores,
290/// and numbers between them as `Emoji` nodes.
291export const Emoji: MarkdownConfig = {
292 defineNodes: [{name: "Emoji", style: t.character}],
293 parseInline: [{
294 name: "Emoji",
295 parse(cx, next, pos) {
296 let match: RegExpMatchArray | null
297 if (next != 58 /* ':' */ || !(match = /^[a-zA-Z_0-9]+:/.exec(cx.slice(pos + 1, cx.end)))) return -1
298 return cx.addElement(cx.elt("Emoji", pos, pos + 1 + match[0].length))
299 }
300 }]
301}