Markdown parser fork with extended syntax for personal use.
1// To regenerate, run the following from the repository root:
2//
3// ```sh
4// cargo run --manifest-path generate/Cargo.toml
5// ```
6
7use regex::Regex;
8use std::fs;
9
10#[tokio::main]
11async fn main() {
12 commonmark().await;
13 punctuation().await;
14}
15
16async fn commonmark() {
17 let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.31.2/spec.txt";
18 let data_url = "commonmark-data.txt";
19 let code_url = "tests/commonmark.rs";
20
21 let value = if let Ok(value) = fs::read_to_string(data_url) {
22 value
23 } else {
24 let value = reqwest::get(url).await.unwrap().text().await.unwrap();
25
26 fs::write(data_url, value.clone()).unwrap();
27
28 value
29 };
30
31 let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
32 let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
33 let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
34 let mut current_heading = None;
35 let mut number = 1;
36
37 let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
38 .unwrap()
39 .replace(&value, "");
40 let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
41 let mut cases = vec![];
42
43 for mat in re.find_iter(&value) {
44 let mut lines = mat.as_str().lines().collect::<Vec<_>>();
45
46 if lines.len() == 1 {
47 current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
48 } else {
49 lines.remove(0);
50 lines.pop();
51 let section = current_heading.as_ref().unwrap();
52 let case = lines.join("\n");
53 let parts = re_in_out.split(&case).collect::<Vec<_>>();
54 let input = format!("{}\n", parts[0]);
55 let output = if parts[1].is_empty() {
56 "".into()
57 } else {
58 format!("{}\n", parts[1])
59 };
60
61 let test = format!(" assert_eq!(\n to_html_with_options(\n r###\"{}\"###,\n &danger\n )?,\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number);
62
63 cases.push(test);
64
65 number += 1;
66 }
67 }
68
69 let doc = format!(
70 "//! `CommonMark` test suite.
71
72// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
73// > It is generate from the latest CommonMark website.
74
75use markdown::{{message, to_html_with_options, CompileOptions, Options}};
76use pretty_assertions::assert_eq;
77
78#[rustfmt::skip]
79#[test]
80fn commonmark() -> Result<(), message::Message> {{
81 let danger = Options {{
82 compile: CompileOptions {{
83 allow_dangerous_html: true,
84 allow_dangerous_protocol: true,
85 ..CompileOptions::default()
86 }},
87 ..Options::default()
88 }};
89
90{}
91
92 Ok(())
93}}
94",
95 cases.join("\n\n")
96 );
97
98 fs::write(code_url, doc).unwrap();
99}
100
101async fn punctuation() {
102 let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
103 let data_url = "unicode-data.txt";
104 let code_url = "src/util/unicode.rs";
105
106 let value = if let Ok(value) = fs::read_to_string(data_url) {
107 value
108 } else {
109 let value = reqwest::get(url).await.unwrap().text().await.unwrap();
110
111 fs::write(data_url, value.clone()).unwrap();
112
113 value
114 };
115
116 let search = [
117 "Pc", // Punctuation, Connector
118 "Pd", // Punctuation, Dash
119 "Pe", // Punctuation, Close
120 "Pf", // Punctuation, FinalQuote
121 "Pi", // Punctuation, InitialQuote
122 "Po", // Punctuation, Other
123 "Ps", // Punctuation, Open
124 "Sc", // Symbol, Currency
125 "Sk", // Symbol, Modifier
126 "Sm", // Symbol, Math
127 "So", // Symbol, Other
128 ];
129
130 let found = value
131 .lines()
132 .map(|line| line.split(';').collect::<Vec<_>>())
133 .map(|cells| (cells[0], cells[2]))
134 .filter(|c| search.contains(&c.1))
135 .map(|c| c.0)
136 .collect::<Vec<_>>();
137
138 let doc = format!(
139 "//! Info on Unicode.
140
141/// List of characters that are considered punctuation.
142///
143/// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
144/// > It is generate from the latest Unicode data.
145///
146/// Rust does not contain an `is_punctuation` method on `char`, while it does
147/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
148///
149/// `CommonMark` handles attention (emphasis, strong) markers based on what
150/// comes before or after them.
151/// One such difference is if those characters are Unicode punctuation.
152///
153/// ## References
154///
155/// * [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.31.2/#unicode-punctuation-character)
156pub static PUNCTUATION: [char; {}] = [
157{}
158];
159",
160 found.len(),
161 found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
162 );
163
164 fs::write(code_url, doc).unwrap();
165}