generate/src/main.rs at main · crashkeys.dev/markdown-rs

crashkeys.dev / markdown-rs
fork atom
Markdown parser fork with extended syntax for personal use.
fork atom
markdown-rs / generate / src / main.rs
at main 165 lines 4.9 kB view raw
wrap content
Titus Wormer Refactor docs 11mo ago
e0ca3f6c
  1// To regenerate, run the following from the repository root:
  2//
  3// ```sh
  4// cargo run --manifest-path generate/Cargo.toml
  5// ```
  6
  7use regex::Regex;
  8use std::fs;
  9
 10#[tokio::main]
 11async fn main() {
 12    commonmark().await;
 13    punctuation().await;
 14}
 15
 16async fn commonmark() {
 17    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.31.2/spec.txt";
 18    let data_url = "commonmark-data.txt";
 19    let code_url = "tests/commonmark.rs";
 20
 21    let value = if let Ok(value) = fs::read_to_string(data_url) {
 22        value
 23    } else {
 24        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
 25
 26        fs::write(data_url, value.clone()).unwrap();
 27
 28        value
 29    };
 30
 31    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
 32    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
 33    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
 34    let mut current_heading = None;
 35    let mut number = 1;
 36
 37    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
 38        .unwrap()
 39        .replace(&value, "");
 40    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
 41    let mut cases = vec![];
 42
 43    for mat in re.find_iter(&value) {
 44        let mut lines = mat.as_str().lines().collect::<Vec<_>>();
 45
 46        if lines.len() == 1 {
 47            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
 48        } else {
 49            lines.remove(0);
 50            lines.pop();
 51            let section = current_heading.as_ref().unwrap();
 52            let case = lines.join("\n");
 53            let parts = re_in_out.split(&case).collect::<Vec<_>>();
 54            let input = format!("{}\n", parts[0]);
 55            let output = if parts[1].is_empty() {
 56                "".into()
 57            } else {
 58                format!("{}\n", parts[1])
 59            };
 60
 61            let test = format!("    assert_eq!(\n        to_html_with_options(\n            r###\"{}\"###,\n            &danger\n        )?,\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, number);
 62
 63            cases.push(test);
 64
 65            number += 1;
 66        }
 67    }
 68
 69    let doc = format!(
 70        "//! `CommonMark` test suite.
 71
 72// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
 73// > It is generate from the latest CommonMark website.
 74
 75use markdown::{{message, to_html_with_options, CompileOptions, Options}};
 76use pretty_assertions::assert_eq;
 77
 78#[rustfmt::skip]
 79#[test]
 80fn commonmark() -> Result<(), message::Message> {{
 81    let danger = Options {{
 82        compile: CompileOptions {{
 83            allow_dangerous_html: true,
 84            allow_dangerous_protocol: true,
 85            ..CompileOptions::default()
 86        }},
 87        ..Options::default()
 88    }};
 89
 90{}
 91
 92    Ok(())
 93}}
 94",
 95        cases.join("\n\n")
 96    );
 97
 98    fs::write(code_url, doc).unwrap();
 99}
100
101async fn punctuation() {
102    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
103    let data_url = "unicode-data.txt";
104    let code_url = "src/util/unicode.rs";
105
106    let value = if let Ok(value) = fs::read_to_string(data_url) {
107        value
108    } else {
109        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
110
111        fs::write(data_url, value.clone()).unwrap();
112
113        value
114    };
115
116    let search = [
117        "Pc", // Punctuation, Connector
118        "Pd", // Punctuation, Dash
119        "Pe", // Punctuation, Close
120        "Pf", // Punctuation, FinalQuote
121        "Pi", // Punctuation, InitialQuote
122        "Po", // Punctuation, Other
123        "Ps", // Punctuation, Open
124        "Sc", // Symbol, Currency
125        "Sk", // Symbol, Modifier
126        "Sm", // Symbol, Math
127        "So", // Symbol, Other
128    ];
129
130    let found = value
131        .lines()
132        .map(|line| line.split(';').collect::<Vec<_>>())
133        .map(|cells| (cells[0], cells[2]))
134        .filter(|c| search.contains(&c.1))
135        .map(|c| c.0)
136        .collect::<Vec<_>>();
137
138    let doc = format!(
139        "//! Info on Unicode.
140
141/// List of characters that are considered punctuation.
142///
143/// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
144/// > It is generate from the latest Unicode data.
145///
146/// Rust does not contain an `is_punctuation` method on `char`, while it does
147/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
148///
149/// `CommonMark` handles attention (emphasis, strong) markers based on what
150/// comes before or after them.
151/// One such difference is if those characters are Unicode punctuation.
152///
153/// ## References
154///
155/// * [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.31.2/#unicode-punctuation-character)
156pub static PUNCTUATION: [char; {}] = [
157{}
158];
159",
160    found.len(),
161    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
162    );
163
164    fs::write(code_url, doc).unwrap();
165}