Markdown parser fork with extended syntax for personal use.

Add `allow_any_img_src` option

Closes GH-164.
Closes GH-165.

Reviewed-by: Titus Wormer <tituswormer@gmail.com>

authored by

Ophir LOJKINE and committed by
GitHub
e923a3ca 283db1af

+122 -2
+12 -1
readme.md
··· 285 285 The typical security aspect discussed for markdown is [cross-site scripting 286 286 (XSS)][xss] attacks. 287 287 Markdown itself is safe if it does not include embedded HTML or dangerous 288 - protocols in links/images (such as `javascript:` or `data:`). 288 + protocols in links/images (such as `javascript:`). 289 289 `markdown-rs` makes any markdown safe by default, even if HTML is embedded or 290 290 dangerous protocols are used, as it encodes or drops them. 291 + 291 292 Turning on the `allow_dangerous_html` or `allow_dangerous_protocol` options for 292 293 user-provided markdown opens you up to XSS attacks. 294 + 295 + Additionnally, you should be able to set `allow_any_img_src` safely. 296 + The default is to allow only `http:`, `https:`, and relative images, 297 + which is what GitHub does. But it should be safe to allow any value on `src`. 298 + 299 + The [HTML specification][whatwg-html-image] prohibits dangerous scripts in 300 + images and all modern browsers respect this and are thus safe. 301 + Opera 12 (from 2012) is a notable browser that did not respect this. 293 302 294 303 An aspect related to XSS for security is syntax errors: markdown itself has no 295 304 syntax errors. ··· 413 422 [support]: .github/support.md 414 423 415 424 [coc]: .github/code-of-conduct.md 425 + 426 + [whatwg-html-image]: https://html.spec.whatwg.org/multipage/images.html#images-processing-model
+59
src/configuration.rs
··· 522 522 /// `ircs`, `mailto`, `xmpp`), are safe. 523 523 /// All other URLs are dangerous and dropped. 524 524 /// 525 + /// When the option `allow_all_protocols_in_img` is enabled, 526 + /// `allow_dangerous_protocol` only applies to links. 527 + /// 528 + /// This is safe because the 529 + /// [HTML specification][whatwg-html-image-processing] 530 + /// does not allow executable code in images. 531 + /// All modern browsers respect this. 532 + /// 533 + /// [whatwg-html-image-processing]: https://html.spec.whatwg.org/multipage/images.html#images-processing-model 534 + /// 525 535 /// ## Examples 526 536 /// 527 537 /// ``` ··· 552 562 /// # } 553 563 /// ``` 554 564 pub allow_dangerous_protocol: bool, 565 + 566 + /// Whether to allow all values in images. 567 + /// 568 + /// The default is `false`, 569 + /// which lets `allow_dangerous_protocol` control protocol safety for 570 + /// both links and images. 571 + /// 572 + /// Pass `true` to allow all values as `src` on images, 573 + /// regardless of `allow_dangerous_protocol`. 574 + /// This is safe because the 575 + /// [HTML specification][whatwg-html-image-processing] 576 + /// does not allow executable code in images. 577 + /// 578 + /// [whatwg-html-image-processing]: https://html.spec.whatwg.org/multipage/images.html#images-processing-model 579 + /// 580 + /// ## Examples 581 + /// 582 + /// ``` 583 + /// use markdown::{to_html_with_options, CompileOptions, Options}; 584 + /// # fn main() -> Result<(), markdown::message::Message> { 585 + /// 586 + /// // By default, some protocols in image sources are dropped: 587 + /// assert_eq!( 588 + /// to_html_with_options( 589 + /// "![](data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==)", 590 + /// &Options::default() 591 + /// )?, 592 + /// "<p><img src=\"\" alt=\"\" /></p>" 593 + /// ); 594 + /// 595 + /// // Turn `allow_any_img_src` on to allow all values as `src` on images. 596 + /// // This is safe because browsers do not execute code in images. 597 + /// assert_eq!( 598 + /// to_html_with_options( 599 + /// "![](javascript:alert(1))", 600 + /// &Options { 601 + /// compile: CompileOptions { 602 + /// allow_any_img_src: true, 603 + /// ..CompileOptions::default() 604 + /// }, 605 + /// ..Options::default() 606 + /// } 607 + /// )?, 608 + /// "<p><img src=\"javascript:alert(1)\" alt=\"\" /></p>" 609 + /// ); 610 + /// # Ok(()) 611 + /// # } 612 + /// ``` 613 + pub allow_any_img_src: bool, 555 614 556 615 // To do: `doc_markdown` is broken. 557 616 #[allow(clippy::doc_markdown)]
+4 -1
src/to_html.rs
··· 1457 1457 }; 1458 1458 1459 1459 if let Some(destination) = destination { 1460 - let url = if context.options.allow_dangerous_protocol { 1460 + let allow_dangerous_protocol = context.options.allow_dangerous_protocol 1461 + || (context.options.allow_any_img_src && media.image); 1462 + 1463 + let url = if allow_dangerous_protocol { 1461 1464 sanitize(destination) 1462 1465 } else { 1463 1466 sanitize_with_protocols(
+16
tests/image.rs
··· 236 236 ); 237 237 238 238 assert_eq!( 239 + to_html_with_options( 240 + "![](javascript:alert(1))", 241 + &Options { 242 + compile: CompileOptions { 243 + allow_dangerous_protocol: false, 244 + allow_any_img_src: true, 245 + ..Default::default() 246 + }, 247 + ..Default::default() 248 + } 249 + )?, 250 + "<p><img src=\"javascript:alert(1)\" alt=\"\" /></p>", 251 + "should allow non-http protocols with the `allow_any_img_src` option" 252 + ); 253 + 254 + assert_eq!( 239 255 to_mdast( 240 256 "a ![alpha]() b ![bravo](charlie 'delta') c.", 241 257 &Default::default()
+31
tests/misc_dangerous_protocol.rs
··· 195 195 "should allow a colon in a path" 196 196 ); 197 197 } 198 + 199 + #[test] 200 + fn dangerous_protocol_image_with_option() { 201 + use markdown::{to_html_with_options, CompileOptions, Options}; 202 + 203 + let options = Options { 204 + compile: CompileOptions { 205 + allow_any_img_src: true, 206 + ..Default::default() 207 + }, 208 + ..Default::default() 209 + }; 210 + 211 + let result = to_html_with_options("![](javascript:alert(1))", &options).unwrap(); 212 + assert_eq!( 213 + result, "<p><img src=\"javascript:alert(1)\" alt=\"\" /></p>", 214 + "should allow javascript protocol with allow_any_img_src option" 215 + ); 216 + 217 + let result = to_html_with_options("![](irc:///help)", &options).unwrap(); 218 + assert_eq!( 219 + result, "<p><img src=\"irc:///help\" alt=\"\" /></p>", 220 + "should allow irc protocol with allow_any_img_src option" 221 + ); 222 + 223 + let result = to_html_with_options("![](mailto:a)", &options).unwrap(); 224 + assert_eq!( 225 + result, "<p><img src=\"mailto:a\" alt=\"\" /></p>", 226 + "should allow mailto protocol with allow_any_img_src option" 227 + ); 228 + }