this repo has no description
at main 1505 lines 60 kB view raw
1(* This module is a recursive descent parser for the ocamldoc syntax. The parser 2 consumes a token stream of type [Token.t Stream.t], provided by the lexer, 3 and produces a comment AST of the type defined in [Parser_.Ast]. 4 5 The AST has two main levels: inline elements, which can appear inside 6 paragraphs, and are spaced horizontally when presented, and block elements, 7 such as paragraphs and lists, which are spaced vertically when presented. 8 Block elements contain inline elements, but not vice versa. 9 10 Corresponding to this, the parser has three "main" functions: 11 12 - [delimited_inline_element_list] parses a run of inline elements that is 13 delimited by curly brace markup ([{...}]). 14 - [paragraph] parses a run of inline elements that make up a paragraph, and 15 is not explicitly delimited with curly braces. 16 - [block_element_list] parses a sequence of block elements. A comment is a 17 sequence of block elements, so [block_element_list] is the top-level 18 parser. It is also used for list item and tag content. *) 19 20open! Compat 21 22type 'a with_location = 'a Loc.with_location 23 24(* {2 Input} *) 25 26type input = { 27 tokens : Token.t Loc.with_location Stream.t; 28 warnings : Warning.t list ref; 29} 30 31(* {2 Output} *) 32 33let add_warning input warning = input.warnings := warning :: !(input.warnings) 34let junk input = Stream.junk input.tokens 35 36let peek input = 37 match Stream.peek input.tokens with 38 | Some token -> token 39 | None -> assert false 40 41module Table = struct 42 module Light_syntax = struct 43 let valid_align = function 44 | [ { Loc.value = `Word w; _ } ] -> ( 45 match String.length w with 46 | 0 -> `Valid None 47 | 1 -> ( 48 match w with 49 | "-" -> `Valid None 50 | ":" -> `Valid (Some `Center) 51 | _ -> `Invalid) 52 | len -> 53 if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then 54 match (String.get w 0, String.get w (len - 1)) with 55 | ':', ':' -> `Valid (Some `Center) 56 | ':', '-' -> `Valid (Some `Left) 57 | '-', ':' -> `Valid (Some `Right) 58 | '-', '-' -> `Valid None 59 | _ -> `Invalid 60 else `Invalid) 61 | _ -> `Invalid 62 63 let valid_align_row lx = 64 let rec loop acc = function 65 | [] -> Some (List.rev acc) 66 | x :: q -> ( 67 match valid_align x with 68 | `Invalid -> None 69 | `Valid alignment -> loop (alignment :: acc) q) 70 in 71 loop [] lx 72 73 let create ~grid ~align : Ast.table = 74 let cell_to_block (x, k) = 75 let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in 76 match x with 77 | [] -> ([], k) 78 | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k) 79 in 80 let row_to_block = List.map cell_to_block in 81 let grid_to_block = List.map row_to_block in 82 ((grid_to_block grid, align), `Light) 83 84 let with_kind kind : 'a with_location list list -> 'a Ast.row = 85 List.map (fun c -> (c, kind)) 86 87 let from_raw_data grid : Ast.table = 88 match grid with 89 | [] -> create ~grid:[] ~align:None 90 | row1 :: rows2_N -> ( 91 match valid_align_row row1 with 92 (* If the first line is the align row, everything else is data. *) 93 | Some _ as align -> 94 create ~grid:(List.map (with_kind `Data) rows2_N) ~align 95 | None -> ( 96 match rows2_N with 97 (* Only 1 line, if this is not the align row this is data. *) 98 | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None 99 | row2 :: rows3_N -> ( 100 match valid_align_row row2 with 101 (* If the second line is the align row, the first one is the 102 header and the rest is data. *) 103 | Some _ as align -> 104 let header = with_kind `Header row1 in 105 let data = List.map (with_kind `Data) rows3_N in 106 create ~grid:(header :: data) ~align 107 (* No align row in the first 2 lines, everything is considered 108 data. *) 109 | None -> 110 create ~grid:(List.map (with_kind `Data) grid) ~align:None 111 ))) 112 end 113 114 module Heavy_syntax = struct 115 let create ~grid : Ast.table = ((grid, None), `Heavy) 116 let from_grid grid : Ast.table = create ~grid 117 end 118end 119 120module Reader = struct 121 let until_rbrace_or_eof input acc = 122 let rec consume () = 123 let next_token = peek input in 124 match next_token.value with 125 | `Right_brace -> 126 junk input; 127 `End (acc, next_token.location) 128 | `End -> 129 Parse_error.end_not_allowed next_token.location ~in_what:"table" 130 |> add_warning input; 131 junk input; 132 `End (acc, next_token.location) 133 | `Space _ | `Single_newline _ | `Blank_line _ -> 134 junk input; 135 consume () 136 | _ -> `Token next_token 137 in 138 consume () 139 140 module Infix = struct 141 let ( >>> ) consume if_token = 142 match consume with 143 | `End (ret, loc) -> (ret, loc) 144 | `Token t -> if_token t 145 end 146end 147 148open Reader.Infix 149 150(* The last token in the stream is always [`End], and it is never consumed by 151 the parser, so the [None] case is impossible. *) 152 153let npeek n input = Stream.npeek n input.tokens 154 155(* {2 Non-link inline elements} *) 156type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ] 157 158(* Convenient abbreviation for use in patterns. *) 159type token_that_always_begins_an_inline_element = 160 [ `Word of string 161 | `Code_span of string 162 | `Raw_markup of string option * string 163 | `Begin_style of style 164 | `Simple_reference of string 165 | `Begin_reference_with_replacement_text of string 166 | `Simple_link of string 167 | `Begin_link_with_replacement_text of string 168 | `Math_span of string ] 169 170(* Check that the token constructors above actually are all in [Token.t]. *) 171let _check_subset : token_that_always_begins_an_inline_element -> Token.t = 172 fun t -> (t :> Token.t) 173 174(* Consumes tokens that make up a single non-link inline element: 175 176 - a horizontal space ([`Space], significant in inline elements), 177 - a word (see [word]), 178 - a code span ([...], [`Code_span _]), or 179 - styled text ({e ...}). 180 181 The latter requires a recursive call to [delimited_inline_element_list], 182 defined below. 183 184 This should be part of [delimited_inline_element_list]; however, it is also 185 called by function [paragraph]. As a result, it is factored out, and made 186 mutually-recursive with [delimited_inline_element_list]. 187 188 This is called only when it is known that the first token in the list is the 189 beginning of an inline element. In the case of [`Minus] and [`Plus], that 190 means the caller has determined that they are not a list bullet (i.e., not 191 the first non-whitespace tokens on their line). 192 193 This function consumes exactly the tokens that make up the element. *) 194let rec inline_element : 195 input -> Loc.span -> _ -> Ast.inline_element with_location = 196 fun input location next_token -> 197 match next_token with 198 | `Space _ as token -> 199 junk input; 200 Loc.at location token 201 | `Word _ as token -> 202 junk input; 203 Loc.at location token 204 (* This is actually the same memory representation as the token, complete 205 with location, and is probably the most common case. Perhaps the token 206 can be reused somehow. The same is true of [`Space], [`Code_span]. *) 207 | `Minus -> 208 junk input; 209 Loc.at location (`Word "-") 210 | `Plus -> 211 junk input; 212 Loc.at location (`Word "+") 213 | `Bar -> 214 junk input; 215 Loc.at location (`Word "|") 216 | (`Code_span _ | `Math_span _ | `Raw_markup _) as token -> 217 junk input; 218 Loc.at location token 219 | `Begin_style s as parent_markup -> 220 junk input; 221 222 let requires_leading_whitespace = 223 match s with 224 | `Bold | `Italic | `Emphasis -> true 225 | `Superscript | `Subscript -> false 226 in 227 let content, brace_location = 228 delimited_inline_element_list ~parent_markup 229 ~parent_markup_location:location ~requires_leading_whitespace input 230 in 231 232 let location = Loc.span [ location; brace_location ] in 233 234 if content = [] then 235 Parse_error.should_not_be_empty 236 ~what:(Token.describe parent_markup) 237 location 238 |> add_warning input; 239 240 Loc.at location (`Styled (s, content)) 241 | `Simple_reference r -> 242 junk input; 243 244 let r_location = Loc.nudge_start (String.length "{!") location in 245 let r = Loc.at r_location r in 246 247 Loc.at location (`Reference (`Simple, r, [])) 248 | `Begin_reference_with_replacement_text r as parent_markup -> 249 junk input; 250 251 let r_location = Loc.nudge_start (String.length "{{!") location in 252 let r = Loc.at r_location r in 253 254 let content, brace_location = 255 delimited_inline_element_list ~parent_markup 256 ~parent_markup_location:location ~requires_leading_whitespace:false 257 input 258 in 259 260 let location = Loc.span [ location; brace_location ] in 261 262 if content = [] then 263 Parse_error.should_not_be_empty 264 ~what:(Token.describe parent_markup) 265 location 266 |> add_warning input; 267 268 Loc.at location (`Reference (`With_text, r, content)) 269 | `Simple_link u -> 270 junk input; 271 272 let u = String.trim u in 273 274 if u = "" then 275 Parse_error.should_not_be_empty 276 ~what:(Token.describe next_token) 277 location 278 |> add_warning input; 279 280 Loc.at location (`Link (u, [])) 281 | `Begin_link_with_replacement_text u as parent_markup -> 282 junk input; 283 284 let u = String.trim u in 285 286 if u = "" then 287 Parse_error.should_not_be_empty 288 ~what:(Token.describe parent_markup) 289 location 290 |> add_warning input; 291 292 let content, brace_location = 293 delimited_inline_element_list ~parent_markup 294 ~parent_markup_location:location ~requires_leading_whitespace:false 295 input 296 in 297 298 `Link (u, content) |> Loc.at (Loc.span [ location; brace_location ]) 299 300(* Consumes tokens that make up a sequence of inline elements that is ended by 301 a '}', a [`Right_brace] token. The brace token is also consumed. 302 303 The sequences are also preceded by some markup like '{b'. Some of these 304 markup tokens require whitespace immediately after the token, and others not. 305 The caller indicates which way that is through the 306 [~requires_leading_whitespace] argument. 307 308 Whitespace is significant in inline element lists. In particular, "foo [bar]" 309 is represented as [`Word "foo"; `Space; `Code_span "bar"], while "foo[bar]" 310 is [`Word "foo"; `Code_span "bar"]. It doesn't matter how much whitespace is 311 there, just whether it is present or not. Single newlines and horizontal 312 space in any amount are allowed. Blank lines are not, as these are separators 313 for {e block} elements. 314 315 In correct input, the first and last elements emitted will not be [`Space], 316 i.e. [`Space] appears only between other non-link inline elements. In 317 incorrect input, there might be [`Space] followed immediately by something 318 like an @author tag. 319 320 The [~parent_markup] and [~parent_markup_location] arguments are used for 321 generating error messages. *) 322and delimited_inline_element_list : 323 parent_markup:[< Token.t ] -> 324 parent_markup_location:Loc.span -> 325 requires_leading_whitespace:bool -> 326 input -> 327 Ast.inline_element with_location list * Loc.span = 328 fun ~parent_markup ~parent_markup_location ~requires_leading_whitespace 329 input -> 330 (* [~at_start_of_line] is used to interpret [`Minus] and [`Plus]. These are 331 word tokens if not the first non-whitespace tokens on their line. Then, 332 they are allowed in a non-link element list. *) 333 let rec consume_elements : 334 at_start_of_line:bool -> 335 Ast.inline_element with_location list -> 336 Ast.inline_element with_location list * Loc.span = 337 fun ~at_start_of_line acc -> 338 let next_token = peek input in 339 match next_token.value with 340 | `Right_brace -> 341 junk input; 342 (List.rev acc, next_token.location) 343 (* The [`Space] token is not space at the beginning or end of line, because 344 that is combined into [`Single_newline] or [`Blank_line] tokens. It is 345 also not at the beginning of markup (after e.g. '{b'), because that is 346 handled separately before calling 347 [consume_non_link_inline_elements], and not immediately before '}', 348 because that is combined into the [`Right_brace] token by the lexer. So, 349 it is an internal space, and we want to add it to the non-link inline 350 element list. *) 351 | (`Space _ | #token_that_always_begins_an_inline_element) as token -> 352 let acc = inline_element input next_token.location token :: acc in 353 consume_elements ~at_start_of_line:false acc 354 | `Single_newline ws -> 355 junk input; 356 let element = Loc.same next_token (`Space ws) in 357 consume_elements ~at_start_of_line:true (element :: acc) 358 | `Blank_line ws as blank -> 359 Parse_error.not_allowed ~what:(Token.describe blank) 360 ~in_what:(Token.describe parent_markup) 361 next_token.location 362 |> add_warning input; 363 364 junk input; 365 let element = Loc.same next_token (`Space ws) in 366 consume_elements ~at_start_of_line:true (element :: acc) 367 | `Bar as token -> 368 let acc = inline_element input next_token.location token :: acc in 369 consume_elements ~at_start_of_line:false acc 370 | (`Minus | `Plus) as bullet -> 371 (if at_start_of_line then 372 let suggestion = 373 Printf.sprintf "move %s so it isn't the first thing on the line." 374 (Token.print bullet) 375 in 376 Parse_error.not_allowed ~what:(Token.describe bullet) 377 ~in_what:(Token.describe parent_markup) 378 ~suggestion next_token.location 379 |> add_warning input); 380 381 let acc = inline_element input next_token.location bullet :: acc in 382 consume_elements ~at_start_of_line:false acc 383 | other_token -> 384 Parse_error.not_allowed 385 ~what:(Token.describe other_token) 386 ~in_what:(Token.describe parent_markup) 387 next_token.location 388 |> add_warning input; 389 390 let last_location = 391 match acc with 392 | last_token :: _ -> last_token.location 393 | [] -> parent_markup_location 394 in 395 396 (List.rev acc, last_location) 397 in 398 399 let first_token = peek input in 400 match first_token.value with 401 | `Space _ -> 402 junk input; 403 consume_elements ~at_start_of_line:false [] 404 (* [~at_start_of_line] is [false] here because the preceding token was some 405 some markup like '{b', and we didn't move to the next line, so the next 406 token will not be the first non-whitespace token on its line. *) 407 | `Single_newline _ -> 408 junk input; 409 consume_elements ~at_start_of_line:true [] 410 | `Blank_line _ as blank -> 411 (* In case the markup is immediately followed by a blank line, the error 412 message printed by the catch-all case below can be confusing, as it will 413 suggest that the markup must be followed by a newline (which it is). It 414 just must not be followed by two newlines. To explain that clearly, 415 handle that case specifically. *) 416 Parse_error.not_allowed ~what:(Token.describe blank) 417 ~in_what:(Token.describe parent_markup) 418 first_token.location 419 |> add_warning input; 420 421 junk input; 422 consume_elements ~at_start_of_line:true [] 423 | `Right_brace -> 424 junk input; 425 ([], first_token.location) 426 | _ -> 427 if requires_leading_whitespace then 428 Parse_error.should_be_followed_by_whitespace 429 ~what:(Token.print parent_markup) 430 parent_markup_location 431 |> add_warning input; 432 consume_elements ~at_start_of_line:false [] 433 434(* {2 Paragraphs} *) 435 436(* Consumes tokens that make up a paragraph. 437 438 A paragraph is a sequence of inline elements that ends on a blank line, or 439 explicit block markup such as a verbatim block on a new line. 440 441 Because of the significance of newlines, paragraphs are parsed line-by-line. 442 The function [paragraph] is called only when the current token is the first 443 non-whitespace token on its line, and begins an inline element. [paragraph] 444 then parses a line of inline elements. Afterwards, it looks ahead to the next 445 line. If that line also begins with an inline element, it parses that line, 446 and so on. *) 447let paragraph : input -> Ast.nestable_block_element with_location = 448 fun input -> 449 (* Parses a single line of a paragraph, consisting of inline elements. The 450 only valid ways to end a paragraph line are with [`End], [`Single_newline], 451 [`Blank_line], and [`Right_brace]. Everything else either belongs in the 452 paragraph, or signifies an attempt to begin a block element inside a 453 paragraph line, which is an error. These errors are caught elsewhere; the 454 paragraph parser just stops. *) 455 let rec paragraph_line : 456 Ast.inline_element with_location list -> 457 Ast.inline_element with_location list = 458 fun acc -> 459 let next_token = peek input in 460 match next_token.value with 461 | ( `Space _ | `Minus | `Plus | `Bar 462 | #token_that_always_begins_an_inline_element ) as token -> 463 let element = inline_element input next_token.location token in 464 paragraph_line (element :: acc) 465 | _ -> acc 466 in 467 468 (* After each line is parsed, decides whether to parse more lines. *) 469 let rec additional_lines : 470 Ast.inline_element with_location list -> 471 Ast.inline_element with_location list = 472 fun acc -> 473 match npeek 2 input with 474 | { value = `Single_newline ws; location } 475 :: { value = #token_that_always_begins_an_inline_element | `Bar; _ } 476 :: _ -> 477 junk input; 478 let acc = Loc.at location (`Space ws) :: acc in 479 let acc = paragraph_line acc in 480 additional_lines acc 481 | _ -> List.rev acc 482 in 483 484 let elements = paragraph_line [] |> additional_lines in 485 `Paragraph elements |> Loc.at (Loc.span (List.map Loc.location elements)) 486 487(* {2 Block elements} *) 488 489(* {3 Helper types} *) 490 491(* The interpretation of tokens in the block parser depends on where on a line 492 each token appears. The six possible "locations" are: 493 494 - [`At_start_of_line], when only whitespace has been read on the current 495 line. 496 - [`After_tag], when a valid tag token, such as [@deprecated], has been read, 497 and only whitespace has been read since. 498 - [`After_shorthand_bullet], when a valid shorthand list item bullet, such as 499 [-], has been read, and only whitespace has been read since. 500 - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li], 501 has been read, and only whitespace has been read since. 502 - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read. 503 - [`After_text], when any other valid non-whitespace token has already been 504 read on the current line. 505 506 Here are some examples of how this affects the interpretation of tokens: 507 508 - A paragraph can start anywhere except [`After_text] (two paragraphs cannot 509 be on the same line, but paragraphs can be nested in just about anything). 510 - [`Minus] is interpreted as a list item bullet [`At_start_of_line], 511 [`After_tag], and [`After_explicit_list_bullet]. 512 - Tags are only allowed [`At_start_of_line]. 513 514 To track the location accurately, the functions that make up the block parser 515 pass explicit [where_in_line] values around and return them. 516 517 In a few cases, [where_in_line] can be inferred from what helper was called. 518 For example, the [paragraph] parser always stops on the same line as the last 519 significant token that is in the paragraph it consumed, so the location must 520 be [`After_text]. *) 521type where_in_line = 522 [ `At_start_of_line 523 | `After_tag 524 | `After_shorthand_bullet 525 | `After_explicit_list_bullet 526 | `After_table_cell 527 | `After_text ] 528 529(* The block parsing loop, function [block_element_list], stops when it 530 encounters certain tokens. 531 532 When it is called for the whole comment, or for in explicit list item 533 ([{li foo}]), it can only stop on end of input or a right brace. 534 535 When it is called inside a shorthand list item ([- foo]), it stops on end of 536 input, right brace, a blank line (indicating end of shorthand list), plus or 537 minus (indicating the start of the next list item), or a section heading or 538 tag, which cannot be nested in list markup. 539 540 The block parser [block_element_list] explicitly returns the token that 541 stopped it, with a type more precise than [Token.t stream_head]: if it was 542 called for the whole comment or an explicit list item, the stop token will 543 have type [stops_at_delimiters stream_head], and if it was called for a 544 shorthand list item, the stop token will have type 545 [implicit_stop stream_head]. This allows the calling parsers to write precise 546 cases for exactly the tokens that might be at the front of the stream after 547 the block parser returns. *) 548type stops_at_delimiters = [ `End | `Right_brace ] 549type code_stop = [ `End | `Right_code_delimiter ] 550 551type stopped_implicitly = 552 [ `End 553 | `Blank_line of string 554 | `Right_brace 555 | `Minus 556 | `Plus 557 | Token.section_heading 558 | Token.media_markup 559 | Token.tag ] 560 561(* Ensure that the above two types are really subsets of [Token.t]. *) 562let _check_subset : stops_at_delimiters -> Token.t = fun t -> (t :> Token.t) 563let _check_subset : stopped_implicitly -> Token.t = fun t -> (t :> Token.t) 564 565(* The different contexts in which the block parser [block_element_list] can be 566 called. The block parser's behavior depends somewhat on the context. For 567 example, while paragraphs are allowed anywhere, shorthand lists are not 568 allowed immediately inside other shorthand lists, while tags are not allowed 569 anywhere except at the comment top level. 570 571 Besides telling the block parser how to behave, each context also carries two 572 types, which determine the return type of the block parser: 573 574 - The type of blocks the parser returns. Note that [nestable_block_element] 575 is included in [block_element]. However, the extra block kinds in 576 [block_element] are only allowed at the comment top level. 577 - The type of token that the block parser stops at. See discussion above. *) 578type ('block, 'stops_at_which_tokens) context = 579 | Top_level : (Ast.block_element, stops_at_delimiters) context 580 | In_implicitly_ended : 581 [ `Tag | `Shorthand_list ] 582 -> (Ast.nestable_block_element, stopped_implicitly) context 583 | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context 584 | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context 585 | In_code_results : (Ast.nestable_block_element, code_stop) context 586 587(* This is a no-op. It is needed to prove to the type system that nestable block 588 elements are acceptable block elements in all contexts. *) 589let accepted_in_all_contexts : type block stops_at_which_tokens. 590 (block, stops_at_which_tokens) context -> 591 Ast.nestable_block_element -> 592 block = 593 fun context block -> 594 match context with 595 | Top_level -> (block :> Ast.block_element) 596 | In_implicitly_ended (`Tag | `Shorthand_list) -> block 597 | In_explicit_list -> block 598 | In_table_cell -> block 599 | In_code_results -> block 600 601(* Converts a tag to a series of words. This is used in error recovery, when a 602 tag cannot be generated. *) 603let tag_to_words = function 604 | `Author s -> [ `Word "@author"; `Space " "; `Word s ] 605 | `Before s -> [ `Word "@before"; `Space " "; `Word s ] 606 | `Canonical s -> [ `Word "@canonical"; `Space " "; `Word s ] 607 | `Deprecated -> [ `Word "@deprecated" ] 608 | `Inline -> [ `Word "@inline" ] 609 | `Open -> [ `Word "@open" ] 610 | `Closed -> [ `Word "@closed" ] 611 | `Hidden -> [ `Word "@hidden" ] 612 | `Param s -> [ `Word "@param"; `Space " "; `Word s ] 613 | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ] 614 | `Return -> [ `Word "@return" ] 615 | `See (`Document, s) -> [ `Word "@see"; `Space " "; `Word ("\"" ^ s ^ "\"") ] 616 | `See (`File, s) -> [ `Word "@see"; `Space " "; `Word ("'" ^ s ^ "'") ] 617 | `See (`Url, s) -> [ `Word "@see"; `Space " "; `Word ("<" ^ s ^ ">") ] 618 | `Since s -> [ `Word "@since"; `Space " "; `Word s ] 619 | `Version s -> [ `Word "@version"; `Space " "; `Word s ] 620 | `Children_order -> [ `Word "@children_order" ] 621 | `Toc_status -> [ `Word "@toc_status" ] 622 | `Order_category -> [ `Word "@order_category" ] 623 | `Short_title -> [ `Word "@short_title" ] 624 | `Custom tag -> [ `Word ("@" ^ tag) ] 625 626(* {3 Block element lists} *) 627 628(* Consumes tokens making up a sequence of block elements. These are: 629 630 - paragraphs, 631 - code blocks, 632 - verbatim text blocks, 633 - tables, 634 - lists, and 635 - section headings. *) 636let rec block_element_list : type block stops_at_which_tokens. 637 (block, stops_at_which_tokens) context -> 638 parent_markup:[< Token.t | `Comment ] -> 639 input -> 640 block with_location list 641 * stops_at_which_tokens with_location 642 * where_in_line = 643 fun context ~parent_markup input -> 644 let rec consume_block_elements : 645 where_in_line -> 646 block with_location list -> 647 block with_location list 648 * stops_at_which_tokens with_location 649 * where_in_line = 650 fun where_in_line acc -> 651 let describe token = 652 match token with 653 | #token_that_always_begins_an_inline_element -> "paragraph" 654 | _ -> Token.describe token 655 in 656 657 let warn_if_after_text { Loc.location; value = token } = 658 if where_in_line = `After_text then 659 Parse_error.should_begin_on_its_own_line ~what:(describe token) location 660 |> add_warning input 661 in 662 663 let warn_because_not_at_top_level { Loc.location; value = token } = 664 let suggestion = 665 Printf.sprintf "move %s outside of any other markup." 666 (Token.print token) 667 in 668 Parse_error.not_allowed ~what:(Token.describe token) 669 ~in_what:(Token.describe parent_markup) 670 ~suggestion location 671 |> add_warning input 672 in 673 674 match peek input with 675 (* Terminators: the two tokens that terminate anything. *) 676 | { value = `End; _ } as next_token -> ( 677 match context with 678 | Top_level -> (List.rev acc, next_token, where_in_line) 679 | In_implicitly_ended (`Tag | `Shorthand_list) -> 680 (List.rev acc, next_token, where_in_line) 681 | In_explicit_list -> (List.rev acc, next_token, where_in_line) 682 | In_table_cell -> (List.rev acc, next_token, where_in_line) 683 | In_code_results -> (List.rev acc, next_token, where_in_line)) 684 | { value = `Right_brace; _ } as next_token -> ( 685 (* This little absurdity is needed to satisfy the type system. Without it, 686 OCaml is unable to prove that [stream_head] has the right type for all 687 possible values of [context]. *) 688 match context with 689 | Top_level -> (List.rev acc, next_token, where_in_line) 690 | In_implicitly_ended (`Tag | `Shorthand_list) -> 691 (List.rev acc, next_token, where_in_line) 692 | In_explicit_list -> (List.rev acc, next_token, where_in_line) 693 | In_table_cell -> (List.rev acc, next_token, where_in_line) 694 | In_code_results -> 695 junk input; 696 consume_block_elements where_in_line acc) 697 | { value = `Right_code_delimiter; _ } as next_token -> ( 698 match context with 699 | In_code_results -> (List.rev acc, next_token, where_in_line) 700 | _ -> 701 junk input; 702 consume_block_elements where_in_line acc) 703 (* Whitespace. This can terminate some kinds of block elements. It is also 704 necessary to track it to interpret [`Minus] and [`Plus] correctly, as 705 well as to ensure that all block elements begin on their own line. *) 706 | { value = `Space _; _ } -> 707 junk input; 708 consume_block_elements where_in_line acc 709 | { value = `Single_newline _; _ } -> 710 junk input; 711 consume_block_elements `At_start_of_line acc 712 | { value = `Blank_line _; _ } as next_token -> ( 713 match context with 714 (* Blank lines terminate shorthand lists ([- foo]) and tags. They also 715 terminate paragraphs, but the paragraph parser is aware of that 716 internally. *) 717 | In_implicitly_ended (`Tag | `Shorthand_list) -> 718 (List.rev acc, next_token, where_in_line) 719 (* Otherwise, blank lines are pretty much like single newlines. *) 720 | _ -> 721 junk input; 722 consume_block_elements `At_start_of_line acc) 723 (* Explicit list items ([{li ...}] and [{- ...}]) can never appear directly 724 in block content. They can only appear inside [{ul ...}] and [{ol ...}]. 725 So, catch those. *) 726 | { value = `Begin_list_item _ as token; location } -> 727 let suggestion = 728 Printf.sprintf "move %s into %s, or use %s." (Token.print token) 729 (Token.describe (`Begin_list `Unordered)) 730 (Token.describe `Minus) 731 in 732 Parse_error.not_allowed ~what:(Token.describe token) 733 ~in_what:(Token.describe parent_markup) 734 ~suggestion location 735 |> add_warning input; 736 737 junk input; 738 consume_block_elements where_in_line acc 739 (* Table rows ([{tr ...}]) can never appear directly 740 in block content. They can only appear inside [{table ...}]. *) 741 | { value = `Begin_table_row as token; location } -> 742 let suggestion = 743 Printf.sprintf "move %s into %s." (Token.print token) 744 (Token.describe `Begin_table_heavy) 745 in 746 Parse_error.not_allowed ~what:(Token.describe token) 747 ~in_what:(Token.describe parent_markup) 748 ~suggestion location 749 |> add_warning input; 750 junk input; 751 consume_block_elements where_in_line acc 752 (* Table cells ([{th ...}] and [{td ...}]) can never appear directly 753 in block content. They can only appear inside [{tr ...}]. *) 754 | { value = `Begin_table_cell _ as token; location } -> 755 let suggestion = 756 Printf.sprintf "move %s into %s." (Token.print token) 757 (Token.describe `Begin_table_row) 758 in 759 Parse_error.not_allowed ~what:(Token.describe token) 760 ~in_what:(Token.describe parent_markup) 761 ~suggestion location 762 |> add_warning input; 763 junk input; 764 consume_block_elements where_in_line acc 765 (* Tags. These can appear at the top level only. *) 766 | { value = `Tag tag as token; location } as next_token -> ( 767 let recover_when_not_at_top_level context = 768 warn_because_not_at_top_level next_token; 769 junk input; 770 let words = List.map (Loc.at location) (tag_to_words tag) in 771 let paragraph = 772 `Paragraph words 773 |> accepted_in_all_contexts context 774 |> Loc.at location 775 in 776 consume_block_elements `At_start_of_line (paragraph :: acc) 777 in 778 779 match context with 780 (* Tags cannot make sense in an explicit list ([{ul {li ...}}]). *) 781 | In_explicit_list -> recover_when_not_at_top_level context 782 (* If a tag starts at the beginning of a line, it terminates the preceding 783 tag and/or the current shorthand list. In this case, return to the 784 caller, and let the caller decide how to interpret the tag token. *) 785 | In_implicitly_ended (`Tag | `Shorthand_list) -> 786 if where_in_line = `At_start_of_line then 787 (List.rev acc, next_token, where_in_line) 788 else recover_when_not_at_top_level context 789 | In_table_cell -> recover_when_not_at_top_level context 790 | In_code_results -> recover_when_not_at_top_level context 791 (* If this is the top-level call to [block_element_list], parse the 792 tag. *) 793 | Top_level -> ( 794 if where_in_line <> `At_start_of_line then 795 Parse_error.should_begin_on_its_own_line 796 ~what:(Token.describe token) location 797 |> add_warning input; 798 799 junk input; 800 801 match tag with 802 | (`Author s | `Since s | `Version s | `Canonical s) as tag -> 803 let s = String.trim s in 804 if s = "" then 805 Parse_error.should_not_be_empty ~what:(Token.describe token) 806 location 807 |> add_warning input; 808 let tag = 809 match tag with 810 | `Author _ -> `Author s 811 | `Since _ -> `Since s 812 | `Version _ -> `Version s 813 | `Canonical _ -> 814 (* TODO The location is only approximate, as we need lexer 815 cooperation to get the real location. *) 816 let r_location = 817 Loc.nudge_start (String.length "@canonical ") location 818 in 819 `Canonical (Loc.at r_location s) 820 in 821 822 let tag = Loc.at location (`Tag tag) in 823 consume_block_elements `After_text (tag :: acc) 824 | ( `Deprecated | `Return | `Children_order | `Short_title 825 | `Toc_status | `Order_category | `Custom _ ) as tag -> 826 let content, _stream_head, where_in_line = 827 block_element_list (In_implicitly_ended `Tag) 828 ~parent_markup:token input 829 in 830 let tag = 831 match tag with 832 | `Deprecated -> `Deprecated content 833 | `Toc_status -> `Toc_status content 834 | `Return -> `Return content 835 | `Children_order -> `Children_order content 836 | `Short_title -> `Short_title content 837 | `Order_category -> `Order_category content 838 | `Custom s -> `Custom (s, content) 839 in 840 let location = 841 location :: List.map Loc.location content |> Loc.span 842 in 843 let tag = Loc.at location (`Tag tag) in 844 consume_block_elements where_in_line (tag :: acc) 845 | (`Param _ | `Raise _ | `Before _) as tag -> 846 let content, _stream_head, where_in_line = 847 block_element_list (In_implicitly_ended `Tag) 848 ~parent_markup:token input 849 in 850 let tag = 851 match tag with 852 | `Param s -> `Param (s, content) 853 | `Raise s -> `Raise (s, content) 854 | `Before s -> `Before (s, content) 855 in 856 let location = 857 location :: List.map Loc.location content |> Loc.span 858 in 859 let tag = Loc.at location (`Tag tag) in 860 consume_block_elements where_in_line (tag :: acc) 861 | `See (kind, target) -> 862 let content, _next_token, where_in_line = 863 block_element_list (In_implicitly_ended `Tag) 864 ~parent_markup:token input 865 in 866 let location = 867 location :: List.map Loc.location content |> Loc.span 868 in 869 let tag = `Tag (`See (kind, target, content)) in 870 let tag = Loc.at location tag in 871 consume_block_elements where_in_line (tag :: acc) 872 | (`Inline | `Open | `Closed | `Hidden) as tag -> 873 let tag = Loc.at location (`Tag tag) in 874 consume_block_elements `After_text (tag :: acc))) 875 | ( { value = #token_that_always_begins_an_inline_element; _ } 876 | { value = `Bar; _ } ) as next_token -> 877 warn_if_after_text next_token; 878 879 let block = paragraph input in 880 let block = Loc.map (accepted_in_all_contexts context) block in 881 let acc = block :: acc in 882 consume_block_elements `After_text acc 883 | { value = `Verbatim s as token; location } as next_token -> 884 warn_if_after_text next_token; 885 if s = "" then 886 Parse_error.should_not_be_empty ~what:(Token.describe token) location 887 |> add_warning input; 888 889 junk input; 890 let block = accepted_in_all_contexts context token in 891 let block = Loc.at location block in 892 let acc = block :: acc in 893 consume_block_elements `After_text acc 894 | { value = `Math_block s as token; location } as next_token -> 895 warn_if_after_text next_token; 896 if s = "" then 897 Parse_error.should_not_be_empty ~what:(Token.describe token) location 898 |> add_warning input; 899 900 junk input; 901 let block = accepted_in_all_contexts context token in 902 let block = Loc.at location block in 903 let acc = block :: acc in 904 consume_block_elements `After_text acc 905 | { 906 value = 907 `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs) 908 as token; 909 location; 910 } as next_token -> 911 warn_if_after_text next_token; 912 junk input; 913 let delimiter = if delim = "" then None else Some delim in 914 let output, location = 915 if not has_outputs then (None, location) 916 else 917 let content, next_token, _where_in_line = 918 block_element_list In_code_results ~parent_markup:token input 919 in 920 junk input; 921 let locations = 922 location :: List.map (fun content -> content.Loc.location) content 923 in 924 let location = Loc.span locations in 925 let location = { location with end_ = next_token.location.end_ } in 926 (Some content, location) 927 in 928 929 if s = "" then 930 Parse_error.should_not_be_empty ~what:(Token.describe token) location 931 |> add_warning input; 932 933 let meta = 934 match meta with 935 | None -> None 936 | Some (language, tags) -> Some { Ast.language; tags } 937 in 938 let block = 939 accepted_in_all_contexts context 940 (`Code_block 941 { 942 Ast.meta; 943 delimiter; 944 content = { value = s; location = v_loc }; 945 output; 946 }) 947 in 948 let block = Loc.at location block in 949 let acc = block :: acc in 950 consume_block_elements `After_text acc 951 | { value = `Modules s as token; location } as next_token -> 952 warn_if_after_text next_token; 953 954 junk input; 955 956 (* TODO Use some library for a splitting function, or move this out into a 957 Util module. *) 958 let split_string delimiters s = 959 let rec scan_delimiters acc index = 960 if index >= String.length s then List.rev acc 961 else if String.contains delimiters s.[index] then 962 scan_delimiters acc (index + 1) 963 else scan_word acc index (index + 1) 964 and scan_word acc start_index index = 965 if index >= String.length s then 966 let word = String.sub s start_index (index - start_index) in 967 List.rev (word :: acc) 968 else if String.contains delimiters s.[index] then 969 let word = String.sub s start_index (index - start_index) in 970 scan_delimiters (word :: acc) (index + 1) 971 else scan_word acc start_index (index + 1) 972 in 973 974 scan_delimiters [] 0 975 in 976 977 (* TODO Correct locations await a full implementation of {!modules} 978 parsing. *) 979 let modules = 980 split_string " \t\r\n" s |> List.map (fun r -> Loc.at location r) 981 in 982 983 if modules = [] then 984 Parse_error.should_not_be_empty ~what:(Token.describe token) location 985 |> add_warning input; 986 987 let block = accepted_in_all_contexts context (`Modules modules) in 988 let block = Loc.at location block in 989 let acc = block :: acc in 990 consume_block_elements `After_text acc 991 | { value = `Begin_list kind as token; location } as next_token -> 992 warn_if_after_text next_token; 993 994 junk input; 995 996 let items, brace_location = 997 explicit_list_items ~parent_markup:token input 998 in 999 if items = [] then 1000 Parse_error.should_not_be_empty ~what:(Token.describe token) location 1001 |> add_warning input; 1002 1003 let location = Loc.span [ location; brace_location ] in 1004 let block = `List (kind, `Heavy, items) in 1005 let block = accepted_in_all_contexts context block in 1006 let block = Loc.at location block in 1007 let acc = block :: acc in 1008 consume_block_elements `After_text acc 1009 | { value = (`Begin_table_light | `Begin_table_heavy) as token; location } 1010 as next_token -> 1011 warn_if_after_text next_token; 1012 junk input; 1013 let block, brace_location = 1014 let parent_markup = token in 1015 let parent_markup_location = location in 1016 match token with 1017 | `Begin_table_light -> 1018 light_table input ~parent_markup ~parent_markup_location 1019 | `Begin_table_heavy -> 1020 heavy_table input ~parent_markup ~parent_markup_location 1021 in 1022 let location = Loc.span [ location; brace_location ] in 1023 let block = accepted_in_all_contexts context (`Table block) in 1024 let block = Loc.at location block in 1025 let acc = block :: acc in 1026 consume_block_elements `After_text acc 1027 | { value = (`Minus | `Plus) as token; location } as next_token -> ( 1028 (match where_in_line with 1029 | `After_text | `After_shorthand_bullet -> 1030 Parse_error.should_begin_on_its_own_line 1031 ~what:(Token.describe token) location 1032 |> add_warning input 1033 | _ -> ()); 1034 1035 match context with 1036 | In_implicitly_ended `Shorthand_list -> 1037 (List.rev acc, next_token, where_in_line) 1038 | _ -> 1039 let items, where_in_line = 1040 shorthand_list_items next_token where_in_line input 1041 in 1042 let kind = 1043 match token with `Minus -> `Unordered | `Plus -> `Ordered 1044 in 1045 let location = 1046 location :: List.map Loc.location (List.flatten items) |> Loc.span 1047 in 1048 let block = `List (kind, `Light, items) in 1049 let block = accepted_in_all_contexts context block in 1050 let block = Loc.at location block in 1051 let acc = block :: acc in 1052 consume_block_elements where_in_line acc) 1053 | { value = `Begin_section_heading (level, label) as token; location } as 1054 next_token -> ( 1055 let recover_when_not_at_top_level context = 1056 warn_because_not_at_top_level next_token; 1057 junk input; 1058 let content, brace_location = 1059 delimited_inline_element_list ~parent_markup:token 1060 ~parent_markup_location:location ~requires_leading_whitespace:true 1061 input 1062 in 1063 let location = Loc.span [ location; brace_location ] in 1064 let paragraph = 1065 `Paragraph content 1066 |> accepted_in_all_contexts context 1067 |> Loc.at location 1068 in 1069 consume_block_elements `At_start_of_line (paragraph :: acc) 1070 in 1071 1072 match context with 1073 | In_implicitly_ended (`Tag | `Shorthand_list) -> 1074 if where_in_line = `At_start_of_line then 1075 (List.rev acc, next_token, where_in_line) 1076 else recover_when_not_at_top_level context 1077 | In_explicit_list -> recover_when_not_at_top_level context 1078 | In_table_cell -> recover_when_not_at_top_level context 1079 | In_code_results -> recover_when_not_at_top_level context 1080 | Top_level -> 1081 if where_in_line <> `At_start_of_line then 1082 Parse_error.should_begin_on_its_own_line 1083 ~what:(Token.describe token) location 1084 |> add_warning input; 1085 1086 let label = 1087 match label with 1088 | Some "" -> 1089 Parse_error.should_not_be_empty ~what:"heading label" location 1090 |> add_warning input; 1091 None 1092 | _ -> label 1093 in 1094 1095 junk input; 1096 1097 let content, brace_location = 1098 delimited_inline_element_list ~parent_markup:token 1099 ~parent_markup_location:location 1100 ~requires_leading_whitespace:true input 1101 in 1102 if content = [] then 1103 Parse_error.should_not_be_empty ~what:(Token.describe token) 1104 location 1105 |> add_warning input; 1106 1107 let location = Loc.span [ location; brace_location ] in 1108 let heading = `Heading (level, label, content) in 1109 let heading = Loc.at location heading in 1110 let acc = heading :: acc in 1111 consume_block_elements `After_text acc) 1112 | { value = `Begin_paragraph_style _ as token; location } -> 1113 junk input; 1114 let content, brace_location = 1115 delimited_inline_element_list ~parent_markup:token 1116 ~parent_markup_location:location ~requires_leading_whitespace:true 1117 input 1118 in 1119 let location = Loc.span [ location; brace_location ] in 1120 1121 Parse_error.markup_should_not_be_used ~what:(Token.describe token) 1122 location 1123 |> add_warning input; 1124 1125 let paragraph = 1126 `Paragraph content 1127 |> accepted_in_all_contexts context 1128 |> Loc.at location 1129 in 1130 consume_block_elements `At_start_of_line (paragraph :: acc) 1131 | { 1132 location; 1133 value = `Media_with_replacement_text (href, media, content) as token; 1134 } -> 1135 junk input; 1136 1137 let r_location = 1138 Loc.nudge_start 1139 (String.length @@ Token.s_of_media `Replaced media) 1140 location 1141 |> Loc.nudge_end (String.length content + 1) 1142 (* +1 for closing character *) 1143 in 1144 let c_location = 1145 Loc.nudge_start 1146 (String.length (Token.s_of_media `Replaced media) 1147 + String.length (match href with `Reference s | `Link s -> s)) 1148 location 1149 |> Loc.nudge_end 1 1150 in 1151 let content = String.trim content in 1152 let href = href |> Loc.at r_location in 1153 1154 if content = "" then 1155 Parse_error.should_not_be_empty ~what:(Token.describe token) 1156 c_location 1157 |> add_warning input; 1158 1159 let block = `Media (`Simple, href, content, media) in 1160 let block = accepted_in_all_contexts context block in 1161 let block = Loc.at location block in 1162 let acc = block :: acc in 1163 consume_block_elements `After_text acc 1164 | { location; value = `Simple_media (href, media) } -> 1165 junk input; 1166 1167 let r_location = 1168 Loc.nudge_start 1169 (String.length @@ Token.s_of_media `Simple media) 1170 location 1171 |> Loc.nudge_end 1 1172 in 1173 let href = href |> Loc.at r_location in 1174 let block = `Media (`Simple, href, "", media) in 1175 let block = accepted_in_all_contexts context block in 1176 let block = Loc.at location block in 1177 let acc = block :: acc in 1178 consume_block_elements `After_text acc 1179 in 1180 1181 let where_in_line = 1182 match context with 1183 | Top_level -> `At_start_of_line 1184 | In_implicitly_ended `Shorthand_list -> `After_shorthand_bullet 1185 | In_explicit_list -> `After_explicit_list_bullet 1186 | In_table_cell -> `After_table_cell 1187 | In_code_results -> `After_tag 1188 | In_implicitly_ended `Tag -> `After_tag 1189 in 1190 1191 consume_block_elements where_in_line [] 1192 1193(* {3 Lists} *) 1194 1195(* Consumes a sequence of implicit list items. Each one consists of a [`Minus] 1196 or [`Plus] token, followed by block elements until: 1197 1198 - a blank line, or 1199 - a list bullet of the opposite kind (e.g. [`Plus] for a [`Minus] list). 1200 1201 This function is called when the next token is known to be [`Minus] or 1202 [`Plus]. It consumes that token, and calls the block element parser (see 1203 above). That parser returns to [implicit_list_items] only on [`Blank_line], 1204 [`End], [`Minus] or [`Plus] at the start of a line, or [`Right_brace]. *) 1205and shorthand_list_items : 1206 [ `Minus | `Plus ] with_location -> 1207 where_in_line -> 1208 input -> 1209 Ast.nestable_block_element with_location list list * where_in_line = 1210 fun first_token where_in_line input -> 1211 let bullet_token = first_token.value in 1212 1213 let rec consume_list_items : 1214 [> ] with_location -> 1215 where_in_line -> 1216 Ast.nestable_block_element with_location list list -> 1217 Ast.nestable_block_element with_location list list * where_in_line = 1218 fun next_token where_in_line acc -> 1219 match next_token.value with 1220 | `End | `Right_brace | `Blank_line _ | `Tag _ | `Begin_section_heading _ 1221 | `Simple_media _ | `Media_with_replacement_text _ -> 1222 (List.rev acc, where_in_line) 1223 | (`Minus | `Plus) as bullet -> 1224 if bullet = bullet_token then ( 1225 junk input; 1226 1227 let content, stream_head, where_in_line = 1228 block_element_list (In_implicitly_ended `Shorthand_list) 1229 ~parent_markup:bullet input 1230 in 1231 if content = [] then 1232 Parse_error.should_not_be_empty ~what:(Token.describe bullet) 1233 next_token.location 1234 |> add_warning input; 1235 1236 let acc = content :: acc in 1237 consume_list_items stream_head where_in_line acc) 1238 else (List.rev acc, where_in_line) 1239 in 1240 1241 consume_list_items 1242 (first_token :> stopped_implicitly with_location) 1243 where_in_line [] 1244 1245(* Consumes a sequence of explicit list items (starting with '{li ...}' and 1246 '{-...}', which are represented by [`Begin_list_item _] tokens). 1247 1248 This function is called immediately after '{ul' or '{ol' ([`Begin_list _]) is 1249 read. The only "valid" way to exit is by reading a [`Right_brace] token, 1250 which is consumed. 1251 1252 Whitespace inside the list, but outside list items, is not significant – this 1253 parsing function consumes all of it. Otherwise, only list item start tokens 1254 are accepted. Everything else is an error. *) 1255and explicit_list_items : 1256 parent_markup:[< Token.t ] -> 1257 input -> 1258 Ast.nestable_block_element with_location list list * Loc.span = 1259 fun ~parent_markup input -> 1260 let rec consume_list_items : 1261 Ast.nestable_block_element with_location list list -> 1262 Ast.nestable_block_element with_location list list * Loc.span = 1263 fun acc -> 1264 let next_token = peek input in 1265 match next_token.value with 1266 | `End -> 1267 Parse_error.end_not_allowed next_token.location 1268 ~in_what:(Token.describe parent_markup) 1269 |> add_warning input; 1270 (List.rev acc, next_token.location) 1271 | `Right_brace -> 1272 junk input; 1273 (List.rev acc, next_token.location) 1274 | `Space _ | `Single_newline _ | `Blank_line _ -> 1275 junk input; 1276 consume_list_items acc 1277 | `Begin_list_item kind as token -> 1278 junk input; 1279 1280 (* '{li', represented by [`Begin_list_item `Li], must be followed by 1281 whitespace. *) 1282 (if kind = `Li then 1283 match (peek input).value with 1284 | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace -> 1285 () 1286 (* The presence of [`Right_brace] above requires some explanation: 1287 1288 - It is better to be silent about missing whitespace if the next 1289 token is [`Right_brace], because the error about an empty list 1290 item will be generated below, and that error is more important to 1291 the user. 1292 - The [`Right_brace] token also happens to include all whitespace 1293 before it, as a convenience for the rest of the parser. As a 1294 result, not ignoring it could be wrong: there could in fact be 1295 whitespace in the concrete syntax immediately after '{li', just 1296 it is not represented as [`Space], [`Single_newline], or 1297 [`Blank_line]. *) 1298 | _ -> 1299 Parse_error.should_be_followed_by_whitespace next_token.location 1300 ~what:(Token.print token) 1301 |> add_warning input); 1302 1303 let content, token_after_list_item, _where_in_line = 1304 block_element_list In_explicit_list ~parent_markup:token input 1305 in 1306 1307 if content = [] then 1308 Parse_error.should_not_be_empty next_token.location 1309 ~what:(Token.describe token) 1310 |> add_warning input; 1311 1312 (match token_after_list_item.value with 1313 | `Right_brace -> junk input 1314 | `End -> 1315 Parse_error.end_not_allowed token_after_list_item.location 1316 ~in_what:(Token.describe token) 1317 |> add_warning input); 1318 1319 let acc = content :: acc in 1320 consume_list_items acc 1321 | token -> 1322 let suggestion = 1323 match token with 1324 | `Begin_section_heading _ | `Tag _ -> 1325 Printf.sprintf "move %s outside the list." (Token.describe token) 1326 | _ -> 1327 Printf.sprintf "move %s into a list item, %s or %s." 1328 (Token.describe token) 1329 (Token.print (`Begin_list_item `Li)) 1330 (Token.print (`Begin_list_item `Dash)) 1331 in 1332 Parse_error.not_allowed next_token.location ~what:(Token.describe token) 1333 ~in_what:(Token.describe parent_markup) 1334 ~suggestion 1335 |> add_warning input; 1336 1337 junk input; 1338 consume_list_items acc 1339 in 1340 1341 consume_list_items [] 1342 1343(* Consumes a sequence of table rows that might start with [`Bar]. 1344 1345 This function is called immediately after '{t' ([`Begin_table `Light]) is 1346 read. The only "valid" way to exit is by reading a [`Right_brace] token, 1347 which is consumed. *) 1348and light_table ~parent_markup ~parent_markup_location input = 1349 let rec consume_rows acc ~last_loc = 1350 Reader.until_rbrace_or_eof input acc >>> fun next_token -> 1351 match next_token.Loc.value with 1352 | `Bar | #token_that_always_begins_an_inline_element -> ( 1353 let next, row, last_loc = 1354 light_table_row ~parent_markup ~last_loc input 1355 in 1356 match next with 1357 | `Continue -> consume_rows (row :: acc) ~last_loc 1358 | `Stop -> (row :: acc, last_loc)) 1359 | other_token -> 1360 Parse_error.not_allowed next_token.location 1361 ~what:(Token.describe other_token) 1362 ~in_what:(Token.describe parent_markup) 1363 |> add_warning input; 1364 junk input; 1365 consume_rows acc ~last_loc 1366 in 1367 let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in 1368 let grid = List.rev rows in 1369 (Table.Light_syntax.from_raw_data grid, brace_location) 1370 1371(* Consumes a table row that might start with [`Bar]. *) 1372and light_table_row ~parent_markup ~last_loc input = 1373 let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc = 1374 let push_cells row cell = 1375 match cell with [] -> row | _ -> List.rev cell :: row 1376 in 1377 let return row cell = List.rev (push_cells row cell) in 1378 let next_token = peek input in 1379 match next_token.value with 1380 | `End -> 1381 Parse_error.end_not_allowed next_token.location ~in_what:"table" 1382 |> add_warning input; 1383 junk input; 1384 (`Stop, return acc_row acc_cell, next_token.location) 1385 | `Right_brace -> 1386 junk input; 1387 (`Stop, return acc_row acc_cell, next_token.location) 1388 | `Space _ as token -> 1389 junk input; 1390 let i = Loc.at next_token.location token in 1391 consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc 1392 | `Single_newline _ | `Blank_line _ -> 1393 junk input; 1394 (`Continue, return acc_row acc_cell, last_loc) 1395 | `Bar -> 1396 junk input; 1397 let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in 1398 consume_row acc_row [] [] ~new_line:false ~last_loc 1399 | #token_that_always_begins_an_inline_element as token -> 1400 let i = inline_element input next_token.location token in 1401 if Loc.spans_multiple_lines i then 1402 Parse_error.not_allowed 1403 ~what:(Token.describe (`Single_newline "")) 1404 ~in_what:(Token.describe `Begin_table_light) 1405 i.location 1406 |> add_warning input; 1407 let acc_cell = 1408 if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell 1409 in 1410 consume_row acc_row acc_cell [] ~new_line:false 1411 ~last_loc:next_token.location 1412 | other_token -> 1413 Parse_error.not_allowed next_token.location 1414 ~what:(Token.describe other_token) 1415 ~in_what:(Token.describe parent_markup) 1416 |> add_warning input; 1417 junk input; 1418 consume_row acc_row acc_cell acc_space ~new_line ~last_loc 1419 in 1420 consume_row [] [] [] ~new_line:true ~last_loc 1421 1422(* Consumes a sequence of table rows (starting with '{tr ...}', which are 1423 represented by [`Begin_table_row] tokens). 1424 1425 This function is called immediately after '{table' ([`Begin_table `Heavy]) is 1426 read. The only "valid" way to exit is by reading a [`Right_brace] token, 1427 which is consumed. *) 1428and heavy_table ~parent_markup ~parent_markup_location input = 1429 let rec consume_rows acc ~last_loc = 1430 Reader.until_rbrace_or_eof input acc >>> fun next_token -> 1431 match next_token.Loc.value with 1432 | `Begin_table_row as token -> 1433 junk input; 1434 let items, last_loc = heavy_table_row ~parent_markup:token input in 1435 consume_rows (List.rev items :: acc) ~last_loc 1436 | token -> 1437 Parse_error.not_allowed next_token.location ~what:(Token.describe token) 1438 ~in_what:(Token.describe parent_markup) 1439 ~suggestion:"Move outside of {table ...}, or inside {tr ...}" 1440 |> add_warning input; 1441 junk input; 1442 consume_rows acc ~last_loc 1443 in 1444 let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in 1445 let grid = List.rev rows in 1446 (Table.Heavy_syntax.from_grid grid, brace_location) 1447 1448(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }', 1449 which are represented by [`Begin_table_cell] tokens). 1450 1451 This function is called immediately after '{tr' ([`Begin_table_row]) is 1452 read. The only "valid" way to exit is by reading a [`Right_brace] token, 1453 which is consumed. *) 1454and heavy_table_row ~parent_markup input = 1455 let rec consume_cell_items acc = 1456 Reader.until_rbrace_or_eof input acc >>> fun next_token -> 1457 match next_token.Loc.value with 1458 | `Begin_table_cell kind as token -> 1459 junk input; 1460 let content, token_after_list_item, _where_in_line = 1461 block_element_list In_table_cell ~parent_markup:token input 1462 in 1463 (match token_after_list_item.value with 1464 | `Right_brace -> junk input 1465 | `End -> 1466 Parse_error.not_allowed token_after_list_item.location 1467 ~what:(Token.describe `End) ~in_what:(Token.describe token) 1468 |> add_warning input); 1469 consume_cell_items ((content, kind) :: acc) 1470 | token -> 1471 Parse_error.not_allowed next_token.location ~what:(Token.describe token) 1472 ~in_what:(Token.describe parent_markup) 1473 ~suggestion: 1474 "Move outside of {table ...}, or inside {td ...} or {th ...}" 1475 |> add_warning input; 1476 junk input; 1477 consume_cell_items acc 1478 in 1479 consume_cell_items [] 1480 1481(* {2 Entry point} *) 1482 1483let parse warnings tokens = 1484 let input : input = { tokens; warnings } in 1485 1486 let rec parse_block_elements () = 1487 let elements, last_token, _where_in_line = 1488 block_element_list Top_level ~parent_markup:`Comment input 1489 in 1490 1491 match last_token.value with 1492 | `End -> elements 1493 | `Right_brace -> 1494 Parse_error.unpaired_right_brace last_token.location 1495 |> add_warning input; 1496 1497 let block = 1498 Loc.same last_token (`Paragraph [ Loc.same last_token (`Word "}") ]) 1499 in 1500 1501 junk input; 1502 elements @ (block :: parse_block_elements ()) 1503 in 1504 let ast = parse_block_elements () in 1505 (ast, List.rev !(input.warnings))