this repo has no description
1(* This module is a recursive descent parser for the ocamldoc syntax. The parser
2 consumes a token stream of type [Token.t Stream.t], provided by the lexer,
3 and produces a comment AST of the type defined in [Parser_.Ast].
4
5 The AST has two main levels: inline elements, which can appear inside
6 paragraphs, and are spaced horizontally when presented, and block elements,
7 such as paragraphs and lists, which are spaced vertically when presented.
8 Block elements contain inline elements, but not vice versa.
9
10 Corresponding to this, the parser has three "main" functions:
11
12 - [delimited_inline_element_list] parses a run of inline elements that is
13 delimited by curly brace markup ([{...}]).
14 - [paragraph] parses a run of inline elements that make up a paragraph, and
15 is not explicitly delimited with curly braces.
16 - [block_element_list] parses a sequence of block elements. A comment is a
17 sequence of block elements, so [block_element_list] is the top-level
18 parser. It is also used for list item and tag content. *)
19
20open! Compat
21
22type 'a with_location = 'a Loc.with_location
23
24(* {2 Input} *)
25
26type input = {
27 tokens : Token.t Loc.with_location Stream.t;
28 warnings : Warning.t list ref;
29}
30
31(* {2 Output} *)
32
33let add_warning input warning = input.warnings := warning :: !(input.warnings)
34let junk input = Stream.junk input.tokens
35
36let peek input =
37 match Stream.peek input.tokens with
38 | Some token -> token
39 | None -> assert false
40
41module Table = struct
42 module Light_syntax = struct
43 let valid_align = function
44 | [ { Loc.value = `Word w; _ } ] -> (
45 match String.length w with
46 | 0 -> `Valid None
47 | 1 -> (
48 match w with
49 | "-" -> `Valid None
50 | ":" -> `Valid (Some `Center)
51 | _ -> `Invalid)
52 | len ->
53 if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then
54 match (String.get w 0, String.get w (len - 1)) with
55 | ':', ':' -> `Valid (Some `Center)
56 | ':', '-' -> `Valid (Some `Left)
57 | '-', ':' -> `Valid (Some `Right)
58 | '-', '-' -> `Valid None
59 | _ -> `Invalid
60 else `Invalid)
61 | _ -> `Invalid
62
63 let valid_align_row lx =
64 let rec loop acc = function
65 | [] -> Some (List.rev acc)
66 | x :: q -> (
67 match valid_align x with
68 | `Invalid -> None
69 | `Valid alignment -> loop (alignment :: acc) q)
70 in
71 loop [] lx
72
73 let create ~grid ~align : Ast.table =
74 let cell_to_block (x, k) =
75 let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in
76 match x with
77 | [] -> ([], k)
78 | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k)
79 in
80 let row_to_block = List.map cell_to_block in
81 let grid_to_block = List.map row_to_block in
82 ((grid_to_block grid, align), `Light)
83
84 let with_kind kind : 'a with_location list list -> 'a Ast.row =
85 List.map (fun c -> (c, kind))
86
87 let from_raw_data grid : Ast.table =
88 match grid with
89 | [] -> create ~grid:[] ~align:None
90 | row1 :: rows2_N -> (
91 match valid_align_row row1 with
92 (* If the first line is the align row, everything else is data. *)
93 | Some _ as align ->
94 create ~grid:(List.map (with_kind `Data) rows2_N) ~align
95 | None -> (
96 match rows2_N with
97 (* Only 1 line, if this is not the align row this is data. *)
98 | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None
99 | row2 :: rows3_N -> (
100 match valid_align_row row2 with
101 (* If the second line is the align row, the first one is the
102 header and the rest is data. *)
103 | Some _ as align ->
104 let header = with_kind `Header row1 in
105 let data = List.map (with_kind `Data) rows3_N in
106 create ~grid:(header :: data) ~align
107 (* No align row in the first 2 lines, everything is considered
108 data. *)
109 | None ->
110 create ~grid:(List.map (with_kind `Data) grid) ~align:None
111 )))
112 end
113
114 module Heavy_syntax = struct
115 let create ~grid : Ast.table = ((grid, None), `Heavy)
116 let from_grid grid : Ast.table = create ~grid
117 end
118end
119
120module Reader = struct
121 let until_rbrace_or_eof input acc =
122 let rec consume () =
123 let next_token = peek input in
124 match next_token.value with
125 | `Right_brace ->
126 junk input;
127 `End (acc, next_token.location)
128 | `End ->
129 Parse_error.end_not_allowed next_token.location ~in_what:"table"
130 |> add_warning input;
131 junk input;
132 `End (acc, next_token.location)
133 | `Space _ | `Single_newline _ | `Blank_line _ ->
134 junk input;
135 consume ()
136 | _ -> `Token next_token
137 in
138 consume ()
139
140 module Infix = struct
141 let ( >>> ) consume if_token =
142 match consume with
143 | `End (ret, loc) -> (ret, loc)
144 | `Token t -> if_token t
145 end
146end
147
148open Reader.Infix
149
150(* The last token in the stream is always [`End], and it is never consumed by
151 the parser, so the [None] case is impossible. *)
152
153let npeek n input = Stream.npeek n input.tokens
154
155(* {2 Non-link inline elements} *)
156type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ]
157
158(* Convenient abbreviation for use in patterns. *)
159type token_that_always_begins_an_inline_element =
160 [ `Word of string
161 | `Code_span of string
162 | `Raw_markup of string option * string
163 | `Begin_style of style
164 | `Simple_reference of string
165 | `Begin_reference_with_replacement_text of string
166 | `Simple_link of string
167 | `Begin_link_with_replacement_text of string
168 | `Math_span of string ]
169
170(* Check that the token constructors above actually are all in [Token.t]. *)
171let _check_subset : token_that_always_begins_an_inline_element -> Token.t =
172 fun t -> (t :> Token.t)
173
174(* Consumes tokens that make up a single non-link inline element:
175
176 - a horizontal space ([`Space], significant in inline elements),
177 - a word (see [word]),
178 - a code span ([...], [`Code_span _]), or
179 - styled text ({e ...}).
180
181 The latter requires a recursive call to [delimited_inline_element_list],
182 defined below.
183
184 This should be part of [delimited_inline_element_list]; however, it is also
185 called by function [paragraph]. As a result, it is factored out, and made
186 mutually-recursive with [delimited_inline_element_list].
187
188 This is called only when it is known that the first token in the list is the
189 beginning of an inline element. In the case of [`Minus] and [`Plus], that
190 means the caller has determined that they are not a list bullet (i.e., not
191 the first non-whitespace tokens on their line).
192
193 This function consumes exactly the tokens that make up the element. *)
194let rec inline_element :
195 input -> Loc.span -> _ -> Ast.inline_element with_location =
196 fun input location next_token ->
197 match next_token with
198 | `Space _ as token ->
199 junk input;
200 Loc.at location token
201 | `Word _ as token ->
202 junk input;
203 Loc.at location token
204 (* This is actually the same memory representation as the token, complete
205 with location, and is probably the most common case. Perhaps the token
206 can be reused somehow. The same is true of [`Space], [`Code_span]. *)
207 | `Minus ->
208 junk input;
209 Loc.at location (`Word "-")
210 | `Plus ->
211 junk input;
212 Loc.at location (`Word "+")
213 | `Bar ->
214 junk input;
215 Loc.at location (`Word "|")
216 | (`Code_span _ | `Math_span _ | `Raw_markup _) as token ->
217 junk input;
218 Loc.at location token
219 | `Begin_style s as parent_markup ->
220 junk input;
221
222 let requires_leading_whitespace =
223 match s with
224 | `Bold | `Italic | `Emphasis -> true
225 | `Superscript | `Subscript -> false
226 in
227 let content, brace_location =
228 delimited_inline_element_list ~parent_markup
229 ~parent_markup_location:location ~requires_leading_whitespace input
230 in
231
232 let location = Loc.span [ location; brace_location ] in
233
234 if content = [] then
235 Parse_error.should_not_be_empty
236 ~what:(Token.describe parent_markup)
237 location
238 |> add_warning input;
239
240 Loc.at location (`Styled (s, content))
241 | `Simple_reference r ->
242 junk input;
243
244 let r_location = Loc.nudge_start (String.length "{!") location in
245 let r = Loc.at r_location r in
246
247 Loc.at location (`Reference (`Simple, r, []))
248 | `Begin_reference_with_replacement_text r as parent_markup ->
249 junk input;
250
251 let r_location = Loc.nudge_start (String.length "{{!") location in
252 let r = Loc.at r_location r in
253
254 let content, brace_location =
255 delimited_inline_element_list ~parent_markup
256 ~parent_markup_location:location ~requires_leading_whitespace:false
257 input
258 in
259
260 let location = Loc.span [ location; brace_location ] in
261
262 if content = [] then
263 Parse_error.should_not_be_empty
264 ~what:(Token.describe parent_markup)
265 location
266 |> add_warning input;
267
268 Loc.at location (`Reference (`With_text, r, content))
269 | `Simple_link u ->
270 junk input;
271
272 let u = String.trim u in
273
274 if u = "" then
275 Parse_error.should_not_be_empty
276 ~what:(Token.describe next_token)
277 location
278 |> add_warning input;
279
280 Loc.at location (`Link (u, []))
281 | `Begin_link_with_replacement_text u as parent_markup ->
282 junk input;
283
284 let u = String.trim u in
285
286 if u = "" then
287 Parse_error.should_not_be_empty
288 ~what:(Token.describe parent_markup)
289 location
290 |> add_warning input;
291
292 let content, brace_location =
293 delimited_inline_element_list ~parent_markup
294 ~parent_markup_location:location ~requires_leading_whitespace:false
295 input
296 in
297
298 `Link (u, content) |> Loc.at (Loc.span [ location; brace_location ])
299
300(* Consumes tokens that make up a sequence of inline elements that is ended by
301 a '}', a [`Right_brace] token. The brace token is also consumed.
302
303 The sequences are also preceded by some markup like '{b'. Some of these
304 markup tokens require whitespace immediately after the token, and others not.
305 The caller indicates which way that is through the
306 [~requires_leading_whitespace] argument.
307
308 Whitespace is significant in inline element lists. In particular, "foo [bar]"
309 is represented as [`Word "foo"; `Space; `Code_span "bar"], while "foo[bar]"
310 is [`Word "foo"; `Code_span "bar"]. It doesn't matter how much whitespace is
311 there, just whether it is present or not. Single newlines and horizontal
312 space in any amount are allowed. Blank lines are not, as these are separators
313 for {e block} elements.
314
315 In correct input, the first and last elements emitted will not be [`Space],
316 i.e. [`Space] appears only between other non-link inline elements. In
317 incorrect input, there might be [`Space] followed immediately by something
318 like an @author tag.
319
320 The [~parent_markup] and [~parent_markup_location] arguments are used for
321 generating error messages. *)
322and delimited_inline_element_list :
323 parent_markup:[< Token.t ] ->
324 parent_markup_location:Loc.span ->
325 requires_leading_whitespace:bool ->
326 input ->
327 Ast.inline_element with_location list * Loc.span =
328 fun ~parent_markup ~parent_markup_location ~requires_leading_whitespace
329 input ->
330 (* [~at_start_of_line] is used to interpret [`Minus] and [`Plus]. These are
331 word tokens if not the first non-whitespace tokens on their line. Then,
332 they are allowed in a non-link element list. *)
333 let rec consume_elements :
334 at_start_of_line:bool ->
335 Ast.inline_element with_location list ->
336 Ast.inline_element with_location list * Loc.span =
337 fun ~at_start_of_line acc ->
338 let next_token = peek input in
339 match next_token.value with
340 | `Right_brace ->
341 junk input;
342 (List.rev acc, next_token.location)
343 (* The [`Space] token is not space at the beginning or end of line, because
344 that is combined into [`Single_newline] or [`Blank_line] tokens. It is
345 also not at the beginning of markup (after e.g. '{b'), because that is
346 handled separately before calling
347 [consume_non_link_inline_elements], and not immediately before '}',
348 because that is combined into the [`Right_brace] token by the lexer. So,
349 it is an internal space, and we want to add it to the non-link inline
350 element list. *)
351 | (`Space _ | #token_that_always_begins_an_inline_element) as token ->
352 let acc = inline_element input next_token.location token :: acc in
353 consume_elements ~at_start_of_line:false acc
354 | `Single_newline ws ->
355 junk input;
356 let element = Loc.same next_token (`Space ws) in
357 consume_elements ~at_start_of_line:true (element :: acc)
358 | `Blank_line ws as blank ->
359 Parse_error.not_allowed ~what:(Token.describe blank)
360 ~in_what:(Token.describe parent_markup)
361 next_token.location
362 |> add_warning input;
363
364 junk input;
365 let element = Loc.same next_token (`Space ws) in
366 consume_elements ~at_start_of_line:true (element :: acc)
367 | `Bar as token ->
368 let acc = inline_element input next_token.location token :: acc in
369 consume_elements ~at_start_of_line:false acc
370 | (`Minus | `Plus) as bullet ->
371 (if at_start_of_line then
372 let suggestion =
373 Printf.sprintf "move %s so it isn't the first thing on the line."
374 (Token.print bullet)
375 in
376 Parse_error.not_allowed ~what:(Token.describe bullet)
377 ~in_what:(Token.describe parent_markup)
378 ~suggestion next_token.location
379 |> add_warning input);
380
381 let acc = inline_element input next_token.location bullet :: acc in
382 consume_elements ~at_start_of_line:false acc
383 | other_token ->
384 Parse_error.not_allowed
385 ~what:(Token.describe other_token)
386 ~in_what:(Token.describe parent_markup)
387 next_token.location
388 |> add_warning input;
389
390 let last_location =
391 match acc with
392 | last_token :: _ -> last_token.location
393 | [] -> parent_markup_location
394 in
395
396 (List.rev acc, last_location)
397 in
398
399 let first_token = peek input in
400 match first_token.value with
401 | `Space _ ->
402 junk input;
403 consume_elements ~at_start_of_line:false []
404 (* [~at_start_of_line] is [false] here because the preceding token was some
405 some markup like '{b', and we didn't move to the next line, so the next
406 token will not be the first non-whitespace token on its line. *)
407 | `Single_newline _ ->
408 junk input;
409 consume_elements ~at_start_of_line:true []
410 | `Blank_line _ as blank ->
411 (* In case the markup is immediately followed by a blank line, the error
412 message printed by the catch-all case below can be confusing, as it will
413 suggest that the markup must be followed by a newline (which it is). It
414 just must not be followed by two newlines. To explain that clearly,
415 handle that case specifically. *)
416 Parse_error.not_allowed ~what:(Token.describe blank)
417 ~in_what:(Token.describe parent_markup)
418 first_token.location
419 |> add_warning input;
420
421 junk input;
422 consume_elements ~at_start_of_line:true []
423 | `Right_brace ->
424 junk input;
425 ([], first_token.location)
426 | _ ->
427 if requires_leading_whitespace then
428 Parse_error.should_be_followed_by_whitespace
429 ~what:(Token.print parent_markup)
430 parent_markup_location
431 |> add_warning input;
432 consume_elements ~at_start_of_line:false []
433
434(* {2 Paragraphs} *)
435
436(* Consumes tokens that make up a paragraph.
437
438 A paragraph is a sequence of inline elements that ends on a blank line, or
439 explicit block markup such as a verbatim block on a new line.
440
441 Because of the significance of newlines, paragraphs are parsed line-by-line.
442 The function [paragraph] is called only when the current token is the first
443 non-whitespace token on its line, and begins an inline element. [paragraph]
444 then parses a line of inline elements. Afterwards, it looks ahead to the next
445 line. If that line also begins with an inline element, it parses that line,
446 and so on. *)
447let paragraph : input -> Ast.nestable_block_element with_location =
448 fun input ->
449 (* Parses a single line of a paragraph, consisting of inline elements. The
450 only valid ways to end a paragraph line are with [`End], [`Single_newline],
451 [`Blank_line], and [`Right_brace]. Everything else either belongs in the
452 paragraph, or signifies an attempt to begin a block element inside a
453 paragraph line, which is an error. These errors are caught elsewhere; the
454 paragraph parser just stops. *)
455 let rec paragraph_line :
456 Ast.inline_element with_location list ->
457 Ast.inline_element with_location list =
458 fun acc ->
459 let next_token = peek input in
460 match next_token.value with
461 | ( `Space _ | `Minus | `Plus | `Bar
462 | #token_that_always_begins_an_inline_element ) as token ->
463 let element = inline_element input next_token.location token in
464 paragraph_line (element :: acc)
465 | _ -> acc
466 in
467
468 (* After each line is parsed, decides whether to parse more lines. *)
469 let rec additional_lines :
470 Ast.inline_element with_location list ->
471 Ast.inline_element with_location list =
472 fun acc ->
473 match npeek 2 input with
474 | { value = `Single_newline ws; location }
475 :: { value = #token_that_always_begins_an_inline_element | `Bar; _ }
476 :: _ ->
477 junk input;
478 let acc = Loc.at location (`Space ws) :: acc in
479 let acc = paragraph_line acc in
480 additional_lines acc
481 | _ -> List.rev acc
482 in
483
484 let elements = paragraph_line [] |> additional_lines in
485 `Paragraph elements |> Loc.at (Loc.span (List.map Loc.location elements))
486
487(* {2 Block elements} *)
488
489(* {3 Helper types} *)
490
491(* The interpretation of tokens in the block parser depends on where on a line
492 each token appears. The six possible "locations" are:
493
494 - [`At_start_of_line], when only whitespace has been read on the current
495 line.
496 - [`After_tag], when a valid tag token, such as [@deprecated], has been read,
497 and only whitespace has been read since.
498 - [`After_shorthand_bullet], when a valid shorthand list item bullet, such as
499 [-], has been read, and only whitespace has been read since.
500 - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li],
501 has been read, and only whitespace has been read since.
502 - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read.
503 - [`After_text], when any other valid non-whitespace token has already been
504 read on the current line.
505
506 Here are some examples of how this affects the interpretation of tokens:
507
508 - A paragraph can start anywhere except [`After_text] (two paragraphs cannot
509 be on the same line, but paragraphs can be nested in just about anything).
510 - [`Minus] is interpreted as a list item bullet [`At_start_of_line],
511 [`After_tag], and [`After_explicit_list_bullet].
512 - Tags are only allowed [`At_start_of_line].
513
514 To track the location accurately, the functions that make up the block parser
515 pass explicit [where_in_line] values around and return them.
516
517 In a few cases, [where_in_line] can be inferred from what helper was called.
518 For example, the [paragraph] parser always stops on the same line as the last
519 significant token that is in the paragraph it consumed, so the location must
520 be [`After_text]. *)
521type where_in_line =
522 [ `At_start_of_line
523 | `After_tag
524 | `After_shorthand_bullet
525 | `After_explicit_list_bullet
526 | `After_table_cell
527 | `After_text ]
528
529(* The block parsing loop, function [block_element_list], stops when it
530 encounters certain tokens.
531
532 When it is called for the whole comment, or for in explicit list item
533 ([{li foo}]), it can only stop on end of input or a right brace.
534
535 When it is called inside a shorthand list item ([- foo]), it stops on end of
536 input, right brace, a blank line (indicating end of shorthand list), plus or
537 minus (indicating the start of the next list item), or a section heading or
538 tag, which cannot be nested in list markup.
539
540 The block parser [block_element_list] explicitly returns the token that
541 stopped it, with a type more precise than [Token.t stream_head]: if it was
542 called for the whole comment or an explicit list item, the stop token will
543 have type [stops_at_delimiters stream_head], and if it was called for a
544 shorthand list item, the stop token will have type
545 [implicit_stop stream_head]. This allows the calling parsers to write precise
546 cases for exactly the tokens that might be at the front of the stream after
547 the block parser returns. *)
548type stops_at_delimiters = [ `End | `Right_brace ]
549type code_stop = [ `End | `Right_code_delimiter ]
550
551type stopped_implicitly =
552 [ `End
553 | `Blank_line of string
554 | `Right_brace
555 | `Minus
556 | `Plus
557 | Token.section_heading
558 | Token.media_markup
559 | Token.tag ]
560
561(* Ensure that the above two types are really subsets of [Token.t]. *)
562let _check_subset : stops_at_delimiters -> Token.t = fun t -> (t :> Token.t)
563let _check_subset : stopped_implicitly -> Token.t = fun t -> (t :> Token.t)
564
565(* The different contexts in which the block parser [block_element_list] can be
566 called. The block parser's behavior depends somewhat on the context. For
567 example, while paragraphs are allowed anywhere, shorthand lists are not
568 allowed immediately inside other shorthand lists, while tags are not allowed
569 anywhere except at the comment top level.
570
571 Besides telling the block parser how to behave, each context also carries two
572 types, which determine the return type of the block parser:
573
574 - The type of blocks the parser returns. Note that [nestable_block_element]
575 is included in [block_element]. However, the extra block kinds in
576 [block_element] are only allowed at the comment top level.
577 - The type of token that the block parser stops at. See discussion above. *)
578type ('block, 'stops_at_which_tokens) context =
579 | Top_level : (Ast.block_element, stops_at_delimiters) context
580 | In_implicitly_ended :
581 [ `Tag | `Shorthand_list ]
582 -> (Ast.nestable_block_element, stopped_implicitly) context
583 | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context
584 | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context
585 | In_code_results : (Ast.nestable_block_element, code_stop) context
586
587(* This is a no-op. It is needed to prove to the type system that nestable block
588 elements are acceptable block elements in all contexts. *)
589let accepted_in_all_contexts : type block stops_at_which_tokens.
590 (block, stops_at_which_tokens) context ->
591 Ast.nestable_block_element ->
592 block =
593 fun context block ->
594 match context with
595 | Top_level -> (block :> Ast.block_element)
596 | In_implicitly_ended (`Tag | `Shorthand_list) -> block
597 | In_explicit_list -> block
598 | In_table_cell -> block
599 | In_code_results -> block
600
601(* Converts a tag to a series of words. This is used in error recovery, when a
602 tag cannot be generated. *)
603let tag_to_words = function
604 | `Author s -> [ `Word "@author"; `Space " "; `Word s ]
605 | `Before s -> [ `Word "@before"; `Space " "; `Word s ]
606 | `Canonical s -> [ `Word "@canonical"; `Space " "; `Word s ]
607 | `Deprecated -> [ `Word "@deprecated" ]
608 | `Inline -> [ `Word "@inline" ]
609 | `Open -> [ `Word "@open" ]
610 | `Closed -> [ `Word "@closed" ]
611 | `Hidden -> [ `Word "@hidden" ]
612 | `Param s -> [ `Word "@param"; `Space " "; `Word s ]
613 | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ]
614 | `Return -> [ `Word "@return" ]
615 | `See (`Document, s) -> [ `Word "@see"; `Space " "; `Word ("\"" ^ s ^ "\"") ]
616 | `See (`File, s) -> [ `Word "@see"; `Space " "; `Word ("'" ^ s ^ "'") ]
617 | `See (`Url, s) -> [ `Word "@see"; `Space " "; `Word ("<" ^ s ^ ">") ]
618 | `Since s -> [ `Word "@since"; `Space " "; `Word s ]
619 | `Version s -> [ `Word "@version"; `Space " "; `Word s ]
620 | `Children_order -> [ `Word "@children_order" ]
621 | `Toc_status -> [ `Word "@toc_status" ]
622 | `Order_category -> [ `Word "@order_category" ]
623 | `Short_title -> [ `Word "@short_title" ]
624 | `Custom tag -> [ `Word ("@" ^ tag) ]
625
626(* {3 Block element lists} *)
627
628(* Consumes tokens making up a sequence of block elements. These are:
629
630 - paragraphs,
631 - code blocks,
632 - verbatim text blocks,
633 - tables,
634 - lists, and
635 - section headings. *)
636let rec block_element_list : type block stops_at_which_tokens.
637 (block, stops_at_which_tokens) context ->
638 parent_markup:[< Token.t | `Comment ] ->
639 input ->
640 block with_location list
641 * stops_at_which_tokens with_location
642 * where_in_line =
643 fun context ~parent_markup input ->
644 let rec consume_block_elements :
645 where_in_line ->
646 block with_location list ->
647 block with_location list
648 * stops_at_which_tokens with_location
649 * where_in_line =
650 fun where_in_line acc ->
651 let describe token =
652 match token with
653 | #token_that_always_begins_an_inline_element -> "paragraph"
654 | _ -> Token.describe token
655 in
656
657 let warn_if_after_text { Loc.location; value = token } =
658 if where_in_line = `After_text then
659 Parse_error.should_begin_on_its_own_line ~what:(describe token) location
660 |> add_warning input
661 in
662
663 let warn_because_not_at_top_level { Loc.location; value = token } =
664 let suggestion =
665 Printf.sprintf "move %s outside of any other markup."
666 (Token.print token)
667 in
668 Parse_error.not_allowed ~what:(Token.describe token)
669 ~in_what:(Token.describe parent_markup)
670 ~suggestion location
671 |> add_warning input
672 in
673
674 match peek input with
675 (* Terminators: the two tokens that terminate anything. *)
676 | { value = `End; _ } as next_token -> (
677 match context with
678 | Top_level -> (List.rev acc, next_token, where_in_line)
679 | In_implicitly_ended (`Tag | `Shorthand_list) ->
680 (List.rev acc, next_token, where_in_line)
681 | In_explicit_list -> (List.rev acc, next_token, where_in_line)
682 | In_table_cell -> (List.rev acc, next_token, where_in_line)
683 | In_code_results -> (List.rev acc, next_token, where_in_line))
684 | { value = `Right_brace; _ } as next_token -> (
685 (* This little absurdity is needed to satisfy the type system. Without it,
686 OCaml is unable to prove that [stream_head] has the right type for all
687 possible values of [context]. *)
688 match context with
689 | Top_level -> (List.rev acc, next_token, where_in_line)
690 | In_implicitly_ended (`Tag | `Shorthand_list) ->
691 (List.rev acc, next_token, where_in_line)
692 | In_explicit_list -> (List.rev acc, next_token, where_in_line)
693 | In_table_cell -> (List.rev acc, next_token, where_in_line)
694 | In_code_results ->
695 junk input;
696 consume_block_elements where_in_line acc)
697 | { value = `Right_code_delimiter; _ } as next_token -> (
698 match context with
699 | In_code_results -> (List.rev acc, next_token, where_in_line)
700 | _ ->
701 junk input;
702 consume_block_elements where_in_line acc)
703 (* Whitespace. This can terminate some kinds of block elements. It is also
704 necessary to track it to interpret [`Minus] and [`Plus] correctly, as
705 well as to ensure that all block elements begin on their own line. *)
706 | { value = `Space _; _ } ->
707 junk input;
708 consume_block_elements where_in_line acc
709 | { value = `Single_newline _; _ } ->
710 junk input;
711 consume_block_elements `At_start_of_line acc
712 | { value = `Blank_line _; _ } as next_token -> (
713 match context with
714 (* Blank lines terminate shorthand lists ([- foo]) and tags. They also
715 terminate paragraphs, but the paragraph parser is aware of that
716 internally. *)
717 | In_implicitly_ended (`Tag | `Shorthand_list) ->
718 (List.rev acc, next_token, where_in_line)
719 (* Otherwise, blank lines are pretty much like single newlines. *)
720 | _ ->
721 junk input;
722 consume_block_elements `At_start_of_line acc)
723 (* Explicit list items ([{li ...}] and [{- ...}]) can never appear directly
724 in block content. They can only appear inside [{ul ...}] and [{ol ...}].
725 So, catch those. *)
726 | { value = `Begin_list_item _ as token; location } ->
727 let suggestion =
728 Printf.sprintf "move %s into %s, or use %s." (Token.print token)
729 (Token.describe (`Begin_list `Unordered))
730 (Token.describe `Minus)
731 in
732 Parse_error.not_allowed ~what:(Token.describe token)
733 ~in_what:(Token.describe parent_markup)
734 ~suggestion location
735 |> add_warning input;
736
737 junk input;
738 consume_block_elements where_in_line acc
739 (* Table rows ([{tr ...}]) can never appear directly
740 in block content. They can only appear inside [{table ...}]. *)
741 | { value = `Begin_table_row as token; location } ->
742 let suggestion =
743 Printf.sprintf "move %s into %s." (Token.print token)
744 (Token.describe `Begin_table_heavy)
745 in
746 Parse_error.not_allowed ~what:(Token.describe token)
747 ~in_what:(Token.describe parent_markup)
748 ~suggestion location
749 |> add_warning input;
750 junk input;
751 consume_block_elements where_in_line acc
752 (* Table cells ([{th ...}] and [{td ...}]) can never appear directly
753 in block content. They can only appear inside [{tr ...}]. *)
754 | { value = `Begin_table_cell _ as token; location } ->
755 let suggestion =
756 Printf.sprintf "move %s into %s." (Token.print token)
757 (Token.describe `Begin_table_row)
758 in
759 Parse_error.not_allowed ~what:(Token.describe token)
760 ~in_what:(Token.describe parent_markup)
761 ~suggestion location
762 |> add_warning input;
763 junk input;
764 consume_block_elements where_in_line acc
765 (* Tags. These can appear at the top level only. *)
766 | { value = `Tag tag as token; location } as next_token -> (
767 let recover_when_not_at_top_level context =
768 warn_because_not_at_top_level next_token;
769 junk input;
770 let words = List.map (Loc.at location) (tag_to_words tag) in
771 let paragraph =
772 `Paragraph words
773 |> accepted_in_all_contexts context
774 |> Loc.at location
775 in
776 consume_block_elements `At_start_of_line (paragraph :: acc)
777 in
778
779 match context with
780 (* Tags cannot make sense in an explicit list ([{ul {li ...}}]). *)
781 | In_explicit_list -> recover_when_not_at_top_level context
782 (* If a tag starts at the beginning of a line, it terminates the preceding
783 tag and/or the current shorthand list. In this case, return to the
784 caller, and let the caller decide how to interpret the tag token. *)
785 | In_implicitly_ended (`Tag | `Shorthand_list) ->
786 if where_in_line = `At_start_of_line then
787 (List.rev acc, next_token, where_in_line)
788 else recover_when_not_at_top_level context
789 | In_table_cell -> recover_when_not_at_top_level context
790 | In_code_results -> recover_when_not_at_top_level context
791 (* If this is the top-level call to [block_element_list], parse the
792 tag. *)
793 | Top_level -> (
794 if where_in_line <> `At_start_of_line then
795 Parse_error.should_begin_on_its_own_line
796 ~what:(Token.describe token) location
797 |> add_warning input;
798
799 junk input;
800
801 match tag with
802 | (`Author s | `Since s | `Version s | `Canonical s) as tag ->
803 let s = String.trim s in
804 if s = "" then
805 Parse_error.should_not_be_empty ~what:(Token.describe token)
806 location
807 |> add_warning input;
808 let tag =
809 match tag with
810 | `Author _ -> `Author s
811 | `Since _ -> `Since s
812 | `Version _ -> `Version s
813 | `Canonical _ ->
814 (* TODO The location is only approximate, as we need lexer
815 cooperation to get the real location. *)
816 let r_location =
817 Loc.nudge_start (String.length "@canonical ") location
818 in
819 `Canonical (Loc.at r_location s)
820 in
821
822 let tag = Loc.at location (`Tag tag) in
823 consume_block_elements `After_text (tag :: acc)
824 | ( `Deprecated | `Return | `Children_order | `Short_title
825 | `Toc_status | `Order_category | `Custom _ ) as tag ->
826 let content, _stream_head, where_in_line =
827 block_element_list (In_implicitly_ended `Tag)
828 ~parent_markup:token input
829 in
830 let tag =
831 match tag with
832 | `Deprecated -> `Deprecated content
833 | `Toc_status -> `Toc_status content
834 | `Return -> `Return content
835 | `Children_order -> `Children_order content
836 | `Short_title -> `Short_title content
837 | `Order_category -> `Order_category content
838 | `Custom s -> `Custom (s, content)
839 in
840 let location =
841 location :: List.map Loc.location content |> Loc.span
842 in
843 let tag = Loc.at location (`Tag tag) in
844 consume_block_elements where_in_line (tag :: acc)
845 | (`Param _ | `Raise _ | `Before _) as tag ->
846 let content, _stream_head, where_in_line =
847 block_element_list (In_implicitly_ended `Tag)
848 ~parent_markup:token input
849 in
850 let tag =
851 match tag with
852 | `Param s -> `Param (s, content)
853 | `Raise s -> `Raise (s, content)
854 | `Before s -> `Before (s, content)
855 in
856 let location =
857 location :: List.map Loc.location content |> Loc.span
858 in
859 let tag = Loc.at location (`Tag tag) in
860 consume_block_elements where_in_line (tag :: acc)
861 | `See (kind, target) ->
862 let content, _next_token, where_in_line =
863 block_element_list (In_implicitly_ended `Tag)
864 ~parent_markup:token input
865 in
866 let location =
867 location :: List.map Loc.location content |> Loc.span
868 in
869 let tag = `Tag (`See (kind, target, content)) in
870 let tag = Loc.at location tag in
871 consume_block_elements where_in_line (tag :: acc)
872 | (`Inline | `Open | `Closed | `Hidden) as tag ->
873 let tag = Loc.at location (`Tag tag) in
874 consume_block_elements `After_text (tag :: acc)))
875 | ( { value = #token_that_always_begins_an_inline_element; _ }
876 | { value = `Bar; _ } ) as next_token ->
877 warn_if_after_text next_token;
878
879 let block = paragraph input in
880 let block = Loc.map (accepted_in_all_contexts context) block in
881 let acc = block :: acc in
882 consume_block_elements `After_text acc
883 | { value = `Verbatim s as token; location } as next_token ->
884 warn_if_after_text next_token;
885 if s = "" then
886 Parse_error.should_not_be_empty ~what:(Token.describe token) location
887 |> add_warning input;
888
889 junk input;
890 let block = accepted_in_all_contexts context token in
891 let block = Loc.at location block in
892 let acc = block :: acc in
893 consume_block_elements `After_text acc
894 | { value = `Math_block s as token; location } as next_token ->
895 warn_if_after_text next_token;
896 if s = "" then
897 Parse_error.should_not_be_empty ~what:(Token.describe token) location
898 |> add_warning input;
899
900 junk input;
901 let block = accepted_in_all_contexts context token in
902 let block = Loc.at location block in
903 let acc = block :: acc in
904 consume_block_elements `After_text acc
905 | {
906 value =
907 `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs)
908 as token;
909 location;
910 } as next_token ->
911 warn_if_after_text next_token;
912 junk input;
913 let delimiter = if delim = "" then None else Some delim in
914 let output, location =
915 if not has_outputs then (None, location)
916 else
917 let content, next_token, _where_in_line =
918 block_element_list In_code_results ~parent_markup:token input
919 in
920 junk input;
921 let locations =
922 location :: List.map (fun content -> content.Loc.location) content
923 in
924 let location = Loc.span locations in
925 let location = { location with end_ = next_token.location.end_ } in
926 (Some content, location)
927 in
928
929 if s = "" then
930 Parse_error.should_not_be_empty ~what:(Token.describe token) location
931 |> add_warning input;
932
933 let meta =
934 match meta with
935 | None -> None
936 | Some (language, tags) -> Some { Ast.language; tags }
937 in
938 let block =
939 accepted_in_all_contexts context
940 (`Code_block
941 {
942 Ast.meta;
943 delimiter;
944 content = { value = s; location = v_loc };
945 output;
946 })
947 in
948 let block = Loc.at location block in
949 let acc = block :: acc in
950 consume_block_elements `After_text acc
951 | { value = `Modules s as token; location } as next_token ->
952 warn_if_after_text next_token;
953
954 junk input;
955
956 (* TODO Use some library for a splitting function, or move this out into a
957 Util module. *)
958 let split_string delimiters s =
959 let rec scan_delimiters acc index =
960 if index >= String.length s then List.rev acc
961 else if String.contains delimiters s.[index] then
962 scan_delimiters acc (index + 1)
963 else scan_word acc index (index + 1)
964 and scan_word acc start_index index =
965 if index >= String.length s then
966 let word = String.sub s start_index (index - start_index) in
967 List.rev (word :: acc)
968 else if String.contains delimiters s.[index] then
969 let word = String.sub s start_index (index - start_index) in
970 scan_delimiters (word :: acc) (index + 1)
971 else scan_word acc start_index (index + 1)
972 in
973
974 scan_delimiters [] 0
975 in
976
977 (* TODO Correct locations await a full implementation of {!modules}
978 parsing. *)
979 let modules =
980 split_string " \t\r\n" s |> List.map (fun r -> Loc.at location r)
981 in
982
983 if modules = [] then
984 Parse_error.should_not_be_empty ~what:(Token.describe token) location
985 |> add_warning input;
986
987 let block = accepted_in_all_contexts context (`Modules modules) in
988 let block = Loc.at location block in
989 let acc = block :: acc in
990 consume_block_elements `After_text acc
991 | { value = `Begin_list kind as token; location } as next_token ->
992 warn_if_after_text next_token;
993
994 junk input;
995
996 let items, brace_location =
997 explicit_list_items ~parent_markup:token input
998 in
999 if items = [] then
1000 Parse_error.should_not_be_empty ~what:(Token.describe token) location
1001 |> add_warning input;
1002
1003 let location = Loc.span [ location; brace_location ] in
1004 let block = `List (kind, `Heavy, items) in
1005 let block = accepted_in_all_contexts context block in
1006 let block = Loc.at location block in
1007 let acc = block :: acc in
1008 consume_block_elements `After_text acc
1009 | { value = (`Begin_table_light | `Begin_table_heavy) as token; location }
1010 as next_token ->
1011 warn_if_after_text next_token;
1012 junk input;
1013 let block, brace_location =
1014 let parent_markup = token in
1015 let parent_markup_location = location in
1016 match token with
1017 | `Begin_table_light ->
1018 light_table input ~parent_markup ~parent_markup_location
1019 | `Begin_table_heavy ->
1020 heavy_table input ~parent_markup ~parent_markup_location
1021 in
1022 let location = Loc.span [ location; brace_location ] in
1023 let block = accepted_in_all_contexts context (`Table block) in
1024 let block = Loc.at location block in
1025 let acc = block :: acc in
1026 consume_block_elements `After_text acc
1027 | { value = (`Minus | `Plus) as token; location } as next_token -> (
1028 (match where_in_line with
1029 | `After_text | `After_shorthand_bullet ->
1030 Parse_error.should_begin_on_its_own_line
1031 ~what:(Token.describe token) location
1032 |> add_warning input
1033 | _ -> ());
1034
1035 match context with
1036 | In_implicitly_ended `Shorthand_list ->
1037 (List.rev acc, next_token, where_in_line)
1038 | _ ->
1039 let items, where_in_line =
1040 shorthand_list_items next_token where_in_line input
1041 in
1042 let kind =
1043 match token with `Minus -> `Unordered | `Plus -> `Ordered
1044 in
1045 let location =
1046 location :: List.map Loc.location (List.flatten items) |> Loc.span
1047 in
1048 let block = `List (kind, `Light, items) in
1049 let block = accepted_in_all_contexts context block in
1050 let block = Loc.at location block in
1051 let acc = block :: acc in
1052 consume_block_elements where_in_line acc)
1053 | { value = `Begin_section_heading (level, label) as token; location } as
1054 next_token -> (
1055 let recover_when_not_at_top_level context =
1056 warn_because_not_at_top_level next_token;
1057 junk input;
1058 let content, brace_location =
1059 delimited_inline_element_list ~parent_markup:token
1060 ~parent_markup_location:location ~requires_leading_whitespace:true
1061 input
1062 in
1063 let location = Loc.span [ location; brace_location ] in
1064 let paragraph =
1065 `Paragraph content
1066 |> accepted_in_all_contexts context
1067 |> Loc.at location
1068 in
1069 consume_block_elements `At_start_of_line (paragraph :: acc)
1070 in
1071
1072 match context with
1073 | In_implicitly_ended (`Tag | `Shorthand_list) ->
1074 if where_in_line = `At_start_of_line then
1075 (List.rev acc, next_token, where_in_line)
1076 else recover_when_not_at_top_level context
1077 | In_explicit_list -> recover_when_not_at_top_level context
1078 | In_table_cell -> recover_when_not_at_top_level context
1079 | In_code_results -> recover_when_not_at_top_level context
1080 | Top_level ->
1081 if where_in_line <> `At_start_of_line then
1082 Parse_error.should_begin_on_its_own_line
1083 ~what:(Token.describe token) location
1084 |> add_warning input;
1085
1086 let label =
1087 match label with
1088 | Some "" ->
1089 Parse_error.should_not_be_empty ~what:"heading label" location
1090 |> add_warning input;
1091 None
1092 | _ -> label
1093 in
1094
1095 junk input;
1096
1097 let content, brace_location =
1098 delimited_inline_element_list ~parent_markup:token
1099 ~parent_markup_location:location
1100 ~requires_leading_whitespace:true input
1101 in
1102 if content = [] then
1103 Parse_error.should_not_be_empty ~what:(Token.describe token)
1104 location
1105 |> add_warning input;
1106
1107 let location = Loc.span [ location; brace_location ] in
1108 let heading = `Heading (level, label, content) in
1109 let heading = Loc.at location heading in
1110 let acc = heading :: acc in
1111 consume_block_elements `After_text acc)
1112 | { value = `Begin_paragraph_style _ as token; location } ->
1113 junk input;
1114 let content, brace_location =
1115 delimited_inline_element_list ~parent_markup:token
1116 ~parent_markup_location:location ~requires_leading_whitespace:true
1117 input
1118 in
1119 let location = Loc.span [ location; brace_location ] in
1120
1121 Parse_error.markup_should_not_be_used ~what:(Token.describe token)
1122 location
1123 |> add_warning input;
1124
1125 let paragraph =
1126 `Paragraph content
1127 |> accepted_in_all_contexts context
1128 |> Loc.at location
1129 in
1130 consume_block_elements `At_start_of_line (paragraph :: acc)
1131 | {
1132 location;
1133 value = `Media_with_replacement_text (href, media, content) as token;
1134 } ->
1135 junk input;
1136
1137 let r_location =
1138 Loc.nudge_start
1139 (String.length @@ Token.s_of_media `Replaced media)
1140 location
1141 |> Loc.nudge_end (String.length content + 1)
1142 (* +1 for closing character *)
1143 in
1144 let c_location =
1145 Loc.nudge_start
1146 (String.length (Token.s_of_media `Replaced media)
1147 + String.length (match href with `Reference s | `Link s -> s))
1148 location
1149 |> Loc.nudge_end 1
1150 in
1151 let content = String.trim content in
1152 let href = href |> Loc.at r_location in
1153
1154 if content = "" then
1155 Parse_error.should_not_be_empty ~what:(Token.describe token)
1156 c_location
1157 |> add_warning input;
1158
1159 let block = `Media (`Simple, href, content, media) in
1160 let block = accepted_in_all_contexts context block in
1161 let block = Loc.at location block in
1162 let acc = block :: acc in
1163 consume_block_elements `After_text acc
1164 | { location; value = `Simple_media (href, media) } ->
1165 junk input;
1166
1167 let r_location =
1168 Loc.nudge_start
1169 (String.length @@ Token.s_of_media `Simple media)
1170 location
1171 |> Loc.nudge_end 1
1172 in
1173 let href = href |> Loc.at r_location in
1174 let block = `Media (`Simple, href, "", media) in
1175 let block = accepted_in_all_contexts context block in
1176 let block = Loc.at location block in
1177 let acc = block :: acc in
1178 consume_block_elements `After_text acc
1179 in
1180
1181 let where_in_line =
1182 match context with
1183 | Top_level -> `At_start_of_line
1184 | In_implicitly_ended `Shorthand_list -> `After_shorthand_bullet
1185 | In_explicit_list -> `After_explicit_list_bullet
1186 | In_table_cell -> `After_table_cell
1187 | In_code_results -> `After_tag
1188 | In_implicitly_ended `Tag -> `After_tag
1189 in
1190
1191 consume_block_elements where_in_line []
1192
1193(* {3 Lists} *)
1194
1195(* Consumes a sequence of implicit list items. Each one consists of a [`Minus]
1196 or [`Plus] token, followed by block elements until:
1197
1198 - a blank line, or
1199 - a list bullet of the opposite kind (e.g. [`Plus] for a [`Minus] list).
1200
1201 This function is called when the next token is known to be [`Minus] or
1202 [`Plus]. It consumes that token, and calls the block element parser (see
1203 above). That parser returns to [implicit_list_items] only on [`Blank_line],
1204 [`End], [`Minus] or [`Plus] at the start of a line, or [`Right_brace]. *)
1205and shorthand_list_items :
1206 [ `Minus | `Plus ] with_location ->
1207 where_in_line ->
1208 input ->
1209 Ast.nestable_block_element with_location list list * where_in_line =
1210 fun first_token where_in_line input ->
1211 let bullet_token = first_token.value in
1212
1213 let rec consume_list_items :
1214 [> ] with_location ->
1215 where_in_line ->
1216 Ast.nestable_block_element with_location list list ->
1217 Ast.nestable_block_element with_location list list * where_in_line =
1218 fun next_token where_in_line acc ->
1219 match next_token.value with
1220 | `End | `Right_brace | `Blank_line _ | `Tag _ | `Begin_section_heading _
1221 | `Simple_media _ | `Media_with_replacement_text _ ->
1222 (List.rev acc, where_in_line)
1223 | (`Minus | `Plus) as bullet ->
1224 if bullet = bullet_token then (
1225 junk input;
1226
1227 let content, stream_head, where_in_line =
1228 block_element_list (In_implicitly_ended `Shorthand_list)
1229 ~parent_markup:bullet input
1230 in
1231 if content = [] then
1232 Parse_error.should_not_be_empty ~what:(Token.describe bullet)
1233 next_token.location
1234 |> add_warning input;
1235
1236 let acc = content :: acc in
1237 consume_list_items stream_head where_in_line acc)
1238 else (List.rev acc, where_in_line)
1239 in
1240
1241 consume_list_items
1242 (first_token :> stopped_implicitly with_location)
1243 where_in_line []
1244
1245(* Consumes a sequence of explicit list items (starting with '{li ...}' and
1246 '{-...}', which are represented by [`Begin_list_item _] tokens).
1247
1248 This function is called immediately after '{ul' or '{ol' ([`Begin_list _]) is
1249 read. The only "valid" way to exit is by reading a [`Right_brace] token,
1250 which is consumed.
1251
1252 Whitespace inside the list, but outside list items, is not significant – this
1253 parsing function consumes all of it. Otherwise, only list item start tokens
1254 are accepted. Everything else is an error. *)
1255and explicit_list_items :
1256 parent_markup:[< Token.t ] ->
1257 input ->
1258 Ast.nestable_block_element with_location list list * Loc.span =
1259 fun ~parent_markup input ->
1260 let rec consume_list_items :
1261 Ast.nestable_block_element with_location list list ->
1262 Ast.nestable_block_element with_location list list * Loc.span =
1263 fun acc ->
1264 let next_token = peek input in
1265 match next_token.value with
1266 | `End ->
1267 Parse_error.end_not_allowed next_token.location
1268 ~in_what:(Token.describe parent_markup)
1269 |> add_warning input;
1270 (List.rev acc, next_token.location)
1271 | `Right_brace ->
1272 junk input;
1273 (List.rev acc, next_token.location)
1274 | `Space _ | `Single_newline _ | `Blank_line _ ->
1275 junk input;
1276 consume_list_items acc
1277 | `Begin_list_item kind as token ->
1278 junk input;
1279
1280 (* '{li', represented by [`Begin_list_item `Li], must be followed by
1281 whitespace. *)
1282 (if kind = `Li then
1283 match (peek input).value with
1284 | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace ->
1285 ()
1286 (* The presence of [`Right_brace] above requires some explanation:
1287
1288 - It is better to be silent about missing whitespace if the next
1289 token is [`Right_brace], because the error about an empty list
1290 item will be generated below, and that error is more important to
1291 the user.
1292 - The [`Right_brace] token also happens to include all whitespace
1293 before it, as a convenience for the rest of the parser. As a
1294 result, not ignoring it could be wrong: there could in fact be
1295 whitespace in the concrete syntax immediately after '{li', just
1296 it is not represented as [`Space], [`Single_newline], or
1297 [`Blank_line]. *)
1298 | _ ->
1299 Parse_error.should_be_followed_by_whitespace next_token.location
1300 ~what:(Token.print token)
1301 |> add_warning input);
1302
1303 let content, token_after_list_item, _where_in_line =
1304 block_element_list In_explicit_list ~parent_markup:token input
1305 in
1306
1307 if content = [] then
1308 Parse_error.should_not_be_empty next_token.location
1309 ~what:(Token.describe token)
1310 |> add_warning input;
1311
1312 (match token_after_list_item.value with
1313 | `Right_brace -> junk input
1314 | `End ->
1315 Parse_error.end_not_allowed token_after_list_item.location
1316 ~in_what:(Token.describe token)
1317 |> add_warning input);
1318
1319 let acc = content :: acc in
1320 consume_list_items acc
1321 | token ->
1322 let suggestion =
1323 match token with
1324 | `Begin_section_heading _ | `Tag _ ->
1325 Printf.sprintf "move %s outside the list." (Token.describe token)
1326 | _ ->
1327 Printf.sprintf "move %s into a list item, %s or %s."
1328 (Token.describe token)
1329 (Token.print (`Begin_list_item `Li))
1330 (Token.print (`Begin_list_item `Dash))
1331 in
1332 Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1333 ~in_what:(Token.describe parent_markup)
1334 ~suggestion
1335 |> add_warning input;
1336
1337 junk input;
1338 consume_list_items acc
1339 in
1340
1341 consume_list_items []
1342
1343(* Consumes a sequence of table rows that might start with [`Bar].
1344
1345 This function is called immediately after '{t' ([`Begin_table `Light]) is
1346 read. The only "valid" way to exit is by reading a [`Right_brace] token,
1347 which is consumed. *)
1348and light_table ~parent_markup ~parent_markup_location input =
1349 let rec consume_rows acc ~last_loc =
1350 Reader.until_rbrace_or_eof input acc >>> fun next_token ->
1351 match next_token.Loc.value with
1352 | `Bar | #token_that_always_begins_an_inline_element -> (
1353 let next, row, last_loc =
1354 light_table_row ~parent_markup ~last_loc input
1355 in
1356 match next with
1357 | `Continue -> consume_rows (row :: acc) ~last_loc
1358 | `Stop -> (row :: acc, last_loc))
1359 | other_token ->
1360 Parse_error.not_allowed next_token.location
1361 ~what:(Token.describe other_token)
1362 ~in_what:(Token.describe parent_markup)
1363 |> add_warning input;
1364 junk input;
1365 consume_rows acc ~last_loc
1366 in
1367 let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1368 let grid = List.rev rows in
1369 (Table.Light_syntax.from_raw_data grid, brace_location)
1370
1371(* Consumes a table row that might start with [`Bar]. *)
1372and light_table_row ~parent_markup ~last_loc input =
1373 let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc =
1374 let push_cells row cell =
1375 match cell with [] -> row | _ -> List.rev cell :: row
1376 in
1377 let return row cell = List.rev (push_cells row cell) in
1378 let next_token = peek input in
1379 match next_token.value with
1380 | `End ->
1381 Parse_error.end_not_allowed next_token.location ~in_what:"table"
1382 |> add_warning input;
1383 junk input;
1384 (`Stop, return acc_row acc_cell, next_token.location)
1385 | `Right_brace ->
1386 junk input;
1387 (`Stop, return acc_row acc_cell, next_token.location)
1388 | `Space _ as token ->
1389 junk input;
1390 let i = Loc.at next_token.location token in
1391 consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc
1392 | `Single_newline _ | `Blank_line _ ->
1393 junk input;
1394 (`Continue, return acc_row acc_cell, last_loc)
1395 | `Bar ->
1396 junk input;
1397 let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in
1398 consume_row acc_row [] [] ~new_line:false ~last_loc
1399 | #token_that_always_begins_an_inline_element as token ->
1400 let i = inline_element input next_token.location token in
1401 if Loc.spans_multiple_lines i then
1402 Parse_error.not_allowed
1403 ~what:(Token.describe (`Single_newline ""))
1404 ~in_what:(Token.describe `Begin_table_light)
1405 i.location
1406 |> add_warning input;
1407 let acc_cell =
1408 if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell
1409 in
1410 consume_row acc_row acc_cell [] ~new_line:false
1411 ~last_loc:next_token.location
1412 | other_token ->
1413 Parse_error.not_allowed next_token.location
1414 ~what:(Token.describe other_token)
1415 ~in_what:(Token.describe parent_markup)
1416 |> add_warning input;
1417 junk input;
1418 consume_row acc_row acc_cell acc_space ~new_line ~last_loc
1419 in
1420 consume_row [] [] [] ~new_line:true ~last_loc
1421
1422(* Consumes a sequence of table rows (starting with '{tr ...}', which are
1423 represented by [`Begin_table_row] tokens).
1424
1425 This function is called immediately after '{table' ([`Begin_table `Heavy]) is
1426 read. The only "valid" way to exit is by reading a [`Right_brace] token,
1427 which is consumed. *)
1428and heavy_table ~parent_markup ~parent_markup_location input =
1429 let rec consume_rows acc ~last_loc =
1430 Reader.until_rbrace_or_eof input acc >>> fun next_token ->
1431 match next_token.Loc.value with
1432 | `Begin_table_row as token ->
1433 junk input;
1434 let items, last_loc = heavy_table_row ~parent_markup:token input in
1435 consume_rows (List.rev items :: acc) ~last_loc
1436 | token ->
1437 Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1438 ~in_what:(Token.describe parent_markup)
1439 ~suggestion:"Move outside of {table ...}, or inside {tr ...}"
1440 |> add_warning input;
1441 junk input;
1442 consume_rows acc ~last_loc
1443 in
1444 let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1445 let grid = List.rev rows in
1446 (Table.Heavy_syntax.from_grid grid, brace_location)
1447
1448(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }',
1449 which are represented by [`Begin_table_cell] tokens).
1450
1451 This function is called immediately after '{tr' ([`Begin_table_row]) is
1452 read. The only "valid" way to exit is by reading a [`Right_brace] token,
1453 which is consumed. *)
1454and heavy_table_row ~parent_markup input =
1455 let rec consume_cell_items acc =
1456 Reader.until_rbrace_or_eof input acc >>> fun next_token ->
1457 match next_token.Loc.value with
1458 | `Begin_table_cell kind as token ->
1459 junk input;
1460 let content, token_after_list_item, _where_in_line =
1461 block_element_list In_table_cell ~parent_markup:token input
1462 in
1463 (match token_after_list_item.value with
1464 | `Right_brace -> junk input
1465 | `End ->
1466 Parse_error.not_allowed token_after_list_item.location
1467 ~what:(Token.describe `End) ~in_what:(Token.describe token)
1468 |> add_warning input);
1469 consume_cell_items ((content, kind) :: acc)
1470 | token ->
1471 Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1472 ~in_what:(Token.describe parent_markup)
1473 ~suggestion:
1474 "Move outside of {table ...}, or inside {td ...} or {th ...}"
1475 |> add_warning input;
1476 junk input;
1477 consume_cell_items acc
1478 in
1479 consume_cell_items []
1480
1481(* {2 Entry point} *)
1482
1483let parse warnings tokens =
1484 let input : input = { tokens; warnings } in
1485
1486 let rec parse_block_elements () =
1487 let elements, last_token, _where_in_line =
1488 block_element_list Top_level ~parent_markup:`Comment input
1489 in
1490
1491 match last_token.value with
1492 | `End -> elements
1493 | `Right_brace ->
1494 Parse_error.unpaired_right_brace last_token.location
1495 |> add_warning input;
1496
1497 let block =
1498 Loc.same last_token (`Paragraph [ Loc.same last_token (`Word "}") ])
1499 in
1500
1501 junk input;
1502 elements @ (block :: parse_block_elements ())
1503 in
1504 let ast = parse_block_elements () in
1505 (ast, List.rev !(input.warnings))