this repo has no description
1module Ast = Ast
2module Loc = Loc
3module Warning = Warning
4
5type t = {
6 ast : Ast.t;
7 warnings : Warning.t list;
8 reversed_newlines : (int * int) list;
9 original_pos : Lexing.position;
10}
11
12(* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard
13 [Lexing] module.
14
15 As the [Lexing] module reads the input, it keeps track of only the byte
16 offset into the input. It is normally the job of each particular lexer
17 implementation to decide which character sequences count as newlines, and
18 keep track of line/column locations. This is usually done by writing several
19 extra regular expressions, and calling [Lexing.new_line] at the right time.
20
21 Keeping track of newlines like this makes the odoc lexer somewhat too
22 diffiult to read, however. To factor the aspect of keeping track of newlines
23 fully out of the odoc lexer, instead of having it keep track of newlines as
24 it's scanning the input, the input is pre-scanned before feeding it into the
25 lexer. A table of all the newlines is assembled, and used to convert offsets
26 into line/column pairs after the lexer emits tokens.
27
28 [reversed_newlines ~input ~comment_location offset] returns a list of pairs
29 of (line number * offset), allowing the easy conversion from the byte
30 [offset], relative to the beginning of a comment, into a location, relative
31 to the beginning of the file containing the comment. This can then be used
32 to convert from byte offset to line number / column number - a Loc.point,
33 and additionally for converting back from a Loc.point to a Lexing.position.
34*)
35
36let reversed_newlines : input:string -> (int * int) list =
37 fun ~input ->
38 let rec find_newlines line_number input_index newlines_accumulator =
39 if input_index >= String.length input then newlines_accumulator
40 else if
41 (* This is good enough to detect CR-LF also. *)
42 input.[input_index] = '\n'
43 then
44 find_newlines (line_number + 1) (input_index + 1)
45 ((line_number + 1, input_index + 1) :: newlines_accumulator)
46 else find_newlines line_number (input_index + 1) newlines_accumulator
47 in
48 find_newlines 1 0 [ (1, 0) ]
49
50(* [offset_to_location] converts from an offset within the comment text, where
51 [reversed_newlines] is the result of the above function and [comment_location]
52 is the location of the comment within its file. The function is meant to be
53 partially applied to its first two arguments, at which point it is passed to
54 the lexer, so it can apply the table to its emitted tokens. *)
55
56let offset_to_location :
57 reversed_newlines:(int * int) list ->
58 comment_location:Lexing.position ->
59 int ->
60 Loc.point =
61 fun ~reversed_newlines ~comment_location byte_offset ->
62 let rec scan_to_last_newline reversed_newlines_prefix =
63 match reversed_newlines_prefix with
64 | [] -> assert false
65 | (line_in_comment, line_start_offset) :: prefix ->
66 if line_start_offset > byte_offset then scan_to_last_newline prefix
67 else
68 let column_in_comment = byte_offset - line_start_offset in
69 let line_in_file =
70 line_in_comment + comment_location.Lexing.pos_lnum - 1
71 in
72 let column_in_file =
73 if line_in_comment = 1 then
74 column_in_comment + comment_location.Lexing.pos_cnum
75 - comment_location.Lexing.pos_bol
76 else column_in_comment
77 in
78 { Loc.line = line_in_file; column = column_in_file }
79 in
80 scan_to_last_newline reversed_newlines
81
82(* Given a Loc.point and the result of [parse_comment], this function returns
83 a valid Lexing.position *)
84let position_of_point : t -> Loc.point -> Lexing.position =
85 fun v point ->
86 let { reversed_newlines; original_pos; _ } = v in
87 let line_in_comment = point.Loc.line - original_pos.pos_lnum + 1 in
88 let rec find_pos_bol reversed_newlines_prefix =
89 match reversed_newlines_prefix with
90 | [] -> assert false
91 | [ _ ] -> original_pos.pos_bol
92 | (line_number, line_start_offset) :: prefix ->
93 if line_number > line_in_comment then find_pos_bol prefix
94 else line_start_offset + original_pos.pos_cnum
95 in
96 let pos_bol = find_pos_bol reversed_newlines in
97 let pos_lnum = point.Loc.line in
98 let pos_cnum = point.column + pos_bol in
99 let pos_fname = original_pos.pos_fname in
100 { Lexing.pos_bol; pos_lnum; pos_cnum; pos_fname }
101
102(* The main entry point for this module *)
103let parse_comment ~location ~text =
104 let warnings = ref [] in
105 let reversed_newlines = reversed_newlines ~input:text in
106 let string_buffer = Buffer.create 256 in
107 let token_stream =
108 let lexbuf = Lexing.from_string text in
109 let offset_to_location =
110 offset_to_location ~reversed_newlines ~comment_location:location
111 in
112 let input : Lexer.input =
113 {
114 file = location.Lexing.pos_fname;
115 offset_to_location;
116 warnings;
117 lexbuf;
118 string_buffer;
119 }
120 in
121 Stream.from (fun _token_index -> Some (Lexer.token input lexbuf))
122 in
123 let ast, warnings = Syntax.parse warnings token_stream in
124 { ast; warnings; reversed_newlines; original_pos = location }
125
126(* Accessor functions, as [t] is opaque *)
127let warnings t = t.warnings
128let ast t = t.ast
129
130(** [deindent ~what input ~start_offset s] "deindents" [s] by an offset computed
131 from [start_offset] and [input], corresponding to the begining of a code
132 block or verbatim. If that is not possible (eg there is a non-whitespace
133 line starting with less than [offset] whitespaces), it unindents as much as
134 possible and raises a warning. *)
135let deindent : what:string -> loc:Loc.span -> string -> string * Warning.t list
136 =
137 fun ~what ~loc s ->
138 let offset = loc.start.column in
139 (* Whitespace-only lines do not count, so they return [None]. *)
140 let count_leading_whitespace line =
141 let rec count_leading_whitespace' index len =
142 if index = len then None
143 else
144 match line.[index] with
145 | ' ' | '\t' -> count_leading_whitespace' (index + 1) len
146 | _ -> Some index
147 in
148 let len = String.length line in
149 (* '\r' may remain because we only split on '\n' below. This is important
150 for the first line, which would be considered not empty without this check. *)
151 let len = if len > 0 && line.[len - 1] = '\r' then len - 1 else len in
152 count_leading_whitespace' 0 len
153 in
154
155 let lines = Astring.String.cuts ~sep:"\n" s in
156
157 let least_amount_of_whitespace =
158 List.fold_left
159 (fun least_so_far line ->
160 match (count_leading_whitespace line, least_so_far) with
161 | Some n, least when n < least -> n
162 | _ -> least_so_far)
163 offset lines
164 in
165 let warning =
166 if least_amount_of_whitespace < offset then
167 [ Parse_error.not_enough_indentation_in_code_block ~what loc ]
168 else []
169 in
170 let drop n line =
171 (* Since blank lines were ignored when calculating
172 [least_amount_of_whitespace], their length might be less than the
173 amount. *)
174 if String.length line < n then ""
175 else String.sub line n (String.length line - n)
176 in
177 let lines = List.map (drop least_amount_of_whitespace) lines in
178 (String.concat "\n" lines, warning)
179
180(** Implements the rules for code block as specified in [odoc_for_authors],
181 section on code blocks and indentation. *)
182let code_block_content ~what ~loc s =
183 let indent = loc.Loc.start.column in
184 (* Remove the first line (to first \n char, included) if it's whitespace only.
185 Otherwise, indent at [indent] level to account for offset. *)
186 let rec handle_first_newline index =
187 if index >= String.length s then String.make indent ' ' ^ s
188 else
189 match s.[index] with
190 | ' ' | '\t' | '\r' -> handle_first_newline (index + 1)
191 | '\n' -> String.sub s (index + 1) (String.length s - index - 1)
192 | _ -> String.make indent ' ' ^ s
193 in
194 let s = handle_first_newline 0 in
195 (* Remove the last line (from last \n char, included) if it's whitespace
196 only. *)
197 let rec handle_last_newline index =
198 if index < 0 then s
199 else
200 match s.[index] with
201 | ' ' | '\t' | '\r' -> handle_last_newline (index - 1)
202 | '\n' -> String.sub s 0 index
203 | _ -> s
204 in
205 let s = handle_last_newline (String.length s - 1) in
206 deindent ~what ~loc s
207
208let verbatim_content loc c =
209 let what = "verbatim" in
210 code_block_content ~what ~loc c
211let codeblock_content loc c =
212 let what = "code block" in
213 code_block_content ~what ~loc c