this repo has no description
at main 213 lines 8.5 kB view raw
1module Ast = Ast 2module Loc = Loc 3module Warning = Warning 4 5type t = { 6 ast : Ast.t; 7 warnings : Warning.t list; 8 reversed_newlines : (int * int) list; 9 original_pos : Lexing.position; 10} 11 12(* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard 13 [Lexing] module. 14 15 As the [Lexing] module reads the input, it keeps track of only the byte 16 offset into the input. It is normally the job of each particular lexer 17 implementation to decide which character sequences count as newlines, and 18 keep track of line/column locations. This is usually done by writing several 19 extra regular expressions, and calling [Lexing.new_line] at the right time. 20 21 Keeping track of newlines like this makes the odoc lexer somewhat too 22 diffiult to read, however. To factor the aspect of keeping track of newlines 23 fully out of the odoc lexer, instead of having it keep track of newlines as 24 it's scanning the input, the input is pre-scanned before feeding it into the 25 lexer. A table of all the newlines is assembled, and used to convert offsets 26 into line/column pairs after the lexer emits tokens. 27 28 [reversed_newlines ~input ~comment_location offset] returns a list of pairs 29 of (line number * offset), allowing the easy conversion from the byte 30 [offset], relative to the beginning of a comment, into a location, relative 31 to the beginning of the file containing the comment. This can then be used 32 to convert from byte offset to line number / column number - a Loc.point, 33 and additionally for converting back from a Loc.point to a Lexing.position. 34*) 35 36let reversed_newlines : input:string -> (int * int) list = 37 fun ~input -> 38 let rec find_newlines line_number input_index newlines_accumulator = 39 if input_index >= String.length input then newlines_accumulator 40 else if 41 (* This is good enough to detect CR-LF also. *) 42 input.[input_index] = '\n' 43 then 44 find_newlines (line_number + 1) (input_index + 1) 45 ((line_number + 1, input_index + 1) :: newlines_accumulator) 46 else find_newlines line_number (input_index + 1) newlines_accumulator 47 in 48 find_newlines 1 0 [ (1, 0) ] 49 50(* [offset_to_location] converts from an offset within the comment text, where 51 [reversed_newlines] is the result of the above function and [comment_location] 52 is the location of the comment within its file. The function is meant to be 53 partially applied to its first two arguments, at which point it is passed to 54 the lexer, so it can apply the table to its emitted tokens. *) 55 56let offset_to_location : 57 reversed_newlines:(int * int) list -> 58 comment_location:Lexing.position -> 59 int -> 60 Loc.point = 61 fun ~reversed_newlines ~comment_location byte_offset -> 62 let rec scan_to_last_newline reversed_newlines_prefix = 63 match reversed_newlines_prefix with 64 | [] -> assert false 65 | (line_in_comment, line_start_offset) :: prefix -> 66 if line_start_offset > byte_offset then scan_to_last_newline prefix 67 else 68 let column_in_comment = byte_offset - line_start_offset in 69 let line_in_file = 70 line_in_comment + comment_location.Lexing.pos_lnum - 1 71 in 72 let column_in_file = 73 if line_in_comment = 1 then 74 column_in_comment + comment_location.Lexing.pos_cnum 75 - comment_location.Lexing.pos_bol 76 else column_in_comment 77 in 78 { Loc.line = line_in_file; column = column_in_file } 79 in 80 scan_to_last_newline reversed_newlines 81 82(* Given a Loc.point and the result of [parse_comment], this function returns 83 a valid Lexing.position *) 84let position_of_point : t -> Loc.point -> Lexing.position = 85 fun v point -> 86 let { reversed_newlines; original_pos; _ } = v in 87 let line_in_comment = point.Loc.line - original_pos.pos_lnum + 1 in 88 let rec find_pos_bol reversed_newlines_prefix = 89 match reversed_newlines_prefix with 90 | [] -> assert false 91 | [ _ ] -> original_pos.pos_bol 92 | (line_number, line_start_offset) :: prefix -> 93 if line_number > line_in_comment then find_pos_bol prefix 94 else line_start_offset + original_pos.pos_cnum 95 in 96 let pos_bol = find_pos_bol reversed_newlines in 97 let pos_lnum = point.Loc.line in 98 let pos_cnum = point.column + pos_bol in 99 let pos_fname = original_pos.pos_fname in 100 { Lexing.pos_bol; pos_lnum; pos_cnum; pos_fname } 101 102(* The main entry point for this module *) 103let parse_comment ~location ~text = 104 let warnings = ref [] in 105 let reversed_newlines = reversed_newlines ~input:text in 106 let string_buffer = Buffer.create 256 in 107 let token_stream = 108 let lexbuf = Lexing.from_string text in 109 let offset_to_location = 110 offset_to_location ~reversed_newlines ~comment_location:location 111 in 112 let input : Lexer.input = 113 { 114 file = location.Lexing.pos_fname; 115 offset_to_location; 116 warnings; 117 lexbuf; 118 string_buffer; 119 } 120 in 121 Stream.from (fun _token_index -> Some (Lexer.token input lexbuf)) 122 in 123 let ast, warnings = Syntax.parse warnings token_stream in 124 { ast; warnings; reversed_newlines; original_pos = location } 125 126(* Accessor functions, as [t] is opaque *) 127let warnings t = t.warnings 128let ast t = t.ast 129 130(** [deindent ~what input ~start_offset s] "deindents" [s] by an offset computed 131 from [start_offset] and [input], corresponding to the begining of a code 132 block or verbatim. If that is not possible (eg there is a non-whitespace 133 line starting with less than [offset] whitespaces), it unindents as much as 134 possible and raises a warning. *) 135let deindent : what:string -> loc:Loc.span -> string -> string * Warning.t list 136 = 137 fun ~what ~loc s -> 138 let offset = loc.start.column in 139 (* Whitespace-only lines do not count, so they return [None]. *) 140 let count_leading_whitespace line = 141 let rec count_leading_whitespace' index len = 142 if index = len then None 143 else 144 match line.[index] with 145 | ' ' | '\t' -> count_leading_whitespace' (index + 1) len 146 | _ -> Some index 147 in 148 let len = String.length line in 149 (* '\r' may remain because we only split on '\n' below. This is important 150 for the first line, which would be considered not empty without this check. *) 151 let len = if len > 0 && line.[len - 1] = '\r' then len - 1 else len in 152 count_leading_whitespace' 0 len 153 in 154 155 let lines = Astring.String.cuts ~sep:"\n" s in 156 157 let least_amount_of_whitespace = 158 List.fold_left 159 (fun least_so_far line -> 160 match (count_leading_whitespace line, least_so_far) with 161 | Some n, least when n < least -> n 162 | _ -> least_so_far) 163 offset lines 164 in 165 let warning = 166 if least_amount_of_whitespace < offset then 167 [ Parse_error.not_enough_indentation_in_code_block ~what loc ] 168 else [] 169 in 170 let drop n line = 171 (* Since blank lines were ignored when calculating 172 [least_amount_of_whitespace], their length might be less than the 173 amount. *) 174 if String.length line < n then "" 175 else String.sub line n (String.length line - n) 176 in 177 let lines = List.map (drop least_amount_of_whitespace) lines in 178 (String.concat "\n" lines, warning) 179 180(** Implements the rules for code block as specified in [odoc_for_authors], 181 section on code blocks and indentation. *) 182let code_block_content ~what ~loc s = 183 let indent = loc.Loc.start.column in 184 (* Remove the first line (to first \n char, included) if it's whitespace only. 185 Otherwise, indent at [indent] level to account for offset. *) 186 let rec handle_first_newline index = 187 if index >= String.length s then String.make indent ' ' ^ s 188 else 189 match s.[index] with 190 | ' ' | '\t' | '\r' -> handle_first_newline (index + 1) 191 | '\n' -> String.sub s (index + 1) (String.length s - index - 1) 192 | _ -> String.make indent ' ' ^ s 193 in 194 let s = handle_first_newline 0 in 195 (* Remove the last line (from last \n char, included) if it's whitespace 196 only. *) 197 let rec handle_last_newline index = 198 if index < 0 then s 199 else 200 match s.[index] with 201 | ' ' | '\t' | '\r' -> handle_last_newline (index - 1) 202 | '\n' -> String.sub s 0 index 203 | _ -> s 204 in 205 let s = handle_last_newline (String.length s - 1) in 206 deindent ~what ~loc s 207 208let verbatim_content loc c = 209 let what = "verbatim" in 210 code_block_content ~what ~loc c 211let codeblock_content loc c = 212 let what = "code block" in 213 code_block_content ~what ~loc c