An OCaml webserver, but the allocating version (vs httpz which doesnt)
1(* parser.ml - Parser combinators for HTTP/1.1 parsing *)
2
3open Base
4
5(* Re-export exception from Err for backwards compatibility *)
6exception Parse_error = Err.Parse_error
7
8(* Parser state - position threaded explicitly *)
9type pstate = { buf : Base_bigstring.t; len : int }
10
11(* Create parser state *)
12let make buf ~len = { buf; len }
13
14(* Remaining bytes at position *)
15let remaining st ~pos = st.len - pos
16
17(* Check if at end *)
18let at_end st ~pos = pos >= st.len
19
20(* Peek current char without advancing *)
21let peek_char st ~pos =
22 Err.partial_when @@ at_end st ~pos;
23 Buf_read.peek st.buf pos
24
25(* Peek char at offset from current position *)
26let peek_at st ~pos off =
27 let p = pos + off in
28 Err.partial_when @@ (p >= st.len);
29 Buf_read.peek st.buf p
30
31(* Match single character, return new position *)
32let char c st ~pos =
33 Err.partial_when @@ at_end st ~pos;
34 Err.malformed_when @@ (not (Char.equal (Buf_read.peek st.buf pos) c));
35 pos + 1
36
37(* Match literal string, return new position *)
38let string s st ~pos =
39 let slen = String.length s in
40 Err.partial_when (remaining st ~pos < slen);
41 for i = 0 to slen - 1 do
42 let actual = Buf_read.peek st.buf (pos + i) in
43 let expected = String.unsafe_get s i in
44 Err.malformed_when @@ (not (Char.equal actual expected))
45 done;
46 pos + slen
47
48(* Take chars while predicate holds, return span and new position *)
49let take_while f st ~pos =
50 let start = pos in
51 let p = ref pos in
52 while not (at_end st ~pos:!p) && f (Buf_read.peek st.buf !p) do
53 Int.incr p
54 done;
55 (Span.make ~off:start ~len:(!p - start), !p)
56
57(* Skip chars while predicate holds, return new position *)
58let skip_while f st ~pos =
59 let p = ref pos in
60 while not (at_end st ~pos:!p) && f (Buf_read.peek st.buf !p) do
61 Int.incr p
62 done;
63 !p
64
65(* Take exactly n chars as span, return span and new position *)
66let take n st ~pos =
67 Err.partial_when @@ (remaining st ~pos < n);
68 (Span.make ~off:pos ~len:n, pos + n)
69
70(* Skip exactly n chars, return new position *)
71let skip n st ~pos =
72 Err.partial_when @@ (remaining st ~pos < n);
73 pos + n
74
75(* Match char satisfying predicate, return char and new position *)
76let satisfy f st ~pos =
77 Err.partial_when @@ at_end st ~pos;
78 let c = Buf_read.peek st.buf pos in
79 Err.malformed_unless @@ f c;
80 (c, pos + 1)
81
82(* Optional: try parser, return None and original pos on failure *)
83let optional p st ~pos =
84 match p st ~pos with
85 | (v, new_pos) -> (Some v, new_pos)
86 | exception Err.Parse_error _ -> (None, pos)
87
88(* ----- HTTP-Specific Combinators ----- *)
89
90(* Match CRLF, return new position *)
91let crlf st ~pos =
92 let pos = char '\r' st ~pos in
93 char '\n' st ~pos
94
95(* Match SP (space), return new position *)
96let sp st ~pos =
97 char ' ' st ~pos
98
99(* Take token chars (for method, header names) - must be non-empty *)
100let token st ~pos =
101 let (sp, pos) = take_while Buf_read.is_token_char st ~pos in
102 Err.malformed_when (Span.len sp = 0);
103 (sp, pos)
104
105(* Skip optional whitespace (OWS), return new position *)
106let ows st ~pos =
107 skip_while Buf_read.is_space st ~pos
108
109(* Parse HTTP version: HTTP/1.0 or HTTP/1.1 *)
110let http_version st ~pos =
111 let pos = string "HTTP/1." st ~pos in
112 let (minor, pos) = satisfy (fun c -> Char.equal c '0' || Char.equal c '1') st ~pos in
113 let v = if Char.equal minor '1' then Version.Http_1_1 else Version.Http_1_0 in
114 (v, pos)
115
116(* Parse method from token span *)
117let parse_method st ~pos =
118 let (sp, pos) = token st ~pos in
119 let len = Span.len sp in
120 let meth = match len with
121 | 3 ->
122 if Span.equal st.buf sp "GET" then Method.Get
123 else if Span.equal st.buf sp "PUT" then Method.Put
124 else Err.fail Err.Invalid_method
125 | 4 ->
126 if Span.equal st.buf sp "POST" then Method.Post
127 else if Span.equal st.buf sp "HEAD" then Method.Head
128 else Err.fail Err.Invalid_method
129 | 5 ->
130 if Span.equal st.buf sp "PATCH" then Method.Patch
131 else if Span.equal st.buf sp "TRACE" then Method.Trace
132 else Err.fail Err.Invalid_method
133 | 6 ->
134 if Span.equal st.buf sp "DELETE" then Method.Delete
135 else Err.fail Err.Invalid_method
136 | 7 ->
137 if Span.equal st.buf sp "OPTIONS" then Method.Options
138 else if Span.equal st.buf sp "CONNECT" then Method.Connect
139 else Err.fail Err.Invalid_method
140 | _ -> Err.fail Err.Invalid_method
141 in
142 (meth, pos)
143
144(* Parse request target - non-empty sequence of non-SP non-CR chars *)
145let parse_target st ~pos =
146 let (sp, pos) = take_while (fun c ->
147 not (Char.equal c ' ') && not (Char.equal c '\r')) st ~pos
148 in
149 Err.when_ (Span.len sp = 0) Err.Invalid_target;
150 (sp, pos)
151
152(* Parse request line: METHOD SP target SP version CRLF *)
153let request_line st ~pos =
154 let (meth, pos) = parse_method st ~pos in
155 let pos = sp st ~pos in
156 let (target, pos) = parse_target st ~pos in
157 let pos = sp st ~pos in
158 let (version, pos) = http_version st ~pos in
159 let pos = crlf st ~pos in
160 (meth, target, version, pos)
161
162(* Parse a single header: name: OWS value OWS CRLF
163 Returns: (name, name_span, value_span, new_pos) *)
164let parse_header st ~pos =
165 let (name_span, pos) = token st ~pos in
166 let pos = char ':' st ~pos in
167 let pos = ows st ~pos in
168 let value_start = pos in
169 (* Find CRLF - need to scan for it *)
170 let crlf_pos = Buf_read.find_crlf st.buf ~pos ~len:st.len in
171 Err.partial_when (crlf_pos < 0);
172 (* Trim trailing whitespace *)
173 let value_end = ref crlf_pos in
174 while !value_end > value_start &&
175 Buf_read.is_space (Buf_read.peek st.buf (!value_end - 1)) do
176 Int.decr value_end
177 done;
178 let value_span = Span.make
179 ~off:value_start
180 ~len:(!value_end - value_start)
181 in
182 let pos = crlf_pos + 2 in
183 let name = Header_name.of_span st.buf name_span in
184 (name, name_span, value_span, pos)
185
186(* Check for end of headers (empty line = CRLF) *)
187let is_headers_end st ~pos =
188 if remaining st ~pos < 2 then false
189 else
190 Char.equal (Buf_read.peek st.buf pos) '\r' &&
191 Char.equal (Buf_read.peek st.buf (pos + 1)) '\n'
192
193(* Skip the empty line at end of headers, return new position *)
194let end_headers st ~pos =
195 crlf st ~pos