(* parser.ml - Parser combinators for HTTP/1.1 parsing *) open Base (* Re-export exception from Err for backwards compatibility *) exception Parse_error = Err.Parse_error (* Parser state - position threaded explicitly *) type pstate = { buf : Base_bigstring.t; len : int } (* Create parser state *) let make buf ~len = { buf; len } (* Remaining bytes at position *) let remaining st ~pos = st.len - pos (* Check if at end *) let at_end st ~pos = pos >= st.len (* Peek current char without advancing *) let peek_char st ~pos = Err.partial_when @@ at_end st ~pos; Buf_read.peek st.buf pos (* Peek char at offset from current position *) let peek_at st ~pos off = let p = pos + off in Err.partial_when @@ (p >= st.len); Buf_read.peek st.buf p (* Match single character, return new position *) let char c st ~pos = Err.partial_when @@ at_end st ~pos; Err.malformed_when @@ (not (Char.equal (Buf_read.peek st.buf pos) c)); pos + 1 (* Match literal string, return new position *) let string s st ~pos = let slen = String.length s in Err.partial_when (remaining st ~pos < slen); for i = 0 to slen - 1 do let actual = Buf_read.peek st.buf (pos + i) in let expected = String.unsafe_get s i in Err.malformed_when @@ (not (Char.equal actual expected)) done; pos + slen (* Take chars while predicate holds, return span and new position *) let take_while f st ~pos = let start = pos in let p = ref pos in while not (at_end st ~pos:!p) && f (Buf_read.peek st.buf !p) do Int.incr p done; (Span.make ~off:start ~len:(!p - start), !p) (* Skip chars while predicate holds, return new position *) let skip_while f st ~pos = let p = ref pos in while not (at_end st ~pos:!p) && f (Buf_read.peek st.buf !p) do Int.incr p done; !p (* Take exactly n chars as span, return span and new position *) let take n st ~pos = Err.partial_when @@ (remaining st ~pos < n); (Span.make ~off:pos ~len:n, pos + n) (* Skip exactly n chars, return new position *) let skip n st ~pos = Err.partial_when @@ (remaining st ~pos < n); pos + n (* Match char satisfying predicate, return char and new position *) let satisfy f st ~pos = Err.partial_when @@ at_end st ~pos; let c = Buf_read.peek st.buf pos in Err.malformed_unless @@ f c; (c, pos + 1) (* Optional: try parser, return None and original pos on failure *) let optional p st ~pos = match p st ~pos with | (v, new_pos) -> (Some v, new_pos) | exception Err.Parse_error _ -> (None, pos) (* ----- HTTP-Specific Combinators ----- *) (* Match CRLF, return new position *) let crlf st ~pos = let pos = char '\r' st ~pos in char '\n' st ~pos (* Match SP (space), return new position *) let sp st ~pos = char ' ' st ~pos (* Take token chars (for method, header names) - must be non-empty *) let token st ~pos = let (sp, pos) = take_while Buf_read.is_token_char st ~pos in Err.malformed_when (Span.len sp = 0); (sp, pos) (* Skip optional whitespace (OWS), return new position *) let ows st ~pos = skip_while Buf_read.is_space st ~pos (* Parse HTTP version: HTTP/1.0 or HTTP/1.1 *) let http_version st ~pos = let pos = string "HTTP/1." st ~pos in let (minor, pos) = satisfy (fun c -> Char.equal c '0' || Char.equal c '1') st ~pos in let v = if Char.equal minor '1' then Version.Http_1_1 else Version.Http_1_0 in (v, pos) (* Parse method from token span *) let parse_method st ~pos = let (sp, pos) = token st ~pos in let len = Span.len sp in let meth = match len with | 3 -> if Span.equal st.buf sp "GET" then Method.Get else if Span.equal st.buf sp "PUT" then Method.Put else Err.fail Err.Invalid_method | 4 -> if Span.equal st.buf sp "POST" then Method.Post else if Span.equal st.buf sp "HEAD" then Method.Head else Err.fail Err.Invalid_method | 5 -> if Span.equal st.buf sp "PATCH" then Method.Patch else if Span.equal st.buf sp "TRACE" then Method.Trace else Err.fail Err.Invalid_method | 6 -> if Span.equal st.buf sp "DELETE" then Method.Delete else Err.fail Err.Invalid_method | 7 -> if Span.equal st.buf sp "OPTIONS" then Method.Options else if Span.equal st.buf sp "CONNECT" then Method.Connect else Err.fail Err.Invalid_method | _ -> Err.fail Err.Invalid_method in (meth, pos) (* Parse request target - non-empty sequence of non-SP non-CR chars *) let parse_target st ~pos = let (sp, pos) = take_while (fun c -> not (Char.equal c ' ') && not (Char.equal c '\r')) st ~pos in Err.when_ (Span.len sp = 0) Err.Invalid_target; (sp, pos) (* Parse request line: METHOD SP target SP version CRLF *) let request_line st ~pos = let (meth, pos) = parse_method st ~pos in let pos = sp st ~pos in let (target, pos) = parse_target st ~pos in let pos = sp st ~pos in let (version, pos) = http_version st ~pos in let pos = crlf st ~pos in (meth, target, version, pos) (* Parse a single header: name: OWS value OWS CRLF Returns: (name, name_span, value_span, new_pos) *) let parse_header st ~pos = let (name_span, pos) = token st ~pos in let pos = char ':' st ~pos in let pos = ows st ~pos in let value_start = pos in (* Find CRLF - need to scan for it *) let crlf_pos = Buf_read.find_crlf st.buf ~pos ~len:st.len in Err.partial_when (crlf_pos < 0); (* Trim trailing whitespace *) let value_end = ref crlf_pos in while !value_end > value_start && Buf_read.is_space (Buf_read.peek st.buf (!value_end - 1)) do Int.decr value_end done; let value_span = Span.make ~off:value_start ~len:(!value_end - value_start) in let pos = crlf_pos + 2 in let name = Header_name.of_span st.buf name_span in (name, name_span, value_span, pos) (* Check for end of headers (empty line = CRLF) *) let is_headers_end st ~pos = if remaining st ~pos < 2 then false else Char.equal (Buf_read.peek st.buf pos) '\r' && Char.equal (Buf_read.peek st.buf (pos + 1)) '\n' (* Skip the empty line at end of headers, return new position *) let end_headers st ~pos = crlf st ~pos