(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: ISC ---------------------------------------------------------------------------*) open Bytesrw (** INI parser and encoder using bytesrw. Implements Python configparser semantics including: - Multiline values via indentation - Basic interpolation: %(name)s - Extended interpolation: ${section:name} - DEFAULT section inheritance - Case-insensitive option lookup *) module Result_syntax = struct let ( let* ) = Result.bind end (* ---- Configuration ---- *) type interpolation = [ `No_interpolation | `Basic_interpolation | `Extended_interpolation ] type config = { delimiters : string list; comment_prefixes : string list; inline_comment_prefixes : string list; default_section : string; interpolation : interpolation; allow_no_value : bool; strict : bool; empty_lines_in_values : bool; } let default_config = { delimiters = ["="; ":"]; comment_prefixes = ["#"; ";"]; inline_comment_prefixes = []; default_section = "DEFAULT"; interpolation = `Basic_interpolation; allow_no_value = false; strict = true; empty_lines_in_values = true; } let raw_config = { default_config with interpolation = `No_interpolation } (* ---- Reading from bytesrw ---- *) let read_all_to_string reader = let buf = Buffer.create 4096 in let rec loop () = let slice = Bytes.Reader.read reader in if Bytes.Slice.length slice = 0 then Buffer.contents buf else begin Buffer.add_subbytes buf (Bytes.Slice.bytes slice) (Bytes.Slice.first slice) (Bytes.Slice.length slice); loop () end in loop () (* ---- Parsing State ---- *) type parse_state = { mutable file : string; mutable line_num : int; mutable byte_pos : int; mutable line_start_byte : int; config : config; (* Accumulated data *) mutable defaults : (string Init.node * Init.Repr.ini_value) list; mutable sections : Init.Repr.ini_section list; (* Current parse state *) mutable cur_section : string option; mutable cur_option : (string * Init.Meta.t) option; mutable cur_value : string list; mutable cur_indent : int; mutable cur_value_meta : Init.Meta.t; mutable pending_ws : string; } let make_state config file = { file; line_num = 1; byte_pos = 0; line_start_byte = 0; config; defaults = []; sections = []; cur_section = None; cur_option = None; cur_value = []; cur_indent = 0; cur_value_meta = Init.Meta.none; pending_ws = ""; } let current_textloc state first_byte last_byte first_line = Init.Textloc.make ~file:state.file ~first_byte ~last_byte ~first_line ~last_line:(state.line_num, state.line_start_byte) let current_meta state first_byte first_line = let textloc = current_textloc state first_byte state.byte_pos first_line in Init.Meta.make textloc (* ---- String Utilities ---- *) let string_starts_with ~prefix s = let plen = String.length prefix in let slen = String.length s in slen >= plen && String.sub s 0 plen = prefix let lstrip s = let len = String.length s in let rec find_start i = if i >= len then len else match s.[i] with | ' ' | '\t' -> find_start (i + 1) | _ -> i in let start = find_start 0 in if start = 0 then s else String.sub s start (len - start) let rstrip s = let rec find_end i = if i < 0 then -1 else match s.[i] with | ' ' | '\t' | '\r' | '\n' -> find_end (i - 1) | _ -> i in let end_pos = find_end (String.length s - 1) in if end_pos = String.length s - 1 then s else String.sub s 0 (end_pos + 1) let strip s = lstrip (rstrip s) let count_indent s = let len = String.length s in let rec count i = if i >= len then i else match s.[i] with | ' ' | '\t' -> count (i + 1) | _ -> i in count 0 (* ---- Comment and Delimiter Handling ---- *) let is_comment_line config line = let trimmed = lstrip line in List.exists (fun prefix -> string_starts_with ~prefix trimmed) config.comment_prefixes let is_empty_line line = String.length (strip line) = 0 let strip_inline_comment config value = if config.inline_comment_prefixes = [] then value else (* Find inline comment with preceding whitespace *) let len = String.length value in let rec find_comment i = if i >= len then value else if value.[i] = ' ' || value.[i] = '\t' then begin let rest = String.sub value i (len - i) in let trimmed = lstrip rest in if List.exists (fun p -> string_starts_with ~prefix:p trimmed) config.inline_comment_prefixes then rstrip (String.sub value 0 i) else find_comment (i + 1) end else find_comment (i + 1) in find_comment 0 let find_delimiter config line = let trimmed = lstrip line in let len = String.length trimmed in let rec try_delimiters delims = match delims with | [] -> None | delim :: rest -> let dlen = String.length delim in let rec find_at i = if i + dlen > len then try_delimiters rest else if String.sub trimmed i dlen = delim then Some (delim, i) else find_at (i + 1) in find_at 0 in try_delimiters config.delimiters (* ---- Section Header Parsing ---- *) let parse_section_header line = let trimmed = strip line in let len = String.length trimmed in if len >= 2 && trimmed.[0] = '[' && trimmed.[len - 1] = ']' then Some (String.sub trimmed 1 (len - 2)) else None (* ---- Interpolation ---- *) let rec basic_interpolate ~section ~defaults ~sections value max_depth = if max_depth <= 0 then Error (Init.Error.make (Init.Error.Interpolation { option = ""; reason = "recursion depth exceeded" })) else let buf = Buffer.create (String.length value) in let len = String.length value in let rec scan i = if i >= len then Ok (Buffer.contents buf) else if i + 1 < len && value.[i] = '%' && value.[i+1] = '%' then begin Buffer.add_char buf '%'; scan (i + 2) end else if value.[i] = '%' && i + 1 < len && value.[i+1] = '(' then begin (* Find closing )s *) let rec find_close j = if j + 1 >= len then None else if value.[j] = ')' && value.[j+1] = 's' then Some j else find_close (j + 1) in match find_close (i + 2) with | None -> Buffer.add_char buf value.[i]; scan (i + 1) | Some close_pos -> let name = String.lowercase_ascii (String.sub value (i + 2) (close_pos - i - 2)) in (* Look up value: current section first, then defaults *) let lookup_result = let find_in_opts opts = List.find_opt (fun ((n, _), _) -> String.lowercase_ascii n = name) opts in match section with | None -> find_in_opts defaults | Some sec -> let sec_opts = List.find_opt (fun s -> String.lowercase_ascii (fst s.Init.Repr.name) = String.lowercase_ascii sec ) sections in match sec_opts with | Some s -> (match find_in_opts s.Init.Repr.options with | Some x -> Some x | None -> find_in_opts defaults) | None -> find_in_opts defaults in match lookup_result with | None -> Error (Init.Error.make (Init.Error.Interpolation { option = name; reason = "option not found" })) | Some (_, iv) -> (* Recursively interpolate the referenced value *) match basic_interpolate ~section ~defaults ~sections iv.Init.Repr.raw (max_depth - 1) with | Error e -> Error e | Ok interpolated -> Buffer.add_string buf interpolated; scan (close_pos + 2) end else begin Buffer.add_char buf value.[i]; scan (i + 1) end in scan 0 let rec extended_interpolate ~section ~defaults ~sections value max_depth = if max_depth <= 0 then Error (Init.Error.make (Init.Error.Interpolation { option = ""; reason = "recursion depth exceeded" })) else let buf = Buffer.create (String.length value) in let len = String.length value in let rec scan i = if i >= len then Ok (Buffer.contents buf) else if i + 1 < len && value.[i] = '$' && value.[i+1] = '$' then begin Buffer.add_char buf '$'; scan (i + 2) end else if value.[i] = '$' && i + 1 < len && value.[i+1] = '{' then begin (* Find closing } *) let rec find_close j = if j >= len then None else if value.[j] = '}' then Some j else find_close (j + 1) in match find_close (i + 2) with | None -> Buffer.add_char buf value.[i]; scan (i + 1) | Some close_pos -> let ref_str = String.sub value (i + 2) (close_pos - i - 2) in (* Parse section:name or just name *) let (ref_section, name) = match String.index_opt ref_str ':' with | None -> (section, String.lowercase_ascii ref_str) | Some colon_pos -> let sec = String.sub ref_str 0 colon_pos in let n = String.sub ref_str (colon_pos + 1) (String.length ref_str - colon_pos - 1) in (Some sec, String.lowercase_ascii n) in (* Look up value *) let lookup_result = let find_in_opts opts = List.find_opt (fun ((n, _), _) -> String.lowercase_ascii n = name) opts in match ref_section with | None -> find_in_opts defaults | Some sec -> let lc_sec = String.lowercase_ascii sec in if lc_sec = String.lowercase_ascii "default" then find_in_opts defaults else let sec_opts = List.find_opt (fun s -> String.lowercase_ascii (fst s.Init.Repr.name) = lc_sec ) sections in match sec_opts with | Some s -> (match find_in_opts s.Init.Repr.options with | Some x -> Some x | None -> find_in_opts defaults) | None -> find_in_opts defaults in match lookup_result with | None -> Error (Init.Error.make (Init.Error.Interpolation { option = name; reason = "option not found" })) | Some (_, iv) -> (* Recursively interpolate *) match extended_interpolate ~section:ref_section ~defaults ~sections iv.Init.Repr.raw (max_depth - 1) with | Error e -> Error e | Ok interpolated -> Buffer.add_string buf interpolated; scan (close_pos + 1) end else begin Buffer.add_char buf value.[i]; scan (i + 1) end in scan 0 let interpolate config ~section ~defaults ~sections value = match config.interpolation with | `No_interpolation -> Ok value | `Basic_interpolation -> basic_interpolate ~section ~defaults ~sections value 10 | `Extended_interpolation -> extended_interpolate ~section ~defaults ~sections value 10 (* ---- Option Finalization ---- *) let finalize_current_option state = match state.cur_option with | None -> () | Some (name, name_meta) -> let raw_value = String.concat "\n" (List.rev state.cur_value) in let value = strip raw_value in let iv = { Init.Repr.raw = value; interpolated = value; (* Will be interpolated later *) meta = state.cur_value_meta; } in let opt = ((name, name_meta), iv) in (match state.cur_section with | None -> (* DEFAULT section *) state.defaults <- opt :: state.defaults | Some sec -> (* Add to current section *) match state.sections with | [] -> let new_sec = { Init.Repr.name = (sec, Init.Meta.none); options = [opt]; meta = Init.Meta.none; } in state.sections <- [new_sec] | sec_data :: rest when fst sec_data.name = sec -> state.sections <- { sec_data with options = opt :: sec_data.options } :: rest | _ -> let new_sec = { Init.Repr.name = (sec, Init.Meta.none); options = [opt]; meta = Init.Meta.none; } in state.sections <- new_sec :: state.sections); state.cur_option <- None; state.cur_value <- []; state.cur_indent <- 0 (* ---- Line Processing ---- *) let process_line state line = let line_start = state.byte_pos in let line_start_line = (state.line_num, state.line_start_byte) in state.byte_pos <- state.byte_pos + String.length line + 1; (* +1 for newline *) state.line_num <- state.line_num + 1; state.line_start_byte <- state.byte_pos; (* Check for empty line *) if is_empty_line line then begin if state.cur_option <> None && state.config.empty_lines_in_values then state.cur_value <- "" :: state.cur_value else begin finalize_current_option state; state.pending_ws <- state.pending_ws ^ line ^ "\n" end; Ok () end (* Check for comment *) else if is_comment_line state.config line then begin if state.cur_option <> None then (* Comment within multiline - finalize. *) finalize_current_option state; state.pending_ws <- state.pending_ws ^ line ^ "\n"; Ok () end (* Check for section header *) else match parse_section_header line with | Some sec_name -> finalize_current_option state; let lc_sec = sec_name in (* Keep original case for section names *) if String.lowercase_ascii sec_name = String.lowercase_ascii state.config.default_section then begin state.cur_section <- None; Ok () end else if state.config.strict then begin (* Check for duplicate section *) let exists = List.exists (fun s -> String.lowercase_ascii (fst s.Init.Repr.name) = String.lowercase_ascii sec_name ) state.sections in if exists then Error (Init.Error.make ~meta:(current_meta state line_start line_start_line) (Init.Error.Duplicate_section sec_name)) else begin let sec_meta = current_meta state line_start line_start_line in let sec_meta = Init.Meta.with_ws_before sec_meta state.pending_ws in state.pending_ws <- ""; let new_sec = { Init.Repr.name = (lc_sec, sec_meta); options = []; meta = sec_meta; } in state.sections <- new_sec :: state.sections; state.cur_section <- Some lc_sec; Ok () end end else begin let sec_meta = current_meta state line_start line_start_line in let sec_meta = Init.Meta.with_ws_before sec_meta state.pending_ws in state.pending_ws <- ""; let new_sec = { Init.Repr.name = (lc_sec, sec_meta); options = []; meta = sec_meta; } in state.sections <- new_sec :: state.sections; state.cur_section <- Some lc_sec; Ok () end | None -> (* Check for continuation of multiline value *) let indent = count_indent line in if state.cur_option <> None && indent > state.cur_indent then begin (* Continuation line *) let value_part = strip line in state.cur_value <- value_part :: state.cur_value; Ok () end else begin (* New option or continuation *) finalize_current_option state; (* Try to parse as option = value *) match find_delimiter state.config line with | Some (delim, pos) -> let stripped = lstrip line in let key = String.sub stripped 0 pos in let key = String.lowercase_ascii (rstrip key) in (* Case-fold option names *) let value_start = pos + String.length delim in let rest = String.sub stripped value_start (String.length stripped - value_start) in let value = strip_inline_comment state.config (lstrip rest) in if state.cur_section = None && state.sections = [] && state.defaults = [] then (* No section header yet - this is DEFAULT section *) (); let opt_meta = current_meta state line_start line_start_line in let opt_meta = Init.Meta.with_ws_before opt_meta state.pending_ws in state.pending_ws <- ""; state.cur_option <- Some (key, opt_meta); state.cur_value <- [value]; state.cur_indent <- count_indent line; state.cur_value_meta <- opt_meta; Ok () | None -> if state.config.allow_no_value then begin (* Valueless option *) let key = String.lowercase_ascii (strip line) in let opt_meta = current_meta state line_start line_start_line in let opt_meta = Init.Meta.with_ws_before opt_meta state.pending_ws in state.pending_ws <- ""; state.cur_option <- Some (key, opt_meta); state.cur_value <- []; state.cur_indent <- count_indent line; state.cur_value_meta <- opt_meta; Ok () end else Error (Init.Error.make ~meta:(current_meta state line_start line_start_line) (Init.Error.Parse ("no delimiter found in line: " ^ line))) end (* ---- Interpolation Pass ---- *) let perform_interpolation state = let open Result_syntax in let interpolate_value ~section iv = interpolate state.config ~section ~defaults:state.defaults ~sections:state.sections iv.Init.Repr.raw |> Result.map (fun interpolated -> { iv with Init.Repr.interpolated = interpolated }) in let interpolate_opts ~section opts = let rec loop acc = function | [] -> Ok (List.rev acc) | ((name, meta), iv) :: rest -> let* iv' = interpolate_value ~section iv in loop (((name, meta), iv') :: acc) rest in loop [] opts in let rec loop_sections acc = function | [] -> Ok (List.rev acc) | sec :: rest -> let* opts' = interpolate_opts ~section:(Some (fst sec.Init.Repr.name)) sec.options in loop_sections ({ sec with options = opts' } :: acc) rest in let* defaults' = interpolate_opts ~section:None state.defaults in state.defaults <- defaults'; let* sections' = loop_sections [] state.sections in state.sections <- sections'; Ok () (* ---- Line splitting ---- *) let split_lines s = let len = String.length s in if len = 0 then [] else let rec split acc start i = if i >= len then let last = String.sub s start (len - start) in List.rev (if String.length last > 0 then last :: acc else acc) else match s.[i] with | '\n' -> let line = String.sub s start (i - start) in split (line :: acc) (i + 1) (i + 1) | '\r' -> let line = String.sub s start (i - start) in let next = if i + 1 < len && s.[i + 1] = '\n' then i + 2 else i + 1 in split (line :: acc) next next | _ -> split acc start (i + 1) in split [] 0 0 (* ---- Main Parse Functions ---- *) let parse_string_internal ?(config=default_config) ?(locs=false) ?(layout=false) ?(file=Init.Textloc.file_none) s = let open Result_syntax in let _ = locs in (* TODO: Use locs to control location tracking *) let _ = layout in (* TODO: Use layout to control whitespace preservation *) let state = make_state config file in let lines = split_lines s in let rec process = function | [] -> finalize_current_option state; Ok () | line :: rest -> let* () = process_line state line in process rest in let* () = process lines in let* () = perform_interpolation state in Ok { Init.Repr.defaults = List.rev state.defaults; sections = List.rev_map (fun (sec : Init.Repr.ini_section) -> { sec with options = List.rev sec.options } ) state.sections; meta = Init.Meta.none; } let parse_reader ?(config=default_config) ?(locs=false) ?(layout=false) ?(file=Init.Textloc.file_none) reader = let s = read_all_to_string reader in parse_string_internal ~config ~locs ~layout ~file s let parse_string ?(config=default_config) ?(locs=false) ?(layout=false) ?(file=Init.Textloc.file_none) s = parse_string_internal ~config ~locs ~layout ~file s (* ---- Decoding ---- *) let decode_doc codec doc = match Init.document_state codec with | Some doc_state -> doc_state.decode doc | None -> match Init.section_state codec with | Some sec_state -> (match doc.Init.Repr.sections with | [sec] -> sec_state.decode sec | [] -> Error (Init.Error.make (Init.Error.Codec "no sections in document")) | _ -> Error (Init.Error.make (Init.Error.Codec "multiple sections; expected single section codec"))) | None -> Error (Init.Error.make (Init.Error.Codec "codec is neither document nor section type")) let decode' ?(config=default_config) ?(locs=false) ?(layout=false) ?(file=Init.Textloc.file_none) codec reader = let open Result_syntax in let* doc = parse_reader ~config ~locs ~layout ~file reader in decode_doc codec doc let decode ?config ?locs ?layout ?file codec reader = decode' ?config ?locs ?layout ?file codec reader |> Result.map_error Init.Error.to_string let decode_string' ?(config=default_config) ?(locs=false) ?(layout=false) ?(file=Init.Textloc.file_none) codec s = let open Result_syntax in let* doc = parse_string ~config ~locs ~layout ~file s in decode_doc codec doc let decode_string ?config ?locs ?layout ?file codec s = decode_string' ?config ?locs ?layout ?file codec s |> Result.map_error Init.Error.to_string (* ---- Encoding ---- *) let encode_to_buffer buf codec value = match Init.document_state codec with | Some doc_state -> let doc = doc_state.encode value in (* Encode defaults *) if doc.defaults <> [] then begin Buffer.add_string buf "[DEFAULT]\n"; List.iter (fun ((name, _), iv) -> Buffer.add_string buf name; Buffer.add_string buf " = "; Buffer.add_string buf iv.Init.Repr.raw; Buffer.add_char buf '\n' ) doc.defaults; Buffer.add_char buf '\n' end; (* Encode sections *) List.iter (fun (sec : Init.Repr.ini_section) -> Buffer.add_char buf '['; Buffer.add_string buf (fst sec.name); Buffer.add_string buf "]\n"; List.iter (fun ((name, _), iv) -> Buffer.add_string buf name; Buffer.add_string buf " = "; Buffer.add_string buf iv.Init.Repr.raw; Buffer.add_char buf '\n' ) sec.options; Buffer.add_char buf '\n' ) doc.sections; Ok () | None -> match Init.section_state codec with | Some sec_state -> let sec = sec_state.encode value in Buffer.add_char buf '['; Buffer.add_string buf (fst sec.name); Buffer.add_string buf "]\n"; List.iter (fun ((name, _), iv) -> Buffer.add_string buf name; Buffer.add_string buf " = "; Buffer.add_string buf iv.Init.Repr.raw; Buffer.add_char buf '\n' ) sec.options; Ok () | None -> Error (Init.Error.make (Init.Error.Codec "codec is neither document nor section type")) let encode' ?buf:_ codec value ~eod writer = let open Result_syntax in let buffer = Buffer.create 1024 in let* () = encode_to_buffer buffer codec value in Bytes.Writer.write_string writer (Buffer.contents buffer); if eod then Bytes.Writer.write_eod writer; Ok () let encode ?buf codec value ~eod writer = encode' ?buf codec value ~eod writer |> Result.map_error Init.Error.to_string let encode_string' ?buf:_ codec value = let buffer = Buffer.create 1024 in encode_to_buffer buffer codec value |> Result.map (fun () -> Buffer.contents buffer) let encode_string ?buf codec value = encode_string' ?buf codec value |> Result.map_error Init.Error.to_string