Generate srcset images for a variety of resolutions from OCaml
at main 416 lines 16 kB view raw
1(* Copyright (c) 2024, Anil Madhavapeddy <anil@recoil.org> 2 3 Permission to use, copy, modify, and/or distribute this software for 4 any purpose with or without fee is hereby granted, provided that the 5 above copyright notice and this permission notice appear in all 6 copies. 7 8 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 9 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 10 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 11 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 12 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA 13 OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 14 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 PERFORMANCE OF THIS SOFTWARE. *) 16 17(** Command-line image processing operations for srcsetter. 18 19 This module provides the core image processing pipeline including 20 file discovery, image conversion, and progress reporting. *) 21 22open Eio 23 24(** Configuration for the image processing pipeline. 25 26 @param dummy When true, skip actual image conversion (dry run) 27 @param preserve When true, skip conversion if destination exists 28 @param proc_mgr Eio process manager for running ImageMagick 29 @param src_dir Source directory containing original images 30 @param dst_dir Destination directory for generated images 31 @param img_widths List of target widths for responsive variants 32 @param img_exts File extensions to process (e.g., ["jpg"; "png"]) 33 @param idx_file Name of the JSON index file to generate 34 @param max_fibers Maximum concurrent conversion operations *) 35type ('a, 'b) config = { 36 dummy : bool; 37 preserve : bool; 38 proc_mgr : 'a Eio.Process.mgr; 39 src_dir : 'b Path.t; 40 dst_dir : 'b Path.t; 41 img_widths : int list; 42 img_exts : string list; 43 idx_file : string; 44 max_fibers : int; 45} 46 47(** [file_seq ~filter path] recursively enumerates files in [path]. 48 49 Returns a sequence of file paths where [filter filename] is true. 50 Directories are traversed depth-first. *) 51let rec file_seq ~filter path = 52 let dirs, files = 53 Path.with_open_dir path Path.read_dir 54 |> List.fold_left 55 (fun (dirs, files) f -> 56 let fp = Path.(path / f) in 57 match Path.kind ~follow:false fp with 58 | `Regular_file when filter f -> (dirs, fp :: files) 59 | `Directory -> (f :: dirs, files) 60 | _ -> (dirs, files)) 61 ([], []) 62 in 63 Seq.append (List.to_seq files) 64 (Seq.flat_map (fun f -> file_seq ~filter Path.(path / f)) (List.to_seq dirs)) 65 66(** [iter_seq_p ?max_fibers fn seq] iterates [fn] over [seq] in parallel. 67 68 @param max_fibers Optional limit on concurrent fibers. Must be positive. 69 @raise Invalid_argument if [max_fibers] is not positive. *) 70let iter_seq_p ?max_fibers fn seq = 71 Eio.Switch.run ~name:"iter_seq_p" @@ fun sw -> 72 match max_fibers with 73 | None -> Seq.iter (fun v -> Fiber.fork ~sw @@ fun () -> fn v) seq 74 | Some mf when mf <= 0 -> invalid_arg "iter_seq_p: max_fibers must be positive" 75 | Some mf -> 76 let sem = Semaphore.make mf in 77 Seq.iter 78 (fun v -> 79 Semaphore.acquire sem; 80 Fiber.fork ~sw @@ fun () -> 81 Fun.protect ~finally:(fun () -> Semaphore.release sem) @@ fun () -> 82 fn v) 83 seq 84 85(** [relativize_path dir path] returns [path] relative to [dir]. 86 87 @raise Failure if [path] is not under [dir]. *) 88let relativize_path dir path = 89 let dir = Path.native_exn dir in 90 let path = Path.native_exn path in 91 match Fpath.(rem_prefix (v dir) (v path)) with 92 | None -> failwith "relativize_path: path is not under directory" 93 | Some rel -> Fpath.to_string rel 94 95(** [dims cfg path] returns the [(width, height)] dimensions of an image. 96 97 Uses ImageMagick's [identify] command to read image metadata. *) 98let dims { proc_mgr; _ } path = 99 let path = Path.native_exn path in 100 let args = [ "identify"; "-ping"; "-format"; "%w %h"; path ] in 101 let output = Process.parse_out proc_mgr Buf_read.take_all args in 102 Scanf.sscanf output "%d %d" (fun w h -> (w, h)) 103 104(** [try_dims cfg path] returns [Some (w, h)] if identify succeeds, [None] otherwise. *) 105let try_dims cfg path = 106 try Some (dims cfg path) 107 with _ -> None 108 109(** [file_size path] returns the size of the file in bytes. *) 110let file_size path = 111 let stat = Path.stat ~follow:true path in 112 Optint.Int63.to_int stat.size 113 114(** [is_valid_image cfg path] returns true if the file exists, has non-zero size, 115 and identify can read its dimensions. *) 116let is_valid_image cfg path = 117 Path.is_file path && 118 file_size path > 0 && 119 Option.is_some (try_dims cfg path) 120 121(** [width_from_variant_name name] extracts the width from a variant filename. 122 123 Variant filenames have the form "path/name.WIDTH.webp". Returns [None] for 124 base images (no width suffix). *) 125let width_from_variant_name name = 126 let base = Filename.chop_extension name in (* remove .webp *) 127 let parts = String.split_on_char '.' base in 128 match List.rev parts with 129 | last :: _ -> ( 130 match int_of_string_opt last with 131 | Some w -> Some w 132 | None -> None) 133 | [] -> None 134 135(** [run cfg args] executes a shell command unless in dummy mode. *) 136let run { dummy; proc_mgr; _ } args = 137 if not dummy then Process.run proc_mgr args 138 139(** [convert cfg (src, dst, size)] converts an image to WebP format. 140 141 Creates the destination directory if needed, then uses ImageMagick 142 to resize and convert the image with auto-orientation. *) 143let convert ({ src_dir; dst_dir; dummy; _ } as cfg) (src, dst, size) = 144 if dummy then () 145 else begin 146 let dir = 147 if Filename.dirname dst = "." then dst_dir 148 else Path.(dst_dir / Filename.dirname dst) 149 in 150 Path.mkdirs ~exists_ok:true ~perm:0o755 dir; 151 let src_path = Path.(native_exn (src_dir / src)) in 152 let dst_path = Path.(native_exn (dst_dir / dst)) in 153 let sz = Printf.sprintf "%dx" size in 154 run cfg 155 [ 156 "magick"; src_path; 157 "-auto-orient"; "-thumbnail"; sz; 158 "-quality"; "100"; 159 "-gravity"; "center"; "-extent"; sz; 160 dst_path; 161 ] 162 end 163 164(** [convert_pdf cfg ~size ~dst ~src] converts a PDF's first page to an image. 165 166 Renders at 300 DPI, crops the top half, and resizes to the target width. *) 167let convert_pdf cfg ~size ~dst ~src = 168 let src_path = Path.native_exn src in 169 let dst_path = Path.native_exn dst in 170 let sz = Printf.sprintf "%sx" size in 171 run cfg 172 [ 173 "magick"; "-density"; "300"; "-quality"; "100"; 174 src_path ^ "[0]"; 175 "-gravity"; "North"; "-crop"; "100%x50%+0+0"; 176 "-resize"; sz; 177 dst_path; 178 ] 179 180(** [needed_sizes ~img_widths ~w] returns widths from [img_widths] that are <= [w]. *) 181let needed_sizes ~img_widths ~w = List.filter (fun tw -> tw <= w) img_widths 182 183(** [needs_conversion ~preserve dst] returns true if [dst] should be generated. 184 185 When [preserve] is true, existing files are skipped. *) 186let needs_conversion ~preserve dst = 187 not (preserve && Path.is_file dst) 188 189(** [translate cfg ?w src] computes source and destination paths for conversion. 190 191 Returns [(src_file, dst_file, width_opt, needs_work)] where [needs_work] 192 indicates whether the conversion should be performed. *) 193let translate { src_dir; dst_dir; preserve; _ } ?w src = 194 let src_file = relativize_path src_dir src in 195 let width_suffix = Option.fold ~none:"" ~some:(fun w -> "." ^ string_of_int w) w in 196 let dst_file = Printf.sprintf "%s%s.webp" (Filename.chop_extension src_file) width_suffix in 197 let dst = Path.(dst_dir / dst_file) in 198 (src_file, dst_file, w, needs_conversion ~preserve dst) 199 200(** {1 Progress Bar Rendering} *) 201 202(** [main_bar total] creates a progress bar for [total] items. *) 203let main_bar total = 204 let open Progress.Line in 205 let style = 206 let open Bar_style in 207 let open Progress.Color in 208 v ~delims:("|", "|") ~color:(hex "#FFBA08") [ ""; ""; ""; ""; " " ] 209 in 210 list [ bar ~style:(`Custom style) total; ticker_to total ] 211 212(** [main_bar_heading head total] creates a labeled progress display. *) 213let main_bar_heading head total = 214 let open Progress.Multi in 215 line (Progress.Line.const head) ++ line (main_bar total) ++ blank 216 217(** [one_bar total] creates a compact progress bar for individual file processing. *) 218let one_bar total = 219 let open Progress.Line in 220 let style = 221 let open Bar_style in 222 let open Progress.Color in 223 v ~delims:("{", "}") ~color:(ansi `blue) [ "="; ">"; " " ] 224 in 225 let left = list [ spinner (); bar ~style:(`Custom style) ~width:(`Fixed 12) total; const " " ] in 226 pair left string 227 228(** {1 Image Processing} *) 229 230(** [truncate_string str max_len] truncates [str] to [max_len] chars with ellipsis. *) 231let truncate_string str max_len = 232 if String.length str <= max_len then str 233 else if max_len <= 3 then String.sub "..." 0 max_len 234 else String.sub str 0 (max_len - 3) ^ "..." 235 236(** [process_file cfg (display, main_rep) src] processes a single source image. 237 238 Converts the image to WebP format at multiple responsive sizes. 239 Shows a nested progress bar for files requiring many conversions. 240 241 @return An {!Srcsetter.t} entry with metadata about the generated images. *) 242let process_file cfg (display, main_rep) src = 243 let w, h = dims cfg src in 244 let needed_w = needed_sizes ~img_widths:cfg.img_widths ~w in 245 let base_src, base_dst, _, _ as base = translate cfg src in 246 let needed = List.map (fun w -> translate cfg ~w src) needed_w in 247 let variants = 248 needed 249 |> List.map (fun (_, dst, _, _) -> (dst, (0, 0))) 250 |> Srcsetter.MS.of_list 251 in 252 let slug = Filename.basename base_dst |> Filename.chop_extension in 253 let ent = Srcsetter.v base_dst slug base_src variants (w, h) in 254 let todo = 255 List.filter_map 256 (fun (src, dst, sz, needs_work) -> 257 if needs_work then Some (src, dst, Option.value sz ~default:w) else None) 258 (base :: needed) 259 in 260 let num_todo = List.length todo in 261 if num_todo > 3 then begin 262 let line = one_bar num_todo in 263 let reporter = Progress.Display.add_line display line in 264 let completed = ref [] in 265 let report_progress sz = 266 if sz > 0 then completed := sz :: !completed; 267 let sizes_str = String.concat "," (List.map string_of_int !completed) in 268 let basename = Path.native_exn src |> Filename.basename |> Filename.chop_extension in 269 let label = Printf.sprintf "%25s -> %s" (truncate_string basename 25) sizes_str in 270 Progress.Reporter.report reporter (1, label) 271 in 272 report_progress 0; 273 List.iter (fun (_, _, sz as job) -> report_progress sz; convert cfg job) todo; 274 main_rep 1; 275 Progress.Display.remove_line display reporter 276 end 277 else begin 278 List.iter (convert cfg) todo; 279 main_rep 1 280 end; 281 ent 282 283(** {1 Pipeline Execution} *) 284 285let min_interval = Some (Mtime.Span.of_uint64_ns 1000L) 286 287(** [stage1 cfg] scans for images in the source directory. 288 289 Returns a sequence of file paths matching the configured extensions. *) 290let stage1 { img_exts; src_dir; _ } = 291 let filter f = List.exists (Filename.check_suffix ("." ^ f)) img_exts in 292 let fs = file_seq ~filter src_dir in 293 let total = Seq.length fs in 294 Format.printf "[1/3] Scanned %d images from %a.\n%!" total Path.pp src_dir; 295 fs 296 297(** [stage2 cfg fs] processes images, converting to WebP at multiple sizes. 298 299 @return List of {!Srcsetter.t} entries with placeholder dimensions. *) 300let stage2 ({ max_fibers; dst_dir; _ } as cfg) fs = 301 let display = 302 Progress.Display.start 303 ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 304 (main_bar_heading "[2/3] Processing images..." (Seq.length fs)) 305 in 306 let [ _; main_rep ] = Progress.Display.reporters display in 307 let ents = ref [] in 308 iter_seq_p ~max_fibers 309 (fun src -> 310 let ent = process_file cfg (display, main_rep) src in 311 ents := ent :: !ents) 312 fs; 313 Progress.Display.finalise display; 314 Format.printf "[2/3] Processed %d images to %a.\n%!" (List.length !ents) 315 Path.pp dst_dir; 316 !ents 317 318(** [stage3 cfg ents] verifies generated images and records their dimensions. 319 320 Regenerates any images that have zero length or fail identify validation. 321 322 @return List of {!Srcsetter.t} entries with actual dimensions. *) 323let stage3 ({ src_dir; dst_dir; max_fibers; _ } as cfg) ents = 324 let ents_seq = List.to_seq ents in 325 let oents = ref [] in 326 let regenerated = ref 0 in 327 let display = 328 Progress.Display.start 329 ~config:(Progress.Config.v ~persistent:false ~min_interval ()) 330 (main_bar_heading "[3/3] Verifying images..." (List.length ents)) 331 in 332 let [ _; rep ] = Progress.Display.reporters display in 333 iter_seq_p ~max_fibers 334 (fun ent -> 335 let src_path = Path.(src_dir / Srcsetter.origin ent) in 336 let orig_w, _ = dims cfg src_path in 337 (* Verify and regenerate base image if needed *) 338 let base_path = Path.(dst_dir / Srcsetter.name ent) in 339 if not (is_valid_image cfg base_path) then begin 340 incr regenerated; 341 convert cfg (Srcsetter.origin ent, Srcsetter.name ent, orig_w) 342 end; 343 let w, h = dims cfg base_path in 344 (* Verify and regenerate variants if needed *) 345 let variants = 346 Srcsetter.MS.bindings ent.variants 347 |> List.map (fun (k, _) -> 348 let variant_path = Path.(dst_dir / k) in 349 if not (is_valid_image cfg variant_path) then begin 350 incr regenerated; 351 let target_w = Option.value (width_from_variant_name k) ~default:orig_w in 352 convert cfg (Srcsetter.origin ent, k, target_w) 353 end; 354 (k, dims cfg variant_path)) 355 |> Srcsetter.MS.of_list 356 in 357 rep 1; 358 oents := { ent with Srcsetter.dims = (w, h); variants } :: !oents) 359 ents_seq; 360 Progress.Display.finalise display; 361 if !regenerated > 0 then 362 Printf.printf "[3/3] Verified %d images, regenerated %d invalid outputs.\n%!" 363 (List.length ents) !regenerated 364 else 365 Printf.printf "[3/3] Verified %d generated image sizes.\n%!" 366 (List.length ents); 367 !oents 368 369(** [run ~proc_mgr ~src_dir ~dst_dir ()] runs the full srcsetter pipeline. 370 371 Scans [src_dir] for images, converts them to WebP format at multiple 372 responsive sizes, and writes an index file to [dst_dir]. 373 374 @param proc_mgr Eio process manager for running ImageMagick 375 @param src_dir Source directory containing original images 376 @param dst_dir Destination directory for generated images 377 @param idx_file Name of the index file (default ["index.json"]) 378 @param img_widths List of target widths (default common responsive breakpoints) 379 @param img_exts List of extensions to process (default common image formats) 380 @param max_fibers Maximum concurrent operations (default 8) 381 @param dummy When true, skip actual conversions (default false) 382 @param preserve When true, skip existing files (default true) 383 @return List of {!Srcsetter.t} entries describing generated images *) 384let run 385 ~proc_mgr 386 ~src_dir 387 ~dst_dir 388 ?(idx_file = "index.json") 389 ?(img_widths = [ 320; 480; 640; 768; 1024; 1280; 1440; 1600; 1920; 2560; 3840 ]) 390 ?(img_exts = [ "png"; "webp"; "jpeg"; "jpg"; "bmp"; "heic"; "gif" ]) 391 ?(max_fibers = 8) 392 ?(dummy = false) 393 ?(preserve = true) 394 () 395 = 396 let img_widths = List.sort (fun a b -> compare b a) img_widths in 397 let cfg = 398 { 399 dummy; 400 preserve; 401 proc_mgr; 402 src_dir; 403 dst_dir; 404 idx_file; 405 img_widths; 406 img_exts; 407 max_fibers; 408 } 409 in 410 let fs = stage1 cfg in 411 let ents = stage2 cfg fs in 412 let oents = stage3 cfg ents in 413 let j = Srcsetter.list_to_json oents |> Result.get_ok in 414 let idx = Path.(dst_dir / idx_file) in 415 Path.save ~append:false ~create:(`Or_truncate 0o644) idx j; 416 oents