Generate srcset images for a variety of resolutions from OCaml
1(* Copyright (c) 2024, Anil Madhavapeddy <anil@recoil.org>
2
3 Permission to use, copy, modify, and/or distribute this software for
4 any purpose with or without fee is hereby granted, provided that the
5 above copyright notice and this permission notice appear in all
6 copies.
7
8 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
9 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
10 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
11 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
12 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
13 OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
14 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 PERFORMANCE OF THIS SOFTWARE. *)
16
17(** Command-line image processing operations for srcsetter.
18
19 This module provides the core image processing pipeline including
20 file discovery, image conversion, and progress reporting. *)
21
22open Eio
23
24(** Configuration for the image processing pipeline.
25
26 @param dummy When true, skip actual image conversion (dry run)
27 @param preserve When true, skip conversion if destination exists
28 @param proc_mgr Eio process manager for running ImageMagick
29 @param src_dir Source directory containing original images
30 @param dst_dir Destination directory for generated images
31 @param img_widths List of target widths for responsive variants
32 @param img_exts File extensions to process (e.g., ["jpg"; "png"])
33 @param idx_file Name of the JSON index file to generate
34 @param max_fibers Maximum concurrent conversion operations *)
35type ('a, 'b) config = {
36 dummy : bool;
37 preserve : bool;
38 proc_mgr : 'a Eio.Process.mgr;
39 src_dir : 'b Path.t;
40 dst_dir : 'b Path.t;
41 img_widths : int list;
42 img_exts : string list;
43 idx_file : string;
44 max_fibers : int;
45}
46
47(** [file_seq ~filter path] recursively enumerates files in [path].
48
49 Returns a sequence of file paths where [filter filename] is true.
50 Directories are traversed depth-first. *)
51let rec file_seq ~filter path =
52 let dirs, files =
53 Path.with_open_dir path Path.read_dir
54 |> List.fold_left
55 (fun (dirs, files) f ->
56 let fp = Path.(path / f) in
57 match Path.kind ~follow:false fp with
58 | `Regular_file when filter f -> (dirs, fp :: files)
59 | `Directory -> (f :: dirs, files)
60 | _ -> (dirs, files))
61 ([], [])
62 in
63 Seq.append (List.to_seq files)
64 (Seq.flat_map (fun f -> file_seq ~filter Path.(path / f)) (List.to_seq dirs))
65
66(** [iter_seq_p ?max_fibers fn seq] iterates [fn] over [seq] in parallel.
67
68 @param max_fibers Optional limit on concurrent fibers. Must be positive.
69 @raise Invalid_argument if [max_fibers] is not positive. *)
70let iter_seq_p ?max_fibers fn seq =
71 Eio.Switch.run ~name:"iter_seq_p" @@ fun sw ->
72 match max_fibers with
73 | None -> Seq.iter (fun v -> Fiber.fork ~sw @@ fun () -> fn v) seq
74 | Some mf when mf <= 0 -> invalid_arg "iter_seq_p: max_fibers must be positive"
75 | Some mf ->
76 let sem = Semaphore.make mf in
77 Seq.iter
78 (fun v ->
79 Semaphore.acquire sem;
80 Fiber.fork ~sw @@ fun () ->
81 Fun.protect ~finally:(fun () -> Semaphore.release sem) @@ fun () ->
82 fn v)
83 seq
84
85(** [relativize_path dir path] returns [path] relative to [dir].
86
87 @raise Failure if [path] is not under [dir]. *)
88let relativize_path dir path =
89 let dir = Path.native_exn dir in
90 let path = Path.native_exn path in
91 match Fpath.(rem_prefix (v dir) (v path)) with
92 | None -> failwith "relativize_path: path is not under directory"
93 | Some rel -> Fpath.to_string rel
94
95(** [dims cfg path] returns the [(width, height)] dimensions of an image.
96
97 Uses ImageMagick's [identify] command to read image metadata. *)
98let dims { proc_mgr; _ } path =
99 let path = Path.native_exn path in
100 let args = [ "identify"; "-ping"; "-format"; "%w %h"; path ] in
101 let output = Process.parse_out proc_mgr Buf_read.take_all args in
102 Scanf.sscanf output "%d %d" (fun w h -> (w, h))
103
104(** [try_dims cfg path] returns [Some (w, h)] if identify succeeds, [None] otherwise. *)
105let try_dims cfg path =
106 try Some (dims cfg path)
107 with _ -> None
108
109(** [file_size path] returns the size of the file in bytes. *)
110let file_size path =
111 let stat = Path.stat ~follow:true path in
112 Optint.Int63.to_int stat.size
113
114(** [is_valid_image cfg path] returns true if the file exists, has non-zero size,
115 and identify can read its dimensions. *)
116let is_valid_image cfg path =
117 Path.is_file path &&
118 file_size path > 0 &&
119 Option.is_some (try_dims cfg path)
120
121(** [width_from_variant_name name] extracts the width from a variant filename.
122
123 Variant filenames have the form "path/name.WIDTH.webp". Returns [None] for
124 base images (no width suffix). *)
125let width_from_variant_name name =
126 let base = Filename.chop_extension name in (* remove .webp *)
127 let parts = String.split_on_char '.' base in
128 match List.rev parts with
129 | last :: _ -> (
130 match int_of_string_opt last with
131 | Some w -> Some w
132 | None -> None)
133 | [] -> None
134
135(** [run cfg args] executes a shell command unless in dummy mode. *)
136let run { dummy; proc_mgr; _ } args =
137 if not dummy then Process.run proc_mgr args
138
139(** [convert cfg (src, dst, size)] converts an image to WebP format.
140
141 Creates the destination directory if needed, then uses ImageMagick
142 to resize and convert the image with auto-orientation. *)
143let convert ({ src_dir; dst_dir; dummy; _ } as cfg) (src, dst, size) =
144 if dummy then ()
145 else begin
146 let dir =
147 if Filename.dirname dst = "." then dst_dir
148 else Path.(dst_dir / Filename.dirname dst)
149 in
150 Path.mkdirs ~exists_ok:true ~perm:0o755 dir;
151 let src_path = Path.(native_exn (src_dir / src)) in
152 let dst_path = Path.(native_exn (dst_dir / dst)) in
153 let sz = Printf.sprintf "%dx" size in
154 run cfg
155 [
156 "magick"; src_path;
157 "-auto-orient"; "-thumbnail"; sz;
158 "-quality"; "100";
159 "-gravity"; "center"; "-extent"; sz;
160 dst_path;
161 ]
162 end
163
164(** [convert_pdf cfg ~size ~dst ~src] converts a PDF's first page to an image.
165
166 Renders at 300 DPI, crops the top half, and resizes to the target width. *)
167let convert_pdf cfg ~size ~dst ~src =
168 let src_path = Path.native_exn src in
169 let dst_path = Path.native_exn dst in
170 let sz = Printf.sprintf "%sx" size in
171 run cfg
172 [
173 "magick"; "-density"; "300"; "-quality"; "100";
174 src_path ^ "[0]";
175 "-gravity"; "North"; "-crop"; "100%x50%+0+0";
176 "-resize"; sz;
177 dst_path;
178 ]
179
180(** [needed_sizes ~img_widths ~w] returns widths from [img_widths] that are <= [w]. *)
181let needed_sizes ~img_widths ~w = List.filter (fun tw -> tw <= w) img_widths
182
183(** [needs_conversion ~preserve dst] returns true if [dst] should be generated.
184
185 When [preserve] is true, existing files are skipped. *)
186let needs_conversion ~preserve dst =
187 not (preserve && Path.is_file dst)
188
189(** [translate cfg ?w src] computes source and destination paths for conversion.
190
191 Returns [(src_file, dst_file, width_opt, needs_work)] where [needs_work]
192 indicates whether the conversion should be performed. *)
193let translate { src_dir; dst_dir; preserve; _ } ?w src =
194 let src_file = relativize_path src_dir src in
195 let width_suffix = Option.fold ~none:"" ~some:(fun w -> "." ^ string_of_int w) w in
196 let dst_file = Printf.sprintf "%s%s.webp" (Filename.chop_extension src_file) width_suffix in
197 let dst = Path.(dst_dir / dst_file) in
198 (src_file, dst_file, w, needs_conversion ~preserve dst)
199
200(** {1 Progress Bar Rendering} *)
201
202(** [main_bar total] creates a progress bar for [total] items. *)
203let main_bar total =
204 let open Progress.Line in
205 let style =
206 let open Bar_style in
207 let open Progress.Color in
208 v ~delims:("|", "|") ~color:(hex "#FFBA08") [ "█"; "▓"; "▒"; "░"; " " ]
209 in
210 list [ bar ~style:(`Custom style) total; ticker_to total ]
211
212(** [main_bar_heading head total] creates a labeled progress display. *)
213let main_bar_heading head total =
214 let open Progress.Multi in
215 line (Progress.Line.const head) ++ line (main_bar total) ++ blank
216
217(** [one_bar total] creates a compact progress bar for individual file processing. *)
218let one_bar total =
219 let open Progress.Line in
220 let style =
221 let open Bar_style in
222 let open Progress.Color in
223 v ~delims:("{", "}") ~color:(ansi `blue) [ "="; ">"; " " ]
224 in
225 let left = list [ spinner (); bar ~style:(`Custom style) ~width:(`Fixed 12) total; const " " ] in
226 pair left string
227
228(** {1 Image Processing} *)
229
230(** [truncate_string str max_len] truncates [str] to [max_len] chars with ellipsis. *)
231let truncate_string str max_len =
232 if String.length str <= max_len then str
233 else if max_len <= 3 then String.sub "..." 0 max_len
234 else String.sub str 0 (max_len - 3) ^ "..."
235
236(** [process_file cfg (display, main_rep) src] processes a single source image.
237
238 Converts the image to WebP format at multiple responsive sizes.
239 Shows a nested progress bar for files requiring many conversions.
240
241 @return An {!Srcsetter.t} entry with metadata about the generated images. *)
242let process_file cfg (display, main_rep) src =
243 let w, h = dims cfg src in
244 let needed_w = needed_sizes ~img_widths:cfg.img_widths ~w in
245 let base_src, base_dst, _, _ as base = translate cfg src in
246 let needed = List.map (fun w -> translate cfg ~w src) needed_w in
247 let variants =
248 needed
249 |> List.map (fun (_, dst, _, _) -> (dst, (0, 0)))
250 |> Srcsetter.MS.of_list
251 in
252 let slug = Filename.basename base_dst |> Filename.chop_extension in
253 let ent = Srcsetter.v base_dst slug base_src variants (w, h) in
254 let todo =
255 List.filter_map
256 (fun (src, dst, sz, needs_work) ->
257 if needs_work then Some (src, dst, Option.value sz ~default:w) else None)
258 (base :: needed)
259 in
260 let num_todo = List.length todo in
261 if num_todo > 3 then begin
262 let line = one_bar num_todo in
263 let reporter = Progress.Display.add_line display line in
264 let completed = ref [] in
265 let report_progress sz =
266 if sz > 0 then completed := sz :: !completed;
267 let sizes_str = String.concat "," (List.map string_of_int !completed) in
268 let basename = Path.native_exn src |> Filename.basename |> Filename.chop_extension in
269 let label = Printf.sprintf "%25s -> %s" (truncate_string basename 25) sizes_str in
270 Progress.Reporter.report reporter (1, label)
271 in
272 report_progress 0;
273 List.iter (fun (_, _, sz as job) -> report_progress sz; convert cfg job) todo;
274 main_rep 1;
275 Progress.Display.remove_line display reporter
276 end
277 else begin
278 List.iter (convert cfg) todo;
279 main_rep 1
280 end;
281 ent
282
283(** {1 Pipeline Execution} *)
284
285let min_interval = Some (Mtime.Span.of_uint64_ns 1000L)
286
287(** [stage1 cfg] scans for images in the source directory.
288
289 Returns a sequence of file paths matching the configured extensions. *)
290let stage1 { img_exts; src_dir; _ } =
291 let filter f = List.exists (Filename.check_suffix ("." ^ f)) img_exts in
292 let fs = file_seq ~filter src_dir in
293 let total = Seq.length fs in
294 Format.printf "[1/3] Scanned %d images from %a.\n%!" total Path.pp src_dir;
295 fs
296
297(** [stage2 cfg fs] processes images, converting to WebP at multiple sizes.
298
299 @return List of {!Srcsetter.t} entries with placeholder dimensions. *)
300let stage2 ({ max_fibers; dst_dir; _ } as cfg) fs =
301 let display =
302 Progress.Display.start
303 ~config:(Progress.Config.v ~persistent:false ~min_interval ())
304 (main_bar_heading "[2/3] Processing images..." (Seq.length fs))
305 in
306 let [ _; main_rep ] = Progress.Display.reporters display in
307 let ents = ref [] in
308 iter_seq_p ~max_fibers
309 (fun src ->
310 let ent = process_file cfg (display, main_rep) src in
311 ents := ent :: !ents)
312 fs;
313 Progress.Display.finalise display;
314 Format.printf "[2/3] Processed %d images to %a.\n%!" (List.length !ents)
315 Path.pp dst_dir;
316 !ents
317
318(** [stage3 cfg ents] verifies generated images and records their dimensions.
319
320 Regenerates any images that have zero length or fail identify validation.
321
322 @return List of {!Srcsetter.t} entries with actual dimensions. *)
323let stage3 ({ src_dir; dst_dir; max_fibers; _ } as cfg) ents =
324 let ents_seq = List.to_seq ents in
325 let oents = ref [] in
326 let regenerated = ref 0 in
327 let display =
328 Progress.Display.start
329 ~config:(Progress.Config.v ~persistent:false ~min_interval ())
330 (main_bar_heading "[3/3] Verifying images..." (List.length ents))
331 in
332 let [ _; rep ] = Progress.Display.reporters display in
333 iter_seq_p ~max_fibers
334 (fun ent ->
335 let src_path = Path.(src_dir / Srcsetter.origin ent) in
336 let orig_w, _ = dims cfg src_path in
337 (* Verify and regenerate base image if needed *)
338 let base_path = Path.(dst_dir / Srcsetter.name ent) in
339 if not (is_valid_image cfg base_path) then begin
340 incr regenerated;
341 convert cfg (Srcsetter.origin ent, Srcsetter.name ent, orig_w)
342 end;
343 let w, h = dims cfg base_path in
344 (* Verify and regenerate variants if needed *)
345 let variants =
346 Srcsetter.MS.bindings ent.variants
347 |> List.map (fun (k, _) ->
348 let variant_path = Path.(dst_dir / k) in
349 if not (is_valid_image cfg variant_path) then begin
350 incr regenerated;
351 let target_w = Option.value (width_from_variant_name k) ~default:orig_w in
352 convert cfg (Srcsetter.origin ent, k, target_w)
353 end;
354 (k, dims cfg variant_path))
355 |> Srcsetter.MS.of_list
356 in
357 rep 1;
358 oents := { ent with Srcsetter.dims = (w, h); variants } :: !oents)
359 ents_seq;
360 Progress.Display.finalise display;
361 if !regenerated > 0 then
362 Printf.printf "[3/3] Verified %d images, regenerated %d invalid outputs.\n%!"
363 (List.length ents) !regenerated
364 else
365 Printf.printf "[3/3] Verified %d generated image sizes.\n%!"
366 (List.length ents);
367 !oents
368
369(** [run ~proc_mgr ~src_dir ~dst_dir ()] runs the full srcsetter pipeline.
370
371 Scans [src_dir] for images, converts them to WebP format at multiple
372 responsive sizes, and writes an index file to [dst_dir].
373
374 @param proc_mgr Eio process manager for running ImageMagick
375 @param src_dir Source directory containing original images
376 @param dst_dir Destination directory for generated images
377 @param idx_file Name of the index file (default ["index.json"])
378 @param img_widths List of target widths (default common responsive breakpoints)
379 @param img_exts List of extensions to process (default common image formats)
380 @param max_fibers Maximum concurrent operations (default 8)
381 @param dummy When true, skip actual conversions (default false)
382 @param preserve When true, skip existing files (default true)
383 @return List of {!Srcsetter.t} entries describing generated images *)
384let run
385 ~proc_mgr
386 ~src_dir
387 ~dst_dir
388 ?(idx_file = "index.json")
389 ?(img_widths = [ 320; 480; 640; 768; 1024; 1280; 1440; 1600; 1920; 2560; 3840 ])
390 ?(img_exts = [ "png"; "webp"; "jpeg"; "jpg"; "bmp"; "heic"; "gif" ])
391 ?(max_fibers = 8)
392 ?(dummy = false)
393 ?(preserve = true)
394 ()
395 =
396 let img_widths = List.sort (fun a b -> compare b a) img_widths in
397 let cfg =
398 {
399 dummy;
400 preserve;
401 proc_mgr;
402 src_dir;
403 dst_dir;
404 idx_file;
405 img_widths;
406 img_exts;
407 max_fibers;
408 }
409 in
410 let fs = stage1 cfg in
411 let ents = stage2 cfg fs in
412 let oents = stage3 cfg ents in
413 let j = Srcsetter.list_to_json oents |> Result.get_ok in
414 let idx = Path.(dst_dir / idx_file) in
415 Path.save ~append:false ~create:(`Or_truncate 0o644) idx j;
416 oents