···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+open Cmdliner
77+88+let () =
99+ Fmt.set_style_renderer Fmt.stdout `Ansi_tty;
1010+ Fmt.set_style_renderer Fmt.stderr `Ansi_tty;
1111+1212+ Eio_main.run @@ fun env ->
1313+ Eio.Switch.run @@ fun sw ->
1414+ let fs = env#fs in
1515+1616+ (* Helper to load data from either a file or directory *)
1717+ let load_path path =
1818+ let path_obj = Eio.Path.(fs / path) in
1919+ match Eio.Path.kind ~follow:true path_obj with
2020+ | `Regular_file -> (
2121+ match Repowatch.Loader.load_file fs path with
2222+ | Ok data -> ([data], [])
2323+ | Error e -> ([], [(path, e)]))
2424+ | `Directory ->
2525+ Repowatch.Loader.load_directory_partial fs path
2626+ | _ -> ([], [(path, "Not a file or directory")])
2727+ in
2828+2929+ let xdg_term = Xdge.Cmd.term "repowatch" fs ~dirs:[ `Config; `Cache ] () in
3030+3131+ let info =
3232+ Cmd.info "repowatch" ~version:"0.1.0"
3333+ ~doc:"GitHub repository activity watcher and analyzer"
3434+ ~man:
3535+ [
3636+ `S Manpage.s_description;
3737+ `P
3838+ "Repowatch parses and analyzes GitHub repository activity data \
3939+ from JSON files in the ruminant format. It provides commands for \
4040+ viewing issues, PRs, discussions, and releases with statistics \
4141+ and filtering capabilities.";
4242+ `S Manpage.s_commands;
4343+ `P "Use $(b,repowatch COMMAND --help) for detailed help on each command.";
4444+ ]
4545+ in
4646+4747+ (* Path argument *)
4848+ let path_arg =
4949+ let doc = "Path to a JSON file or directory containing JSON files." in
5050+ Arg.(required & pos 0 (some string) None & info [] ~docv:"PATH" ~doc)
5151+ in
5252+5353+ let path_arg_opt =
5454+ let doc = "Path to a JSON file or directory containing JSON files." in
5555+ Arg.(value & pos 0 (some string) None & info [] ~docv:"PATH" ~doc)
5656+ in
5757+5858+ (* Parse command *)
5959+ let parse_cmd =
6060+ let doc = "Parse and display GitHub activity data." in
6161+ let info = Cmd.info "parse" ~doc in
6262+ let term =
6363+ let open Term.Syntax in
6464+ let+ (_xdg, _) = xdg_term
6565+ and+ path = path_arg
6666+ and+ short =
6767+ Arg.(value & flag & info [ "s"; "short" ] ~doc:"Show short output")
6868+ and+ log_level = Logs_cli.level () in
6969+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
7070+ Logs.set_level log_level;
7171+7272+ let path_obj = Eio.Path.(fs / path) in
7373+ match Eio.Path.kind ~follow:true path_obj with
7474+ | `Regular_file -> (
7575+ match Repowatch.Loader.load_file fs path with
7676+ | Ok data ->
7777+ if short then Fmt.pr "%a@." Repowatch.Printer.pp_week_summary data
7878+ else Fmt.pr "%a@." Repowatch.Printer.pp_week_data data
7979+ | Error e -> Fmt.epr "Error: %s@." e)
8080+ | `Directory -> (
8181+ match Repowatch.Loader.load_directory fs path with
8282+ | Ok data_list ->
8383+ List.iter
8484+ (fun data ->
8585+ if short then
8686+ Fmt.pr "%a@.@." Repowatch.Printer.pp_week_summary data
8787+ else Fmt.pr "%a@.---@.@." Repowatch.Printer.pp_week_data data)
8888+ data_list
8989+ | Error errors ->
9090+ List.iter
9191+ (fun (file, e) -> Fmt.epr "Error in %s: %s@." file e)
9292+ errors)
9393+ | _ -> Fmt.epr "Error: %s is not a file or directory@." path
9494+ in
9595+ Cmd.v info term
9696+ in
9797+9898+ (* Stats command *)
9999+ let stats_cmd =
100100+ let doc = "Show aggregate statistics for activity data." in
101101+ let info = Cmd.info "stats" ~doc in
102102+ let term =
103103+ let open Term.Syntax in
104104+ let+ (_xdg, _) = xdg_term
105105+ and+ path = path_arg
106106+ and+ compact =
107107+ Arg.(value & flag & info [ "c"; "compact" ] ~doc:"Show compact output")
108108+ and+ log_level = Logs_cli.level () in
109109+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
110110+ Logs.set_level log_level;
111111+112112+ let path_obj = Eio.Path.(fs / path) in
113113+ match Eio.Path.kind ~follow:true path_obj with
114114+ | `Regular_file -> (
115115+ match Repowatch.Loader.load_file fs path with
116116+ | Ok data ->
117117+ let stats = Repowatch.Printer.compute_stats data in
118118+ if compact then
119119+ Fmt.pr "%a@." Repowatch.Printer.pp_stats_compact stats
120120+ else Fmt.pr "%a@." Repowatch.Printer.pp_stats stats
121121+ | Error e -> Fmt.epr "Error: %s@." e)
122122+ | `Directory ->
123123+ let data_list, errors = Repowatch.Loader.load_directory_partial fs path in
124124+ if errors <> [] then
125125+ List.iter
126126+ (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e))
127127+ errors;
128128+ let stats_list = List.map Repowatch.Printer.compute_stats data_list in
129129+ let stats = Repowatch.Printer.aggregate_stats stats_list in
130130+ Fmt.pr "Aggregated statistics from %d files:@.@." (List.length data_list);
131131+ if compact then Fmt.pr "%a@." Repowatch.Printer.pp_stats_compact stats
132132+ else Fmt.pr "%a@." Repowatch.Printer.pp_stats stats
133133+ | _ -> Fmt.epr "Error: %s is not a file or directory@." path
134134+ in
135135+ Cmd.v info term
136136+ in
137137+138138+ (* Validate command *)
139139+ let validate_cmd =
140140+ let doc = "Validate JSON files against the expected schema." in
141141+ let info = Cmd.info "validate" ~doc in
142142+ let term =
143143+ let open Term.Syntax in
144144+ let+ (_xdg, _) = xdg_term
145145+ and+ path = path_arg
146146+ and+ log_level = Logs_cli.level () in
147147+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
148148+ Logs.set_level log_level;
149149+150150+ let path_obj = Eio.Path.(fs / path) in
151151+ match Eio.Path.kind ~follow:true path_obj with
152152+ | `Regular_file -> (
153153+ match Repowatch.Loader.load_file fs path with
154154+ | Ok _ -> Fmt.pr "OK: %s@." path
155155+ | Error e -> Fmt.pr "FAIL: %s - %s@." path e)
156156+ | `Directory ->
157157+ let files = Repowatch.Loader.find_json_files fs path in
158158+ let ok_count = ref 0 in
159159+ let fail_count = ref 0 in
160160+ List.iter
161161+ (fun file ->
162162+ match Repowatch.Loader.load_file fs file with
163163+ | Ok _ ->
164164+ incr ok_count;
165165+ Fmt.pr "OK: %s@." file
166166+ | Error e ->
167167+ incr fail_count;
168168+ Fmt.pr "FAIL: %s - %s@." file e)
169169+ files;
170170+ Fmt.pr "@.Validation complete: %d OK, %d FAILED@." !ok_count !fail_count
171171+ | _ -> Fmt.epr "Error: %s is not a file or directory@." path
172172+ in
173173+ Cmd.v info term
174174+ in
175175+176176+ (* List command - list repositories in a data directory *)
177177+ let list_cmd =
178178+ let doc = "List repositories in a ruminant data directory." in
179179+ let info = Cmd.info "list" ~doc in
180180+ let term =
181181+ let open Term.Syntax in
182182+ let+ (_xdg, _) = xdg_term
183183+ and+ path = path_arg_opt
184184+ and+ log_level = Logs_cli.level () in
185185+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
186186+ Logs.set_level log_level;
187187+188188+ let data_dir = Option.value path ~default:"." in
189189+ let repos = Repowatch.Loader.find_repos fs data_dir in
190190+ if repos = [] then Fmt.pr "No repositories found in %s@." data_dir
191191+ else (
192192+ Fmt.pr "Repositories in %s:@." data_dir;
193193+ List.iter (fun r -> Fmt.pr " %s@." r) repos)
194194+ in
195195+ Cmd.v info term
196196+ in
197197+198198+ (* Issues command - list issues *)
199199+ let issues_cmd =
200200+ let doc = "List issues from activity data." in
201201+ let info = Cmd.info "issues" ~doc in
202202+ let term =
203203+ let open Term.Syntax in
204204+ let+ (_xdg, _) = xdg_term
205205+ and+ path = path_arg
206206+ and+ state =
207207+ Arg.(
208208+ value
209209+ & opt (some string) None
210210+ & info [ "state" ] ~docv:"STATE" ~doc:"Filter by state (open/closed)")
211211+ and+ log_level = Logs_cli.level () in
212212+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
213213+ Logs.set_level log_level;
214214+215215+ let data_list, errors = load_path path in
216216+ if errors <> [] then
217217+ List.iter
218218+ (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e))
219219+ errors;
220220+221221+ let all_issues =
222222+ List.concat_map Repowatch.Types.Week_data.issues data_list
223223+ in
224224+ let filtered =
225225+ match state with
226226+ | Some s ->
227227+ List.filter (fun i -> Repowatch.Types.Issue.state i = s) all_issues
228228+ | None -> all_issues
229229+ in
230230+ Fmt.pr "Issues (%d):@." (List.length filtered);
231231+ List.iter
232232+ (fun i -> Fmt.pr " %a@." Repowatch.Printer.pp_issue_short i)
233233+ filtered
234234+ in
235235+ Cmd.v info term
236236+ in
237237+238238+ (* PRs command - list pull requests *)
239239+ let prs_cmd =
240240+ let doc = "List pull requests from activity data." in
241241+ let info = Cmd.info "prs" ~doc in
242242+ let term =
243243+ let open Term.Syntax in
244244+ let+ (_xdg, _) = xdg_term
245245+ and+ path = path_arg
246246+ and+ state =
247247+ Arg.(
248248+ value
249249+ & opt (some string) None
250250+ & info [ "state" ] ~docv:"STATE"
251251+ ~doc:"Filter by state (open/closed/merged)")
252252+ and+ draft =
253253+ Arg.(value & flag & info [ "draft" ] ~doc:"Show only draft PRs")
254254+ and+ log_level = Logs_cli.level () in
255255+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
256256+ Logs.set_level log_level;
257257+258258+ let data_list, errors = load_path path in
259259+ if errors <> [] then
260260+ List.iter
261261+ (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e))
262262+ errors;
263263+264264+ let all_prs = List.concat_map Repowatch.Types.Week_data.prs data_list in
265265+ let filtered =
266266+ all_prs
267267+ |> (fun prs ->
268268+ match state with
269269+ | Some "merged" ->
270270+ List.filter
271271+ (fun p -> Option.is_some (Repowatch.Types.Pr.merged_at p))
272272+ prs
273273+ | Some s ->
274274+ List.filter (fun p -> Repowatch.Types.Pr.state p = s) prs
275275+ | None -> prs)
276276+ |> fun prs ->
277277+ if draft then List.filter Repowatch.Types.Pr.draft prs else prs
278278+ in
279279+ Fmt.pr "Pull Requests (%d):@." (List.length filtered);
280280+ List.iter
281281+ (fun p -> Fmt.pr " %a@." Repowatch.Printer.pp_pr_short p)
282282+ filtered
283283+ in
284284+ Cmd.v info term
285285+ in
286286+287287+ (* Sync command - sync repository data from GitHub *)
288288+ let sync_cmd =
289289+ let doc = "Sync repository data from GitHub." in
290290+ let info =
291291+ Cmd.info "sync" ~doc
292292+ ~man:
293293+ [
294294+ `S Manpage.s_description;
295295+ `P
296296+ "Sync repository activity data from GitHub's GraphQL and REST \
297297+ APIs. Data is filtered by ISO week and saved in the ruminant \
298298+ JSON format.";
299299+ `S Manpage.s_examples;
300300+ `P "Sync current week for all configured repositories:";
301301+ `Pre " repowatch sync";
302302+ `P "Sync a specific repository:";
303303+ `Pre " repowatch sync ocaml/ocaml";
304304+ `P "Sync a specific week:";
305305+ `Pre " repowatch sync -w 3 -y 2024 ocaml/ocaml";
306306+ `P "Update existing data (for daily cron jobs):";
307307+ `Pre " repowatch sync --update";
308308+ `P
309309+ "The --update flag merges new activity with existing cached \
310310+ data, making it ideal for keeping the current week up-to-date \
311311+ via daily cron jobs.";
312312+ ]
313313+ in
314314+ let term =
315315+ let open Term.Syntax in
316316+ let+ (_xdg, _) = xdg_term
317317+ and+ repos =
318318+ Arg.(
319319+ value & pos_all string []
320320+ & info [] ~docv:"REPO" ~doc:"Repositories to sync (owner/repo format)")
321321+ and+ week =
322322+ Arg.(
323323+ value
324324+ & opt (some int) None
325325+ & info [ "w"; "week" ] ~docv:"WEEK"
326326+ ~doc:"ISO week number (default: current)")
327327+ and+ year =
328328+ Arg.(
329329+ value
330330+ & opt (some int) None
331331+ & info [ "y"; "year" ] ~docv:"YEAR" ~doc:"Year (default: current)")
332332+ and+ weeks_back =
333333+ Arg.(
334334+ value & opt int 1
335335+ & info [ "n"; "weeks" ] ~docv:"N"
336336+ ~doc:"Number of weeks to sync (default: 1)")
337337+ and+ output_dir =
338338+ Arg.(
339339+ value
340340+ & opt (some string) None
341341+ & info [ "o"; "output" ] ~docv:"DIR"
342342+ ~doc:"Output directory (default: from config or data/gh)")
343343+ and+ force =
344344+ Arg.(value & flag & info [ "f"; "force" ] ~doc:"Force re-sync even if cached")
345345+ and+ update =
346346+ Arg.(
347347+ value & flag
348348+ & info [ "u"; "update" ]
349349+ ~doc:
350350+ "Incremental update mode. Merge new data with existing cached \
351351+ data. Useful for daily cron jobs to update the current week.")
352352+ and+ log_level = Logs_cli.level () in
353353+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
354354+ Logs.set_level log_level;
355355+356356+ (* Load config *)
357357+ let config =
358358+ match Repowatch.Config.load_xdg_opt ~fs with
359359+ | Some c -> c
360360+ | None -> Repowatch.Config.default
361361+ in
362362+363363+ (* Determine output directory *)
364364+ let out_dir =
365365+ match output_dir with
366366+ | Some d -> d
367367+ | None -> Repowatch.Config.Sync.output_dir (Repowatch.Config.sync config)
368368+ in
369369+370370+ (* Get GitHub token *)
371371+ let github_config = Repowatch.Config.github config in
372372+ match Repowatch.Config.Github.get_token github_config with
373373+ | None ->
374374+ Fmt.epr
375375+ "Error: GitHub token not found. Set %s environment variable.@."
376376+ (Repowatch.Config.Github.token_env github_config)
377377+ | Some token -> (
378378+ (* Create GitHub client *)
379379+ let client = Repowatch.Github.create ~sw ~env ~token in
380380+381381+ (* Determine which repositories to sync *)
382382+ let repos_to_sync =
383383+ if repos = [] then
384384+ (* Use configured repositories *)
385385+ let configured = Repowatch.Config.repositories config in
386386+ if configured = [] then (
387387+ Fmt.epr
388388+ "Error: No repositories specified and none configured.@.";
389389+ Fmt.epr "Use: repowatch sync owner/repo@.";
390390+ Fmt.epr "Or add repositories to your config file.@.";
391391+ [])
392392+ else
393393+ List.map
394394+ (fun r ->
395395+ ( Repowatch.Config.Repository.owner r,
396396+ Repowatch.Config.Repository.repo r ))
397397+ configured
398398+ else
399399+ (* Parse command-line repos *)
400400+ List.filter_map
401401+ (fun s ->
402402+ match String.split_on_char '/' s with
403403+ | [ owner; repo ] -> Some (owner, repo)
404404+ | _ ->
405405+ Fmt.epr "Warning: Invalid repository format: %s@." s;
406406+ None)
407407+ repos
408408+ in
409409+410410+ if repos_to_sync = [] then ()
411411+ else
412412+ (* Determine week(s) to sync *)
413413+ let current_week = Repowatch.Week.current ~clock:env#clock in
414414+ let target_week =
415415+ match (week, year) with
416416+ | Some w, Some y -> Repowatch.Week.of_year_week ~year:y ~week:w
417417+ | Some w, None ->
418418+ Repowatch.Week.of_year_week
419419+ ~year:(Repowatch.Week.year current_week)
420420+ ~week:w
421421+ | None, Some y ->
422422+ Repowatch.Week.of_year_week ~year:y
423423+ ~week:(Repowatch.Week.week current_week)
424424+ | None, None -> current_week
425425+ in
426426+427427+ let weeks_to_sync =
428428+ if weeks_back <= 1 then [ target_week ]
429429+ else
430430+ let rec build_list acc n w =
431431+ if n <= 0 then List.rev acc
432432+ else build_list (w :: acc) (n - 1) (Repowatch.Week.prev w)
433433+ in
434434+ build_list [] weeks_back target_week
435435+ in
436436+437437+ (* Sync each repository *)
438438+ List.iter
439439+ (fun (owner, repo) ->
440440+ Fmt.pr "Syncing %s/%s...@." owner repo;
441441+ List.iter
442442+ (fun week ->
443443+ let week_str = Repowatch.Week.to_string week in
444444+ (* Check if already cached *)
445445+ let cached =
446446+ if force then None
447447+ else
448448+ Repowatch.Storage.load_week ~fs ~base_dir:out_dir ~owner
449449+ ~repo ~week
450450+ in
451451+ (* Determine sync mode based on flags *)
452452+ let should_sync, existing_data =
453453+ match (cached, force, update) with
454454+ | Some data, false, true ->
455455+ (* Update mode: merge with existing *)
456456+ (true, Some data)
457457+ | Some _, false, false ->
458458+ (* Cached and not forcing: skip *)
459459+ (false, None)
460460+ | _, true, _ ->
461461+ (* Force: full re-sync *)
462462+ (true, None)
463463+ | None, _, true ->
464464+ (* Update mode but no existing: full sync *)
465465+ (true, None)
466466+ | None, _, false ->
467467+ (* No cache: full sync *)
468468+ (true, None)
469469+ in
470470+ if not should_sync then
471471+ Fmt.pr " %s: cached (use -f to re-sync)@." week_str
472472+ else (
473473+ let mode_str =
474474+ if Option.is_some existing_data then "updating"
475475+ else "syncing"
476476+ in
477477+ Fmt.pr " %s: %s...@." week_str mode_str;
478478+ let on_progress p =
479479+ let phase_str =
480480+ match p.Repowatch.Sync.phase with
481481+ | `Issues_prs -> "issues/PRs"
482482+ | `Discussions -> "discussions"
483483+ | `Releases -> "releases"
484484+ | `Users -> "users"
485485+ in
486486+ Fmt.pr " %s page %d@." phase_str p.current
487487+ in
488488+ let result =
489489+ Repowatch.Sync.sync_week_incremental ~client ~owner
490490+ ~repo ~week ~existing:existing_data ~on_progress ()
491491+ in
492492+ match result with
493493+ | Error e ->
494494+ Fmt.epr " Error: %a@." Repowatch.Github.pp_error e
495495+ | Ok result ->
496496+ (* Save the data *)
497497+ Repowatch.Storage.save_week ~fs ~base_dir:out_dir
498498+ ~owner ~repo ~week ~data:result.week_data;
499499+ Fmt.pr
500500+ " Done: %d issues, %d PRs, %d pages fetched@."
501501+ result.issues_fetched result.prs_fetched
502502+ result.pages_fetched;
503503+ (match result.rate_limit_remaining with
504504+ | Some remaining ->
505505+ Fmt.pr " Rate limit remaining: %d@." remaining
506506+ | None -> ())))
507507+ weeks_to_sync)
508508+ repos_to_sync)
509509+ in
510510+ Cmd.v info term
511511+ in
512512+513513+ (* Repos command - list configured repositories *)
514514+ let repos_cmd =
515515+ let doc = "List configured repositories." in
516516+ let info = Cmd.info "repos" ~doc in
517517+ let term =
518518+ let open Term.Syntax in
519519+ let+ (_xdg, _) = xdg_term and+ log_level = Logs_cli.level () in
520520+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
521521+ Logs.set_level log_level;
522522+523523+ match Repowatch.Config.load_xdg_opt ~fs with
524524+ | None -> Fmt.pr "No configuration file found.@."
525525+ | Some config ->
526526+ let repos = Repowatch.Config.repositories config in
527527+ if repos = [] then Fmt.pr "No repositories configured.@."
528528+ else (
529529+ Fmt.pr "Configured repositories:@.";
530530+ List.iter
531531+ (fun r ->
532532+ let group =
533533+ match Repowatch.Config.Repository.group r with
534534+ | Some g -> Printf.sprintf " [%s]" g
535535+ | None -> ""
536536+ in
537537+ Fmt.pr " %s%s@." (Repowatch.Config.Repository.full_name r) group)
538538+ repos)
539539+ in
540540+ Cmd.v info term
541541+ in
542542+543543+ (* Status command - show sync status for configured repositories *)
544544+ let status_cmd =
545545+ let doc = "Show sync status for repositories." in
546546+ let info = Cmd.info "status" ~doc in
547547+ let term =
548548+ let open Term.Syntax in
549549+ let+ (_xdg, _) = xdg_term
550550+ and+ output_dir =
551551+ Arg.(
552552+ value
553553+ & opt (some string) None
554554+ & info [ "o"; "output" ] ~docv:"DIR" ~doc:"Data directory")
555555+ and+ log_level = Logs_cli.level () in
556556+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
557557+ Logs.set_level log_level;
558558+559559+ let config =
560560+ match Repowatch.Config.load_xdg_opt ~fs with
561561+ | Some c -> c
562562+ | None -> Repowatch.Config.default
563563+ in
564564+565565+ let out_dir =
566566+ match output_dir with
567567+ | Some d -> d
568568+ | None -> Repowatch.Config.Sync.output_dir (Repowatch.Config.sync config)
569569+ in
570570+571571+ let repos = Repowatch.Config.repositories config in
572572+ if repos = [] then Fmt.pr "No repositories configured.@."
573573+ else (
574574+ let current_week = Repowatch.Week.current ~clock:env#clock in
575575+ Fmt.pr "Sync status (current week: %s):@.@."
576576+ (Repowatch.Week.to_string current_week);
577577+ List.iter
578578+ (fun r ->
579579+ let owner = Repowatch.Config.Repository.owner r in
580580+ let repo = Repowatch.Config.Repository.repo r in
581581+ let cached_weeks =
582582+ Repowatch.Storage.list_cached_weeks ~fs ~base_dir:out_dir ~owner
583583+ ~repo
584584+ in
585585+ let has_current =
586586+ List.exists (Repowatch.Week.equal current_week) cached_weeks
587587+ in
588588+ let status =
589589+ if has_current then "up-to-date" else "needs sync"
590590+ in
591591+ Fmt.pr " %s/%s: %s (%d weeks cached)@." owner repo status
592592+ (List.length cached_weeks))
593593+ repos)
594594+ in
595595+ Cmd.v info term
596596+ in
597597+598598+ (* Init command - create a default config file *)
599599+ let init_cmd =
600600+ let doc = "Initialize a new configuration file." in
601601+ let info = Cmd.info "init" ~doc in
602602+ let term =
603603+ let open Term.Syntax in
604604+ let+ (xdg, _) = xdg_term
605605+ and+ force =
606606+ Arg.(
607607+ value & flag
608608+ & info [ "f"; "force" ] ~doc:"Overwrite existing config")
609609+ and+ log_level = Logs_cli.level () in
610610+ Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ());
611611+ Logs.set_level log_level;
612612+613613+ let config_dir = Xdge.config_dir xdg in
614614+ Repowatch.Storage.ensure_dir config_dir;
615615+ let config_path = Eio.Path.(config_dir / "config.toml") in
616616+617617+ let exists =
618618+ try
619619+ ignore (Eio.Path.load config_path);
620620+ true
621621+ with _ -> false
622622+ in
623623+624624+ if exists && not force then
625625+ Fmt.pr "Config file already exists: %a@.Use -f to overwrite.@."
626626+ Eio.Path.pp config_path
627627+ else (
628628+ let default_content =
629629+ {|# Repowatch configuration file
630630+631631+[github]
632632+token_env = "GITHUB_TOKEN"
633633+634634+[sync]
635635+output_dir = "data/gh"
636636+max_pages = 20
637637+early_exit_pages = 5
638638+639639+[cache]
640640+ttl_hours = 24
641641+642642+# Add repositories to watch:
643643+# [[repositories]]
644644+# owner = "ocaml"
645645+# repo = "ocaml"
646646+# group = "core"
647647+#
648648+# [[repositories]]
649649+# owner = "ocaml"
650650+# repo = "dune"
651651+# group = "tooling"
652652+|}
653653+ in
654654+ Eio.Path.save ~create:(`Or_truncate 0o644) config_path default_content;
655655+ Fmt.pr "Created config file: %a@." Eio.Path.pp config_path)
656656+ in
657657+ Cmd.v info term
658658+ in
659659+660660+ let default_term =
661661+ let open Term.Syntax in
662662+ let+ _ = xdg_term and+ _ = Logs_cli.level () in
663663+ `Help (`Pager, None)
664664+ in
665665+ let default_term = Term.ret default_term in
666666+667667+ let cmd =
668668+ Cmd.group info ~default:default_term
669669+ [
670670+ parse_cmd;
671671+ stats_cmd;
672672+ validate_cmd;
673673+ list_cmd;
674674+ issues_cmd;
675675+ prs_cmd;
676676+ sync_cmd;
677677+ repos_cmd;
678678+ status_cmd;
679679+ init_cmd;
680680+ ]
681681+ in
682682+683683+ exit (Cmd.eval cmd)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** JSON codecs for GitHub activity data types.
77+88+ This module provides jsont codecs for encoding and decoding the types
99+ defined in {!Types}. *)
1010+1111+(** {1 Codecs} *)
1212+1313+val metadata : Types.Metadata.t Jsont.t
1414+(** Codec for {!Types.Metadata.t}. *)
1515+1616+val issue : Types.Issue.t Jsont.t
1717+(** Codec for {!Types.Issue.t}. *)
1818+1919+val pr : Types.Pr.t Jsont.t
2020+(** Codec for {!Types.Pr.t}. *)
2121+2222+val discussion : Types.Discussion.t Jsont.t
2323+(** Codec for {!Types.Discussion.t}. *)
2424+2525+val asset : Types.Asset.t Jsont.t
2626+(** Codec for {!Types.Asset.t}. *)
2727+2828+val release : Types.Release.t Jsont.t
2929+(** Codec for {!Types.Release.t}. *)
3030+3131+val week_data : Types.Week_data.t Jsont.t
3232+(** Codec for {!Types.Week_data.t}. *)
3333+3434+(** {1 Decoding} *)
3535+3636+val decode_string : string -> (Types.Week_data.t, string) result
3737+(** Decode a JSON string to week data. *)
3838+3939+val decode_file :
4040+ Eio.Fs.dir_ty Eio.Path.t -> string -> (Types.Week_data.t, string) result
4141+(** Decode a JSON file to week data. *)
4242+4343+(** {1 Encoding} *)
4444+4545+val encode_string : Types.Week_data.t -> string
4646+(** Encode week data to a JSON string. *)
+165
repowatch/lib/config.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+module Repository = struct
77+ type t = {
88+ owner : string;
99+ repo : string;
1010+ group : string option;
1111+ }
1212+1313+ let make ~owner ~repo ?group () = { owner; repo; group }
1414+ let owner t = t.owner
1515+ let repo t = t.repo
1616+ let group t = t.group
1717+ let full_name t = Printf.sprintf "%s/%s" t.owner t.repo
1818+1919+ let codec =
2020+ Tomlt.(
2121+ Table.(
2222+ obj (fun owner repo group -> { owner; repo; group })
2323+ |> mem "owner" string ~enc:(fun t -> t.owner)
2424+ |> mem "repo" string ~enc:(fun t -> t.repo)
2525+ |> opt_mem "group" string ~enc:(fun t -> t.group)
2626+ |> finish))
2727+end
2828+2929+module Github = struct
3030+ type t = { token_env : string }
3131+3232+ let default_token_env = "GITHUB_TOKEN"
3333+ let make ?(token_env = default_token_env) () = { token_env }
3434+ let token_env t = t.token_env
3535+ let get_token t = Sys.getenv_opt t.token_env
3636+3737+ let codec =
3838+ Tomlt.(
3939+ Table.(
4040+ obj (fun token_env -> { token_env })
4141+ |> mem "token_env" string ~dec_absent:default_token_env
4242+ ~enc:(fun t -> t.token_env)
4343+ |> finish))
4444+end
4545+4646+module Cache = struct
4747+ type t = {
4848+ directory : string option;
4949+ ttl_hours : int;
5050+ }
5151+5252+ let default_ttl_hours = 24
5353+ let make ?directory ?(ttl_hours = default_ttl_hours) () = { directory; ttl_hours }
5454+ let directory t = t.directory
5555+ let ttl_hours t = t.ttl_hours
5656+5757+ let codec =
5858+ Tomlt.(
5959+ Table.(
6060+ obj (fun directory ttl_hours -> { directory; ttl_hours })
6161+ |> opt_mem "directory" string ~enc:(fun t -> t.directory)
6262+ |> mem "ttl_hours" int ~dec_absent:default_ttl_hours
6363+ ~enc:(fun t -> t.ttl_hours)
6464+ |> finish))
6565+end
6666+6767+module Sync = struct
6868+ type t = {
6969+ output_dir : string;
7070+ max_pages : int;
7171+ early_exit_pages : int;
7272+ }
7373+7474+ let default_output_dir = "data/gh"
7575+ let default_max_pages = 20
7676+ let default_early_exit_pages = 5
7777+7878+ let make ?(output_dir = default_output_dir) ?(max_pages = default_max_pages)
7979+ ?(early_exit_pages = default_early_exit_pages) () =
8080+ { output_dir; max_pages; early_exit_pages }
8181+8282+ let output_dir t = t.output_dir
8383+ let max_pages t = t.max_pages
8484+ let early_exit_pages t = t.early_exit_pages
8585+8686+ let codec =
8787+ Tomlt.(
8888+ Table.(
8989+ obj (fun output_dir max_pages early_exit_pages ->
9090+ { output_dir; max_pages; early_exit_pages })
9191+ |> mem "output_dir" string ~dec_absent:default_output_dir
9292+ ~enc:(fun t -> t.output_dir)
9393+ |> mem "max_pages" int ~dec_absent:default_max_pages
9494+ ~enc:(fun t -> t.max_pages)
9595+ |> mem "early_exit_pages" int ~dec_absent:default_early_exit_pages
9696+ ~enc:(fun t -> t.early_exit_pages)
9797+ |> finish))
9898+end
9999+100100+type t = {
101101+ github : Github.t;
102102+ repositories : Repository.t list;
103103+ cache : Cache.t;
104104+ sync : Sync.t;
105105+}
106106+107107+let default_github = Github.make ()
108108+let default_cache = Cache.make ()
109109+let default_sync = Sync.make ()
110110+111111+let make ?(github = default_github) ?(repositories = []) ?(cache = default_cache)
112112+ ?(sync = default_sync) () =
113113+ { github; repositories; cache; sync }
114114+115115+let default = make ()
116116+let github t = t.github
117117+let repositories t = t.repositories
118118+let cache t = t.cache
119119+let sync t = t.sync
120120+121121+let codec =
122122+ Tomlt.(
123123+ Table.(
124124+ obj (fun github repositories cache sync ->
125125+ { github; repositories; cache; sync })
126126+ |> mem "github" Github.codec ~dec_absent:default_github
127127+ ~enc:(fun t -> t.github)
128128+ |> mem "repositories" (list Repository.codec) ~dec_absent:[]
129129+ ~enc:(fun t -> t.repositories)
130130+ |> mem "cache" Cache.codec ~dec_absent:default_cache
131131+ ~enc:(fun t -> t.cache)
132132+ |> mem "sync" Sync.codec ~dec_absent:default_sync ~enc:(fun t -> t.sync)
133133+ |> finish))
134134+135135+let load_from_path path =
136136+ let content = Eio.Path.load path in
137137+ match Tomlt_bytesrw.decode_string codec content with
138138+ | Ok config -> config
139139+ | Error e -> failwith (Tomlt.Error.to_string e)
140140+141141+let load_from_path_opt path =
142142+ try
143143+ let content = Eio.Path.load path in
144144+ match Tomlt_bytesrw.decode_string codec content with
145145+ | Ok config -> Some config
146146+ | Error _ -> None
147147+ with _ -> None
148148+149149+let xdg ~fs = Xdge.create fs "repowatch"
150150+151151+let load_xdg ~fs =
152152+ let xdg = xdg ~fs in
153153+ match Xdge.find_config_file xdg "config.toml" with
154154+ | Some path -> load_from_path path
155155+ | None -> failwith "No repowatch configuration found in XDG config directories"
156156+157157+let load_xdg_opt ~fs =
158158+ let xdg = xdg ~fs in
159159+ match Xdge.find_config_file xdg "config.toml" with
160160+ | Some path -> load_from_path_opt path
161161+ | None -> None
162162+163163+let config_dir ~fs =
164164+ let xdg = xdg ~fs in
165165+ Xdge.config_dir xdg
+146
repowatch/lib/config.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Configuration file handling for repowatch.
77+88+ Configuration is stored in TOML format. The default location follows
99+ XDG conventions: [$XDG_CONFIG_HOME/repowatch/config.toml]. *)
1010+1111+(** {1 Repository Configuration} *)
1212+1313+module Repository : sig
1414+ type t
1515+ (** A repository to watch. *)
1616+1717+ val make : owner:string -> repo:string -> ?group:string -> unit -> t
1818+ (** Create a repository configuration. *)
1919+2020+ val owner : t -> string
2121+ (** Repository owner. *)
2222+2323+ val repo : t -> string
2424+ (** Repository name. *)
2525+2626+ val group : t -> string option
2727+ (** Optional group name for categorization. *)
2828+2929+ val full_name : t -> string
3030+ (** Full name in "owner/repo" format. *)
3131+end
3232+3333+(** {1 GitHub Configuration} *)
3434+3535+module Github : sig
3636+ type t
3737+ (** GitHub API configuration. *)
3838+3939+ val make : ?token_env:string -> unit -> t
4040+ (** Create GitHub configuration.
4141+4242+ @param token_env Name of environment variable containing the token
4343+ (default: "GITHUB_TOKEN") *)
4444+4545+ val token_env : t -> string
4646+ (** Environment variable name for the GitHub token. *)
4747+4848+ val get_token : t -> string option
4949+ (** Attempt to read the token from the environment. *)
5050+end
5151+5252+(** {1 Cache Configuration} *)
5353+5454+module Cache : sig
5555+ type t
5656+ (** Cache configuration. *)
5757+5858+ val make : ?directory:string -> ?ttl_hours:int -> unit -> t
5959+ (** Create cache configuration.
6060+6161+ @param directory Cache directory path (default: uses XDG cache dir)
6262+ @param ttl_hours Hours before cached data is considered stale (default: 24) *)
6363+6464+ val directory : t -> string option
6565+ (** Cache directory path. None means use XDG default. *)
6666+6767+ val ttl_hours : t -> int
6868+ (** Time-to-live in hours for cached data. *)
6969+end
7070+7171+(** {1 Sync Configuration} *)
7272+7373+module Sync : sig
7474+ type t
7575+ (** Sync operation configuration. *)
7676+7777+ val make :
7878+ ?output_dir:string ->
7979+ ?max_pages:int ->
8080+ ?early_exit_pages:int ->
8181+ unit ->
8282+ t
8383+ (** Create sync configuration.
8484+8585+ @param output_dir Output directory for synced data (default: "data/gh")
8686+ @param max_pages Maximum pages to fetch per query (default: 20)
8787+ @param early_exit_pages Pages without activity before stopping (default: 5) *)
8888+8989+ val output_dir : t -> string
9090+ (** Output directory for synced data. *)
9191+9292+ val max_pages : t -> int
9393+ (** Maximum GraphQL pages to fetch. *)
9494+9595+ val early_exit_pages : t -> int
9696+ (** Pages without relevant activity before early exit. *)
9797+end
9898+9999+(** {1 Configuration} *)
100100+101101+type t
102102+(** Complete repowatch configuration. *)
103103+104104+val make :
105105+ ?github:Github.t ->
106106+ ?repositories:Repository.t list ->
107107+ ?cache:Cache.t ->
108108+ ?sync:Sync.t ->
109109+ unit ->
110110+ t
111111+(** Create a configuration with optional components. *)
112112+113113+val default : t
114114+(** Default configuration with no repositories. *)
115115+116116+val github : t -> Github.t
117117+(** GitHub configuration. *)
118118+119119+val repositories : t -> Repository.t list
120120+(** List of repositories to watch. *)
121121+122122+val cache : t -> Cache.t
123123+(** Cache configuration. *)
124124+125125+val sync : t -> Sync.t
126126+(** Sync configuration. *)
127127+128128+(** {1 Loading and Saving} *)
129129+130130+val codec : t Tomlt.t
131131+(** TOML codec for configuration. *)
132132+133133+val load_from_path : Eio.Fs.dir_ty Eio.Path.t -> t
134134+(** Load configuration from a path. Raises on error. *)
135135+136136+val load_from_path_opt : Eio.Fs.dir_ty Eio.Path.t -> t option
137137+(** Load configuration from a path, returning None on error. *)
138138+139139+val load_xdg : fs:Eio.Fs.dir_ty Eio.Path.t -> t
140140+(** Load configuration from XDG config directory. Raises if not found. *)
141141+142142+val load_xdg_opt : fs:Eio.Fs.dir_ty Eio.Path.t -> t option
143143+(** Load configuration from XDG config directory, returning None if not found. *)
144144+145145+val config_dir : fs:Eio.Fs.dir_ty Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t
146146+(** Get the XDG configuration directory for repowatch. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Week-based filtering for GitHub data.
77+88+ This module provides functions to filter GitHub issues, PRs, and discussions
99+ based on whether they had activity during a specific ISO week. Activity
1010+ includes creation, updates, and timeline events. *)
1111+1212+(** {1 Activity Checking} *)
1313+1414+val issue_active_in_week : week:Week.t -> Graphql_types.Issue_node.t -> bool
1515+(** [issue_active_in_week ~week issue] returns [true] if the issue has any
1616+ activity during the given week.
1717+1818+ Activity is detected if any of:
1919+ - The issue was created during the week
2020+ - The issue was updated during the week
2121+ - Any timeline event occurred during the week *)
2222+2323+val pr_active_in_week : week:Week.t -> Graphql_types.Pr_node.t -> bool
2424+(** [pr_active_in_week ~week pr] returns [true] if the PR has any
2525+ activity during the given week.
2626+2727+ Activity is detected if any of:
2828+ - The PR was created during the week
2929+ - The PR was updated during the week
3030+ - The PR was merged during the week
3131+ - Any timeline event occurred during the week *)
3232+3333+val discussion_active_in_week :
3434+ week:Week.t -> Graphql_types.Discussion_node.t -> bool
3535+(** [discussion_active_in_week ~week discussion] returns [true] if the
3636+ discussion was updated during the given week. *)
3737+3838+(** {1 Label Helpers} *)
3939+4040+val is_good_first_issue : Graphql_types.Issue_node.t -> bool
4141+(** [is_good_first_issue issue] returns [true] if the issue has a label
4242+ commonly used to mark issues suitable for newcomers.
4343+4444+ Recognized labels include:
4545+ - "good first issue"
4646+ - "good-first-issue"
4747+ - "beginner"
4848+ - "beginner-friendly"
4949+ - "easy"
5050+ - "help wanted" *)
5151+5252+val has_label : string -> Graphql_types.Issue_node.t -> bool
5353+(** [has_label name issue] returns [true] if the issue has a label matching
5454+ the given name (case-insensitive). *)
5555+5656+val has_label_pr : string -> Graphql_types.Pr_node.t -> bool
5757+(** [has_label_pr name pr] returns [true] if the PR has a label matching
5858+ the given name (case-insensitive). *)
+134
repowatch/lib/github.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+type error =
77+ | Rate_limited of { reset_at : float }
88+ | Unauthorized
99+ | Forbidden
1010+ | Not_found
1111+ | Server_error of int
1212+ | Network_error of string
1313+ | Parse_error of string
1414+1515+let pp_error fmt = function
1616+ | Rate_limited { reset_at } ->
1717+ Format.fprintf fmt "Rate limited (resets at %f)" reset_at
1818+ | Unauthorized -> Format.fprintf fmt "Unauthorized (401)"
1919+ | Forbidden -> Format.fprintf fmt "Forbidden (403)"
2020+ | Not_found -> Format.fprintf fmt "Not found (404)"
2121+ | Server_error code -> Format.fprintf fmt "Server error (%d)" code
2222+ | Network_error msg -> Format.fprintf fmt "Network error: %s" msg
2323+ | Parse_error msg -> Format.fprintf fmt "Parse error: %s" msg
2424+2525+type rate_limit = {
2626+ mutable remaining : int option;
2727+ mutable reset_at : float option;
2828+}
2929+3030+type t = {
3131+ session : Requests.t;
3232+ clock : float Eio.Time.clock_ty Eio.Resource.t;
3333+ rate_limit : rate_limit;
3434+}
3535+3636+let graphql_endpoint = "https://api.github.com/graphql"
3737+let rest_base = "https://api.github.com"
3838+3939+let create ~sw ~env ~token =
4040+ let session =
4141+ Requests.create ~sw env
4242+ |> fun s -> Requests.set_auth s (Requests.Auth.bearer ~token)
4343+ |> fun s -> Requests.set_default_header s "Accept" "application/json"
4444+ |> fun s -> Requests.set_default_header s "User-Agent" "repowatch/1.0"
4545+ in
4646+ let clock = env#clock in
4747+ let rate_limit = { remaining = None; reset_at = None } in
4848+ { session; clock; rate_limit }
4949+5050+let create_from_env ~sw ~env =
5151+ match Sys.getenv_opt "GITHUB_TOKEN" with
5252+ | Some token -> Ok (create ~sw ~env ~token)
5353+ | None -> Error "GITHUB_TOKEN environment variable not set"
5454+5555+let update_rate_limits t headers =
5656+ (match Requests.Headers.get_string "X-RateLimit-Remaining" headers with
5757+ | Some s -> (
5858+ try t.rate_limit.remaining <- Some (int_of_string s) with _ -> ())
5959+ | None -> ());
6060+ match Requests.Headers.get_string "X-RateLimit-Reset" headers with
6161+ | Some s -> (
6262+ try t.rate_limit.reset_at <- Some (float_of_string s) with _ -> ())
6363+ | None -> ()
6464+6565+let rate_limit_remaining t = t.rate_limit.remaining
6666+let rate_limit_reset t = t.rate_limit.reset_at
6767+6868+let handle_response t response =
6969+ let headers = Requests.Response.headers response in
7070+ update_rate_limits t headers;
7171+ let status = Requests.Response.status_code response in
7272+ match status with
7373+ | 200 | 201 ->
7474+ let body = Requests.Response.text response in
7575+ Ok body
7676+ | 401 -> Error Unauthorized
7777+ | 403 ->
7878+ (* Check if it's rate limiting *)
7979+ let remaining = t.rate_limit.remaining in
8080+ let reset_at = t.rate_limit.reset_at in
8181+ if remaining = Some 0 then
8282+ Error (Rate_limited { reset_at = Option.value reset_at ~default:0.0 })
8383+ else Error Forbidden
8484+ | 404 -> Error Not_found
8585+ | code when code >= 500 -> Error (Server_error code)
8686+ | code -> Error (Server_error code)
8787+8888+let with_retry ~clock ~max_attempts f =
8989+ let rec loop attempt =
9090+ match f () with
9191+ | Ok _ as result -> result
9292+ | Error (Server_error _) when attempt < max_attempts ->
9393+ let delay = Float.pow 2.0 (Float.of_int attempt) in
9494+ Eio.Time.sleep clock delay;
9595+ loop (attempt + 1)
9696+ | Error _ as err -> err
9797+ in
9898+ loop 0
9999+100100+let graphql t ~query ~variables =
101101+ let body = Graphql.build_request_body ~query ~variables in
102102+ with_retry ~clock:t.clock ~max_attempts:3 (fun () ->
103103+ try
104104+ let response =
105105+ Requests.post t.session graphql_endpoint
106106+ ~headers:
107107+ (Requests.Headers.empty
108108+ |> Requests.Headers.content_type Requests.Mime.json)
109109+ ~body:(Requests.Body.of_string Requests.Mime.json body)
110110+ in
111111+ handle_response t response
112112+ with
113113+ | Eio.Io (Eio.Net.E (Eio.Net.Connection_failure _), _) as exn ->
114114+ Error (Network_error (Printexc.to_string exn))
115115+ | exn -> Error (Network_error (Printexc.to_string exn)))
116116+117117+let get_releases t ~owner ~repo ~page =
118118+ let url =
119119+ Printf.sprintf "%s/repos/%s/%s/releases?page=%d&per_page=100" rest_base
120120+ owner repo page
121121+ in
122122+ with_retry ~clock:t.clock ~max_attempts:3 (fun () ->
123123+ try
124124+ let response = Requests.get t.session url in
125125+ handle_response t response
126126+ with exn -> Error (Network_error (Printexc.to_string exn)))
127127+128128+let get_user t ~username =
129129+ let url = Printf.sprintf "%s/users/%s" rest_base username in
130130+ with_retry ~clock:t.clock ~max_attempts:3 (fun () ->
131131+ try
132132+ let response = Requests.get t.session url in
133133+ handle_response t response
134134+ with exn -> Error (Network_error (Printexc.to_string exn)))
+103
repowatch/lib/github.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** GitHub API client with authentication and rate limiting.
77+88+ This module provides a client for GitHub's GraphQL and REST APIs with
99+ automatic rate limit handling and retry logic. *)
1010+1111+(** {1 Types} *)
1212+1313+type t
1414+(** GitHub API client with authentication and rate limiting. *)
1515+1616+type error =
1717+ | Rate_limited of { reset_at : float }
1818+ (** Request was rate limited. [reset_at] is Unix timestamp when
1919+ the rate limit resets. *)
2020+ | Unauthorized
2121+ (** Authentication failed (HTTP 401). *)
2222+ | Forbidden
2323+ (** Access forbidden (HTTP 403), not rate limiting. *)
2424+ | Not_found
2525+ (** Resource not found (HTTP 404). *)
2626+ | Server_error of int
2727+ (** GitHub server error (HTTP 5xx). *)
2828+ | Network_error of string
2929+ (** Network-level error. *)
3030+ | Parse_error of string
3131+ (** JSON parsing error. *)
3232+3333+val pp_error : Format.formatter -> error -> unit
3434+(** Pretty-print an error. *)
3535+3636+(** {1 Client Creation} *)
3737+3838+val create :
3939+ sw:Eio.Switch.t ->
4040+ env:
4141+ < clock : float Eio.Time.clock_ty Eio.Resource.t
4242+ ; net : _ Eio.Net.t
4343+ ; fs : Eio.Fs.dir_ty Eio.Path.t
4444+ ; .. > ->
4545+ token:string ->
4646+ t
4747+(** [create ~sw ~env ~token] creates a GitHub client with the given
4848+ authentication token.
4949+5050+ @param sw Switch for resource management
5151+ @param env Eio environment with clock, net, and fs capabilities
5252+ @param token GitHub personal access token or OAuth token *)
5353+5454+val create_from_env :
5555+ sw:Eio.Switch.t ->
5656+ env:
5757+ < clock : float Eio.Time.clock_ty Eio.Resource.t
5858+ ; net : _ Eio.Net.t
5959+ ; fs : Eio.Fs.dir_ty Eio.Path.t
6060+ ; .. > ->
6161+ (t, string) result
6262+(** [create_from_env ~sw ~env] creates a client using the [GITHUB_TOKEN]
6363+ environment variable. Returns [Error msg] if the variable is not set. *)
6464+6565+(** {1 GraphQL API} *)
6666+6767+val graphql :
6868+ t ->
6969+ query:string ->
7070+ variables:Graphql.query_variables ->
7171+ (string, error) result
7272+(** [graphql t ~query ~variables] executes a GraphQL query against GitHub's
7373+ API and returns the raw JSON response body.
7474+7575+ This function automatically:
7676+ - Retries on server errors with exponential backoff
7777+ - Handles rate limiting with appropriate delays
7878+ - Updates internal rate limit tracking *)
7979+8080+(** {1 REST API} *)
8181+8282+val get_releases :
8383+ t ->
8484+ owner:string ->
8585+ repo:string ->
8686+ page:int ->
8787+ (string, error) result
8888+(** [get_releases t ~owner ~repo ~page] fetches releases using the REST API.
8989+ Returns the raw JSON response body. Page numbering starts at 1. *)
9090+9191+val get_user : t -> username:string -> (string, error) result
9292+(** [get_user t ~username] fetches a user profile using the REST API.
9393+ Returns the raw JSON response body. *)
9494+9595+(** {1 Rate Limit Information} *)
9696+9797+val rate_limit_remaining : t -> int option
9898+(** [rate_limit_remaining t] returns the current rate limit remaining,
9999+ if known from previous responses. *)
100100+101101+val rate_limit_reset : t -> float option
102102+(** [rate_limit_reset t] returns the Unix timestamp when the rate limit
103103+ resets, if known from previous responses. *)
+218
repowatch/lib/graphql.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+type query_variables = {
77+ owner : string;
88+ name : string;
99+ issues_after : string option;
1010+ prs_after : string option;
1111+}
1212+1313+let issues_prs_query =
1414+ {|query($owner: String!, $name: String!, $issuesAfter: String, $prsAfter: String) {
1515+ repository(owner: $owner, name: $name) {
1616+ issues(first: 25, after: $issuesAfter, orderBy: {field: UPDATED_AT, direction: DESC}) {
1717+ pageInfo {
1818+ hasNextPage
1919+ endCursor
2020+ }
2121+ nodes {
2222+ number
2323+ title
2424+ url
2525+ createdAt
2626+ updatedAt
2727+ closedAt
2828+ bodyText
2929+ state
3030+ author {
3131+ login
3232+ }
3333+ labels(first: 20) {
3434+ nodes {
3535+ name
3636+ }
3737+ }
3838+ comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) {
3939+ totalCount
4040+ nodes {
4141+ author {
4242+ login
4343+ }
4444+ bodyText
4545+ createdAt
4646+ updatedAt
4747+ }
4848+ }
4949+ timelineItems(first: 100, itemTypes: [ISSUE_COMMENT, LABELED_EVENT, UNLABELED_EVENT, CLOSED_EVENT, REOPENED_EVENT]) {
5050+ nodes {
5151+ __typename
5252+ ... on IssueComment {
5353+ createdAt
5454+ }
5555+ ... on LabeledEvent {
5656+ createdAt
5757+ }
5858+ ... on UnlabeledEvent {
5959+ createdAt
6060+ }
6161+ ... on ClosedEvent {
6262+ createdAt
6363+ }
6464+ ... on ReopenedEvent {
6565+ createdAt
6666+ }
6767+ }
6868+ }
6969+ }
7070+ }
7171+ pullRequests(first: 25, after: $prsAfter, orderBy: {field: UPDATED_AT, direction: DESC}) {
7272+ pageInfo {
7373+ hasNextPage
7474+ endCursor
7575+ }
7676+ nodes {
7777+ number
7878+ title
7979+ url
8080+ createdAt
8181+ updatedAt
8282+ closedAt
8383+ mergedAt
8484+ bodyText
8585+ state
8686+ additions
8787+ deletions
8888+ changedFiles
8989+ mergeable
9090+ isDraft
9191+ author {
9292+ login
9393+ }
9494+ labels(first: 20) {
9595+ nodes {
9696+ name
9797+ }
9898+ }
9999+ comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) {
100100+ totalCount
101101+ nodes {
102102+ author {
103103+ login
104104+ }
105105+ bodyText
106106+ createdAt
107107+ updatedAt
108108+ }
109109+ }
110110+ timelineItems(first: 100, itemTypes: [PULL_REQUEST_COMMIT, PULL_REQUEST_REVIEW, ISSUE_COMMENT, CLOSED_EVENT, REOPENED_EVENT, MERGED_EVENT]) {
111111+ nodes {
112112+ __typename
113113+ ... on PullRequestCommit {
114114+ commit {
115115+ committedDate
116116+ }
117117+ }
118118+ ... on PullRequestReview {
119119+ createdAt
120120+ }
121121+ ... on IssueComment {
122122+ createdAt
123123+ }
124124+ ... on ClosedEvent {
125125+ createdAt
126126+ }
127127+ ... on ReopenedEvent {
128128+ createdAt
129129+ }
130130+ ... on MergedEvent {
131131+ createdAt
132132+ }
133133+ }
134134+ }
135135+ }
136136+ }
137137+ }
138138+}|}
139139+140140+let discussions_query =
141141+ {|query($owner: String!, $name: String!) {
142142+ repository(owner: $owner, name: $name) {
143143+ discussions(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
144144+ nodes {
145145+ number
146146+ title
147147+ url
148148+ updatedAt
149149+ bodyText
150150+ author {
151151+ login
152152+ }
153153+ category {
154154+ name
155155+ }
156156+ comments {
157157+ totalCount
158158+ }
159159+ answerChosenAt
160160+ }
161161+ }
162162+ }
163163+}|}
164164+165165+(* JSON escape a string *)
166166+let escape_json_string s =
167167+ let b = Buffer.create (String.length s * 2) in
168168+ String.iter
169169+ (function
170170+ | '"' -> Buffer.add_string b "\\\""
171171+ | '\\' -> Buffer.add_string b "\\\\"
172172+ | '\n' -> Buffer.add_string b "\\n"
173173+ | '\r' -> Buffer.add_string b "\\r"
174174+ | '\t' -> Buffer.add_string b "\\t"
175175+ | c when Char.code c < 32 ->
176176+ Buffer.add_string b (Printf.sprintf "\\u%04x" (Char.code c))
177177+ | c -> Buffer.add_char b c)
178178+ s;
179179+ Buffer.contents b
180180+181181+let variables_to_json vars =
182182+ let add_if_some key opt acc =
183183+ match opt with
184184+ | Some v -> (key, Jsont.Json.string v) :: acc
185185+ | None -> acc
186186+ in
187187+ [ ("owner", Jsont.Json.string vars.owner); ("name", Jsont.Json.string vars.name) ]
188188+ |> add_if_some "issuesAfter" vars.issues_after
189189+ |> add_if_some "prsAfter" vars.prs_after
190190+191191+let build_request_body ~query ~variables =
192192+ (* Build the JSON manually for efficiency and control *)
193193+ let vars_json =
194194+ let parts =
195195+ let base =
196196+ [
197197+ Printf.sprintf "\"owner\": \"%s\"" (escape_json_string variables.owner);
198198+ Printf.sprintf "\"name\": \"%s\"" (escape_json_string variables.name);
199199+ ]
200200+ in
201201+ let with_issues =
202202+ match variables.issues_after with
203203+ | Some cursor ->
204204+ base
205205+ @ [ Printf.sprintf "\"issuesAfter\": \"%s\""
206206+ (escape_json_string cursor) ]
207207+ | None -> base
208208+ in
209209+ match variables.prs_after with
210210+ | Some cursor ->
211211+ with_issues
212212+ @ [ Printf.sprintf "\"prsAfter\": \"%s\"" (escape_json_string cursor) ]
213213+ | None -> with_issues
214214+ in
215215+ "{" ^ String.concat ", " parts ^ "}"
216216+ in
217217+ Printf.sprintf "{\"query\": \"%s\", \"variables\": %s}"
218218+ (escape_json_string query) vars_json
+51
repowatch/lib/graphql.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** GraphQL query construction for GitHub API.
77+88+ This module provides the GraphQL query strings and request body building
99+ functions for fetching repository data from GitHub's GraphQL API. *)
1010+1111+(** {1 Query Variables} *)
1212+1313+type query_variables = {
1414+ owner : string; (** Repository owner (user or organization). *)
1515+ name : string; (** Repository name. *)
1616+ issues_after : string option; (** Cursor for issues pagination. *)
1717+ prs_after : string option; (** Cursor for PRs pagination. *)
1818+}
1919+(** Variables for the issues/PRs GraphQL query. *)
2020+2121+(** {1 Query Strings} *)
2222+2323+val issues_prs_query : string
2424+(** The GraphQL query string for fetching issues and pull requests.
2525+2626+ This query fetches both issues and PRs in a single request with:
2727+ - Cursor-based pagination (first 25 items per page)
2828+ - Ordering by UPDATED_AT descending
2929+ - Author information (handles deleted users)
3030+ - Labels (first 20)
3131+ - Recent comments (first 10)
3232+ - Timeline events for activity tracking *)
3333+3434+val discussions_query : string
3535+(** The GraphQL query string for fetching discussions.
3636+3737+ This is a simpler query that fetches the first 100 discussions
3838+ ordered by UPDATED_AT descending, without pagination. *)
3939+4040+(** {1 Request Building} *)
4141+4242+val build_request_body : query:string -> variables:query_variables -> string
4343+(** [build_request_body ~query ~variables] builds the JSON request body
4444+ for the GitHub GraphQL endpoint.
4545+4646+ Returns a JSON string of the form:
4747+ [\{"query": "...", "variables": \{...\}\}] *)
4848+4949+val variables_to_json : query_variables -> (string * Jsont.Json.t) list
5050+(** [variables_to_json vars] converts query variables to a JSON-compatible
5151+ association list for use with other GraphQL clients. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Jsont codecs for GitHub GraphQL API responses.
77+88+ This module provides type-safe JSON codecs for decoding the responses
99+ from GitHub's GraphQL API for issues, pull requests, and discussions. *)
1010+1111+(** {1 Component Codecs} *)
1212+1313+val page_info : Graphql_types.Page_info.t Jsont.t
1414+(** Codec for GraphQL pagination info. *)
1515+1616+val author : Graphql_types.Author.t Jsont.t
1717+(** Codec for GitHub user/author. Handles null authors as "ghost". *)
1818+1919+val label : Graphql_types.Label.t Jsont.t
2020+(** Codec for issue/PR labels. *)
2121+2222+val comment : Graphql_types.Comment.t Jsont.t
2323+(** Codec for comments. *)
2424+2525+val timeline_item : Graphql_types.Timeline_item.t Jsont.t
2626+(** Codec for timeline events. Uses __typename to determine event type. *)
2727+2828+(** {1 Issue/PR Codecs} *)
2929+3030+val issue_node : Graphql_types.Issue_node.t Jsont.t
3131+(** Codec for GitHub issue nodes from GraphQL. *)
3232+3333+val pr_node : Graphql_types.Pr_node.t Jsont.t
3434+(** Codec for GitHub PR nodes from GraphQL. *)
3535+3636+val issues_connection : Graphql_types.Issues_connection.t Jsont.t
3737+(** Codec for paginated issues connection. *)
3838+3939+val prs_connection : Graphql_types.Prs_connection.t Jsont.t
4040+(** Codec for paginated PRs connection. *)
4141+4242+(** {1 Discussion Codecs} *)
4343+4444+val discussion_node : Graphql_types.Discussion_node.t Jsont.t
4545+(** Codec for GitHub discussion nodes from GraphQL. *)
4646+4747+(** {1 Response Codecs} *)
4848+4949+val repository_response : Graphql_types.Repository_response.t Jsont.t
5050+(** Codec for the full issues/PRs query response. Expects the response to
5151+ be wrapped in [\{"data": \{"repository": ...\}\}]. *)
5252+5353+val discussions_response : Graphql_types.Discussions_response.t Jsont.t
5454+(** Codec for the discussions query response. Expects the response to
5555+ be wrapped in [\{"data": \{"repository": ...\}\}]. *)
5656+5757+(** {1 Decoding Functions} *)
5858+5959+val decode_repository_response :
6060+ string -> (Graphql_types.Repository_response.t, string) result
6161+(** Decode a JSON string as an issues/PRs response. *)
6262+6363+val decode_discussions_response :
6464+ string -> (Graphql_types.Discussions_response.t, string) result
6565+(** Decode a JSON string as a discussions response. *)
+260
repowatch/lib/graphql_types.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+module Page_info = struct
77+ type t = { has_next_page : bool; end_cursor : string option }
88+99+ let make ~has_next_page ~end_cursor = { has_next_page; end_cursor }
1010+ let has_next_page t = t.has_next_page
1111+ let end_cursor t = t.end_cursor
1212+end
1313+1414+module Author = struct
1515+ type t = { login : string }
1616+1717+ let make ~login = { login }
1818+ let ghost = { login = "ghost" }
1919+ let login t = t.login
2020+end
2121+2222+module Label = struct
2323+ type t = { name : string }
2424+2525+ let make ~name = { name }
2626+ let name t = t.name
2727+end
2828+2929+module Comment = struct
3030+ type t = {
3131+ author : Author.t;
3232+ body_text : string;
3333+ created_at : string;
3434+ updated_at : string;
3535+ }
3636+3737+ let make ~author ~body_text ~created_at ~updated_at =
3838+ { author; body_text; created_at; updated_at }
3939+4040+ let author t = t.author
4141+ let body_text t = t.body_text
4242+ let created_at t = t.created_at
4343+ let updated_at t = t.updated_at
4444+end
4545+4646+module Timeline_item = struct
4747+ type t =
4848+ | Issue_comment of { created_at : string }
4949+ | Labeled_event of { created_at : string }
5050+ | Unlabeled_event of { created_at : string }
5151+ | Closed_event of { created_at : string }
5252+ | Reopened_event of { created_at : string }
5353+ | Pr_commit of { committed_date : string }
5454+ | Pr_review of { created_at : string }
5555+ | Merged_event of { created_at : string }
5656+ | Unknown
5757+5858+ let created_at = function
5959+ | Issue_comment { created_at } -> Some created_at
6060+ | Labeled_event { created_at } -> Some created_at
6161+ | Unlabeled_event { created_at } -> Some created_at
6262+ | Closed_event { created_at } -> Some created_at
6363+ | Reopened_event { created_at } -> Some created_at
6464+ | Pr_commit { committed_date } -> Some committed_date
6565+ | Pr_review { created_at } -> Some created_at
6666+ | Merged_event { created_at } -> Some created_at
6767+ | Unknown -> None
6868+end
6969+7070+module Issue_node = struct
7171+ type t = {
7272+ number : int;
7373+ title : string;
7474+ url : string;
7575+ created_at : string;
7676+ updated_at : string;
7777+ closed_at : string option;
7878+ body_text : string;
7979+ state : string;
8080+ author : Author.t;
8181+ labels : Label.t list;
8282+ comments : Comment.t list;
8383+ comments_total_count : int;
8484+ timeline_items : Timeline_item.t list;
8585+ }
8686+8787+ let make ~number ~title ~url ~created_at ~updated_at ~closed_at ~body_text
8888+ ~state ~author ~labels ~comments ~comments_total_count ~timeline_items =
8989+ {
9090+ number;
9191+ title;
9292+ url;
9393+ created_at;
9494+ updated_at;
9595+ closed_at;
9696+ body_text;
9797+ state;
9898+ author;
9999+ labels;
100100+ comments;
101101+ comments_total_count;
102102+ timeline_items;
103103+ }
104104+105105+ let number t = t.number
106106+ let title t = t.title
107107+ let url t = t.url
108108+ let created_at t = t.created_at
109109+ let updated_at t = t.updated_at
110110+ let closed_at t = t.closed_at
111111+ let body_text t = t.body_text
112112+ let state t = t.state
113113+ let author t = t.author
114114+ let labels t = t.labels
115115+ let comments t = t.comments
116116+ let comments_total_count t = t.comments_total_count
117117+ let timeline_items t = t.timeline_items
118118+end
119119+120120+module Pr_node = struct
121121+ type t = {
122122+ number : int;
123123+ title : string;
124124+ url : string;
125125+ created_at : string;
126126+ updated_at : string;
127127+ closed_at : string option;
128128+ merged_at : string option;
129129+ body_text : string;
130130+ state : string;
131131+ additions : int;
132132+ deletions : int;
133133+ changed_files : int;
134134+ mergeable : string;
135135+ is_draft : bool;
136136+ author : Author.t;
137137+ labels : Label.t list;
138138+ comments : Comment.t list;
139139+ comments_total_count : int;
140140+ timeline_items : Timeline_item.t list;
141141+ }
142142+143143+ let make ~number ~title ~url ~created_at ~updated_at ~closed_at ~merged_at
144144+ ~body_text ~state ~additions ~deletions ~changed_files ~mergeable
145145+ ~is_draft ~author ~labels ~comments ~comments_total_count ~timeline_items
146146+ =
147147+ {
148148+ number;
149149+ title;
150150+ url;
151151+ created_at;
152152+ updated_at;
153153+ closed_at;
154154+ merged_at;
155155+ body_text;
156156+ state;
157157+ additions;
158158+ deletions;
159159+ changed_files;
160160+ mergeable;
161161+ is_draft;
162162+ author;
163163+ labels;
164164+ comments;
165165+ comments_total_count;
166166+ timeline_items;
167167+ }
168168+169169+ let number t = t.number
170170+ let title t = t.title
171171+ let url t = t.url
172172+ let created_at t = t.created_at
173173+ let updated_at t = t.updated_at
174174+ let closed_at t = t.closed_at
175175+ let merged_at t = t.merged_at
176176+ let body_text t = t.body_text
177177+ let state t = t.state
178178+ let additions t = t.additions
179179+ let deletions t = t.deletions
180180+ let changed_files t = t.changed_files
181181+ let mergeable t = t.mergeable
182182+ let is_draft t = t.is_draft
183183+ let author t = t.author
184184+ let labels t = t.labels
185185+ let comments t = t.comments
186186+ let comments_total_count t = t.comments_total_count
187187+ let timeline_items t = t.timeline_items
188188+end
189189+190190+module Issues_connection = struct
191191+ type t = { page_info : Page_info.t; nodes : Issue_node.t list }
192192+193193+ let make ~page_info ~nodes = { page_info; nodes }
194194+ let page_info t = t.page_info
195195+ let nodes t = t.nodes
196196+end
197197+198198+module Prs_connection = struct
199199+ type t = { page_info : Page_info.t; nodes : Pr_node.t list }
200200+201201+ let make ~page_info ~nodes = { page_info; nodes }
202202+ let page_info t = t.page_info
203203+ let nodes t = t.nodes
204204+end
205205+206206+module Discussion_node = struct
207207+ type t = {
208208+ number : int;
209209+ title : string;
210210+ url : string;
211211+ updated_at : string;
212212+ body_text : string;
213213+ author : Author.t;
214214+ category : string;
215215+ comments_count : int;
216216+ answered : bool;
217217+ }
218218+219219+ let make ~number ~title ~url ~updated_at ~body_text ~author ~category
220220+ ~comments_count ~answered =
221221+ {
222222+ number;
223223+ title;
224224+ url;
225225+ updated_at;
226226+ body_text;
227227+ author;
228228+ category;
229229+ comments_count;
230230+ answered;
231231+ }
232232+233233+ let number t = t.number
234234+ let title t = t.title
235235+ let url t = t.url
236236+ let updated_at t = t.updated_at
237237+ let body_text t = t.body_text
238238+ let author t = t.author
239239+ let category t = t.category
240240+ let comments_count t = t.comments_count
241241+ let answered t = t.answered
242242+end
243243+244244+module Repository_response = struct
245245+ type t = {
246246+ issues : Issues_connection.t;
247247+ pull_requests : Prs_connection.t;
248248+ }
249249+250250+ let make ~issues ~pull_requests = { issues; pull_requests }
251251+ let issues t = t.issues
252252+ let pull_requests t = t.pull_requests
253253+end
254254+255255+module Discussions_response = struct
256256+ type t = { discussions : Discussion_node.t list }
257257+258258+ let make ~discussions = { discussions }
259259+ let discussions t = t.discussions
260260+end
+357
repowatch/lib/graphql_types.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** GraphQL response types for GitHub API.
77+88+ This module defines types for decoding GitHub GraphQL API responses for
99+ issues, pull requests, and discussions queries. *)
1010+1111+(** {1 Pagination} *)
1212+1313+module Page_info : sig
1414+ type t
1515+ (** Pagination information from a GraphQL connection. *)
1616+1717+ val make : has_next_page:bool -> end_cursor:string option -> t
1818+ (** Create pagination info. *)
1919+2020+ val has_next_page : t -> bool
2121+ (** Whether there are more pages available. *)
2222+2323+ val end_cursor : t -> string option
2424+ (** Cursor for fetching the next page, if any. *)
2525+end
2626+2727+(** {1 User Information} *)
2828+2929+module Author : sig
3030+ type t
3131+ (** A GitHub user or organization. *)
3232+3333+ val make : login:string -> t
3434+ (** Create an author. *)
3535+3636+ val ghost : t
3737+ (** The "ghost" author, used when the original author has been deleted. *)
3838+3939+ val login : t -> string
4040+ (** The user's login name. Returns "ghost" for deleted users. *)
4141+end
4242+4343+(** {1 Labels} *)
4444+4545+module Label : sig
4646+ type t
4747+ (** A GitHub issue or PR label. *)
4848+4949+ val make : name:string -> t
5050+ (** Create a label. *)
5151+5252+ val name : t -> string
5353+ (** The label name. *)
5454+end
5555+5656+(** {1 Comments} *)
5757+5858+module Comment : sig
5959+ type t
6060+ (** A comment on an issue, PR, or discussion. *)
6161+6262+ val make :
6363+ author:Author.t ->
6464+ body_text:string ->
6565+ created_at:string ->
6666+ updated_at:string ->
6767+ t
6868+ (** Create a comment. *)
6969+7070+ val author : t -> Author.t
7171+ (** The comment author. *)
7272+7373+ val body_text : t -> string
7474+ (** The plain text body of the comment. *)
7575+7676+ val created_at : t -> string
7777+ (** When the comment was created (ISO 8601). *)
7878+7979+ val updated_at : t -> string
8080+ (** When the comment was last updated (ISO 8601). *)
8181+end
8282+8383+(** {1 Timeline Events} *)
8484+8585+module Timeline_item : sig
8686+ type t =
8787+ | Issue_comment of { created_at : string }
8888+ | Labeled_event of { created_at : string }
8989+ | Unlabeled_event of { created_at : string }
9090+ | Closed_event of { created_at : string }
9191+ | Reopened_event of { created_at : string }
9292+ | Pr_commit of { committed_date : string }
9393+ | Pr_review of { created_at : string }
9494+ | Merged_event of { created_at : string }
9595+ | Unknown
9696+ (** Timeline events from an issue or PR.
9797+9898+ These track activity that may have occurred during a given week,
9999+ enabling filtering of items by their recent activity. *)
100100+101101+ val created_at : t -> string option
102102+ (** Extract the timestamp from any timeline item, if present. *)
103103+end
104104+105105+(** {1 Issues} *)
106106+107107+module Issue_node : sig
108108+ type t
109109+ (** A GitHub issue from the GraphQL API. *)
110110+111111+ val make :
112112+ number:int ->
113113+ title:string ->
114114+ url:string ->
115115+ created_at:string ->
116116+ updated_at:string ->
117117+ closed_at:string option ->
118118+ body_text:string ->
119119+ state:string ->
120120+ author:Author.t ->
121121+ labels:Label.t list ->
122122+ comments:Comment.t list ->
123123+ comments_total_count:int ->
124124+ timeline_items:Timeline_item.t list ->
125125+ t
126126+ (** Create an issue node. *)
127127+128128+ val number : t -> int
129129+ (** The issue number. *)
130130+131131+ val title : t -> string
132132+ (** The issue title. *)
133133+134134+ val url : t -> string
135135+ (** URL to the issue on GitHub. *)
136136+137137+ val created_at : t -> string
138138+ (** When the issue was created (ISO 8601). *)
139139+140140+ val updated_at : t -> string
141141+ (** When the issue was last updated (ISO 8601). *)
142142+143143+ val closed_at : t -> string option
144144+ (** When the issue was closed, if closed. *)
145145+146146+ val body_text : t -> string
147147+ (** The plain text body of the issue. *)
148148+149149+ val state : t -> string
150150+ (** The issue state: "OPEN" or "CLOSED". *)
151151+152152+ val author : t -> Author.t
153153+ (** The issue author. *)
154154+155155+ val labels : t -> Label.t list
156156+ (** Labels applied to the issue. *)
157157+158158+ val comments : t -> Comment.t list
159159+ (** Recent comments on the issue. *)
160160+161161+ val comments_total_count : t -> int
162162+ (** Total number of comments. *)
163163+164164+ val timeline_items : t -> Timeline_item.t list
165165+ (** Recent timeline events. *)
166166+end
167167+168168+(** {1 Pull Requests} *)
169169+170170+module Pr_node : sig
171171+ type t
172172+ (** A GitHub pull request from the GraphQL API. *)
173173+174174+ val make :
175175+ number:int ->
176176+ title:string ->
177177+ url:string ->
178178+ created_at:string ->
179179+ updated_at:string ->
180180+ closed_at:string option ->
181181+ merged_at:string option ->
182182+ body_text:string ->
183183+ state:string ->
184184+ additions:int ->
185185+ deletions:int ->
186186+ changed_files:int ->
187187+ mergeable:string ->
188188+ is_draft:bool ->
189189+ author:Author.t ->
190190+ labels:Label.t list ->
191191+ comments:Comment.t list ->
192192+ comments_total_count:int ->
193193+ timeline_items:Timeline_item.t list ->
194194+ t
195195+ (** Create a PR node. *)
196196+197197+ val number : t -> int
198198+ (** The PR number. *)
199199+200200+ val title : t -> string
201201+ (** The PR title. *)
202202+203203+ val url : t -> string
204204+ (** URL to the PR on GitHub. *)
205205+206206+ val created_at : t -> string
207207+ (** When the PR was created (ISO 8601). *)
208208+209209+ val updated_at : t -> string
210210+ (** When the PR was last updated (ISO 8601). *)
211211+212212+ val closed_at : t -> string option
213213+ (** When the PR was closed, if closed. *)
214214+215215+ val merged_at : t -> string option
216216+ (** When the PR was merged, if merged. *)
217217+218218+ val body_text : t -> string
219219+ (** The plain text body of the PR. *)
220220+221221+ val state : t -> string
222222+ (** The PR state: "OPEN", "CLOSED", or "MERGED". *)
223223+224224+ val additions : t -> int
225225+ (** Lines added. *)
226226+227227+ val deletions : t -> int
228228+ (** Lines deleted. *)
229229+230230+ val changed_files : t -> int
231231+ (** Number of files changed. *)
232232+233233+ val mergeable : t -> string
234234+ (** Mergeable state: "MERGEABLE", "CONFLICTING", or "UNKNOWN". *)
235235+236236+ val is_draft : t -> bool
237237+ (** Whether this is a draft PR. *)
238238+239239+ val author : t -> Author.t
240240+ (** The PR author. *)
241241+242242+ val labels : t -> Label.t list
243243+ (** Labels applied to the PR. *)
244244+245245+ val comments : t -> Comment.t list
246246+ (** Recent comments on the PR. *)
247247+248248+ val comments_total_count : t -> int
249249+ (** Total number of comments. *)
250250+251251+ val timeline_items : t -> Timeline_item.t list
252252+ (** Recent timeline events. *)
253253+end
254254+255255+(** {1 Connections (Paginated Lists)} *)
256256+257257+module Issues_connection : sig
258258+ type t
259259+ (** A paginated list of issues. *)
260260+261261+ val make : page_info:Page_info.t -> nodes:Issue_node.t list -> t
262262+ (** Create an issues connection. *)
263263+264264+ val page_info : t -> Page_info.t
265265+ (** Pagination information. *)
266266+267267+ val nodes : t -> Issue_node.t list
268268+ (** The issues in this page. *)
269269+end
270270+271271+module Prs_connection : sig
272272+ type t
273273+ (** A paginated list of pull requests. *)
274274+275275+ val make : page_info:Page_info.t -> nodes:Pr_node.t list -> t
276276+ (** Create a PRs connection. *)
277277+278278+ val page_info : t -> Page_info.t
279279+ (** Pagination information. *)
280280+281281+ val nodes : t -> Pr_node.t list
282282+ (** The PRs in this page. *)
283283+end
284284+285285+(** {1 Discussions} *)
286286+287287+module Discussion_node : sig
288288+ type t
289289+ (** A GitHub discussion from the GraphQL API. *)
290290+291291+ val make :
292292+ number:int ->
293293+ title:string ->
294294+ url:string ->
295295+ updated_at:string ->
296296+ body_text:string ->
297297+ author:Author.t ->
298298+ category:string ->
299299+ comments_count:int ->
300300+ answered:bool ->
301301+ t
302302+ (** Create a discussion node. *)
303303+304304+ val number : t -> int
305305+ (** The discussion number. *)
306306+307307+ val title : t -> string
308308+ (** The discussion title. *)
309309+310310+ val url : t -> string
311311+ (** URL to the discussion on GitHub. *)
312312+313313+ val updated_at : t -> string
314314+ (** When the discussion was last updated (ISO 8601). *)
315315+316316+ val body_text : t -> string
317317+ (** The plain text body of the discussion. *)
318318+319319+ val author : t -> Author.t
320320+ (** The discussion author. *)
321321+322322+ val category : t -> string
323323+ (** The discussion category name. *)
324324+325325+ val comments_count : t -> int
326326+ (** Number of comments. *)
327327+328328+ val answered : t -> bool
329329+ (** Whether the discussion has an accepted answer. *)
330330+end
331331+332332+(** {1 API Responses} *)
333333+334334+module Repository_response : sig
335335+ type t
336336+ (** Response from the issues/PRs GraphQL query. *)
337337+338338+ val make : issues:Issues_connection.t -> pull_requests:Prs_connection.t -> t
339339+ (** Create a repository response. *)
340340+341341+ val issues : t -> Issues_connection.t
342342+ (** The issues connection. *)
343343+344344+ val pull_requests : t -> Prs_connection.t
345345+ (** The pull requests connection. *)
346346+end
347347+348348+module Discussions_response : sig
349349+ type t
350350+ (** Response from the discussions GraphQL query. *)
351351+352352+ val make : discussions:Discussion_node.t list -> t
353353+ (** Create a discussions response. *)
354354+355355+ val discussions : t -> Discussion_node.t list
356356+ (** The discussions list. *)
357357+end
+94
repowatch/lib/loader.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+let load_file fs path = Codec.decode_file fs path
77+88+let load_file_exn fs path =
99+ match load_file fs path with
1010+ | Ok data -> data
1111+ | Error e -> failwith e
1212+1313+let is_json_file name =
1414+ String.length name > 5 && String.sub name (String.length name - 5) 5 = ".json"
1515+1616+let rec find_json_files_rec fs dir acc =
1717+ let path = Eio.Path.(fs / dir) in
1818+ let entries =
1919+ try Eio.Path.read_dir path
2020+ with _ -> []
2121+ in
2222+ List.fold_left
2323+ (fun acc entry ->
2424+ let entry_path = Filename.concat dir entry in
2525+ let full_path = Eio.Path.(fs / entry_path) in
2626+ match Eio.Path.kind ~follow:true full_path with
2727+ | `Directory -> find_json_files_rec fs entry_path acc
2828+ | `Regular_file when is_json_file entry -> entry_path :: acc
2929+ | _ -> acc)
3030+ acc entries
3131+3232+let find_json_files fs dir =
3333+ find_json_files_rec fs dir [] |> List.sort String.compare
3434+3535+let load_directory fs dir =
3636+ let files = find_json_files fs dir in
3737+ let results =
3838+ List.map
3939+ (fun file ->
4040+ match load_file fs file with
4141+ | Ok data -> `Ok (file, data)
4242+ | Error e -> `Error (file, e))
4343+ files
4444+ in
4545+ let successes, errors =
4646+ List.partition_map
4747+ (function
4848+ | `Ok (_, data) -> Either.Left data
4949+ | `Error (file, e) -> Either.Right (file, e))
5050+ results
5151+ in
5252+ if errors = [] then Ok successes else Error errors
5353+5454+let load_directory_partial fs dir =
5555+ let files = find_json_files fs dir in
5656+ List.fold_left
5757+ (fun (data, errors) file ->
5858+ match load_file fs file with
5959+ | Ok d -> (d :: data, errors)
6060+ | Error e -> (data, (file, e) :: errors))
6161+ ([], []) files
6262+ |> fun (data, errors) -> (List.rev data, List.rev errors)
6363+6464+let find_repos fs data_dir =
6565+ let path = Eio.Path.(fs / data_dir) in
6666+ let owners =
6767+ try Eio.Path.read_dir path
6868+ with _ -> []
6969+ in
7070+ List.fold_left
7171+ (fun acc owner ->
7272+ let owner_path = Eio.Path.(fs / Filename.concat data_dir owner) in
7373+ match Eio.Path.kind ~follow:true owner_path with
7474+ | `Directory ->
7575+ let repos =
7676+ try Eio.Path.read_dir owner_path
7777+ with _ -> []
7878+ in
7979+ List.fold_left
8080+ (fun acc repo ->
8181+ let repo_path =
8282+ Eio.Path.(fs / Filename.concat (Filename.concat data_dir owner) repo)
8383+ in
8484+ match Eio.Path.kind ~follow:true repo_path with
8585+ | `Directory -> Printf.sprintf "%s/%s" owner repo :: acc
8686+ | _ -> acc)
8787+ acc repos
8888+ | _ -> acc)
8989+ [] owners
9090+ |> List.sort String.compare
9191+9292+let load_repo fs ~data_dir ~owner ~repo =
9393+ let dir = Filename.concat (Filename.concat data_dir owner) repo in
9494+ load_directory fs dir
+76
repowatch/lib/loader.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** File loading utilities for GitHub activity data.
77+88+ This module provides functions for loading JSON files from the filesystem
99+ and discovering files in directory structures. *)
1010+1111+(** {1 File Loading} *)
1212+1313+val load_file :
1414+ Eio.Fs.dir_ty Eio.Path.t -> string -> (Types.Week_data.t, string) result
1515+(** [load_file fs path] loads and parses a single JSON file.
1616+1717+ @param fs The filesystem capability
1818+ @param path Path to the JSON file *)
1919+2020+val load_file_exn : Eio.Fs.dir_ty Eio.Path.t -> string -> Types.Week_data.t
2121+(** [load_file_exn fs path] loads and parses a single JSON file.
2222+ Raises [Failure] on error. *)
2323+2424+(** {1 Directory Loading} *)
2525+2626+val find_json_files : Eio.Fs.dir_ty Eio.Path.t -> string -> string list
2727+(** [find_json_files fs dir] finds all JSON files in a directory tree.
2828+2929+ Files are returned in sorted order, which for the week-NN-YYYY.json format
3030+ results in chronological ordering.
3131+3232+ @param fs The filesystem capability
3333+ @param dir Root directory to search *)
3434+3535+val load_directory :
3636+ Eio.Fs.dir_ty Eio.Path.t ->
3737+ string ->
3838+ (Types.Week_data.t list, (string * string) list) result
3939+(** [load_directory fs dir] loads all JSON files from a directory.
4040+4141+ Returns [Ok data] if all files parse successfully, or [Error errors] with
4242+ a list of (filename, error message) pairs for files that failed to parse. *)
4343+4444+val load_directory_partial :
4545+ Eio.Fs.dir_ty Eio.Path.t ->
4646+ string ->
4747+ Types.Week_data.t list * (string * string) list
4848+(** [load_directory_partial fs dir] loads all JSON files, returning both
4949+ successfully parsed data and errors.
5050+5151+ This is useful when you want to process whatever data is available
5252+ even if some files fail to parse. *)
5353+5454+(** {1 Repository Discovery} *)
5555+5656+val find_repos : Eio.Fs.dir_ty Eio.Path.t -> string -> string list
5757+(** [find_repos fs data_dir] finds repository paths in a ruminant data directory.
5858+5959+ The ruminant format stores data in [data/gh/{owner}/{repo}/] structure.
6060+ This function returns a list of "owner/repo" strings.
6161+6262+ @param fs The filesystem capability
6363+ @param data_dir Root of the ruminant data directory (e.g., "data/gh") *)
6464+6565+val load_repo :
6666+ Eio.Fs.dir_ty Eio.Path.t ->
6767+ data_dir:string ->
6868+ owner:string ->
6969+ repo:string ->
7070+ (Types.Week_data.t list, (string * string) list) result
7171+(** [load_repo fs ~data_dir ~owner ~repo] loads all data for a repository.
7272+7373+ @param fs The filesystem capability
7474+ @param data_dir Root of the ruminant data directory
7575+ @param owner Repository owner
7676+ @param repo Repository name *)
+260
repowatch/lib/printer.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+let truncate_string max_len s =
77+ if String.length s <= max_len then s
88+ else String.sub s 0 (max_len - 3) ^ "..."
99+1010+let pp_metadata ppf m =
1111+ let open Types.Metadata in
1212+ Fmt.pf ppf "@[<v>Repository: %s@,Year: %d, Week: %d@,Period: %s to %s@,Cached: %s@]"
1313+ (repo m) (year m) (week m) (week_start m) (week_end m) (cached_at m)
1414+1515+let pp_labels ppf labels =
1616+ if labels = [] then ()
1717+ else Fmt.pf ppf " [%a]" Fmt.(list ~sep:(any ", ") string) labels
1818+1919+let pp_issue ppf i =
2020+ let open Types.Issue in
2121+ Fmt.pf ppf "@[<v>#%d %s@, Author: @%s@, State: %s%a@, Created: %s@, Updated: %s%a@]"
2222+ (id i) (title i) (user i) (state i) pp_labels (labels i)
2323+ (created_at i) (updated_at i)
2424+ (fun ppf -> function
2525+ | Some ca -> Fmt.pf ppf "@, Closed: %s" ca
2626+ | None -> ())
2727+ (closed_at i)
2828+2929+let pp_issue_short ppf i =
3030+ let open Types.Issue in
3131+ let state_mark = if state i = "open" then "o" else "x" in
3232+ Fmt.pf ppf "[%s] #%-5d %-60s @%s"
3333+ state_mark (id i) (truncate_string 60 (title i)) (user i)
3434+3535+let pp_pr ppf p =
3636+ let open Types.Pr in
3737+ Fmt.pf ppf
3838+ "@[<v>#%d %s%s@, Author: @%s@, State: %s%a@, +%d -%d (%d files)@, \
3939+ Mergeable: %s@, Created: %s@, Updated: %s%a%a@]"
4040+ (id p) (title p) (if draft p then " [DRAFT]" else "")
4141+ (user p) (state p) pp_labels (labels p)
4242+ (additions p) (deletions p) (changed_files p)
4343+ (mergeable p) (created_at p) (updated_at p)
4444+ (fun ppf -> function
4545+ | Some ca -> Fmt.pf ppf "@, Closed: %s" ca
4646+ | None -> ())
4747+ (closed_at p)
4848+ (fun ppf -> function
4949+ | Some ma -> Fmt.pf ppf "@, Merged: %s" ma
5050+ | None -> ())
5151+ (merged_at p)
5252+5353+let pp_pr_short ppf p =
5454+ let open Types.Pr in
5555+ let state_mark =
5656+ match merged_at p with
5757+ | Some _ -> "M"
5858+ | None -> if state p = "open" then "o" else "x"
5959+ in
6060+ let draft_mark = if draft p then "D" else " " in
6161+ Fmt.pf ppf "[%s%s] #%-5d +%-5d -%-5d %-50s @%s"
6262+ state_mark draft_mark (id p) (additions p) (deletions p)
6363+ (truncate_string 50 (title p)) (user p)
6464+6565+let pp_discussion ppf d =
6666+ let open Types.Discussion in
6767+ Fmt.pf ppf "@[<v>#%d %s@, Author: @%s@, Category: %s@, Comments: %d%s@, Updated: %s@]"
6868+ (id d) (title d) (user d) (category d) (comments d)
6969+ (if answered d then " [ANSWERED]" else "")
7070+ (updated_at d)
7171+7272+let pp_discussion_short ppf d =
7373+ let open Types.Discussion in
7474+ let answered_mark = if answered d then "A" else " " in
7575+ Fmt.pf ppf "[%s] #%-5d %-50s (%s) @%s"
7676+ answered_mark (id d) (truncate_string 50 (title d)) (category d) (user d)
7777+7878+let pp_asset ppf a =
7979+ let open Types.Asset in
8080+ let size_kb = size a / 1024 in
8181+ Fmt.pf ppf " - %s (%d KB, %d downloads)" (name a) size_kb (download_count a)
8282+8383+let pp_release ppf r =
8484+ let open Types.Release in
8585+ Fmt.pf ppf "@[<v>%s (%s)%s%s@, Author: @%s@, Published: %s@, URL: %s%a@]"
8686+ (tag_name r) (name r)
8787+ (if prerelease r then " [PRE-RELEASE]" else "")
8888+ (if draft r then " [DRAFT]" else "")
8989+ (author r) (published_at r) (html_url r)
9090+ (fun ppf assets ->
9191+ if assets <> [] then
9292+ Fmt.pf ppf "@, Assets:@, %a"
9393+ Fmt.(list ~sep:(any "@, ") pp_asset) assets)
9494+ (assets r)
9595+9696+let pp_release_short ppf r =
9797+ let open Types.Release in
9898+ let marks =
9999+ (if prerelease r then "P" else " ") ^
100100+ (if draft r then "D" else " ")
101101+ in
102102+ Fmt.pf ppf "[%s] %-20s %-40s @%s"
103103+ marks (tag_name r) (truncate_string 40 (name r)) (author r)
104104+105105+let pp_week_summary ppf w =
106106+ let open Types.Week_data in
107107+ let m = metadata w in
108108+ let num_issues = List.length (issues w) in
109109+ let num_prs = List.length (prs w) in
110110+ let num_gfi = List.length (good_first_issues w) in
111111+ let num_discussions = List.length (discussions w) in
112112+ let num_releases = List.length (releases w) in
113113+ let num_users = List.length (users w) in
114114+ Fmt.pf ppf "@[<v>%a@,@,Activity: %d issues, %d PRs, %d discussions, %d releases@,\
115115+ Good first issues: %d@,Active users: %d@]"
116116+ pp_metadata m num_issues num_prs num_discussions num_releases
117117+ num_gfi num_users
118118+119119+let pp_week_data ppf w =
120120+ let open Types.Week_data in
121121+ Fmt.pf ppf "@[<v>%a@,@,"
122122+ pp_week_summary w;
123123+ if issues w <> [] then
124124+ Fmt.pf ppf "Issues:@, @[<v>%a@]@,@,"
125125+ Fmt.(list ~sep:(any "@,@, ") pp_issue) (issues w);
126126+ if prs w <> [] then
127127+ Fmt.pf ppf "Pull Requests:@, @[<v>%a@]@,@,"
128128+ Fmt.(list ~sep:(any "@,@, ") pp_pr) (prs w);
129129+ if discussions w <> [] then
130130+ Fmt.pf ppf "Discussions:@, @[<v>%a@]@,@,"
131131+ Fmt.(list ~sep:(any "@,@, ") pp_discussion) (discussions w);
132132+ if releases w <> [] then
133133+ Fmt.pf ppf "Releases:@, @[<v>%a@]@,"
134134+ Fmt.(list ~sep:(any "@,@, ") pp_release) (releases w);
135135+ Fmt.pf ppf "@]"
136136+137137+type stats = {
138138+ total_issues : int;
139139+ open_issues : int;
140140+ closed_issues : int;
141141+ total_prs : int;
142142+ open_prs : int;
143143+ merged_prs : int;
144144+ closed_prs : int;
145145+ draft_prs : int;
146146+ total_discussions : int;
147147+ answered_discussions : int;
148148+ total_releases : int;
149149+ total_users : int;
150150+ total_additions : int;
151151+ total_deletions : int;
152152+ total_files_changed : int;
153153+}
154154+155155+let empty_stats = {
156156+ total_issues = 0;
157157+ open_issues = 0;
158158+ closed_issues = 0;
159159+ total_prs = 0;
160160+ open_prs = 0;
161161+ merged_prs = 0;
162162+ closed_prs = 0;
163163+ draft_prs = 0;
164164+ total_discussions = 0;
165165+ answered_discussions = 0;
166166+ total_releases = 0;
167167+ total_users = 0;
168168+ total_additions = 0;
169169+ total_deletions = 0;
170170+ total_files_changed = 0;
171171+}
172172+173173+let compute_stats w =
174174+ let open Types in
175175+ let issues = Week_data.issues w in
176176+ let prs = Week_data.prs w in
177177+ let discussions = Week_data.discussions w in
178178+ let releases = Week_data.releases w in
179179+ let users = Week_data.users w in
180180+ let open_issues, closed_issues =
181181+ List.fold_left
182182+ (fun (o, c) i ->
183183+ if Issue.state i = "open" then (o + 1, c) else (o, c + 1))
184184+ (0, 0) issues
185185+ in
186186+ let open_prs, merged_prs, closed_prs, draft_prs, additions, deletions, files =
187187+ List.fold_left
188188+ (fun (op, mp, cp, dp, a, d, f) p ->
189189+ let op, mp, cp =
190190+ match Pr.merged_at p with
191191+ | Some _ -> (op, mp + 1, cp)
192192+ | None -> if Pr.state p = "open" then (op + 1, mp, cp) else (op, mp, cp + 1)
193193+ in
194194+ let dp = if Pr.draft p then dp + 1 else dp in
195195+ (op, mp, cp, dp, a + Pr.additions p, d + Pr.deletions p, f + Pr.changed_files p))
196196+ (0, 0, 0, 0, 0, 0, 0) prs
197197+ in
198198+ let answered_discussions =
199199+ List.fold_left
200200+ (fun acc d -> if Discussion.answered d then acc + 1 else acc)
201201+ 0 discussions
202202+ in
203203+ {
204204+ total_issues = List.length issues;
205205+ open_issues;
206206+ closed_issues;
207207+ total_prs = List.length prs;
208208+ open_prs;
209209+ merged_prs;
210210+ closed_prs;
211211+ draft_prs;
212212+ total_discussions = List.length discussions;
213213+ answered_discussions;
214214+ total_releases = List.length releases;
215215+ total_users = List.length users;
216216+ total_additions = additions;
217217+ total_deletions = deletions;
218218+ total_files_changed = files;
219219+ }
220220+221221+let aggregate_stats stats_list =
222222+ List.fold_left
223223+ (fun acc s ->
224224+ {
225225+ total_issues = acc.total_issues + s.total_issues;
226226+ open_issues = acc.open_issues + s.open_issues;
227227+ closed_issues = acc.closed_issues + s.closed_issues;
228228+ total_prs = acc.total_prs + s.total_prs;
229229+ open_prs = acc.open_prs + s.open_prs;
230230+ merged_prs = acc.merged_prs + s.merged_prs;
231231+ closed_prs = acc.closed_prs + s.closed_prs;
232232+ draft_prs = acc.draft_prs + s.draft_prs;
233233+ total_discussions = acc.total_discussions + s.total_discussions;
234234+ answered_discussions = acc.answered_discussions + s.answered_discussions;
235235+ total_releases = acc.total_releases + s.total_releases;
236236+ total_users = acc.total_users + s.total_users;
237237+ total_additions = acc.total_additions + s.total_additions;
238238+ total_deletions = acc.total_deletions + s.total_deletions;
239239+ total_files_changed = acc.total_files_changed + s.total_files_changed;
240240+ })
241241+ empty_stats stats_list
242242+243243+let pp_stats ppf s =
244244+ Fmt.pf ppf "@[<v>\
245245+ Issues: %d total (%d open, %d closed)@,\
246246+ Pull Reqs: %d total (%d open, %d merged, %d closed, %d draft)@,\
247247+ Discussions: %d total (%d answered)@,\
248248+ Releases: %d@,\
249249+ Users: %d@,\
250250+ Code: +%d -%d (%d files)@]"
251251+ s.total_issues s.open_issues s.closed_issues
252252+ s.total_prs s.open_prs s.merged_prs s.closed_prs s.draft_prs
253253+ s.total_discussions s.answered_discussions
254254+ s.total_releases s.total_users
255255+ s.total_additions s.total_deletions s.total_files_changed
256256+257257+let pp_stats_compact ppf s =
258258+ Fmt.pf ppf "I:%d P:%d D:%d R:%d (+%d -%d)"
259259+ s.total_issues s.total_prs s.total_discussions s.total_releases
260260+ s.total_additions s.total_deletions
+82
repowatch/lib/printer.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Pretty-printing for GitHub activity data.
77+88+ This module provides formatters for displaying activity data in
99+ human-readable formats. *)
1010+1111+(** {1 Basic Formatters} *)
1212+1313+val pp_metadata : Types.Metadata.t Fmt.t
1414+(** Format metadata information. *)
1515+1616+val pp_issue : Types.Issue.t Fmt.t
1717+(** Format an issue. *)
1818+1919+val pp_issue_short : Types.Issue.t Fmt.t
2020+(** Format an issue in short form (one line). *)
2121+2222+val pp_pr : Types.Pr.t Fmt.t
2323+(** Format a pull request. *)
2424+2525+val pp_pr_short : Types.Pr.t Fmt.t
2626+(** Format a pull request in short form (one line). *)
2727+2828+val pp_discussion : Types.Discussion.t Fmt.t
2929+(** Format a discussion. *)
3030+3131+val pp_discussion_short : Types.Discussion.t Fmt.t
3232+(** Format a discussion in short form (one line). *)
3333+3434+val pp_asset : Types.Asset.t Fmt.t
3535+(** Format a release asset. *)
3636+3737+val pp_release : Types.Release.t Fmt.t
3838+(** Format a release. *)
3939+4040+val pp_release_short : Types.Release.t Fmt.t
4141+(** Format a release in short form (one line). *)
4242+4343+(** {1 Summary Formatters} *)
4444+4545+val pp_week_summary : Types.Week_data.t Fmt.t
4646+(** Format a summary of weekly activity. *)
4747+4848+val pp_week_data : Types.Week_data.t Fmt.t
4949+(** Format complete weekly data. *)
5050+5151+(** {1 Statistics} *)
5252+5353+type stats = {
5454+ total_issues : int;
5555+ open_issues : int;
5656+ closed_issues : int;
5757+ total_prs : int;
5858+ open_prs : int;
5959+ merged_prs : int;
6060+ closed_prs : int;
6161+ draft_prs : int;
6262+ total_discussions : int;
6363+ answered_discussions : int;
6464+ total_releases : int;
6565+ total_users : int;
6666+ total_additions : int;
6767+ total_deletions : int;
6868+ total_files_changed : int;
6969+}
7070+(** Statistics computed from weekly data. *)
7171+7272+val compute_stats : Types.Week_data.t -> stats
7373+(** Compute statistics from weekly data. *)
7474+7575+val aggregate_stats : stats list -> stats
7676+(** Aggregate multiple stats into one. *)
7777+7878+val pp_stats : stats Fmt.t
7979+(** Format statistics. *)
8080+8181+val pp_stats_compact : stats Fmt.t
8282+(** Format statistics in compact form. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+let data_dir ~base_dir ~owner ~repo =
77+ Eio.Path.(base_dir / "gh" / owner / repo)
88+99+let week_filename ~week =
1010+ Printf.sprintf "week-%02d-%04d.json" (Week.week week) (Week.year week)
1111+1212+let ensure_dir path =
1313+ (* Create directory tree if it doesn't exist *)
1414+ try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 path
1515+ with Eio.Io _ -> ()
1616+1717+let save_week ~fs ~base_dir ~owner ~repo ~week ~data =
1818+ let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in
1919+ ensure_dir dir;
2020+ let filename = week_filename ~week in
2121+ let filepath = Eio.Path.(dir / filename) in
2222+ let json = Codec.encode_string data in
2323+ Eio.Path.save ~create:(`Or_truncate 0o644) filepath json
2424+2525+let load_week ~fs ~base_dir ~owner ~repo ~week =
2626+ let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in
2727+ let filename = week_filename ~week in
2828+ let filepath = Eio.Path.(dir / filename) in
2929+ try
3030+ let content = Eio.Path.load filepath in
3131+ match Codec.decode_string content with
3232+ | Ok data -> Some data
3333+ | Error _ -> None
3434+ with Eio.Io _ -> None
3535+3636+let parse_week_filename filename =
3737+ (* Parse "week-WW-YYYY.json" *)
3838+ try
3939+ if String.length filename < 17 then None
4040+ else if not (String.sub filename 0 5 = "week-") then None
4141+ else if not (String.sub filename (String.length filename - 5) 5 = ".json")
4242+ then None
4343+ else
4444+ let week_str = String.sub filename 5 2 in
4545+ let year_str = String.sub filename 8 4 in
4646+ let week_num = int_of_string week_str in
4747+ let year = int_of_string year_str in
4848+ Some (Week.of_year_week ~year ~week:week_num)
4949+ with _ -> None
5050+5151+let list_cached_weeks ~fs ~base_dir ~owner ~repo =
5252+ let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in
5353+ try
5454+ let entries = Eio.Path.read_dir dir in
5555+ entries
5656+ |> List.filter_map parse_week_filename
5757+ |> List.sort Week.compare
5858+ with Eio.Io _ -> []
+66
repowatch/lib/storage.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Persistent storage for sync results.
77+88+ This module provides functions to save and load weekly repository data
99+ in the ruminant JSON format, maintaining the directory structure
1010+ [data/gh/{owner}/{repo}/]. *)
1111+1212+(** {1 Path Construction} *)
1313+1414+val data_dir :
1515+ base_dir:Eio.Fs.dir_ty Eio.Path.t ->
1616+ owner:string ->
1717+ repo:string ->
1818+ Eio.Fs.dir_ty Eio.Path.t
1919+(** [data_dir ~base_dir ~owner ~repo] returns the directory path for
2020+ repository data: [base_dir/gh/{owner}/{repo}/]. *)
2121+2222+val week_filename : week:Week.t -> string
2323+(** [week_filename ~week] returns the filename for a week's data:
2424+ [week-{WW}-{YYYY}.json]. *)
2525+2626+(** {1 Saving Data} *)
2727+2828+val save_week :
2929+ fs:Eio.Fs.dir_ty Eio.Path.t ->
3030+ base_dir:string ->
3131+ owner:string ->
3232+ repo:string ->
3333+ week:Week.t ->
3434+ data:Types.Week_data.t ->
3535+ unit
3636+(** [save_week ~fs ~base_dir ~owner ~repo ~week ~data] saves the weekly
3737+ data to a JSON file, creating directories as needed.
3838+3939+ The file is written to [{base_dir}/gh/{owner}/{repo}/week-{WW}-{YYYY}.json]. *)
4040+4141+(** {1 Loading Data} *)
4242+4343+val load_week :
4444+ fs:Eio.Fs.dir_ty Eio.Path.t ->
4545+ base_dir:string ->
4646+ owner:string ->
4747+ repo:string ->
4848+ week:Week.t ->
4949+ Types.Week_data.t option
5050+(** [load_week ~fs ~base_dir ~owner ~repo ~week] loads existing week data
5151+ if present. Returns [None] if the file does not exist. *)
5252+5353+val list_cached_weeks :
5454+ fs:Eio.Fs.dir_ty Eio.Path.t ->
5555+ base_dir:string ->
5656+ owner:string ->
5757+ repo:string ->
5858+ Week.t list
5959+(** [list_cached_weeks ~fs ~base_dir ~owner ~repo] lists all cached weeks
6060+ for a repository, sorted by date. *)
6161+6262+(** {1 Directory Management} *)
6363+6464+val ensure_dir : Eio.Fs.dir_ty Eio.Path.t -> unit
6565+(** [ensure_dir path] creates the directory and all parent directories
6666+ if they don't exist. *)
+346
repowatch/lib/sync.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+module G = Graphql_types
77+88+type phase =
99+ [ `Issues_prs
1010+ | `Discussions
1111+ | `Releases
1212+ | `Users
1313+ ]
1414+1515+type progress = { phase : phase; current : int; total : int option }
1616+1717+type sync_result = {
1818+ week_data : Types.Week_data.t;
1919+ issues_fetched : int;
2020+ prs_fetched : int;
2121+ pages_fetched : int;
2222+ rate_limit_remaining : int option;
2323+}
2424+2525+let max_pages = 20
2626+let early_exit_threshold = 5
2727+2828+(* Pagination state for issues/PRs sync *)
2929+type pagination_state = {
3030+ mutable issues_cursor : string option;
3131+ mutable prs_cursor : string option;
3232+ mutable issues_done : bool;
3333+ mutable prs_done : bool;
3434+ mutable pages_without_activity : int;
3535+}
3636+3737+let sync_issues_prs ~client ~owner ~repo ~week ~on_progress =
3838+ let state =
3939+ {
4040+ issues_cursor = None;
4141+ prs_cursor = None;
4242+ issues_done = false;
4343+ prs_done = false;
4444+ pages_without_activity = 0;
4545+ }
4646+ in
4747+ let all_issues = ref [] in
4848+ let all_prs = ref [] in
4949+ let rec loop page_count =
5050+ if page_count >= max_pages then Ok page_count
5151+ else if state.issues_done && state.prs_done then Ok page_count
5252+ else if state.pages_without_activity >= early_exit_threshold then
5353+ Ok page_count
5454+ else
5555+ let variables =
5656+ Graphql.
5757+ {
5858+ owner;
5959+ name = repo;
6060+ issues_after = state.issues_cursor;
6161+ prs_after = state.prs_cursor;
6262+ }
6363+ in
6464+ Option.iter
6565+ (fun f ->
6666+ f { phase = `Issues_prs; current = page_count + 1; total = None })
6767+ on_progress;
6868+ match Github.graphql client ~query:Graphql.issues_prs_query ~variables with
6969+ | Error e -> Error e
7070+ | Ok response_body -> (
7171+ match Graphql_codec.decode_repository_response response_body with
7272+ | Error msg -> Error (Github.Parse_error msg)
7373+ | Ok response ->
7474+ (* Process issues *)
7575+ let issues_conn = G.Repository_response.issues response in
7676+ let issues_page_info = G.Issues_connection.page_info issues_conn in
7777+ let issue_nodes = G.Issues_connection.nodes issues_conn in
7878+ let new_issues =
7979+ List.filter (Filter.issue_active_in_week ~week) issue_nodes
8080+ in
8181+ all_issues := !all_issues @ new_issues;
8282+ (* Update issues pagination *)
8383+ if G.Page_info.has_next_page issues_page_info then
8484+ state.issues_cursor <- G.Page_info.end_cursor issues_page_info
8585+ else state.issues_done <- true;
8686+ (* Process PRs *)
8787+ let prs_conn = G.Repository_response.pull_requests response in
8888+ let prs_page_info = G.Prs_connection.page_info prs_conn in
8989+ let pr_nodes = G.Prs_connection.nodes prs_conn in
9090+ let new_prs =
9191+ List.filter (Filter.pr_active_in_week ~week) pr_nodes
9292+ in
9393+ all_prs := !all_prs @ new_prs;
9494+ (* Update PRs pagination *)
9595+ if G.Page_info.has_next_page prs_page_info then
9696+ state.prs_cursor <- G.Page_info.end_cursor prs_page_info
9797+ else state.prs_done <- true;
9898+ (* Track activity for early exit *)
9999+ if List.length new_issues = 0 && List.length new_prs = 0 then
100100+ state.pages_without_activity <-
101101+ state.pages_without_activity + 1
102102+ else state.pages_without_activity <- 0;
103103+ loop (page_count + 1))
104104+ in
105105+ match loop 0 with
106106+ | Ok pages -> Ok (!all_issues, !all_prs, pages)
107107+ | Error e -> Error e
108108+109109+let sync_discussions ~client ~owner ~repo ~week ~on_progress =
110110+ let variables =
111111+ Graphql.{ owner; name = repo; issues_after = None; prs_after = None }
112112+ in
113113+ Option.iter
114114+ (fun f -> f { phase = `Discussions; current = 1; total = Some 1 })
115115+ on_progress;
116116+ match Github.graphql client ~query:Graphql.discussions_query ~variables with
117117+ | Error e -> Error e
118118+ | Ok response_body -> (
119119+ match Graphql_codec.decode_discussions_response response_body with
120120+ | Error msg -> Error (Github.Parse_error msg)
121121+ | Ok response ->
122122+ let discussions = G.Discussions_response.discussions response in
123123+ let active =
124124+ List.filter (Filter.discussion_active_in_week ~week) discussions
125125+ in
126126+ Ok active)
127127+128128+let sync_releases ~client ~owner ~repo ~week ~on_progress =
129129+ let week_start = Week.start_ptime week in
130130+ let week_end = Week.end_ptime week in
131131+ let rec loop page acc =
132132+ if page > 5 then
133133+ (* Max 5 pages of releases *)
134134+ Ok acc
135135+ else (
136136+ Option.iter
137137+ (fun f -> f { phase = `Releases; current = page; total = None })
138138+ on_progress;
139139+ match Github.get_releases client ~owner ~repo ~page with
140140+ | Error e -> Error e
141141+ | Ok response_body -> (
142142+ match Jsont_bytesrw.decode_string (Jsont.list Jsont.json) response_body
143143+ with
144144+ | Error msg -> Error (Github.Parse_error msg)
145145+ | Ok releases_json ->
146146+ if List.length releases_json = 0 then Ok acc
147147+ else
148148+ let releases =
149149+ List.filter_map Transform.release_of_json releases_json
150150+ in
151151+ (* Filter by week *)
152152+ let active =
153153+ List.filter
154154+ (fun r ->
155155+ match
156156+ Ptime.of_rfc3339 (Types.Release.published_at r)
157157+ with
158158+ | Ok (t, _, _) ->
159159+ Ptime.compare t week_start >= 0
160160+ && Ptime.compare t week_end <= 0
161161+ | Error _ -> false)
162162+ releases
163163+ in
164164+ (* Check if all releases are before the week *)
165165+ let all_before =
166166+ List.for_all
167167+ (fun r ->
168168+ match
169169+ Ptime.of_rfc3339 (Types.Release.published_at r)
170170+ with
171171+ | Ok (t, _, _) -> Ptime.compare t week_start < 0
172172+ | Error _ -> true)
173173+ releases
174174+ in
175175+ if all_before then Ok (acc @ active)
176176+ else loop (page + 1) (acc @ active)))
177177+ in
178178+ loop 1 []
179179+180180+let sync_week ~client ~owner ~repo ~week ?on_progress () =
181181+ (* Sync issues and PRs *)
182182+ match sync_issues_prs ~client ~owner ~repo ~week ~on_progress with
183183+ | Error e -> Error e
184184+ | Ok (issue_nodes, pr_nodes, pages) -> (
185185+ (* Sync discussions *)
186186+ match sync_discussions ~client ~owner ~repo ~week ~on_progress with
187187+ | Error e -> Error e
188188+ | Ok discussion_nodes -> (
189189+ (* Sync releases *)
190190+ match sync_releases ~client ~owner ~repo ~week ~on_progress with
191191+ | Error e -> Error e
192192+ | Ok releases ->
193193+ (* Transform to Types *)
194194+ let issues = List.map Transform.issue_of_node issue_nodes in
195195+ let prs = List.map Transform.pr_of_node pr_nodes in
196196+ let discussions =
197197+ List.map Transform.discussion_of_node discussion_nodes
198198+ in
199199+ let good_first_issues =
200200+ issue_nodes
201201+ |> List.filter Filter.is_good_first_issue
202202+ |> List.map Transform.issue_of_node
203203+ in
204204+ (* Get current time for metadata *)
205205+ let cached_at =
206206+ Option.value
207207+ (Ptime.of_float_s (Unix.gettimeofday ()))
208208+ ~default:Ptime.epoch
209209+ in
210210+ let metadata =
211211+ Transform.make_metadata ~owner ~repo ~week ~cached_at
212212+ in
213213+ (* Create week data *)
214214+ let week_data =
215215+ Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues
216216+ ~discussions ~releases ~users:[]
217217+ in
218218+ (* Extract users *)
219219+ let users = Users.extract_from_week_data week_data in
220220+ let week_data =
221221+ Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues
222222+ ~discussions ~releases ~users
223223+ in
224224+ Ok
225225+ {
226226+ week_data;
227227+ issues_fetched = List.length issue_nodes;
228228+ prs_fetched = List.length pr_nodes;
229229+ pages_fetched = pages;
230230+ rate_limit_remaining = Github.rate_limit_remaining client;
231231+ }))
232232+233233+let sync_range ~client ~owner ~repo ~from_week ~to_week ?on_progress () =
234234+ let weeks = Week.range ~from:from_week ~to_:to_week in
235235+ let rec loop results = function
236236+ | [] -> Ok (List.rev results)
237237+ | week :: rest -> (
238238+ match sync_week ~client ~owner ~repo ~week ?on_progress () with
239239+ | Error e -> Error e
240240+ | Ok result -> loop (result :: results) rest)
241241+ in
242242+ loop [] weeks
243243+244244+(* Merge utilities for incremental sync *)
245245+246246+let merge_issues ~existing ~new_items =
247247+ (* Build a map of new items by ID *)
248248+ let new_map =
249249+ List.fold_left
250250+ (fun acc item -> (Types.Issue.id item, item) :: acc)
251251+ [] new_items
252252+ in
253253+ (* Keep existing items that aren't in new, then add all new *)
254254+ let existing_filtered =
255255+ List.filter
256256+ (fun item -> not (List.mem_assoc (Types.Issue.id item) new_map))
257257+ existing
258258+ in
259259+ existing_filtered @ new_items
260260+261261+let merge_prs ~existing ~new_items =
262262+ let new_map =
263263+ List.fold_left (fun acc item -> (Types.Pr.id item, item) :: acc) [] new_items
264264+ in
265265+ let existing_filtered =
266266+ List.filter
267267+ (fun item -> not (List.mem_assoc (Types.Pr.id item) new_map))
268268+ existing
269269+ in
270270+ existing_filtered @ new_items
271271+272272+let merge_discussions ~existing ~new_items =
273273+ let new_map =
274274+ List.fold_left
275275+ (fun acc item -> (Types.Discussion.id item, item) :: acc)
276276+ [] new_items
277277+ in
278278+ let existing_filtered =
279279+ List.filter
280280+ (fun item -> not (List.mem_assoc (Types.Discussion.id item) new_map))
281281+ existing
282282+ in
283283+ existing_filtered @ new_items
284284+285285+let merge_releases ~existing ~new_items =
286286+ let new_map =
287287+ List.fold_left
288288+ (fun acc item -> (Types.Release.tag_name item, item) :: acc)
289289+ [] new_items
290290+ in
291291+ let existing_filtered =
292292+ List.filter
293293+ (fun item -> not (List.mem_assoc (Types.Release.tag_name item) new_map))
294294+ existing
295295+ in
296296+ existing_filtered @ new_items
297297+298298+let merge_users ~existing ~new_items =
299299+ List.sort_uniq String.compare (existing @ new_items)
300300+301301+let merge_week_data ~existing new_data =
302302+ let issues =
303303+ merge_issues
304304+ ~existing:(Types.Week_data.issues existing)
305305+ ~new_items:(Types.Week_data.issues new_data)
306306+ in
307307+ let prs =
308308+ merge_prs
309309+ ~existing:(Types.Week_data.prs existing)
310310+ ~new_items:(Types.Week_data.prs new_data)
311311+ in
312312+ let good_first_issues =
313313+ merge_issues
314314+ ~existing:(Types.Week_data.good_first_issues existing)
315315+ ~new_items:(Types.Week_data.good_first_issues new_data)
316316+ in
317317+ let discussions =
318318+ merge_discussions
319319+ ~existing:(Types.Week_data.discussions existing)
320320+ ~new_items:(Types.Week_data.discussions new_data)
321321+ in
322322+ let releases =
323323+ merge_releases
324324+ ~existing:(Types.Week_data.releases existing)
325325+ ~new_items:(Types.Week_data.releases new_data)
326326+ in
327327+ let users =
328328+ merge_users
329329+ ~existing:(Types.Week_data.users existing)
330330+ ~new_items:(Types.Week_data.users new_data)
331331+ in
332332+ (* Use the new metadata (has updated cached_at timestamp) *)
333333+ let metadata = Types.Week_data.metadata new_data in
334334+ Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues ~discussions
335335+ ~releases ~users
336336+337337+let sync_week_incremental ~client ~owner ~repo ~week ~existing ?on_progress () =
338338+ match sync_week ~client ~owner ~repo ~week ?on_progress () with
339339+ | Error e -> Error e
340340+ | Ok result ->
341341+ let merged_data =
342342+ match existing with
343343+ | None -> result.week_data
344344+ | Some existing_data -> merge_week_data ~existing:existing_data result.week_data
345345+ in
346346+ Ok { result with week_data = merged_data }
+112
repowatch/lib/sync.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Repository sync engine.
77+88+ This module provides the sync engine for fetching repository data from
99+ GitHub and transforming it into the ruminant format. It handles pagination,
1010+ rate limiting, and week-based filtering. *)
1111+1212+(** {1 Progress Reporting} *)
1313+1414+type phase =
1515+ [ `Issues_prs
1616+ | `Discussions
1717+ | `Releases
1818+ | `Users
1919+ ]
2020+(** Sync phases. *)
2121+2222+type progress = {
2323+ phase : phase; (** Current sync phase. *)
2424+ current : int; (** Current page or item number. *)
2525+ total : int option; (** Total pages/items if known. *)
2626+}
2727+(** Progress information for sync callbacks. *)
2828+2929+(** {1 Sync Results} *)
3030+3131+type sync_result = {
3232+ week_data : Types.Week_data.t; (** The synced week data. *)
3333+ issues_fetched : int; (** Number of issues fetched. *)
3434+ prs_fetched : int; (** Number of PRs fetched. *)
3535+ pages_fetched : int; (** Total GraphQL pages fetched. *)
3636+ rate_limit_remaining : int option; (** Rate limit remaining after sync. *)
3737+}
3838+(** Result of a successful sync operation. *)
3939+4040+(** {1 Sync Operations} *)
4141+4242+val sync_week :
4343+ client:Github.t ->
4444+ owner:string ->
4545+ repo:string ->
4646+ week:Week.t ->
4747+ ?on_progress:(progress -> unit) ->
4848+ unit ->
4949+ (sync_result, Github.error) result
5050+(** [sync_week ~client ~owner ~repo ~week ()] fetches all repository
5151+ activity for the given week.
5252+5353+ This function:
5454+ - Queries GitHub GraphQL API for issues and PRs with pagination
5555+ - Queries for discussions (single page)
5656+ - Fetches releases via REST API
5757+ - Filters all data to include only items active during the week
5858+ - Extracts usernames from the data
5959+6060+ @param client GitHub API client
6161+ @param owner Repository owner (user or organization)
6262+ @param repo Repository name
6363+ @param week ISO week to sync
6464+ @param on_progress Optional callback for progress updates *)
6565+6666+val sync_range :
6767+ client:Github.t ->
6868+ owner:string ->
6969+ repo:string ->
7070+ from_week:Week.t ->
7171+ to_week:Week.t ->
7272+ ?on_progress:(progress -> unit) ->
7373+ unit ->
7474+ (sync_result list, Github.error) result
7575+(** [sync_range ~client ~owner ~repo ~from_week ~to_week ()] syncs a
7676+ range of weeks. Stops on the first error. *)
7777+7878+(** {1 Incremental Sync} *)
7979+8080+val merge_week_data : existing:Types.Week_data.t -> Types.Week_data.t -> Types.Week_data.t
8181+(** [merge_week_data ~existing new_data] merges new data into existing data.
8282+8383+ Merging rules:
8484+ - Issues/PRs/Discussions: newer entries (by ID) replace older ones
8585+ - Releases: deduplicated by tag_name, newer replaces older
8686+ - Users: union of both lists
8787+ - Metadata: uses the new data's metadata (updated cached_at) *)
8888+8989+val sync_week_incremental :
9090+ client:Github.t ->
9191+ owner:string ->
9292+ repo:string ->
9393+ week:Week.t ->
9494+ existing:Types.Week_data.t option ->
9595+ ?on_progress:(progress -> unit) ->
9696+ unit ->
9797+ (sync_result, Github.error) result
9898+(** [sync_week_incremental ~client ~owner ~repo ~week ~existing ()]
9999+ fetches repository activity and merges with existing data if present.
100100+101101+ This is designed for daily cron jobs that incrementally update the
102102+ current week's data. New items are added and existing items are
103103+ updated with fresh data from GitHub. *)
104104+105105+(** {1 Configuration} *)
106106+107107+val max_pages : int
108108+(** Maximum number of GraphQL pages to fetch (default: 20). *)
109109+110110+val early_exit_threshold : int
111111+(** Number of pages without relevant activity before stopping early
112112+ (default: 5). *)
+159
repowatch/lib/transform.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+module G = Graphql_types
77+88+let format_comment ~author ~body =
99+ Printf.sprintf "@%s: %s" author body
1010+1111+let issue_of_node node =
1212+ let labels =
1313+ List.map G.Label.name (G.Issue_node.labels node)
1414+ in
1515+ let comments =
1616+ List.map
1717+ (fun c ->
1818+ format_comment
1919+ ~author:(G.Author.login (G.Comment.author c))
2020+ ~body:(G.Comment.body_text c))
2121+ (G.Issue_node.comments node)
2222+ in
2323+ let state = String.lowercase_ascii (G.Issue_node.state node) in
2424+ Types.Issue.make
2525+ ~id:(G.Issue_node.number node)
2626+ ~title:(G.Issue_node.title node)
2727+ ~url:(G.Issue_node.url node)
2828+ ~user:(G.Author.login (G.Issue_node.author node))
2929+ ~created_at:(G.Issue_node.created_at node)
3030+ ~updated_at:(G.Issue_node.updated_at node)
3131+ ~closed_at:(G.Issue_node.closed_at node)
3232+ ~body:(G.Issue_node.body_text node)
3333+ ~labels
3434+ ~state
3535+ ~comments
3636+3737+let pr_of_node node =
3838+ let labels =
3939+ List.map G.Label.name (G.Pr_node.labels node)
4040+ in
4141+ let comments =
4242+ List.map
4343+ (fun c ->
4444+ format_comment
4545+ ~author:(G.Author.login (G.Comment.author c))
4646+ ~body:(G.Comment.body_text c))
4747+ (G.Pr_node.comments node)
4848+ in
4949+ (* Normalize state: GraphQL returns "OPEN", "CLOSED", "MERGED" *)
5050+ let state =
5151+ let raw_state = G.Pr_node.state node in
5252+ match String.lowercase_ascii raw_state with
5353+ | "merged" -> "merged"
5454+ | "closed" ->
5555+ (* Check if it was merged even though state is CLOSED *)
5656+ if Option.is_some (G.Pr_node.merged_at node) then "merged"
5757+ else "closed"
5858+ | _ -> "open"
5959+ in
6060+ Types.Pr.make
6161+ ~id:(G.Pr_node.number node)
6262+ ~title:(G.Pr_node.title node)
6363+ ~url:(G.Pr_node.url node)
6464+ ~user:(G.Author.login (G.Pr_node.author node))
6565+ ~created_at:(G.Pr_node.created_at node)
6666+ ~updated_at:(G.Pr_node.updated_at node)
6767+ ~closed_at:(G.Pr_node.closed_at node)
6868+ ~merged_at:(G.Pr_node.merged_at node)
6969+ ~body:(G.Pr_node.body_text node)
7070+ ~labels
7171+ ~state
7272+ ~comments
7373+ ~additions:(G.Pr_node.additions node)
7474+ ~deletions:(G.Pr_node.deletions node)
7575+ ~changed_files:(G.Pr_node.changed_files node)
7676+ ~mergeable:(G.Pr_node.mergeable node)
7777+ ~draft:(G.Pr_node.is_draft node)
7878+7979+let discussion_of_node node =
8080+ Types.Discussion.make
8181+ ~id:(G.Discussion_node.number node)
8282+ ~title:(G.Discussion_node.title node)
8383+ ~url:(G.Discussion_node.url node)
8484+ ~user:(G.Author.login (G.Discussion_node.author node))
8585+ ~updated_at:(G.Discussion_node.updated_at node)
8686+ ~body:(G.Discussion_node.body_text node)
8787+ ~category:(G.Discussion_node.category node)
8888+ ~comments:(G.Discussion_node.comments_count node)
8989+ ~answered:(G.Discussion_node.answered node)
9090+9191+(* Codec for REST API release *)
9292+let nullable_string = Jsont.option Jsont.string
9393+9494+let rest_asset =
9595+ Jsont.Object.map ~kind:"RestAsset"
9696+ (fun name download_count size ->
9797+ Types.Asset.make ~name ~download_count ~size)
9898+ |> Jsont.Object.mem "name" Jsont.string ~enc:Types.Asset.name
9999+ |> Jsont.Object.mem "download_count" Jsont.int ~enc:Types.Asset.download_count
100100+ |> Jsont.Object.mem "size" Jsont.int ~enc:Types.Asset.size
101101+ |> Jsont.Object.finish
102102+103103+let rest_author_login =
104104+ Jsont.Object.map ~kind:"RestAuthor" (fun login -> login)
105105+ |> Jsont.Object.mem "login" Jsont.string ~enc:(fun x -> x)
106106+ |> Jsont.Object.finish
107107+108108+let rest_release =
109109+ Jsont.Object.map ~kind:"RestRelease"
110110+ (fun tag_name name published_at author html_url body prerelease draft assets ->
111111+ let name = Option.value name ~default:tag_name in
112112+ let body = Option.value body ~default:"" in
113113+ Types.Release.make ~tag_name ~name ~published_at ~author ~html_url ~body
114114+ ~prerelease ~draft ~assets)
115115+ |> Jsont.Object.mem "tag_name" Jsont.string ~enc:Types.Release.tag_name
116116+ |> Jsont.Object.mem "name" nullable_string ~dec_absent:None
117117+ ~enc:(fun r -> Some (Types.Release.name r))
118118+ |> Jsont.Object.mem "published_at" Jsont.string ~enc:Types.Release.published_at
119119+ |> Jsont.Object.mem "author" rest_author_login ~enc:Types.Release.author
120120+ |> Jsont.Object.mem "html_url" Jsont.string ~enc:Types.Release.html_url
121121+ |> Jsont.Object.mem "body" nullable_string ~dec_absent:None
122122+ ~enc:(fun r -> Some (Types.Release.body r))
123123+ |> Jsont.Object.mem "prerelease" Jsont.bool ~enc:Types.Release.prerelease
124124+ |> Jsont.Object.mem "draft" Jsont.bool ~enc:Types.Release.draft
125125+ |> Jsont.Object.mem "assets" (Jsont.list rest_asset) ~enc:Types.Release.assets
126126+ |> Jsont.Object.finish
127127+128128+let release_of_json json =
129129+ match Jsont.Json.decode rest_release json with
130130+ | Ok release -> Some release
131131+ | Error _ -> None
132132+133133+let ptime_to_iso8601 t =
134134+ let (y, m, d), ((hh, mm, ss), _tz) = Ptime.to_date_time t in
135135+ Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d hh mm ss
136136+137137+let make_metadata ~owner ~repo ~week ~cached_at =
138138+ Types.Metadata.make
139139+ ~repo:(Printf.sprintf "%s/%s" owner repo)
140140+ ~year:(Week.year week)
141141+ ~week:(Week.week week)
142142+ ~week_start:(Week.start_date week)
143143+ ~week_end:(Week.end_date week)
144144+ ~cached_at:(ptime_to_iso8601 cached_at)
145145+146146+let issues_of_nodes ~week nodes =
147147+ nodes
148148+ |> List.filter (Filter.issue_active_in_week ~week)
149149+ |> List.map issue_of_node
150150+151151+let prs_of_nodes ~week nodes =
152152+ nodes
153153+ |> List.filter (Filter.pr_active_in_week ~week)
154154+ |> List.map pr_of_node
155155+156156+let discussions_of_nodes ~week nodes =
157157+ nodes
158158+ |> List.filter (Filter.discussion_active_in_week ~week)
159159+ |> List.map discussion_of_node
+70
repowatch/lib/transform.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Transform GraphQL responses to repowatch types.
77+88+ This module provides functions to convert the raw GraphQL response types
99+ into the normalized [Types] representations used for storage and analysis. *)
1010+1111+(** {1 Issue Transformation} *)
1212+1313+val issue_of_node : Graphql_types.Issue_node.t -> Types.Issue.t
1414+(** [issue_of_node node] converts a GraphQL issue node to [Types.Issue.t].
1515+1616+ Comments are formatted as "@author: body" strings. *)
1717+1818+(** {1 Pull Request Transformation} *)
1919+2020+val pr_of_node : Graphql_types.Pr_node.t -> Types.Pr.t
2121+(** [pr_of_node node] converts a GraphQL PR node to [Types.Pr.t].
2222+2323+ The state is normalized to lowercase ("open", "closed", "merged"). *)
2424+2525+(** {1 Discussion Transformation} *)
2626+2727+val discussion_of_node : Graphql_types.Discussion_node.t -> Types.Discussion.t
2828+(** [discussion_of_node node] converts a GraphQL discussion node to
2929+ [Types.Discussion.t]. *)
3030+3131+(** {1 Release Transformation} *)
3232+3333+val release_of_json : Jsont.Json.t -> Types.Release.t option
3434+(** [release_of_json json] attempts to decode a release from the GitHub REST
3535+ API JSON format. Returns [None] if decoding fails. *)
3636+3737+(** {1 Comment Formatting} *)
3838+3939+val format_comment : author:string -> body:string -> string
4040+(** [format_comment ~author ~body] formats a comment as "@author: body". *)
4141+4242+(** {1 Metadata Construction} *)
4343+4444+val make_metadata :
4545+ owner:string ->
4646+ repo:string ->
4747+ week:Week.t ->
4848+ cached_at:Ptime.t ->
4949+ Types.Metadata.t
5050+(** [make_metadata ~owner ~repo ~week ~cached_at] creates metadata for a
5151+ weekly snapshot. *)
5252+5353+(** {1 Batch Transformation} *)
5454+5555+val issues_of_nodes :
5656+ week:Week.t -> Graphql_types.Issue_node.t list -> Types.Issue.t list
5757+(** [issues_of_nodes ~week nodes] filters and transforms issues that were
5858+ active during the given week. *)
5959+6060+val prs_of_nodes :
6161+ week:Week.t -> Graphql_types.Pr_node.t list -> Types.Pr.t list
6262+(** [prs_of_nodes ~week nodes] filters and transforms PRs that were
6363+ active during the given week. *)
6464+6565+val discussions_of_nodes :
6666+ week:Week.t ->
6767+ Graphql_types.Discussion_node.t list ->
6868+ Types.Discussion.t list
6969+(** [discussions_of_nodes ~week nodes] filters and transforms discussions
7070+ that were updated during the given week. *)
+198
repowatch/lib/types.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+module Metadata = struct
77+ type t = {
88+ repo : string;
99+ year : int;
1010+ week : int;
1111+ week_start : string;
1212+ week_end : string;
1313+ cached_at : string;
1414+ }
1515+1616+ let make ~repo ~year ~week ~week_start ~week_end ~cached_at =
1717+ { repo; year; week; week_start; week_end; cached_at }
1818+1919+ let repo t = t.repo
2020+ let year t = t.year
2121+ let week t = t.week
2222+ let week_start t = t.week_start
2323+ let week_end t = t.week_end
2424+ let cached_at t = t.cached_at
2525+end
2626+2727+module Issue = struct
2828+ type t = {
2929+ id : int;
3030+ title : string;
3131+ url : string;
3232+ user : string;
3333+ created_at : string;
3434+ updated_at : string;
3535+ closed_at : string option;
3636+ body : string;
3737+ labels : string list;
3838+ state : string;
3939+ comments : string list;
4040+ }
4141+4242+ let make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at ~body
4343+ ~labels ~state ~comments =
4444+ { id; title; url; user; created_at; updated_at; closed_at; body; labels;
4545+ state; comments }
4646+4747+ let id t = t.id
4848+ let title t = t.title
4949+ let url t = t.url
5050+ let user t = t.user
5151+ let created_at t = t.created_at
5252+ let updated_at t = t.updated_at
5353+ let closed_at t = t.closed_at
5454+ let body t = t.body
5555+ let labels t = t.labels
5656+ let state t = t.state
5757+ let comments t = t.comments
5858+end
5959+6060+module Pr = struct
6161+ type t = {
6262+ id : int;
6363+ title : string;
6464+ url : string;
6565+ user : string;
6666+ created_at : string;
6767+ updated_at : string;
6868+ closed_at : string option;
6969+ merged_at : string option;
7070+ body : string;
7171+ labels : string list;
7272+ state : string;
7373+ comments : string list;
7474+ additions : int;
7575+ deletions : int;
7676+ changed_files : int;
7777+ mergeable : string;
7878+ draft : bool;
7979+ }
8080+8181+ let make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at ~merged_at
8282+ ~body ~labels ~state ~comments ~additions ~deletions ~changed_files
8383+ ~mergeable ~draft =
8484+ { id; title; url; user; created_at; updated_at; closed_at; merged_at;
8585+ body; labels; state; comments; additions; deletions; changed_files;
8686+ mergeable; draft }
8787+8888+ let id t = t.id
8989+ let title t = t.title
9090+ let url t = t.url
9191+ let user t = t.user
9292+ let created_at t = t.created_at
9393+ let updated_at t = t.updated_at
9494+ let closed_at t = t.closed_at
9595+ let merged_at t = t.merged_at
9696+ let body t = t.body
9797+ let labels t = t.labels
9898+ let state t = t.state
9999+ let comments t = t.comments
100100+ let additions t = t.additions
101101+ let deletions t = t.deletions
102102+ let changed_files t = t.changed_files
103103+ let mergeable t = t.mergeable
104104+ let draft t = t.draft
105105+end
106106+107107+module Discussion = struct
108108+ type t = {
109109+ id : int;
110110+ title : string;
111111+ url : string;
112112+ user : string;
113113+ updated_at : string;
114114+ body : string;
115115+ category : string;
116116+ comments : int;
117117+ answered : bool;
118118+ }
119119+120120+ let make ~id ~title ~url ~user ~updated_at ~body ~category ~comments ~answered =
121121+ { id; title; url; user; updated_at; body; category; comments; answered }
122122+123123+ let id t = t.id
124124+ let title t = t.title
125125+ let url t = t.url
126126+ let user t = t.user
127127+ let updated_at t = t.updated_at
128128+ let body t = t.body
129129+ let category t = t.category
130130+ let comments t = t.comments
131131+ let answered t = t.answered
132132+end
133133+134134+module Asset = struct
135135+ type t = {
136136+ name : string;
137137+ download_count : int;
138138+ size : int;
139139+ }
140140+141141+ let make ~name ~download_count ~size = { name; download_count; size }
142142+ let name t = t.name
143143+ let download_count t = t.download_count
144144+ let size t = t.size
145145+end
146146+147147+module Release = struct
148148+ type t = {
149149+ tag_name : string;
150150+ name : string;
151151+ published_at : string;
152152+ author : string;
153153+ html_url : string;
154154+ body : string;
155155+ prerelease : bool;
156156+ draft : bool;
157157+ assets : Asset.t list;
158158+ }
159159+160160+ let make ~tag_name ~name ~published_at ~author ~html_url ~body ~prerelease
161161+ ~draft ~assets =
162162+ { tag_name; name; published_at; author; html_url; body; prerelease; draft;
163163+ assets }
164164+165165+ let tag_name t = t.tag_name
166166+ let name t = t.name
167167+ let published_at t = t.published_at
168168+ let author t = t.author
169169+ let html_url t = t.html_url
170170+ let body t = t.body
171171+ let prerelease t = t.prerelease
172172+ let draft t = t.draft
173173+ let assets t = t.assets
174174+end
175175+176176+module Week_data = struct
177177+ type t = {
178178+ metadata : Metadata.t;
179179+ issues : Issue.t list;
180180+ prs : Pr.t list;
181181+ good_first_issues : Issue.t list;
182182+ discussions : Discussion.t list;
183183+ releases : Release.t list;
184184+ users : string list;
185185+ }
186186+187187+ let make ~metadata ~issues ~prs ~good_first_issues ~discussions ~releases
188188+ ~users =
189189+ { metadata; issues; prs; good_first_issues; discussions; releases; users }
190190+191191+ let metadata t = t.metadata
192192+ let issues t = t.issues
193193+ let prs t = t.prs
194194+ let good_first_issues t = t.good_first_issues
195195+ let discussions t = t.discussions
196196+ let releases t = t.releases
197197+ let users t = t.users
198198+end
+337
repowatch/lib/types.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Type definitions for GitHub repository activity data.
77+88+ These types correspond to the JSON schema used by the ruminant data format
99+ for weekly repository activity snapshots. *)
1010+1111+(** {1 Metadata} *)
1212+1313+module Metadata : sig
1414+ type t
1515+ (** Metadata about a weekly activity snapshot. *)
1616+1717+ val make :
1818+ repo:string ->
1919+ year:int ->
2020+ week:int ->
2121+ week_start:string ->
2222+ week_end:string ->
2323+ cached_at:string ->
2424+ t
2525+ (** Create metadata for a weekly snapshot.
2626+2727+ @param repo Repository in "owner/repo" format
2828+ @param year The year
2929+ @param week ISO week number (1-53)
3030+ @param week_start Start date in ISO 8601 format
3131+ @param week_end End date in ISO 8601 format
3232+ @param cached_at Timestamp when data was cached *)
3333+3434+ val repo : t -> string
3535+ (** Repository identifier in "owner/repo" format. *)
3636+3737+ val year : t -> int
3838+ (** The year this snapshot covers. *)
3939+4040+ val week : t -> int
4141+ (** ISO week number (1-53). *)
4242+4343+ val week_start : t -> string
4444+ (** Start date of the week in ISO 8601 format. *)
4545+4646+ val week_end : t -> string
4747+ (** End date of the week in ISO 8601 format. *)
4848+4949+ val cached_at : t -> string
5050+ (** Timestamp when the data was cached. *)
5151+end
5252+5353+(** {1 Issues} *)
5454+5555+module Issue : sig
5656+ type t
5757+ (** A GitHub issue. *)
5858+5959+ val make :
6060+ id:int ->
6161+ title:string ->
6262+ url:string ->
6363+ user:string ->
6464+ created_at:string ->
6565+ updated_at:string ->
6666+ closed_at:string option ->
6767+ body:string ->
6868+ labels:string list ->
6969+ state:string ->
7070+ comments:string list ->
7171+ t
7272+ (** Create an issue record. *)
7373+7474+ val id : t -> int
7575+ (** Issue number. *)
7676+7777+ val title : t -> string
7878+ (** Issue title. *)
7979+8080+ val url : t -> string
8181+ (** URL to the issue on GitHub. *)
8282+8383+ val user : t -> string
8484+ (** Username who created the issue. *)
8585+8686+ val created_at : t -> string
8787+ (** Creation timestamp in ISO 8601 format. *)
8888+8989+ val updated_at : t -> string
9090+ (** Last update timestamp. *)
9191+9292+ val closed_at : t -> string option
9393+ (** Closure timestamp if closed. *)
9494+9595+ val body : t -> string
9696+ (** Issue body text. *)
9797+9898+ val labels : t -> string list
9999+ (** Labels applied to the issue. *)
100100+101101+ val state : t -> string
102102+ (** Issue state ("open" or "closed"). *)
103103+104104+ val comments : t -> string list
105105+ (** List of comment texts. *)
106106+end
107107+108108+(** {1 Pull Requests} *)
109109+110110+module Pr : sig
111111+ type t
112112+ (** A GitHub pull request. *)
113113+114114+ val make :
115115+ id:int ->
116116+ title:string ->
117117+ url:string ->
118118+ user:string ->
119119+ created_at:string ->
120120+ updated_at:string ->
121121+ closed_at:string option ->
122122+ merged_at:string option ->
123123+ body:string ->
124124+ labels:string list ->
125125+ state:string ->
126126+ comments:string list ->
127127+ additions:int ->
128128+ deletions:int ->
129129+ changed_files:int ->
130130+ mergeable:string ->
131131+ draft:bool ->
132132+ t
133133+ (** Create a pull request record. *)
134134+135135+ val id : t -> int
136136+ (** PR number. *)
137137+138138+ val title : t -> string
139139+ (** PR title. *)
140140+141141+ val url : t -> string
142142+ (** URL to the PR on GitHub. *)
143143+144144+ val user : t -> string
145145+ (** Username who created the PR. *)
146146+147147+ val created_at : t -> string
148148+ (** Creation timestamp. *)
149149+150150+ val updated_at : t -> string
151151+ (** Last update timestamp. *)
152152+153153+ val closed_at : t -> string option
154154+ (** Closure timestamp if closed. *)
155155+156156+ val merged_at : t -> string option
157157+ (** Merge timestamp if merged. *)
158158+159159+ val body : t -> string
160160+ (** PR body text. *)
161161+162162+ val labels : t -> string list
163163+ (** Labels applied to the PR. *)
164164+165165+ val state : t -> string
166166+ (** PR state ("open", "closed", or "merged"). *)
167167+168168+ val comments : t -> string list
169169+ (** List of comment texts. *)
170170+171171+ val additions : t -> int
172172+ (** Number of lines added. *)
173173+174174+ val deletions : t -> int
175175+ (** Number of lines deleted. *)
176176+177177+ val changed_files : t -> int
178178+ (** Number of files changed. *)
179179+180180+ val mergeable : t -> string
181181+ (** Merge status ("MERGEABLE", "CONFLICTING", etc.). *)
182182+183183+ val draft : t -> bool
184184+ (** Whether this is a draft PR. *)
185185+end
186186+187187+(** {1 Discussions} *)
188188+189189+module Discussion : sig
190190+ type t
191191+ (** A GitHub discussion. *)
192192+193193+ val make :
194194+ id:int ->
195195+ title:string ->
196196+ url:string ->
197197+ user:string ->
198198+ updated_at:string ->
199199+ body:string ->
200200+ category:string ->
201201+ comments:int ->
202202+ answered:bool ->
203203+ t
204204+ (** Create a discussion record. *)
205205+206206+ val id : t -> int
207207+ (** Discussion ID. *)
208208+209209+ val title : t -> string
210210+ (** Discussion title. *)
211211+212212+ val url : t -> string
213213+ (** URL to the discussion on GitHub. *)
214214+215215+ val user : t -> string
216216+ (** Username who created the discussion. *)
217217+218218+ val updated_at : t -> string
219219+ (** Last update timestamp. *)
220220+221221+ val body : t -> string
222222+ (** Discussion body text. *)
223223+224224+ val category : t -> string
225225+ (** Discussion category. *)
226226+227227+ val comments : t -> int
228228+ (** Number of comments. *)
229229+230230+ val answered : t -> bool
231231+ (** Whether the discussion has been marked answered. *)
232232+end
233233+234234+(** {1 Release Assets} *)
235235+236236+module Asset : sig
237237+ type t
238238+ (** A release asset (downloadable file). *)
239239+240240+ val make : name:string -> download_count:int -> size:int -> t
241241+ (** Create an asset record. *)
242242+243243+ val name : t -> string
244244+ (** Asset filename. *)
245245+246246+ val download_count : t -> int
247247+ (** Number of downloads. *)
248248+249249+ val size : t -> int
250250+ (** Asset size in bytes. *)
251251+end
252252+253253+(** {1 Releases} *)
254254+255255+module Release : sig
256256+ type t
257257+ (** A GitHub release. *)
258258+259259+ val make :
260260+ tag_name:string ->
261261+ name:string ->
262262+ published_at:string ->
263263+ author:string ->
264264+ html_url:string ->
265265+ body:string ->
266266+ prerelease:bool ->
267267+ draft:bool ->
268268+ assets:Asset.t list ->
269269+ t
270270+ (** Create a release record. *)
271271+272272+ val tag_name : t -> string
273273+ (** Git tag name for the release. *)
274274+275275+ val name : t -> string
276276+ (** Release title. *)
277277+278278+ val published_at : t -> string
279279+ (** Publication timestamp. *)
280280+281281+ val author : t -> string
282282+ (** Username who created the release. *)
283283+284284+ val html_url : t -> string
285285+ (** URL to the release on GitHub. *)
286286+287287+ val body : t -> string
288288+ (** Release notes body text. *)
289289+290290+ val prerelease : t -> bool
291291+ (** Whether this is a prerelease. *)
292292+293293+ val draft : t -> bool
294294+ (** Whether this is a draft release. *)
295295+296296+ val assets : t -> Asset.t list
297297+ (** Downloadable assets attached to the release. *)
298298+end
299299+300300+(** {1 Weekly Data} *)
301301+302302+module Week_data : sig
303303+ type t
304304+ (** Complete weekly activity snapshot for a repository. *)
305305+306306+ val make :
307307+ metadata:Metadata.t ->
308308+ issues:Issue.t list ->
309309+ prs:Pr.t list ->
310310+ good_first_issues:Issue.t list ->
311311+ discussions:Discussion.t list ->
312312+ releases:Release.t list ->
313313+ users:string list ->
314314+ t
315315+ (** Create a weekly data snapshot. *)
316316+317317+ val metadata : t -> Metadata.t
318318+ (** Snapshot metadata. *)
319319+320320+ val issues : t -> Issue.t list
321321+ (** Issues with activity during the week. *)
322322+323323+ val prs : t -> Pr.t list
324324+ (** Pull requests with activity during the week. *)
325325+326326+ val good_first_issues : t -> Issue.t list
327327+ (** Issues labeled "good first issue". *)
328328+329329+ val discussions : t -> Discussion.t list
330330+ (** Discussions with activity during the week. *)
331331+332332+ val releases : t -> Release.t list
333333+ (** Releases published during the week. *)
334334+335335+ val users : t -> string list
336336+ (** Usernames who were active during the week. *)
337337+end
+188
repowatch/lib/users.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(* Common words to exclude from username detection *)
77+let excluded_words =
88+ [
99+ (* Common English words that might appear with @ *)
1010+ "all";
1111+ "everyone";
1212+ "here";
1313+ "channel";
1414+ "team";
1515+ "author";
1616+ "authors";
1717+ "maintainer";
1818+ "maintainers";
1919+ "reviewer";
2020+ "reviewers";
2121+ "user";
2222+ "users";
2323+ "admin";
2424+ "admins";
2525+ "bot";
2626+ "bots";
2727+ "ghost";
2828+ (* Programming terms *)
2929+ "param";
3030+ "params";
3131+ "return";
3232+ "returns";
3333+ "type";
3434+ "types";
3535+ "value";
3636+ "values";
3737+ "deprecated";
3838+ "since";
3939+ "see";
4040+ "link";
4141+ "code";
4242+ "example";
4343+ "note";
4444+ "warning";
4545+ "todo";
4646+ "fixme";
4747+ "hack";
4848+ (* OCaml-specific terms *)
4949+ "raise";
5050+ "raises";
5151+ "inline";
5252+ "ocaml";
5353+ "opam";
5454+ "dune";
5555+ "module";
5656+ "functor";
5757+ "mli";
5858+ "cma";
5959+ "cmo";
6060+ "cmx";
6161+ "cmi";
6262+ (* Git/GitHub terms *)
6363+ "dependabot";
6464+ "github";
6565+ "actions";
6666+ "codecov";
6767+ "renovate";
6868+ ]
6969+7070+(* Check if a string looks like a hex hash (commit SHA fragment) *)
7171+let looks_like_hex s =
7272+ String.length s >= 6
7373+ && String.for_all
7474+ (fun c -> (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))
7575+ (String.lowercase_ascii s)
7676+7777+let is_valid_username s =
7878+ let len = String.length s in
7979+ if len < 2 || len > 39 then false
8080+ else if List.mem (String.lowercase_ascii s) excluded_words then false
8181+ else if looks_like_hex s then false
8282+ else
8383+ (* Must start with letter *)
8484+ let first = s.[0] in
8585+ if not ((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z'))
8686+ then false
8787+ else
8888+ (* Must not start or end with hyphen *)
8989+ if s.[0] = '-' || s.[len - 1] = '-' then false
9090+ else
9191+ (* Must contain only alphanumeric and single hyphens *)
9292+ let rec check i prev_hyphen =
9393+ if i >= len then true
9494+ else
9595+ let c = s.[i] in
9696+ if c = '-' then
9797+ if prev_hyphen then false (* consecutive hyphens *)
9898+ else check (i + 1) true
9999+ else if
100100+ (c >= 'a' && c <= 'z')
101101+ || (c >= 'A' && c <= 'Z')
102102+ || (c >= '0' && c <= '9')
103103+ then check (i + 1) false
104104+ else false
105105+ in
106106+ check 0 false
107107+108108+(* Extract @mentions from text *)
109109+let extract_mentions text =
110110+ let len = String.length text in
111111+ let rec find_at i acc =
112112+ if i >= len then acc
113113+ else if text.[i] = '@' then
114114+ let rec read_username j =
115115+ if j >= len then j
116116+ else
117117+ let c = text.[j] in
118118+ if
119119+ (c >= 'a' && c <= 'z')
120120+ || (c >= 'A' && c <= 'Z')
121121+ || (c >= '0' && c <= '9')
122122+ || c = '-'
123123+ then read_username (j + 1)
124124+ else j
125125+ in
126126+ let end_pos = read_username (i + 1) in
127127+ if end_pos > i + 1 then
128128+ let username = String.sub text (i + 1) (end_pos - i - 1) in
129129+ if is_valid_username username then find_at end_pos (username :: acc)
130130+ else find_at end_pos acc
131131+ else find_at (i + 1) acc
132132+ else find_at (i + 1) acc
133133+ in
134134+ find_at 0 [] |> List.sort_uniq String.compare
135135+136136+(* Extract username from comment format "@username: body" *)
137137+let extract_comment_author comment =
138138+ if String.length comment > 1 && comment.[0] = '@' then
139139+ try
140140+ let colon_pos = String.index comment ':' in
141141+ let username = String.sub comment 1 (colon_pos - 1) in
142142+ if is_valid_username username then Some username else None
143143+ with Not_found -> None
144144+ else None
145145+146146+let extract_from_week_data data =
147147+ let users = ref [] in
148148+ let add u = if is_valid_username u then users := u :: !users in
149149+ (* Issue authors and mentions *)
150150+ List.iter
151151+ (fun issue ->
152152+ add (Types.Issue.user issue);
153153+ List.iter (fun m -> add m) (extract_mentions (Types.Issue.body issue));
154154+ List.iter
155155+ (fun comment ->
156156+ Option.iter add (extract_comment_author comment);
157157+ List.iter (fun m -> add m) (extract_mentions comment))
158158+ (Types.Issue.comments issue))
159159+ (Types.Week_data.issues data);
160160+ (* PR authors and mentions *)
161161+ List.iter
162162+ (fun pr ->
163163+ add (Types.Pr.user pr);
164164+ List.iter (fun m -> add m) (extract_mentions (Types.Pr.body pr));
165165+ List.iter
166166+ (fun comment ->
167167+ Option.iter add (extract_comment_author comment);
168168+ List.iter (fun m -> add m) (extract_mentions comment))
169169+ (Types.Pr.comments pr))
170170+ (Types.Week_data.prs data);
171171+ (* Good first issues *)
172172+ List.iter
173173+ (fun issue ->
174174+ add (Types.Issue.user issue);
175175+ List.iter (fun m -> add m) (extract_mentions (Types.Issue.body issue)))
176176+ (Types.Week_data.good_first_issues data);
177177+ (* Discussion authors *)
178178+ List.iter
179179+ (fun disc ->
180180+ add (Types.Discussion.user disc);
181181+ List.iter (fun m -> add m) (extract_mentions (Types.Discussion.body disc)))
182182+ (Types.Week_data.discussions data);
183183+ (* Release authors *)
184184+ List.iter
185185+ (fun rel -> add (Types.Release.author rel))
186186+ (Types.Week_data.releases data);
187187+ (* Return unique sorted list *)
188188+ !users |> List.sort_uniq String.compare
+43
repowatch/lib/users.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Extract and validate GitHub usernames.
77+88+ This module provides functions to extract GitHub usernames from
99+ weekly activity data and validate them against common patterns
1010+ that shouldn't be treated as usernames. *)
1111+1212+(** {1 Username Extraction} *)
1313+1414+val extract_from_week_data : Types.Week_data.t -> string list
1515+(** [extract_from_week_data data] extracts all unique usernames from
1616+ the week data. This includes:
1717+ - Issue and PR authors
1818+ - Comment authors (extracted from "@user: comment" format)
1919+ - Discussion authors
2020+ - Release authors
2121+ - Usernames mentioned in issue/PR bodies and comments *)
2222+2323+val extract_mentions : string -> string list
2424+(** [extract_mentions text] extracts mentions (usernames prefixed with [\@])
2525+ from text. Returns a list of unique usernames found. *)
2626+2727+(** {1 Validation} *)
2828+2929+val is_valid_username : string -> bool
3030+(** [is_valid_username s] returns [true] if the string looks like a
3131+ valid GitHub username and is not in the exclusion list.
3232+3333+ GitHub usernames:
3434+ - Start with a letter
3535+ - Contain only alphanumeric characters and hyphens
3636+ - Are 1-39 characters long
3737+ - Don't start or end with a hyphen
3838+ - Don't contain consecutive hyphens *)
3939+4040+val excluded_words : string list
4141+(** List of words that look like mentions but should be excluded.
4242+ Includes common programming terms, OCaml-specific terms, and
4343+ common English words. *)
+212
repowatch/lib/week.ml
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+type t = { year : int; week : int }
77+88+(* Number of ISO weeks in a year. A year has 53 weeks if:
99+ - January 1 is a Thursday, or
1010+ - January 1 is a Wednesday and it's a leap year *)
1111+let weeks_in_year year =
1212+ let jan1_weekday =
1313+ (* Zeller's congruence for January 1 *)
1414+ let y = if 1 <= 1 then year - 1 else year in
1515+ let m = 13 in
1616+ (* January treated as month 13 of previous year *)
1717+ let q = 1 in
1818+ let k = y mod 100 in
1919+ let j = y / 100 in
2020+ let h = (q + ((13 * (m + 1)) / 5) + k + (k / 4) + (j / 4) - (2 * j)) mod 7
2121+ in
2222+ (* Convert to ISO weekday (1=Monday, 7=Sunday) *)
2323+ ((h + 5) mod 7) + 1
2424+ in
2525+ let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in
2626+ if jan1_weekday = 4 || (jan1_weekday = 3 && is_leap) then 53 else 52
2727+2828+let of_year_week ~year ~week =
2929+ let max_weeks = weeks_in_year year in
3030+ if week < 1 || week > max_weeks then
3131+ invalid_arg
3232+ (Printf.sprintf "Week %d is out of range for year %d (max: %d)" week year
3333+ max_weeks)
3434+ else { year; week }
3535+3636+let year t = t.year
3737+let week t = t.week
3838+3939+(* Convert Gregorian date to ISO week date *)
4040+let of_date ~year ~month ~day =
4141+ (* Algorithm based on ISO 8601 week date calculation *)
4242+ (* First, calculate the day of year *)
4343+ let days_before_month =
4444+ [| 0; 31; 59; 90; 120; 151; 181; 212; 243; 273; 304; 334 |]
4545+ in
4646+ let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in
4747+ let doy =
4848+ days_before_month.(month - 1)
4949+ + day
5050+ + (if month > 2 && is_leap then 1 else 0)
5151+ in
5252+ (* Calculate day of week (1=Monday, 7=Sunday) using a known reference *)
5353+ (* January 1, 2000 was a Saturday (day 6 in ISO) *)
5454+ let days_since_2000 =
5555+ let years_diff = year - 2000 in
5656+ let leap_years =
5757+ if years_diff >= 0 then
5858+ let prev_year = year - 1 in
5959+ (prev_year / 4) - (prev_year / 100) + (prev_year / 400)
6060+ - ((1999 / 4) - (1999 / 100) + (1999 / 400))
6161+ else
6262+ let prev_year = year - 1 in
6363+ (prev_year / 4) - (prev_year / 100) + (prev_year / 400)
6464+ - ((1999 / 4) - (1999 / 100) + (1999 / 400))
6565+ in
6666+ (years_diff * 365) + leap_years + doy - 1
6767+ in
6868+ (* January 1, 2000 was Saturday = day 6 *)
6969+ let dow = ((days_since_2000 mod 7) + 6 - 1 + 7) mod 7 + 1 in
7070+ (* ISO week number calculation *)
7171+ let week_num = (doy - dow + 10) / 7 in
7272+ if week_num < 1 then
7373+ (* Belongs to last week of previous year *)
7474+ { year = year - 1; week = weeks_in_year (year - 1) }
7575+ else if week_num > weeks_in_year year then
7676+ (* Belongs to first week of next year *)
7777+ { year = year + 1; week = 1 }
7878+ else { year; week = week_num }
7979+8080+let of_ptime ptime =
8181+ let (y, m, d), _ = Ptime.to_date_time ptime in
8282+ of_date ~year:y ~month:m ~day:d
8383+8484+let current ~clock =
8585+ let now = Eio.Time.now clock in
8686+ match Ptime.of_float_s now with
8787+ | Some ptime -> of_ptime ptime
8888+ | None ->
8989+ (* Fallback to epoch if conversion fails *)
9090+ of_date ~year:1970 ~month:1 ~day:1
9191+9292+(* Calculate the Monday of week 1 for a given ISO week-year *)
9393+let monday_of_week1 year =
9494+ (* Week 1 contains January 4, so find the Monday of that week *)
9595+ let jan4_days_since_2000 =
9696+ let years_diff = year - 2000 in
9797+ let prev_year = year - 1 in
9898+ let leap_years =
9999+ (prev_year / 4) - (prev_year / 100) + (prev_year / 400)
100100+ - ((1999 / 4) - (1999 / 100) + (1999 / 400))
101101+ in
102102+ (years_diff * 365) + leap_years + 4 - 1
103103+ in
104104+ let jan4_dow = ((jan4_days_since_2000 mod 7) + 6 - 1 + 7) mod 7 + 1 in
105105+ (* Monday of week 1 = January 4 - (weekday of Jan 4 - 1) *)
106106+ jan4_days_since_2000 - (jan4_dow - 1)
107107+108108+let days_since_2000_to_date days =
109109+ (* Approximate year *)
110110+ let approx_year = 2000 + (days / 365) in
111111+ let rec find_year y =
112112+ let start_of_year =
113113+ let years_diff = y - 2000 in
114114+ let prev_year = y - 1 in
115115+ let leap_years =
116116+ (prev_year / 4) - (prev_year / 100) + (prev_year / 400)
117117+ - ((1999 / 4) - (1999 / 100) + (1999 / 400))
118118+ in
119119+ (years_diff * 365) + leap_years
120120+ in
121121+ if start_of_year > days then find_year (y - 1) else (y, days - start_of_year)
122122+ in
123123+ let year, doy = find_year approx_year in
124124+ let doy = doy + 1 in
125125+ (* Convert to 1-based day of year *)
126126+ let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in
127127+ let days_in_month =
128128+ [| 31; (if is_leap then 29 else 28); 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |]
129129+ in
130130+ let rec find_month m remaining =
131131+ if remaining <= days_in_month.(m - 1) then (m, remaining)
132132+ else find_month (m + 1) (remaining - days_in_month.(m - 1))
133133+ in
134134+ let month, day = find_month 1 doy in
135135+ (year, month, day)
136136+137137+let start_ptime t =
138138+ let monday_w1 = monday_of_week1 t.year in
139139+ let monday = monday_w1 + ((t.week - 1) * 7) in
140140+ let year, month, day = days_since_2000_to_date monday in
141141+ match Ptime.of_date (year, month, day) with
142142+ | Some pt -> pt
143143+ | None -> Ptime.epoch
144144+145145+let end_ptime t =
146146+ let monday_w1 = monday_of_week1 t.year in
147147+ let sunday = monday_w1 + ((t.week - 1) * 7) + 6 in
148148+ let year, month, day = days_since_2000_to_date sunday in
149149+ match Ptime.of_date_time ((year, month, day), ((23, 59, 59), 0)) with
150150+ | Some pt -> pt
151151+ | None -> Ptime.epoch
152152+153153+let start_date t =
154154+ let pt = start_ptime t in
155155+ let (y, m, d), _ = Ptime.to_date_time pt in
156156+ Printf.sprintf "%04d-%02d-%02d" y m d
157157+158158+let end_date t =
159159+ let pt = end_ptime t in
160160+ let (y, m, d), _ = Ptime.to_date_time pt in
161161+ Printf.sprintf "%04d-%02d-%02d" y m d
162162+163163+let contains_ptime t time =
164164+ let start = start_ptime t in
165165+ let end_ = end_ptime t in
166166+ Ptime.compare time start >= 0 && Ptime.compare time end_ <= 0
167167+168168+let parse_iso8601 s =
169169+ (* Parse ISO 8601 timestamp like "2024-01-15T10:30:00Z" *)
170170+ try
171171+ let len = String.length s in
172172+ if len < 10 then None
173173+ else
174174+ let year = int_of_string (String.sub s 0 4) in
175175+ let month = int_of_string (String.sub s 5 2) in
176176+ let day = int_of_string (String.sub s 8 2) in
177177+ if len >= 19 then
178178+ let hour = int_of_string (String.sub s 11 2) in
179179+ let min = int_of_string (String.sub s 14 2) in
180180+ let sec = int_of_string (String.sub s 17 2) in
181181+ Ptime.of_date_time ((year, month, day), ((hour, min, sec), 0))
182182+ else Ptime.of_date (year, month, day)
183183+ with _ -> None
184184+185185+let contains_timestamp t timestamp =
186186+ match parse_iso8601 timestamp with
187187+ | Some time -> contains_ptime t time
188188+ | None -> false
189189+190190+let prev t =
191191+ if t.week > 1 then { year = t.year; week = t.week - 1 }
192192+ else { year = t.year - 1; week = weeks_in_year (t.year - 1) }
193193+194194+let next t =
195195+ let max_weeks = weeks_in_year t.year in
196196+ if t.week < max_weeks then { year = t.year; week = t.week + 1 }
197197+ else { year = t.year + 1; week = 1 }
198198+199199+let compare a b =
200200+ match Int.compare a.year b.year with 0 -> Int.compare a.week b.week | c -> c
201201+202202+let equal a b = a.year = b.year && a.week = b.week
203203+204204+let range ~from ~to_ =
205205+ let rec loop acc current =
206206+ if compare current from < 0 then acc
207207+ else loop (current :: acc) (prev current)
208208+ in
209209+ if compare from to_ > 0 then [] else loop [] to_
210210+211211+let to_string t = Printf.sprintf "%04d-W%02d" t.year t.week
212212+let pp fmt t = Format.fprintf fmt "%s" (to_string t)
+99
repowatch/lib/week.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** ISO week calculations and date filtering.
77+88+ This module provides utilities for working with ISO 8601 week dates,
99+ which are used to partition GitHub activity data into weekly buckets.
1010+1111+ An ISO week always starts on Monday and ends on Sunday. Week 1 is the
1212+ week containing the first Thursday of the year (equivalently, the week
1313+ containing January 4th). *)
1414+1515+type t
1616+(** An ISO week represented as a year and week number (1-53). *)
1717+1818+(** {1 Constructors} *)
1919+2020+val of_date : year:int -> month:int -> day:int -> t
2121+(** [of_date ~year ~month ~day] returns the ISO week containing the given
2222+ Gregorian date. Month is 1-12, day is 1-31. *)
2323+2424+val of_ptime : Ptime.t -> t
2525+(** [of_ptime t] returns the ISO week containing the given timestamp. *)
2626+2727+val current : clock:_ Eio.Time.clock -> t
2828+(** [current ~clock] returns the current ISO week based on the system clock. *)
2929+3030+val of_year_week : year:int -> week:int -> t
3131+(** [of_year_week ~year ~week] creates an ISO week from year and week number.
3232+ Week number should be 1-53. Raises [Invalid_argument] if week is out of
3333+ range for the given year. *)
3434+3535+(** {1 Accessors} *)
3636+3737+val year : t -> int
3838+(** [year t] returns the ISO week-year. Note that this may differ from the
3939+ Gregorian year for dates near year boundaries. *)
4040+4141+val week : t -> int
4242+(** [week t] returns the ISO week number (1-53). *)
4343+4444+(** {1 Date Boundaries} *)
4545+4646+val start_date : t -> string
4747+(** [start_date t] returns the Monday of the week in ISO 8601 format
4848+ (YYYY-MM-DD). *)
4949+5050+val end_date : t -> string
5151+(** [end_date t] returns the Sunday of the week in ISO 8601 format
5252+ (YYYY-MM-DD). *)
5353+5454+val start_ptime : t -> Ptime.t
5555+(** [start_ptime t] returns the Monday of the week at 00:00:00 UTC. *)
5656+5757+val end_ptime : t -> Ptime.t
5858+(** [end_ptime t] returns the Sunday of the week at 23:59:59 UTC. *)
5959+6060+(** {1 Timestamp Filtering} *)
6161+6262+val contains_timestamp : t -> string -> bool
6363+(** [contains_timestamp t timestamp] returns [true] if the ISO 8601 timestamp
6464+ falls within this week. Accepts timestamps in the format used by GitHub
6565+ API (e.g., "2024-01-15T10:30:00Z"). Returns [false] if the timestamp
6666+ cannot be parsed. *)
6767+6868+val contains_ptime : t -> Ptime.t -> bool
6969+(** [contains_ptime t time] returns [true] if [time] falls within this week. *)
7070+7171+(** {1 Navigation} *)
7272+7373+val prev : t -> t
7474+(** [prev t] returns the previous week. *)
7575+7676+val next : t -> t
7777+(** [next t] returns the next week. *)
7878+7979+val range : from:t -> to_:t -> t list
8080+(** [range ~from ~to_] generates a list of weeks from [from] to [to_]
8181+ (inclusive). Returns an empty list if [from] is after [to_]. *)
8282+8383+(** {1 Comparison} *)
8484+8585+val compare : t -> t -> int
8686+(** [compare a b] compares two weeks. Returns a negative integer if [a] is
8787+ before [b], zero if equal, and a positive integer if [a] is after [b]. *)
8888+8989+val equal : t -> t -> bool
9090+(** [equal a b] returns [true] if [a] and [b] represent the same week. *)
9191+9292+(** {1 Formatting} *)
9393+9494+val to_string : t -> string
9595+(** [to_string t] returns a string representation in ISO week format
9696+ (e.g., "2024-W03"). *)
9797+9898+val pp : Format.formatter -> t -> unit
9999+(** [pp fmt t] pretty-prints the week. *)
+40
repowatch/repowatch.opam
···11+# This file is generated by dune, edit dune-project instead
22+opam-version: "2.0"
33+synopsis: "GitHub repository activity watcher and analyzer"
44+description:
55+ "Repowatch parses and analyzes GitHub repository activity data from JSON files. It provides a CLI for viewing issues, PRs, discussions, and releases with statistics and filtering capabilities."
66+maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
77+authors: ["Anil Madhavapeddy <anil@recoil.org>"]
88+license: "ISC"
99+homepage: "https://github.com/avsm/repowatch"
1010+doc: "https://avsm.github.io/repowatch"
1111+bug-reports: "https://github.com/avsm/repowatch/issues"
1212+depends: [
1313+ "ocaml" {>= "5.2.0"}
1414+ "dune" {>= "3.20" & >= "3.20"}
1515+ "eio_main" {>= "1.2"}
1616+ "jsont" {>= "0.1.0"}
1717+ "tomlt" {>= "0.1.0"}
1818+ "xdge" {>= "0.1.0"}
1919+ "cmdliner" {>= "1.3.0"}
2020+ "logs" {>= "0.7.0"}
2121+ "fmt" {>= "0.9.0"}
2222+ "ptime" {>= "1.0.0"}
2323+ "odoc" {with-doc}
2424+]
2525+build: [
2626+ ["dune" "subst"] {dev}
2727+ [
2828+ "dune"
2929+ "build"
3030+ "-p"
3131+ name
3232+ "-j"
3333+ jobs
3434+ "@install"
3535+ "@runtest" {with-test}
3636+ "@doc" {with-doc}
3737+ ]
3838+]
3939+dev-repo: "git+https://github.com/avsm/repowatch.git"
4040+x-maintenance-intent: ["(latest)"]