My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add repowatch: GitHub repository activity watcher and analyzer

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+6618
+856
repowatch/PLAN.md
··· 1 + # Repowatch GitHub Sync Implementation Plan 2 + 3 + This document outlines the implementation plan for adding GitHub GraphQL query support to repowatch, enabling it to fetch repository activity data and store it in the same format as ruminant's `data/gh/` directory structure. 4 + 5 + ## Overview 6 + 7 + The goal is to implement a `repowatch sync` command that: 8 + 1. Queries GitHub's GraphQL API for repository activity (issues, PRs, discussions) 9 + 2. Queries GitHub's REST API for releases 10 + 3. Filters data by ISO week 11 + 4. Saves results in the ruminant JSON format 12 + 13 + ## Reference Implementation Analysis 14 + 15 + Based on analysis of the Python ruminant codebase, the key implementation patterns are: 16 + 17 + ### GraphQL Queries 18 + 19 + **Issues and PRs Query** - Fetches both in a single query with cursor-based pagination: 20 + ```graphql 21 + query($owner: String!, $name: String!, $issuesAfter: String, $prsAfter: String) { 22 + repository(owner: $owner, name: $name) { 23 + issues(first: 25, after: $issuesAfter, orderBy: {field: UPDATED_AT, direction: DESC}) { 24 + pageInfo { hasNextPage, endCursor } 25 + nodes { 26 + number, title, url, createdAt, updatedAt, closedAt, bodyText, state 27 + author { login } 28 + labels(first: 20) { nodes { name } } 29 + comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) { 30 + totalCount 31 + nodes { author { login }, bodyText, createdAt, updatedAt } 32 + } 33 + timelineItems(first: 100, itemTypes: [ISSUE_COMMENT, LABELED_EVENT, UNLABELED_EVENT, CLOSED_EVENT, REOPENED_EVENT]) { 34 + nodes { 35 + __typename 36 + ... on IssueComment { createdAt } 37 + ... on LabeledEvent { createdAt } 38 + ... on UnlabeledEvent { createdAt } 39 + ... on ClosedEvent { createdAt } 40 + ... on ReopenedEvent { createdAt } 41 + } 42 + } 43 + } 44 + } 45 + pullRequests(first: 25, after: $prsAfter, orderBy: {field: UPDATED_AT, direction: DESC}) { 46 + pageInfo { hasNextPage, endCursor } 47 + nodes { 48 + number, title, url, createdAt, updatedAt, closedAt, mergedAt, bodyText, state 49 + additions, deletions, changedFiles, mergeable, isDraft 50 + author { login } 51 + labels(first: 20) { nodes { name } } 52 + comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) { 53 + totalCount 54 + nodes { author { login }, bodyText, createdAt, updatedAt } 55 + } 56 + timelineItems(first: 100, itemTypes: [PULL_REQUEST_COMMIT, PULL_REQUEST_REVIEW, ISSUE_COMMENT, CLOSED_EVENT, REOPENED_EVENT, MERGED_EVENT]) { 57 + nodes { 58 + __typename 59 + ... on PullRequestCommit { commit { committedDate } } 60 + ... on PullRequestReview { createdAt } 61 + ... on IssueComment { createdAt } 62 + ... on ClosedEvent { createdAt } 63 + ... on ReopenedEvent { createdAt } 64 + ... on MergedEvent { createdAt } 65 + } 66 + } 67 + } 68 + } 69 + } 70 + } 71 + ``` 72 + 73 + **Discussions Query** - Single-page fetch (no pagination): 74 + ```graphql 75 + query($owner: String!, $name: String!) { 76 + repository(owner: $owner, name: $name) { 77 + discussions(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) { 78 + nodes { 79 + number, title, url, updatedAt, bodyText 80 + author { login } 81 + category { name } 82 + comments { totalCount } 83 + answerChosenAt 84 + } 85 + } 86 + } 87 + } 88 + ``` 89 + 90 + ### Pagination Strategy 91 + 92 + - Cursor-based pagination with `first: 25` items per page 93 + - Independent cursors for issues (`issuesAfter`) and PRs (`prsAfter`) 94 + - Safety limits: max 20 pages, early termination if no activity in 5 pages 95 + - Cursor change detection to prevent infinite loops 96 + 97 + ### Authentication 98 + 99 + - Bearer token in `Authorization` header 100 + - Token sources (priority order): 101 + 1. Config file (`.ruminant-keys.toml` or our `config.toml`) 102 + 2. `GITHUB_TOKEN` environment variable 103 + 104 + ### Rate Limiting 105 + 106 + - Monitor `X-RateLimit-Remaining` and `X-RateLimit-Reset` headers 107 + - Exponential backoff: 1s, 2s, 4s delays on retryable errors 108 + - Max 3 retry attempts 109 + - HTTP 502/503/504 are retryable; HTTP 403 is not 110 + 111 + ### Week Filtering 112 + 113 + Items included if ANY of: 114 + - Created during target week 115 + - Updated during target week 116 + - Timeline events during target week 117 + 118 + --- 119 + 120 + ## Implementation Plan 121 + 122 + ### Phase 1: Core Types and GraphQL Module 123 + 124 + #### 1.1 GraphQL Types (`lib/graphql_types.mli`) 125 + 126 + ```ocaml 127 + (** GraphQL response types for GitHub API *) 128 + 129 + module PageInfo : sig 130 + type t 131 + val has_next_page : t -> bool 132 + val end_cursor : t -> string option 133 + end 134 + 135 + module Author : sig 136 + type t 137 + val login : t -> string (* "ghost" if null *) 138 + end 139 + 140 + module Label : sig 141 + type t 142 + val name : t -> string 143 + end 144 + 145 + module Comment : sig 146 + type t 147 + val author : t -> Author.t 148 + val body_text : t -> string 149 + val created_at : t -> string 150 + val updated_at : t -> string 151 + end 152 + 153 + module TimelineItem : sig 154 + type t = 155 + | Issue_comment of { created_at : string } 156 + | Labeled_event of { created_at : string } 157 + | Unlabeled_event of { created_at : string } 158 + | Closed_event of { created_at : string } 159 + | Reopened_event of { created_at : string } 160 + | Pr_commit of { committed_date : string } 161 + | Pr_review of { created_at : string } 162 + | Merged_event of { created_at : string } 163 + | Unknown 164 + end 165 + 166 + module Issue_node : sig 167 + type t 168 + val number : t -> int 169 + val title : t -> string 170 + val url : t -> string 171 + val created_at : t -> string 172 + val updated_at : t -> string 173 + val closed_at : t -> string option 174 + val body_text : t -> string 175 + val state : t -> string 176 + val author : t -> Author.t 177 + val labels : t -> Label.t list 178 + val comments : t -> Comment.t list 179 + val timeline_items : t -> TimelineItem.t list 180 + end 181 + 182 + module Pr_node : sig 183 + type t 184 + (* Issue fields plus: *) 185 + val merged_at : t -> string option 186 + val additions : t -> int 187 + val deletions : t -> int 188 + val changed_files : t -> int 189 + val mergeable : t -> string 190 + val is_draft : t -> bool 191 + (* ... other accessors ... *) 192 + end 193 + 194 + module Issues_connection : sig 195 + type t 196 + val page_info : t -> PageInfo.t 197 + val nodes : t -> Issue_node.t list 198 + end 199 + 200 + module Prs_connection : sig 201 + type t 202 + val page_info : t -> PageInfo.t 203 + val nodes : t -> Pr_node.t list 204 + end 205 + 206 + module Discussion_node : sig 207 + type t 208 + val number : t -> int 209 + val title : t -> string 210 + val url : t -> string 211 + val updated_at : t -> string 212 + val body_text : t -> string 213 + val author : t -> Author.t 214 + val category : t -> string 215 + val comments_count : t -> int 216 + val answered : t -> bool 217 + end 218 + 219 + module Repository_response : sig 220 + type t 221 + val issues : t -> Issues_connection.t 222 + val pull_requests : t -> Prs_connection.t 223 + end 224 + 225 + module Discussions_response : sig 226 + type t 227 + val discussions : t -> Discussion_node.t list 228 + end 229 + ``` 230 + 231 + #### 1.2 GraphQL Codecs (`lib/graphql_codec.ml`) 232 + 233 + Jsont codecs for decoding GraphQL responses: 234 + 235 + ```ocaml 236 + (* Example structure *) 237 + let page_info_jsont = ... 238 + let author_jsont = ... 239 + let label_jsont = ... 240 + let comment_jsont = ... 241 + let timeline_item_jsont = ... 242 + let issue_node_jsont = ... 243 + let pr_node_jsont = ... 244 + let issues_connection_jsont = ... 245 + let prs_connection_jsont = ... 246 + let repository_response_jsont = ... 247 + let discussion_node_jsont = ... 248 + let discussions_response_jsont = ... 249 + ``` 250 + 251 + #### 1.3 GraphQL Query Builder (`lib/graphql.mli`) 252 + 253 + ```ocaml 254 + (** GraphQL query construction and execution *) 255 + 256 + type query_variables = { 257 + owner : string; 258 + name : string; 259 + issues_after : string option; 260 + prs_after : string option; 261 + } 262 + 263 + val issues_prs_query : string 264 + (** The GraphQL query string for fetching issues and PRs *) 265 + 266 + val discussions_query : string 267 + (** The GraphQL query string for fetching discussions *) 268 + 269 + val build_request_body : query:string -> variables:query_variables -> string 270 + (** Build JSON request body for GraphQL endpoint *) 271 + ``` 272 + 273 + ### Phase 2: GitHub Client Module 274 + 275 + #### 2.1 Client Types (`lib/github.mli`) 276 + 277 + ```ocaml 278 + (** GitHub API client *) 279 + 280 + type t 281 + (** GitHub API client with authentication and rate limiting *) 282 + 283 + type error = 284 + | Rate_limited of { reset_at : float } 285 + | Unauthorized 286 + | Forbidden 287 + | Not_found 288 + | Server_error of int 289 + | Network_error of string 290 + | Parse_error of string 291 + 292 + val create : 293 + sw:Eio.Switch.t -> 294 + env:< clock: _ Eio.Time.clock; net: _ Eio.Net.t; .. > -> 295 + token:string -> 296 + t 297 + (** Create a GitHub client with authentication *) 298 + 299 + val create_from_env : 300 + sw:Eio.Switch.t -> 301 + env:< clock: _ Eio.Time.clock; net: _ Eio.Net.t; .. > -> 302 + (t, string) result 303 + (** Create client using GITHUB_TOKEN environment variable *) 304 + 305 + (** {1 GraphQL API} *) 306 + 307 + val graphql : 308 + t -> 309 + query:string -> 310 + variables:(string * Jsont.Json.t) list -> 311 + (Jsont.Json.t, error) result 312 + (** Execute a GraphQL query *) 313 + 314 + (** {1 REST API} *) 315 + 316 + val get_releases : 317 + t -> 318 + owner:string -> 319 + repo:string -> 320 + page:int -> 321 + (Jsont.Json.t, error) result 322 + (** Fetch releases using REST API *) 323 + 324 + val get_user : 325 + t -> 326 + username:string -> 327 + (Jsont.Json.t, error) result 328 + (** Fetch user profile *) 329 + 330 + (** {1 Rate Limit Info} *) 331 + 332 + val rate_limit_remaining : t -> int option 333 + (** Current rate limit remaining (if known) *) 334 + 335 + val rate_limit_reset : t -> float option 336 + (** Rate limit reset time as Unix timestamp (if known) *) 337 + ``` 338 + 339 + #### 2.2 Client Implementation (`lib/github.ml`) 340 + 341 + Key implementation details: 342 + 343 + ```ocaml 344 + (* Rate limit tracking *) 345 + type rate_limit = { 346 + mutable remaining : int option; 347 + mutable reset_at : float option; 348 + } 349 + 350 + (* Retry logic with exponential backoff *) 351 + let with_retry ~max_attempts ~clock f = 352 + let rec loop attempt = 353 + match f () with 354 + | Ok _ as result -> result 355 + | Error (Server_error code) when code >= 500 && attempt < max_attempts -> 356 + let delay = Float.pow 2.0 (Float.of_int attempt) in 357 + Eio.Time.sleep clock delay; 358 + loop (attempt + 1) 359 + | Error _ as err -> err 360 + in 361 + loop 0 362 + 363 + (* GraphQL endpoint *) 364 + let graphql_endpoint = "https://api.github.com/graphql" 365 + 366 + (* REST API base *) 367 + let rest_base = "https://api.github.com" 368 + 369 + (* Execute GraphQL query *) 370 + let graphql t ~query ~variables = 371 + let body = Graphql.build_request_body ~query ~variables in 372 + with_retry ~max_attempts:3 ~clock:t.clock (fun () -> 373 + let resp = Requests.post t.session graphql_endpoint 374 + ~headers:(Requests.Headers.empty 375 + |> Requests.Headers.add "Content-Type" "application/json") 376 + ~body:(Requests.Body.string body) in 377 + update_rate_limits t resp; 378 + match Requests.Response.status_code resp with 379 + | 200 -> parse_graphql_response resp 380 + | 401 -> Error Unauthorized 381 + | 403 -> check_rate_limit_or_forbidden t resp 382 + | 404 -> Error Not_found 383 + | code when code >= 500 -> Error (Server_error code) 384 + | code -> Error (Server_error code)) 385 + ``` 386 + 387 + ### Phase 3: Week-Based Filtering 388 + 389 + #### 3.1 Week Utilities (`lib/week.mli`) 390 + 391 + ```ocaml 392 + (** ISO week calculations and date filtering *) 393 + 394 + type t 395 + (** An ISO week (year, week number) *) 396 + 397 + val of_date : year:int -> month:int -> day:int -> t 398 + (** Get the ISO week containing the given date *) 399 + 400 + val current : clock:_ Eio.Time.clock -> t 401 + (** Get the current ISO week *) 402 + 403 + val of_year_week : year:int -> week:int -> t 404 + (** Create from year and week number (1-53) *) 405 + 406 + val year : t -> int 407 + val week : t -> int 408 + 409 + val start_date : t -> string 410 + (** Monday of the week in ISO 8601 format (YYYY-MM-DD) *) 411 + 412 + val end_date : t -> string 413 + (** Sunday of the week in ISO 8601 format (YYYY-MM-DD) *) 414 + 415 + val contains_timestamp : t -> string -> bool 416 + (** Check if an ISO 8601 timestamp falls within this week *) 417 + 418 + val prev : t -> t 419 + (** Previous week *) 420 + 421 + val next : t -> t 422 + (** Next week *) 423 + 424 + val range : from:t -> to_:t -> t list 425 + (** Generate list of weeks in range (inclusive) *) 426 + ``` 427 + 428 + #### 3.2 Activity Filtering (`lib/filter.mli`) 429 + 430 + ```ocaml 431 + (** Filter GitHub data by week *) 432 + 433 + val issue_active_in_week : 434 + week:Week.t -> 435 + Graphql_types.Issue_node.t -> 436 + bool 437 + (** Check if issue has activity in the given week *) 438 + 439 + val pr_active_in_week : 440 + week:Week.t -> 441 + Graphql_types.Pr_node.t -> 442 + bool 443 + (** Check if PR has activity in the given week *) 444 + 445 + val discussion_active_in_week : 446 + week:Week.t -> 447 + Graphql_types.Discussion_node.t -> 448 + bool 449 + (** Check if discussion has activity in the given week *) 450 + 451 + val is_good_first_issue : 452 + Graphql_types.Issue_node.t -> 453 + bool 454 + (** Check if issue has good-first-issue labels *) 455 + ``` 456 + 457 + ### Phase 4: Data Transformation 458 + 459 + #### 4.1 Transform Module (`lib/transform.mli`) 460 + 461 + ```ocaml 462 + (** Transform GraphQL responses to repowatch types *) 463 + 464 + val issue_of_node : Graphql_types.Issue_node.t -> Types.Issue.t 465 + (** Convert GraphQL issue node to Issue.t *) 466 + 467 + val pr_of_node : Graphql_types.Pr_node.t -> Types.Pr.t 468 + (** Convert GraphQL PR node to Pr.t *) 469 + 470 + val discussion_of_node : Graphql_types.Discussion_node.t -> Types.Discussion.t 471 + (** Convert GraphQL discussion node to Discussion.t *) 472 + 473 + val release_of_json : Jsont.Json.t -> Types.Release.t option 474 + (** Convert REST API release JSON to Release.t *) 475 + 476 + val format_comment : author:string -> body:string -> string 477 + (** Format comment as "@author: body" *) 478 + ``` 479 + 480 + ### Phase 5: Sync Engine 481 + 482 + #### 5.1 Sync Types (`lib/sync.mli`) 483 + 484 + ```ocaml 485 + (** Repository sync engine *) 486 + 487 + type progress = { 488 + phase : [ `Issues | `Prs | `Discussions | `Releases | `Users ]; 489 + current : int; 490 + total : int option; 491 + } 492 + 493 + type sync_result = { 494 + week_data : Types.Week_data.t; 495 + issues_fetched : int; 496 + prs_fetched : int; 497 + pages_fetched : int; 498 + rate_limit_remaining : int option; 499 + } 500 + 501 + val sync_week : 502 + client:Github.t -> 503 + owner:string -> 504 + repo:string -> 505 + week:Week.t -> 506 + ?on_progress:(progress -> unit) -> 507 + unit -> 508 + (sync_result, Github.error) result 509 + (** Sync a single week of repository data *) 510 + 511 + val sync_range : 512 + client:Github.t -> 513 + owner:string -> 514 + repo:string -> 515 + from_week:Week.t -> 516 + to_week:Week.t -> 517 + ?on_progress:(progress -> unit) -> 518 + unit -> 519 + (sync_result list, Github.error) result 520 + (** Sync a range of weeks *) 521 + ``` 522 + 523 + #### 5.2 Sync Implementation (`lib/sync.ml`) 524 + 525 + Key implementation patterns: 526 + 527 + ```ocaml 528 + (* Pagination state *) 529 + type pagination_state = { 530 + mutable issues_cursor : string option; 531 + mutable prs_cursor : string option; 532 + mutable issues_done : bool; 533 + mutable prs_done : bool; 534 + mutable pages_without_activity : int; 535 + } 536 + 537 + (* Sync issues and PRs with pagination *) 538 + let sync_issues_prs ~client ~owner ~repo ~week = 539 + let state = { 540 + issues_cursor = None; 541 + prs_cursor = None; 542 + issues_done = false; 543 + prs_done = false; 544 + pages_without_activity = 0; 545 + } in 546 + let issues = ref [] in 547 + let prs = ref [] in 548 + let max_pages = 20 in 549 + let early_exit_threshold = 5 in 550 + 551 + let rec loop page_count = 552 + if page_count >= max_pages then Ok () 553 + else if state.issues_done && state.prs_done then Ok () 554 + else if state.pages_without_activity >= early_exit_threshold then Ok () 555 + else begin 556 + let variables = Graphql.{ 557 + owner; 558 + name = repo; 559 + issues_after = state.issues_cursor; 560 + prs_after = state.prs_cursor; 561 + } in 562 + match Github.graphql client 563 + ~query:Graphql.issues_prs_query 564 + ~variables:(Graphql.variables_to_json variables) with 565 + | Error e -> Error e 566 + | Ok response -> 567 + let repo_data = Graphql_codec.decode_repository_response response in 568 + (* Process issues *) 569 + let new_issues = process_issues ~week repo_data.issues in 570 + (* Process PRs *) 571 + let new_prs = process_prs ~week repo_data.pull_requests in 572 + (* Update pagination state *) 573 + update_pagination_state state repo_data; 574 + (* Track activity *) 575 + if List.is_empty new_issues && List.is_empty new_prs then 576 + state.pages_without_activity <- state.pages_without_activity + 1 577 + else 578 + state.pages_without_activity <- 0; 579 + (* Accumulate results *) 580 + issues := new_issues @ !issues; 581 + prs := new_prs @ !prs; 582 + loop (page_count + 1) 583 + end 584 + in 585 + match loop 0 with 586 + | Ok () -> Ok (!issues, !prs) 587 + | Error e -> Error e 588 + ``` 589 + 590 + ### Phase 6: Storage Module 591 + 592 + #### 6.1 Storage Interface (`lib/storage.mli`) 593 + 594 + ```ocaml 595 + (** Persistent storage for sync results *) 596 + 597 + val data_dir : fs:Eio.Fs.dir_ty Eio.Path.t -> owner:string -> repo:string -> 598 + Eio.Fs.dir_ty Eio.Path.t 599 + (** Get directory for repository data: {base}/gh/{owner}/{repo}/ *) 600 + 601 + val week_file : week:Week.t -> string 602 + (** Get filename for week: week-{WW}-{YYYY}.json *) 603 + 604 + val save_week : 605 + fs:Eio.Fs.dir_ty Eio.Path.t -> 606 + base_dir:string -> 607 + owner:string -> 608 + repo:string -> 609 + week:Week.t -> 610 + data:Types.Week_data.t -> 611 + unit 612 + (** Save week data to file, creating directories as needed *) 613 + 614 + val load_week : 615 + fs:Eio.Fs.dir_ty Eio.Path.t -> 616 + base_dir:string -> 617 + owner:string -> 618 + repo:string -> 619 + week:Week.t -> 620 + Types.Week_data.t option 621 + (** Load existing week data if present *) 622 + 623 + val list_cached_weeks : 624 + fs:Eio.Fs.dir_ty Eio.Path.t -> 625 + base_dir:string -> 626 + owner:string -> 627 + repo:string -> 628 + Week.t list 629 + (** List all cached weeks for a repository *) 630 + ``` 631 + 632 + ### Phase 7: User Extraction 633 + 634 + #### 7.1 User Module (`lib/users.mli`) 635 + 636 + ```ocaml 637 + (** Extract and validate GitHub usernames *) 638 + 639 + val extract_from_week_data : Types.Week_data.t -> string list 640 + (** Extract all unique usernames from week data *) 641 + 642 + val extract_mentions : string -> string list 643 + (** Extract @mentions from text *) 644 + 645 + val is_valid_username : string -> bool 646 + (** Check if string is a valid GitHub username (not a common word, etc.) *) 647 + 648 + val excluded_words : string list 649 + (** List of words to exclude from username detection *) 650 + ``` 651 + 652 + Implementation notes: 653 + - Username regex: `@([a-zA-Z][a-zA-Z0-9-]{0,38})` 654 + - Exclude common words, programming terms, OCaml-specific terms 655 + - Exclude hex strings (commit SHA fragments) 656 + - Minimum 2 characters 657 + 658 + ### Phase 8: CLI Commands 659 + 660 + #### 8.1 Sync Command (`bin/main.ml` additions) 661 + 662 + ```ocaml 663 + (* sync command *) 664 + let sync_cmd = 665 + let doc = "Sync repository data from GitHub" in 666 + let info = Cmd.info "sync" ~doc in 667 + let term = 668 + let open Term.Syntax in 669 + let+ xdg = xdg_term 670 + and+ repos = Arg.(value & pos_all string [] & info [] ~docv:"REPO" 671 + ~doc:"Repositories to sync (owner/repo format)") 672 + and+ week = Arg.(value & opt (some int) None & 673 + info ["w"; "week"] ~docv:"WEEK" ~doc:"ISO week number (default: current)") 674 + and+ year = Arg.(value & opt (some int) None & 675 + info ["y"; "year"] ~docv:"YEAR" ~doc:"Year (default: current)") 676 + and+ weeks_back = Arg.(value & opt int 1 & 677 + info ["n"; "weeks"] ~docv:"N" ~doc:"Number of weeks to sync (default: 1)") 678 + and+ output_dir = Arg.(value & opt string "data/gh" & 679 + info ["o"; "output"] ~docv:"DIR" ~doc:"Output directory (default: data/gh)") 680 + and+ force = Arg.(value & flag & 681 + info ["f"; "force"] ~doc:"Force re-sync even if cached") 682 + and+ log_level = Logs_cli.level () in 683 + (* Implementation *) 684 + ... 685 + in 686 + Cmd.v info term 687 + ``` 688 + 689 + #### 8.2 Additional Commands 690 + 691 + ```ocaml 692 + (* repos command - list configured repositories *) 693 + let repos_cmd = ... 694 + 695 + (* status command - show sync status *) 696 + let status_cmd = ... 697 + 698 + (* users command - fetch user profiles *) 699 + let users_cmd = ... 700 + ``` 701 + 702 + --- 703 + 704 + ## File Structure 705 + 706 + ``` 707 + repowatch/ 708 + ├── lib/ 709 + │ ├── dune # Updated dependencies 710 + │ ├── repowatch.ml # Updated exports 711 + │ ├── types.mli/ml # (existing) 712 + │ ├── codec.mli/ml # (existing) 713 + │ ├── loader.mli/ml # (existing) 714 + │ ├── config.mli/ml # (existing, extended for GitHub token) 715 + │ ├── printer.mli/ml # (existing) 716 + │ ├── week.mli/ml # NEW: ISO week utilities 717 + │ ├── graphql_types.mli/ml # NEW: GraphQL response types 718 + │ ├── graphql_codec.mli/ml # NEW: Jsont codecs for GraphQL 719 + │ ├── graphql.mli/ml # NEW: Query building 720 + │ ├── github.mli/ml # NEW: GitHub API client 721 + │ ├── filter.mli/ml # NEW: Week-based filtering 722 + │ ├── transform.mli/ml # NEW: Data transformation 723 + │ ├── sync.mli/ml # NEW: Sync engine 724 + │ ├── storage.mli/ml # NEW: File storage 725 + │ └── users.mli/ml # NEW: Username extraction 726 + ├── bin/ 727 + │ ├── dune # (existing) 728 + │ └── main.ml # Updated with sync commands 729 + └── test/ 730 + ├── dune # Updated 731 + ├── test_codec.ml # (existing) 732 + ├── test_week.ml # NEW 733 + ├── test_graphql.ml # NEW 734 + ├── test_filter.ml # NEW 735 + └── test_users.ml # NEW 736 + ``` 737 + 738 + --- 739 + 740 + ## Dependencies 741 + 742 + Update `lib/dune`: 743 + ```dune 744 + (library 745 + (name repowatch) 746 + (public_name repowatch) 747 + (libraries 748 + jsont jsont.bytesrw 749 + tomlt tomlt.bytesrw 750 + eio 751 + xdge 752 + requests 753 + logs 754 + fmt 755 + ptime ptime.clock.os)) 756 + ``` 757 + 758 + --- 759 + 760 + ## Implementation Order 761 + 762 + ### Week 1: Core Infrastructure 763 + 1. `week.mli/ml` - ISO week calculations 764 + 2. `graphql_types.mli/ml` - Response types 765 + 3. `graphql_codec.mli/ml` - JSON parsing 766 + 4. `graphql.mli/ml` - Query building 767 + 5. Tests for above 768 + 769 + ### Week 2: GitHub Client 770 + 1. `github.mli/ml` - HTTP client with auth and rate limiting 771 + 2. `filter.mli/ml` - Week-based filtering 772 + 3. `transform.mli/ml` - Data transformation 773 + 4. Tests for above 774 + 775 + ### Week 3: Sync Engine 776 + 1. `sync.mli/ml` - Pagination and sync logic 777 + 2. `storage.mli/ml` - File persistence 778 + 3. `users.mli/ml` - Username extraction 779 + 4. Tests for above 780 + 781 + ### Week 4: CLI Integration 782 + 1. Update `main.ml` with sync command 783 + 2. Update `config.mli/ml` for GitHub token 784 + 3. End-to-end testing 785 + 4. Documentation 786 + 787 + --- 788 + 789 + ## Testing Strategy 790 + 791 + ### Unit Tests 792 + - Week calculations (edge cases: year boundaries, week 53) 793 + - GraphQL response parsing 794 + - Week filtering logic 795 + - Username extraction and validation 796 + - Data transformation 797 + 798 + ### Integration Tests 799 + - Mock GitHub API responses 800 + - Full sync workflow with fixtures 801 + - Error handling (rate limits, auth failures) 802 + 803 + ### End-to-End Tests 804 + - Sync real repository (with live API, opt-in) 805 + - Validate output matches ruminant format 806 + 807 + --- 808 + 809 + ## Error Handling 810 + 811 + All sync operations should: 812 + 1. Return `Result` types for recoverable errors 813 + 2. Log warnings for partial failures (e.g., some pages fail) 814 + 3. Preserve partial results when possible 815 + 4. Provide clear error messages with context 816 + 817 + Rate limit handling: 818 + - Detect via response headers 819 + - Calculate wait time from reset timestamp 820 + - Optionally wait and retry (with user confirmation for long waits) 821 + 822 + --- 823 + 824 + ## Configuration Extension 825 + 826 + Add to `config.toml`: 827 + ```toml 828 + [github] 829 + token_env = "GITHUB_TOKEN" # Or explicit token 830 + 831 + [sync] 832 + output_dir = "data/gh" 833 + max_pages = 20 834 + early_exit_pages = 5 835 + concurrent_repos = 2 836 + 837 + [[repositories]] 838 + owner = "ocaml" 839 + repo = "ocaml" 840 + group = "core" 841 + 842 + [[repositories]] 843 + owner = "ocaml" 844 + repo = "dune" 845 + group = "tooling" 846 + ``` 847 + 848 + --- 849 + 850 + ## Future Enhancements 851 + 852 + 1. **Incremental Sync**: Only fetch new data since last sync 853 + 2. **Parallel Repo Sync**: Sync multiple repos concurrently (respecting rate limits) 854 + 3. **User Profile Caching**: Fetch and cache user profiles 855 + 4. **Webhook Support**: React to GitHub webhooks for real-time updates 856 + 5. **Export Formats**: Generate reports, RSS feeds, etc.
+5
repowatch/bin/dune
··· 1 + (executable 2 + (name main) 3 + (public_name repowatch) 4 + (package repowatch) 5 + (libraries repowatch cmdliner logs.fmt logs.cli fmt.tty fmt.cli eio_main))
+683
repowatch/bin/main.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + open Cmdliner 7 + 8 + let () = 9 + Fmt.set_style_renderer Fmt.stdout `Ansi_tty; 10 + Fmt.set_style_renderer Fmt.stderr `Ansi_tty; 11 + 12 + Eio_main.run @@ fun env -> 13 + Eio.Switch.run @@ fun sw -> 14 + let fs = env#fs in 15 + 16 + (* Helper to load data from either a file or directory *) 17 + let load_path path = 18 + let path_obj = Eio.Path.(fs / path) in 19 + match Eio.Path.kind ~follow:true path_obj with 20 + | `Regular_file -> ( 21 + match Repowatch.Loader.load_file fs path with 22 + | Ok data -> ([data], []) 23 + | Error e -> ([], [(path, e)])) 24 + | `Directory -> 25 + Repowatch.Loader.load_directory_partial fs path 26 + | _ -> ([], [(path, "Not a file or directory")]) 27 + in 28 + 29 + let xdg_term = Xdge.Cmd.term "repowatch" fs ~dirs:[ `Config; `Cache ] () in 30 + 31 + let info = 32 + Cmd.info "repowatch" ~version:"0.1.0" 33 + ~doc:"GitHub repository activity watcher and analyzer" 34 + ~man: 35 + [ 36 + `S Manpage.s_description; 37 + `P 38 + "Repowatch parses and analyzes GitHub repository activity data \ 39 + from JSON files in the ruminant format. It provides commands for \ 40 + viewing issues, PRs, discussions, and releases with statistics \ 41 + and filtering capabilities."; 42 + `S Manpage.s_commands; 43 + `P "Use $(b,repowatch COMMAND --help) for detailed help on each command."; 44 + ] 45 + in 46 + 47 + (* Path argument *) 48 + let path_arg = 49 + let doc = "Path to a JSON file or directory containing JSON files." in 50 + Arg.(required & pos 0 (some string) None & info [] ~docv:"PATH" ~doc) 51 + in 52 + 53 + let path_arg_opt = 54 + let doc = "Path to a JSON file or directory containing JSON files." in 55 + Arg.(value & pos 0 (some string) None & info [] ~docv:"PATH" ~doc) 56 + in 57 + 58 + (* Parse command *) 59 + let parse_cmd = 60 + let doc = "Parse and display GitHub activity data." in 61 + let info = Cmd.info "parse" ~doc in 62 + let term = 63 + let open Term.Syntax in 64 + let+ (_xdg, _) = xdg_term 65 + and+ path = path_arg 66 + and+ short = 67 + Arg.(value & flag & info [ "s"; "short" ] ~doc:"Show short output") 68 + and+ log_level = Logs_cli.level () in 69 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 70 + Logs.set_level log_level; 71 + 72 + let path_obj = Eio.Path.(fs / path) in 73 + match Eio.Path.kind ~follow:true path_obj with 74 + | `Regular_file -> ( 75 + match Repowatch.Loader.load_file fs path with 76 + | Ok data -> 77 + if short then Fmt.pr "%a@." Repowatch.Printer.pp_week_summary data 78 + else Fmt.pr "%a@." Repowatch.Printer.pp_week_data data 79 + | Error e -> Fmt.epr "Error: %s@." e) 80 + | `Directory -> ( 81 + match Repowatch.Loader.load_directory fs path with 82 + | Ok data_list -> 83 + List.iter 84 + (fun data -> 85 + if short then 86 + Fmt.pr "%a@.@." Repowatch.Printer.pp_week_summary data 87 + else Fmt.pr "%a@.---@.@." Repowatch.Printer.pp_week_data data) 88 + data_list 89 + | Error errors -> 90 + List.iter 91 + (fun (file, e) -> Fmt.epr "Error in %s: %s@." file e) 92 + errors) 93 + | _ -> Fmt.epr "Error: %s is not a file or directory@." path 94 + in 95 + Cmd.v info term 96 + in 97 + 98 + (* Stats command *) 99 + let stats_cmd = 100 + let doc = "Show aggregate statistics for activity data." in 101 + let info = Cmd.info "stats" ~doc in 102 + let term = 103 + let open Term.Syntax in 104 + let+ (_xdg, _) = xdg_term 105 + and+ path = path_arg 106 + and+ compact = 107 + Arg.(value & flag & info [ "c"; "compact" ] ~doc:"Show compact output") 108 + and+ log_level = Logs_cli.level () in 109 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 110 + Logs.set_level log_level; 111 + 112 + let path_obj = Eio.Path.(fs / path) in 113 + match Eio.Path.kind ~follow:true path_obj with 114 + | `Regular_file -> ( 115 + match Repowatch.Loader.load_file fs path with 116 + | Ok data -> 117 + let stats = Repowatch.Printer.compute_stats data in 118 + if compact then 119 + Fmt.pr "%a@." Repowatch.Printer.pp_stats_compact stats 120 + else Fmt.pr "%a@." Repowatch.Printer.pp_stats stats 121 + | Error e -> Fmt.epr "Error: %s@." e) 122 + | `Directory -> 123 + let data_list, errors = Repowatch.Loader.load_directory_partial fs path in 124 + if errors <> [] then 125 + List.iter 126 + (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e)) 127 + errors; 128 + let stats_list = List.map Repowatch.Printer.compute_stats data_list in 129 + let stats = Repowatch.Printer.aggregate_stats stats_list in 130 + Fmt.pr "Aggregated statistics from %d files:@.@." (List.length data_list); 131 + if compact then Fmt.pr "%a@." Repowatch.Printer.pp_stats_compact stats 132 + else Fmt.pr "%a@." Repowatch.Printer.pp_stats stats 133 + | _ -> Fmt.epr "Error: %s is not a file or directory@." path 134 + in 135 + Cmd.v info term 136 + in 137 + 138 + (* Validate command *) 139 + let validate_cmd = 140 + let doc = "Validate JSON files against the expected schema." in 141 + let info = Cmd.info "validate" ~doc in 142 + let term = 143 + let open Term.Syntax in 144 + let+ (_xdg, _) = xdg_term 145 + and+ path = path_arg 146 + and+ log_level = Logs_cli.level () in 147 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 148 + Logs.set_level log_level; 149 + 150 + let path_obj = Eio.Path.(fs / path) in 151 + match Eio.Path.kind ~follow:true path_obj with 152 + | `Regular_file -> ( 153 + match Repowatch.Loader.load_file fs path with 154 + | Ok _ -> Fmt.pr "OK: %s@." path 155 + | Error e -> Fmt.pr "FAIL: %s - %s@." path e) 156 + | `Directory -> 157 + let files = Repowatch.Loader.find_json_files fs path in 158 + let ok_count = ref 0 in 159 + let fail_count = ref 0 in 160 + List.iter 161 + (fun file -> 162 + match Repowatch.Loader.load_file fs file with 163 + | Ok _ -> 164 + incr ok_count; 165 + Fmt.pr "OK: %s@." file 166 + | Error e -> 167 + incr fail_count; 168 + Fmt.pr "FAIL: %s - %s@." file e) 169 + files; 170 + Fmt.pr "@.Validation complete: %d OK, %d FAILED@." !ok_count !fail_count 171 + | _ -> Fmt.epr "Error: %s is not a file or directory@." path 172 + in 173 + Cmd.v info term 174 + in 175 + 176 + (* List command - list repositories in a data directory *) 177 + let list_cmd = 178 + let doc = "List repositories in a ruminant data directory." in 179 + let info = Cmd.info "list" ~doc in 180 + let term = 181 + let open Term.Syntax in 182 + let+ (_xdg, _) = xdg_term 183 + and+ path = path_arg_opt 184 + and+ log_level = Logs_cli.level () in 185 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 186 + Logs.set_level log_level; 187 + 188 + let data_dir = Option.value path ~default:"." in 189 + let repos = Repowatch.Loader.find_repos fs data_dir in 190 + if repos = [] then Fmt.pr "No repositories found in %s@." data_dir 191 + else ( 192 + Fmt.pr "Repositories in %s:@." data_dir; 193 + List.iter (fun r -> Fmt.pr " %s@." r) repos) 194 + in 195 + Cmd.v info term 196 + in 197 + 198 + (* Issues command - list issues *) 199 + let issues_cmd = 200 + let doc = "List issues from activity data." in 201 + let info = Cmd.info "issues" ~doc in 202 + let term = 203 + let open Term.Syntax in 204 + let+ (_xdg, _) = xdg_term 205 + and+ path = path_arg 206 + and+ state = 207 + Arg.( 208 + value 209 + & opt (some string) None 210 + & info [ "state" ] ~docv:"STATE" ~doc:"Filter by state (open/closed)") 211 + and+ log_level = Logs_cli.level () in 212 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 213 + Logs.set_level log_level; 214 + 215 + let data_list, errors = load_path path in 216 + if errors <> [] then 217 + List.iter 218 + (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e)) 219 + errors; 220 + 221 + let all_issues = 222 + List.concat_map Repowatch.Types.Week_data.issues data_list 223 + in 224 + let filtered = 225 + match state with 226 + | Some s -> 227 + List.filter (fun i -> Repowatch.Types.Issue.state i = s) all_issues 228 + | None -> all_issues 229 + in 230 + Fmt.pr "Issues (%d):@." (List.length filtered); 231 + List.iter 232 + (fun i -> Fmt.pr " %a@." Repowatch.Printer.pp_issue_short i) 233 + filtered 234 + in 235 + Cmd.v info term 236 + in 237 + 238 + (* PRs command - list pull requests *) 239 + let prs_cmd = 240 + let doc = "List pull requests from activity data." in 241 + let info = Cmd.info "prs" ~doc in 242 + let term = 243 + let open Term.Syntax in 244 + let+ (_xdg, _) = xdg_term 245 + and+ path = path_arg 246 + and+ state = 247 + Arg.( 248 + value 249 + & opt (some string) None 250 + & info [ "state" ] ~docv:"STATE" 251 + ~doc:"Filter by state (open/closed/merged)") 252 + and+ draft = 253 + Arg.(value & flag & info [ "draft" ] ~doc:"Show only draft PRs") 254 + and+ log_level = Logs_cli.level () in 255 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 256 + Logs.set_level log_level; 257 + 258 + let data_list, errors = load_path path in 259 + if errors <> [] then 260 + List.iter 261 + (fun (file, e) -> Logs.warn (fun m -> m "Error in %s: %s" file e)) 262 + errors; 263 + 264 + let all_prs = List.concat_map Repowatch.Types.Week_data.prs data_list in 265 + let filtered = 266 + all_prs 267 + |> (fun prs -> 268 + match state with 269 + | Some "merged" -> 270 + List.filter 271 + (fun p -> Option.is_some (Repowatch.Types.Pr.merged_at p)) 272 + prs 273 + | Some s -> 274 + List.filter (fun p -> Repowatch.Types.Pr.state p = s) prs 275 + | None -> prs) 276 + |> fun prs -> 277 + if draft then List.filter Repowatch.Types.Pr.draft prs else prs 278 + in 279 + Fmt.pr "Pull Requests (%d):@." (List.length filtered); 280 + List.iter 281 + (fun p -> Fmt.pr " %a@." Repowatch.Printer.pp_pr_short p) 282 + filtered 283 + in 284 + Cmd.v info term 285 + in 286 + 287 + (* Sync command - sync repository data from GitHub *) 288 + let sync_cmd = 289 + let doc = "Sync repository data from GitHub." in 290 + let info = 291 + Cmd.info "sync" ~doc 292 + ~man: 293 + [ 294 + `S Manpage.s_description; 295 + `P 296 + "Sync repository activity data from GitHub's GraphQL and REST \ 297 + APIs. Data is filtered by ISO week and saved in the ruminant \ 298 + JSON format."; 299 + `S Manpage.s_examples; 300 + `P "Sync current week for all configured repositories:"; 301 + `Pre " repowatch sync"; 302 + `P "Sync a specific repository:"; 303 + `Pre " repowatch sync ocaml/ocaml"; 304 + `P "Sync a specific week:"; 305 + `Pre " repowatch sync -w 3 -y 2024 ocaml/ocaml"; 306 + `P "Update existing data (for daily cron jobs):"; 307 + `Pre " repowatch sync --update"; 308 + `P 309 + "The --update flag merges new activity with existing cached \ 310 + data, making it ideal for keeping the current week up-to-date \ 311 + via daily cron jobs."; 312 + ] 313 + in 314 + let term = 315 + let open Term.Syntax in 316 + let+ (_xdg, _) = xdg_term 317 + and+ repos = 318 + Arg.( 319 + value & pos_all string [] 320 + & info [] ~docv:"REPO" ~doc:"Repositories to sync (owner/repo format)") 321 + and+ week = 322 + Arg.( 323 + value 324 + & opt (some int) None 325 + & info [ "w"; "week" ] ~docv:"WEEK" 326 + ~doc:"ISO week number (default: current)") 327 + and+ year = 328 + Arg.( 329 + value 330 + & opt (some int) None 331 + & info [ "y"; "year" ] ~docv:"YEAR" ~doc:"Year (default: current)") 332 + and+ weeks_back = 333 + Arg.( 334 + value & opt int 1 335 + & info [ "n"; "weeks" ] ~docv:"N" 336 + ~doc:"Number of weeks to sync (default: 1)") 337 + and+ output_dir = 338 + Arg.( 339 + value 340 + & opt (some string) None 341 + & info [ "o"; "output" ] ~docv:"DIR" 342 + ~doc:"Output directory (default: from config or data/gh)") 343 + and+ force = 344 + Arg.(value & flag & info [ "f"; "force" ] ~doc:"Force re-sync even if cached") 345 + and+ update = 346 + Arg.( 347 + value & flag 348 + & info [ "u"; "update" ] 349 + ~doc: 350 + "Incremental update mode. Merge new data with existing cached \ 351 + data. Useful for daily cron jobs to update the current week.") 352 + and+ log_level = Logs_cli.level () in 353 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 354 + Logs.set_level log_level; 355 + 356 + (* Load config *) 357 + let config = 358 + match Repowatch.Config.load_xdg_opt ~fs with 359 + | Some c -> c 360 + | None -> Repowatch.Config.default 361 + in 362 + 363 + (* Determine output directory *) 364 + let out_dir = 365 + match output_dir with 366 + | Some d -> d 367 + | None -> Repowatch.Config.Sync.output_dir (Repowatch.Config.sync config) 368 + in 369 + 370 + (* Get GitHub token *) 371 + let github_config = Repowatch.Config.github config in 372 + match Repowatch.Config.Github.get_token github_config with 373 + | None -> 374 + Fmt.epr 375 + "Error: GitHub token not found. Set %s environment variable.@." 376 + (Repowatch.Config.Github.token_env github_config) 377 + | Some token -> ( 378 + (* Create GitHub client *) 379 + let client = Repowatch.Github.create ~sw ~env ~token in 380 + 381 + (* Determine which repositories to sync *) 382 + let repos_to_sync = 383 + if repos = [] then 384 + (* Use configured repositories *) 385 + let configured = Repowatch.Config.repositories config in 386 + if configured = [] then ( 387 + Fmt.epr 388 + "Error: No repositories specified and none configured.@."; 389 + Fmt.epr "Use: repowatch sync owner/repo@."; 390 + Fmt.epr "Or add repositories to your config file.@."; 391 + []) 392 + else 393 + List.map 394 + (fun r -> 395 + ( Repowatch.Config.Repository.owner r, 396 + Repowatch.Config.Repository.repo r )) 397 + configured 398 + else 399 + (* Parse command-line repos *) 400 + List.filter_map 401 + (fun s -> 402 + match String.split_on_char '/' s with 403 + | [ owner; repo ] -> Some (owner, repo) 404 + | _ -> 405 + Fmt.epr "Warning: Invalid repository format: %s@." s; 406 + None) 407 + repos 408 + in 409 + 410 + if repos_to_sync = [] then () 411 + else 412 + (* Determine week(s) to sync *) 413 + let current_week = Repowatch.Week.current ~clock:env#clock in 414 + let target_week = 415 + match (week, year) with 416 + | Some w, Some y -> Repowatch.Week.of_year_week ~year:y ~week:w 417 + | Some w, None -> 418 + Repowatch.Week.of_year_week 419 + ~year:(Repowatch.Week.year current_week) 420 + ~week:w 421 + | None, Some y -> 422 + Repowatch.Week.of_year_week ~year:y 423 + ~week:(Repowatch.Week.week current_week) 424 + | None, None -> current_week 425 + in 426 + 427 + let weeks_to_sync = 428 + if weeks_back <= 1 then [ target_week ] 429 + else 430 + let rec build_list acc n w = 431 + if n <= 0 then List.rev acc 432 + else build_list (w :: acc) (n - 1) (Repowatch.Week.prev w) 433 + in 434 + build_list [] weeks_back target_week 435 + in 436 + 437 + (* Sync each repository *) 438 + List.iter 439 + (fun (owner, repo) -> 440 + Fmt.pr "Syncing %s/%s...@." owner repo; 441 + List.iter 442 + (fun week -> 443 + let week_str = Repowatch.Week.to_string week in 444 + (* Check if already cached *) 445 + let cached = 446 + if force then None 447 + else 448 + Repowatch.Storage.load_week ~fs ~base_dir:out_dir ~owner 449 + ~repo ~week 450 + in 451 + (* Determine sync mode based on flags *) 452 + let should_sync, existing_data = 453 + match (cached, force, update) with 454 + | Some data, false, true -> 455 + (* Update mode: merge with existing *) 456 + (true, Some data) 457 + | Some _, false, false -> 458 + (* Cached and not forcing: skip *) 459 + (false, None) 460 + | _, true, _ -> 461 + (* Force: full re-sync *) 462 + (true, None) 463 + | None, _, true -> 464 + (* Update mode but no existing: full sync *) 465 + (true, None) 466 + | None, _, false -> 467 + (* No cache: full sync *) 468 + (true, None) 469 + in 470 + if not should_sync then 471 + Fmt.pr " %s: cached (use -f to re-sync)@." week_str 472 + else ( 473 + let mode_str = 474 + if Option.is_some existing_data then "updating" 475 + else "syncing" 476 + in 477 + Fmt.pr " %s: %s...@." week_str mode_str; 478 + let on_progress p = 479 + let phase_str = 480 + match p.Repowatch.Sync.phase with 481 + | `Issues_prs -> "issues/PRs" 482 + | `Discussions -> "discussions" 483 + | `Releases -> "releases" 484 + | `Users -> "users" 485 + in 486 + Fmt.pr " %s page %d@." phase_str p.current 487 + in 488 + let result = 489 + Repowatch.Sync.sync_week_incremental ~client ~owner 490 + ~repo ~week ~existing:existing_data ~on_progress () 491 + in 492 + match result with 493 + | Error e -> 494 + Fmt.epr " Error: %a@." Repowatch.Github.pp_error e 495 + | Ok result -> 496 + (* Save the data *) 497 + Repowatch.Storage.save_week ~fs ~base_dir:out_dir 498 + ~owner ~repo ~week ~data:result.week_data; 499 + Fmt.pr 500 + " Done: %d issues, %d PRs, %d pages fetched@." 501 + result.issues_fetched result.prs_fetched 502 + result.pages_fetched; 503 + (match result.rate_limit_remaining with 504 + | Some remaining -> 505 + Fmt.pr " Rate limit remaining: %d@." remaining 506 + | None -> ()))) 507 + weeks_to_sync) 508 + repos_to_sync) 509 + in 510 + Cmd.v info term 511 + in 512 + 513 + (* Repos command - list configured repositories *) 514 + let repos_cmd = 515 + let doc = "List configured repositories." in 516 + let info = Cmd.info "repos" ~doc in 517 + let term = 518 + let open Term.Syntax in 519 + let+ (_xdg, _) = xdg_term and+ log_level = Logs_cli.level () in 520 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 521 + Logs.set_level log_level; 522 + 523 + match Repowatch.Config.load_xdg_opt ~fs with 524 + | None -> Fmt.pr "No configuration file found.@." 525 + | Some config -> 526 + let repos = Repowatch.Config.repositories config in 527 + if repos = [] then Fmt.pr "No repositories configured.@." 528 + else ( 529 + Fmt.pr "Configured repositories:@."; 530 + List.iter 531 + (fun r -> 532 + let group = 533 + match Repowatch.Config.Repository.group r with 534 + | Some g -> Printf.sprintf " [%s]" g 535 + | None -> "" 536 + in 537 + Fmt.pr " %s%s@." (Repowatch.Config.Repository.full_name r) group) 538 + repos) 539 + in 540 + Cmd.v info term 541 + in 542 + 543 + (* Status command - show sync status for configured repositories *) 544 + let status_cmd = 545 + let doc = "Show sync status for repositories." in 546 + let info = Cmd.info "status" ~doc in 547 + let term = 548 + let open Term.Syntax in 549 + let+ (_xdg, _) = xdg_term 550 + and+ output_dir = 551 + Arg.( 552 + value 553 + & opt (some string) None 554 + & info [ "o"; "output" ] ~docv:"DIR" ~doc:"Data directory") 555 + and+ log_level = Logs_cli.level () in 556 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 557 + Logs.set_level log_level; 558 + 559 + let config = 560 + match Repowatch.Config.load_xdg_opt ~fs with 561 + | Some c -> c 562 + | None -> Repowatch.Config.default 563 + in 564 + 565 + let out_dir = 566 + match output_dir with 567 + | Some d -> d 568 + | None -> Repowatch.Config.Sync.output_dir (Repowatch.Config.sync config) 569 + in 570 + 571 + let repos = Repowatch.Config.repositories config in 572 + if repos = [] then Fmt.pr "No repositories configured.@." 573 + else ( 574 + let current_week = Repowatch.Week.current ~clock:env#clock in 575 + Fmt.pr "Sync status (current week: %s):@.@." 576 + (Repowatch.Week.to_string current_week); 577 + List.iter 578 + (fun r -> 579 + let owner = Repowatch.Config.Repository.owner r in 580 + let repo = Repowatch.Config.Repository.repo r in 581 + let cached_weeks = 582 + Repowatch.Storage.list_cached_weeks ~fs ~base_dir:out_dir ~owner 583 + ~repo 584 + in 585 + let has_current = 586 + List.exists (Repowatch.Week.equal current_week) cached_weeks 587 + in 588 + let status = 589 + if has_current then "up-to-date" else "needs sync" 590 + in 591 + Fmt.pr " %s/%s: %s (%d weeks cached)@." owner repo status 592 + (List.length cached_weeks)) 593 + repos) 594 + in 595 + Cmd.v info term 596 + in 597 + 598 + (* Init command - create a default config file *) 599 + let init_cmd = 600 + let doc = "Initialize a new configuration file." in 601 + let info = Cmd.info "init" ~doc in 602 + let term = 603 + let open Term.Syntax in 604 + let+ (xdg, _) = xdg_term 605 + and+ force = 606 + Arg.( 607 + value & flag 608 + & info [ "f"; "force" ] ~doc:"Overwrite existing config") 609 + and+ log_level = Logs_cli.level () in 610 + Logs.set_reporter (Logs_fmt.reporter ~app:Fmt.stdout ~dst:Fmt.stderr ()); 611 + Logs.set_level log_level; 612 + 613 + let config_dir = Xdge.config_dir xdg in 614 + Repowatch.Storage.ensure_dir config_dir; 615 + let config_path = Eio.Path.(config_dir / "config.toml") in 616 + 617 + let exists = 618 + try 619 + ignore (Eio.Path.load config_path); 620 + true 621 + with _ -> false 622 + in 623 + 624 + if exists && not force then 625 + Fmt.pr "Config file already exists: %a@.Use -f to overwrite.@." 626 + Eio.Path.pp config_path 627 + else ( 628 + let default_content = 629 + {|# Repowatch configuration file 630 + 631 + [github] 632 + token_env = "GITHUB_TOKEN" 633 + 634 + [sync] 635 + output_dir = "data/gh" 636 + max_pages = 20 637 + early_exit_pages = 5 638 + 639 + [cache] 640 + ttl_hours = 24 641 + 642 + # Add repositories to watch: 643 + # [[repositories]] 644 + # owner = "ocaml" 645 + # repo = "ocaml" 646 + # group = "core" 647 + # 648 + # [[repositories]] 649 + # owner = "ocaml" 650 + # repo = "dune" 651 + # group = "tooling" 652 + |} 653 + in 654 + Eio.Path.save ~create:(`Or_truncate 0o644) config_path default_content; 655 + Fmt.pr "Created config file: %a@." Eio.Path.pp config_path) 656 + in 657 + Cmd.v info term 658 + in 659 + 660 + let default_term = 661 + let open Term.Syntax in 662 + let+ _ = xdg_term and+ _ = Logs_cli.level () in 663 + `Help (`Pager, None) 664 + in 665 + let default_term = Term.ret default_term in 666 + 667 + let cmd = 668 + Cmd.group info ~default:default_term 669 + [ 670 + parse_cmd; 671 + stats_cmd; 672 + validate_cmd; 673 + list_cmd; 674 + issues_cmd; 675 + prs_cmd; 676 + sync_cmd; 677 + repos_cmd; 678 + status_cmd; 679 + init_cmd; 680 + ] 681 + in 682 + 683 + exit (Cmd.eval cmd)
+34
repowatch/dune-project
··· 1 + (lang dune 3.20) 2 + 3 + (name repowatch) 4 + 5 + (generate_opam_files true) 6 + 7 + (source 8 + (github avsm/repowatch)) 9 + 10 + (authors "Anil Madhavapeddy <anil@recoil.org>") 11 + 12 + (maintainers "Anil Madhavapeddy <anil@recoil.org>") 13 + 14 + (license ISC) 15 + 16 + (documentation "https://avsm.github.io/repowatch") 17 + 18 + (package 19 + (name repowatch) 20 + (synopsis "GitHub repository activity watcher and analyzer") 21 + (description 22 + "Repowatch parses and analyzes GitHub repository activity data from JSON files. It provides a CLI for viewing issues, PRs, discussions, and releases with statistics and filtering capabilities.") 23 + (depends 24 + (ocaml (>= 5.2.0)) 25 + (dune (>= 3.20)) 26 + (eio_main (>= 1.2)) 27 + (jsont (>= 0.1.0)) 28 + (tomlt (>= 0.1.0)) 29 + (xdge (>= 0.1.0)) 30 + (cmdliner (>= 1.3.0)) 31 + (logs (>= 0.7.0)) 32 + (fmt (>= 0.9.0)) 33 + (ptime (>= 1.0.0)) 34 + (odoc :with-doc)))
+147
repowatch/lib/codec.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Codec for nullable strings - null decodes to None *) 7 + let nullable_string = Jsont.option Jsont.string 8 + 9 + let metadata = 10 + let kind = "Metadata" in 11 + Jsont.Object.map ~kind 12 + (fun repo year week week_start week_end cached_at -> 13 + Types.Metadata.make ~repo ~year ~week ~week_start ~week_end ~cached_at) 14 + |> Jsont.Object.mem "repo" Jsont.string ~enc:Types.Metadata.repo 15 + |> Jsont.Object.mem "year" Jsont.int ~enc:Types.Metadata.year 16 + |> Jsont.Object.mem "week" Jsont.int ~enc:Types.Metadata.week 17 + |> Jsont.Object.mem "week_start" Jsont.string ~enc:Types.Metadata.week_start 18 + |> Jsont.Object.mem "week_end" Jsont.string ~enc:Types.Metadata.week_end 19 + |> Jsont.Object.mem "cached_at" Jsont.string ~enc:Types.Metadata.cached_at 20 + |> Jsont.Object.finish 21 + 22 + let issue = 23 + let kind = "Issue" in 24 + Jsont.Object.map ~kind 25 + (fun id title url user created_at updated_at closed_at body labels state 26 + comments -> 27 + Types.Issue.make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at 28 + ~body ~labels ~state ~comments) 29 + |> Jsont.Object.mem "id" Jsont.int ~enc:Types.Issue.id 30 + |> Jsont.Object.mem "title" Jsont.string ~enc:Types.Issue.title 31 + |> Jsont.Object.mem "url" Jsont.string ~enc:Types.Issue.url 32 + |> Jsont.Object.mem "user" Jsont.string ~enc:Types.Issue.user 33 + |> Jsont.Object.mem "created_at" Jsont.string ~enc:Types.Issue.created_at 34 + |> Jsont.Object.mem "updated_at" Jsont.string ~enc:Types.Issue.updated_at 35 + |> Jsont.Object.mem "closed_at" nullable_string ~dec_absent:None 36 + ~enc:Types.Issue.closed_at 37 + |> Jsont.Object.mem "body" Jsont.string ~enc:Types.Issue.body 38 + |> Jsont.Object.mem "labels" (Jsont.list Jsont.string) ~enc:Types.Issue.labels 39 + |> Jsont.Object.mem "state" Jsont.string ~enc:Types.Issue.state 40 + |> Jsont.Object.mem "comments" (Jsont.list Jsont.string) 41 + ~enc:Types.Issue.comments 42 + |> Jsont.Object.finish 43 + 44 + let pr = 45 + let kind = "PR" in 46 + Jsont.Object.map ~kind 47 + (fun id title url user created_at updated_at closed_at merged_at body 48 + labels state comments additions deletions changed_files mergeable draft -> 49 + Types.Pr.make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at 50 + ~merged_at ~body ~labels ~state ~comments ~additions ~deletions 51 + ~changed_files ~mergeable ~draft) 52 + |> Jsont.Object.mem "id" Jsont.int ~enc:Types.Pr.id 53 + |> Jsont.Object.mem "title" Jsont.string ~enc:Types.Pr.title 54 + |> Jsont.Object.mem "url" Jsont.string ~enc:Types.Pr.url 55 + |> Jsont.Object.mem "user" Jsont.string ~enc:Types.Pr.user 56 + |> Jsont.Object.mem "created_at" Jsont.string ~enc:Types.Pr.created_at 57 + |> Jsont.Object.mem "updated_at" Jsont.string ~enc:Types.Pr.updated_at 58 + |> Jsont.Object.mem "closed_at" nullable_string ~dec_absent:None 59 + ~enc:Types.Pr.closed_at 60 + |> Jsont.Object.mem "merged_at" nullable_string ~dec_absent:None 61 + ~enc:Types.Pr.merged_at 62 + |> Jsont.Object.mem "body" Jsont.string ~enc:Types.Pr.body 63 + |> Jsont.Object.mem "labels" (Jsont.list Jsont.string) ~enc:Types.Pr.labels 64 + |> Jsont.Object.mem "state" Jsont.string ~enc:Types.Pr.state 65 + |> Jsont.Object.mem "comments" (Jsont.list Jsont.string) ~enc:Types.Pr.comments 66 + |> Jsont.Object.mem "additions" Jsont.int ~enc:Types.Pr.additions 67 + |> Jsont.Object.mem "deletions" Jsont.int ~enc:Types.Pr.deletions 68 + |> Jsont.Object.mem "changed_files" Jsont.int ~enc:Types.Pr.changed_files 69 + |> Jsont.Object.mem "mergeable" Jsont.string ~enc:Types.Pr.mergeable 70 + |> Jsont.Object.mem "draft" Jsont.bool ~enc:Types.Pr.draft 71 + |> Jsont.Object.finish 72 + 73 + let discussion = 74 + let kind = "Discussion" in 75 + Jsont.Object.map ~kind 76 + (fun id title url user updated_at body category comments answered -> 77 + Types.Discussion.make ~id ~title ~url ~user ~updated_at ~body ~category 78 + ~comments ~answered) 79 + |> Jsont.Object.mem "id" Jsont.int ~enc:Types.Discussion.id 80 + |> Jsont.Object.mem "title" Jsont.string ~enc:Types.Discussion.title 81 + |> Jsont.Object.mem "url" Jsont.string ~enc:Types.Discussion.url 82 + |> Jsont.Object.mem "user" Jsont.string ~enc:Types.Discussion.user 83 + |> Jsont.Object.mem "updated_at" Jsont.string ~enc:Types.Discussion.updated_at 84 + |> Jsont.Object.mem "body" Jsont.string ~enc:Types.Discussion.body 85 + |> Jsont.Object.mem "category" Jsont.string ~enc:Types.Discussion.category 86 + |> Jsont.Object.mem "comments" Jsont.int ~enc:Types.Discussion.comments 87 + |> Jsont.Object.mem "answered" Jsont.bool ~enc:Types.Discussion.answered 88 + |> Jsont.Object.finish 89 + 90 + let asset = 91 + let kind = "Asset" in 92 + Jsont.Object.map ~kind 93 + (fun name download_count size -> 94 + Types.Asset.make ~name ~download_count ~size) 95 + |> Jsont.Object.mem "name" Jsont.string ~enc:Types.Asset.name 96 + |> Jsont.Object.mem "download_count" Jsont.int ~enc:Types.Asset.download_count 97 + |> Jsont.Object.mem "size" Jsont.int ~enc:Types.Asset.size 98 + |> Jsont.Object.finish 99 + 100 + let release = 101 + let kind = "Release" in 102 + Jsont.Object.map ~kind 103 + (fun tag_name name published_at author html_url body prerelease draft 104 + assets -> 105 + Types.Release.make ~tag_name ~name ~published_at ~author ~html_url ~body 106 + ~prerelease ~draft ~assets) 107 + |> Jsont.Object.mem "tag_name" Jsont.string ~enc:Types.Release.tag_name 108 + |> Jsont.Object.mem "name" Jsont.string ~enc:Types.Release.name 109 + |> Jsont.Object.mem "published_at" Jsont.string ~enc:Types.Release.published_at 110 + |> Jsont.Object.mem "author" Jsont.string ~enc:Types.Release.author 111 + |> Jsont.Object.mem "html_url" Jsont.string ~enc:Types.Release.html_url 112 + |> Jsont.Object.mem "body" Jsont.string ~enc:Types.Release.body 113 + |> Jsont.Object.mem "prerelease" Jsont.bool ~enc:Types.Release.prerelease 114 + |> Jsont.Object.mem "draft" Jsont.bool ~enc:Types.Release.draft 115 + |> Jsont.Object.mem "assets" (Jsont.list asset) ~enc:Types.Release.assets 116 + |> Jsont.Object.finish 117 + 118 + let week_data = 119 + let kind = "Week_data" in 120 + Jsont.Object.map ~kind 121 + (fun metadata issues prs good_first_issues discussions releases users -> 122 + Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues 123 + ~discussions ~releases ~users) 124 + |> Jsont.Object.mem "metadata" metadata ~enc:Types.Week_data.metadata 125 + |> Jsont.Object.mem "issues" (Jsont.list issue) ~enc:Types.Week_data.issues 126 + |> Jsont.Object.mem "prs" (Jsont.list pr) ~enc:Types.Week_data.prs 127 + |> Jsont.Object.mem "good_first_issues" (Jsont.list issue) 128 + ~enc:Types.Week_data.good_first_issues 129 + |> Jsont.Object.mem "discussions" (Jsont.list discussion) 130 + ~enc:Types.Week_data.discussions 131 + |> Jsont.Object.mem "releases" (Jsont.list release) 132 + ~enc:Types.Week_data.releases 133 + |> Jsont.Object.mem "users" (Jsont.list Jsont.string) ~dec_absent:[] 134 + ~enc:Types.Week_data.users 135 + |> Jsont.Object.finish 136 + 137 + let decode_string s = 138 + Jsont_bytesrw.decode_string week_data s 139 + 140 + let decode_file fs path = 141 + let content = Eio.Path.(load (fs / path)) in 142 + decode_string content 143 + 144 + let encode_string data = 145 + match Jsont_bytesrw.encode_string week_data data with 146 + | Ok s -> s 147 + | Error e -> failwith ("encode failed: " ^ e)
+46
repowatch/lib/codec.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** JSON codecs for GitHub activity data types. 7 + 8 + This module provides jsont codecs for encoding and decoding the types 9 + defined in {!Types}. *) 10 + 11 + (** {1 Codecs} *) 12 + 13 + val metadata : Types.Metadata.t Jsont.t 14 + (** Codec for {!Types.Metadata.t}. *) 15 + 16 + val issue : Types.Issue.t Jsont.t 17 + (** Codec for {!Types.Issue.t}. *) 18 + 19 + val pr : Types.Pr.t Jsont.t 20 + (** Codec for {!Types.Pr.t}. *) 21 + 22 + val discussion : Types.Discussion.t Jsont.t 23 + (** Codec for {!Types.Discussion.t}. *) 24 + 25 + val asset : Types.Asset.t Jsont.t 26 + (** Codec for {!Types.Asset.t}. *) 27 + 28 + val release : Types.Release.t Jsont.t 29 + (** Codec for {!Types.Release.t}. *) 30 + 31 + val week_data : Types.Week_data.t Jsont.t 32 + (** Codec for {!Types.Week_data.t}. *) 33 + 34 + (** {1 Decoding} *) 35 + 36 + val decode_string : string -> (Types.Week_data.t, string) result 37 + (** Decode a JSON string to week data. *) 38 + 39 + val decode_file : 40 + Eio.Fs.dir_ty Eio.Path.t -> string -> (Types.Week_data.t, string) result 41 + (** Decode a JSON file to week data. *) 42 + 43 + (** {1 Encoding} *) 44 + 45 + val encode_string : Types.Week_data.t -> string 46 + (** Encode week data to a JSON string. *)
+165
repowatch/lib/config.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module Repository = struct 7 + type t = { 8 + owner : string; 9 + repo : string; 10 + group : string option; 11 + } 12 + 13 + let make ~owner ~repo ?group () = { owner; repo; group } 14 + let owner t = t.owner 15 + let repo t = t.repo 16 + let group t = t.group 17 + let full_name t = Printf.sprintf "%s/%s" t.owner t.repo 18 + 19 + let codec = 20 + Tomlt.( 21 + Table.( 22 + obj (fun owner repo group -> { owner; repo; group }) 23 + |> mem "owner" string ~enc:(fun t -> t.owner) 24 + |> mem "repo" string ~enc:(fun t -> t.repo) 25 + |> opt_mem "group" string ~enc:(fun t -> t.group) 26 + |> finish)) 27 + end 28 + 29 + module Github = struct 30 + type t = { token_env : string } 31 + 32 + let default_token_env = "GITHUB_TOKEN" 33 + let make ?(token_env = default_token_env) () = { token_env } 34 + let token_env t = t.token_env 35 + let get_token t = Sys.getenv_opt t.token_env 36 + 37 + let codec = 38 + Tomlt.( 39 + Table.( 40 + obj (fun token_env -> { token_env }) 41 + |> mem "token_env" string ~dec_absent:default_token_env 42 + ~enc:(fun t -> t.token_env) 43 + |> finish)) 44 + end 45 + 46 + module Cache = struct 47 + type t = { 48 + directory : string option; 49 + ttl_hours : int; 50 + } 51 + 52 + let default_ttl_hours = 24 53 + let make ?directory ?(ttl_hours = default_ttl_hours) () = { directory; ttl_hours } 54 + let directory t = t.directory 55 + let ttl_hours t = t.ttl_hours 56 + 57 + let codec = 58 + Tomlt.( 59 + Table.( 60 + obj (fun directory ttl_hours -> { directory; ttl_hours }) 61 + |> opt_mem "directory" string ~enc:(fun t -> t.directory) 62 + |> mem "ttl_hours" int ~dec_absent:default_ttl_hours 63 + ~enc:(fun t -> t.ttl_hours) 64 + |> finish)) 65 + end 66 + 67 + module Sync = struct 68 + type t = { 69 + output_dir : string; 70 + max_pages : int; 71 + early_exit_pages : int; 72 + } 73 + 74 + let default_output_dir = "data/gh" 75 + let default_max_pages = 20 76 + let default_early_exit_pages = 5 77 + 78 + let make ?(output_dir = default_output_dir) ?(max_pages = default_max_pages) 79 + ?(early_exit_pages = default_early_exit_pages) () = 80 + { output_dir; max_pages; early_exit_pages } 81 + 82 + let output_dir t = t.output_dir 83 + let max_pages t = t.max_pages 84 + let early_exit_pages t = t.early_exit_pages 85 + 86 + let codec = 87 + Tomlt.( 88 + Table.( 89 + obj (fun output_dir max_pages early_exit_pages -> 90 + { output_dir; max_pages; early_exit_pages }) 91 + |> mem "output_dir" string ~dec_absent:default_output_dir 92 + ~enc:(fun t -> t.output_dir) 93 + |> mem "max_pages" int ~dec_absent:default_max_pages 94 + ~enc:(fun t -> t.max_pages) 95 + |> mem "early_exit_pages" int ~dec_absent:default_early_exit_pages 96 + ~enc:(fun t -> t.early_exit_pages) 97 + |> finish)) 98 + end 99 + 100 + type t = { 101 + github : Github.t; 102 + repositories : Repository.t list; 103 + cache : Cache.t; 104 + sync : Sync.t; 105 + } 106 + 107 + let default_github = Github.make () 108 + let default_cache = Cache.make () 109 + let default_sync = Sync.make () 110 + 111 + let make ?(github = default_github) ?(repositories = []) ?(cache = default_cache) 112 + ?(sync = default_sync) () = 113 + { github; repositories; cache; sync } 114 + 115 + let default = make () 116 + let github t = t.github 117 + let repositories t = t.repositories 118 + let cache t = t.cache 119 + let sync t = t.sync 120 + 121 + let codec = 122 + Tomlt.( 123 + Table.( 124 + obj (fun github repositories cache sync -> 125 + { github; repositories; cache; sync }) 126 + |> mem "github" Github.codec ~dec_absent:default_github 127 + ~enc:(fun t -> t.github) 128 + |> mem "repositories" (list Repository.codec) ~dec_absent:[] 129 + ~enc:(fun t -> t.repositories) 130 + |> mem "cache" Cache.codec ~dec_absent:default_cache 131 + ~enc:(fun t -> t.cache) 132 + |> mem "sync" Sync.codec ~dec_absent:default_sync ~enc:(fun t -> t.sync) 133 + |> finish)) 134 + 135 + let load_from_path path = 136 + let content = Eio.Path.load path in 137 + match Tomlt_bytesrw.decode_string codec content with 138 + | Ok config -> config 139 + | Error e -> failwith (Tomlt.Error.to_string e) 140 + 141 + let load_from_path_opt path = 142 + try 143 + let content = Eio.Path.load path in 144 + match Tomlt_bytesrw.decode_string codec content with 145 + | Ok config -> Some config 146 + | Error _ -> None 147 + with _ -> None 148 + 149 + let xdg ~fs = Xdge.create fs "repowatch" 150 + 151 + let load_xdg ~fs = 152 + let xdg = xdg ~fs in 153 + match Xdge.find_config_file xdg "config.toml" with 154 + | Some path -> load_from_path path 155 + | None -> failwith "No repowatch configuration found in XDG config directories" 156 + 157 + let load_xdg_opt ~fs = 158 + let xdg = xdg ~fs in 159 + match Xdge.find_config_file xdg "config.toml" with 160 + | Some path -> load_from_path_opt path 161 + | None -> None 162 + 163 + let config_dir ~fs = 164 + let xdg = xdg ~fs in 165 + Xdge.config_dir xdg
+146
repowatch/lib/config.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Configuration file handling for repowatch. 7 + 8 + Configuration is stored in TOML format. The default location follows 9 + XDG conventions: [$XDG_CONFIG_HOME/repowatch/config.toml]. *) 10 + 11 + (** {1 Repository Configuration} *) 12 + 13 + module Repository : sig 14 + type t 15 + (** A repository to watch. *) 16 + 17 + val make : owner:string -> repo:string -> ?group:string -> unit -> t 18 + (** Create a repository configuration. *) 19 + 20 + val owner : t -> string 21 + (** Repository owner. *) 22 + 23 + val repo : t -> string 24 + (** Repository name. *) 25 + 26 + val group : t -> string option 27 + (** Optional group name for categorization. *) 28 + 29 + val full_name : t -> string 30 + (** Full name in "owner/repo" format. *) 31 + end 32 + 33 + (** {1 GitHub Configuration} *) 34 + 35 + module Github : sig 36 + type t 37 + (** GitHub API configuration. *) 38 + 39 + val make : ?token_env:string -> unit -> t 40 + (** Create GitHub configuration. 41 + 42 + @param token_env Name of environment variable containing the token 43 + (default: "GITHUB_TOKEN") *) 44 + 45 + val token_env : t -> string 46 + (** Environment variable name for the GitHub token. *) 47 + 48 + val get_token : t -> string option 49 + (** Attempt to read the token from the environment. *) 50 + end 51 + 52 + (** {1 Cache Configuration} *) 53 + 54 + module Cache : sig 55 + type t 56 + (** Cache configuration. *) 57 + 58 + val make : ?directory:string -> ?ttl_hours:int -> unit -> t 59 + (** Create cache configuration. 60 + 61 + @param directory Cache directory path (default: uses XDG cache dir) 62 + @param ttl_hours Hours before cached data is considered stale (default: 24) *) 63 + 64 + val directory : t -> string option 65 + (** Cache directory path. None means use XDG default. *) 66 + 67 + val ttl_hours : t -> int 68 + (** Time-to-live in hours for cached data. *) 69 + end 70 + 71 + (** {1 Sync Configuration} *) 72 + 73 + module Sync : sig 74 + type t 75 + (** Sync operation configuration. *) 76 + 77 + val make : 78 + ?output_dir:string -> 79 + ?max_pages:int -> 80 + ?early_exit_pages:int -> 81 + unit -> 82 + t 83 + (** Create sync configuration. 84 + 85 + @param output_dir Output directory for synced data (default: "data/gh") 86 + @param max_pages Maximum pages to fetch per query (default: 20) 87 + @param early_exit_pages Pages without activity before stopping (default: 5) *) 88 + 89 + val output_dir : t -> string 90 + (** Output directory for synced data. *) 91 + 92 + val max_pages : t -> int 93 + (** Maximum GraphQL pages to fetch. *) 94 + 95 + val early_exit_pages : t -> int 96 + (** Pages without relevant activity before early exit. *) 97 + end 98 + 99 + (** {1 Configuration} *) 100 + 101 + type t 102 + (** Complete repowatch configuration. *) 103 + 104 + val make : 105 + ?github:Github.t -> 106 + ?repositories:Repository.t list -> 107 + ?cache:Cache.t -> 108 + ?sync:Sync.t -> 109 + unit -> 110 + t 111 + (** Create a configuration with optional components. *) 112 + 113 + val default : t 114 + (** Default configuration with no repositories. *) 115 + 116 + val github : t -> Github.t 117 + (** GitHub configuration. *) 118 + 119 + val repositories : t -> Repository.t list 120 + (** List of repositories to watch. *) 121 + 122 + val cache : t -> Cache.t 123 + (** Cache configuration. *) 124 + 125 + val sync : t -> Sync.t 126 + (** Sync configuration. *) 127 + 128 + (** {1 Loading and Saving} *) 129 + 130 + val codec : t Tomlt.t 131 + (** TOML codec for configuration. *) 132 + 133 + val load_from_path : Eio.Fs.dir_ty Eio.Path.t -> t 134 + (** Load configuration from a path. Raises on error. *) 135 + 136 + val load_from_path_opt : Eio.Fs.dir_ty Eio.Path.t -> t option 137 + (** Load configuration from a path, returning None on error. *) 138 + 139 + val load_xdg : fs:Eio.Fs.dir_ty Eio.Path.t -> t 140 + (** Load configuration from XDG config directory. Raises if not found. *) 141 + 142 + val load_xdg_opt : fs:Eio.Fs.dir_ty Eio.Path.t -> t option 143 + (** Load configuration from XDG config directory, returning None if not found. *) 144 + 145 + val config_dir : fs:Eio.Fs.dir_ty Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t 146 + (** Get the XDG configuration directory for repowatch. *)
+4
repowatch/lib/dune
··· 1 + (library 2 + (name repowatch) 3 + (public_name repowatch) 4 + (libraries jsont jsont.bytesrw eio xdge tomlt tomlt.bytesrw logs fmt ptime ptime.clock.os requests unix))
+57
repowatch/lib/filter.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module G = Graphql_types 7 + 8 + let timestamp_in_week week timestamp = Week.contains_timestamp week timestamp 9 + 10 + let any_timeline_event_in_week week items = 11 + List.exists 12 + (fun item -> 13 + match G.Timeline_item.created_at item with 14 + | Some ts -> timestamp_in_week week ts 15 + | None -> false) 16 + items 17 + 18 + let issue_active_in_week ~week issue = 19 + timestamp_in_week week (G.Issue_node.created_at issue) 20 + || timestamp_in_week week (G.Issue_node.updated_at issue) 21 + || any_timeline_event_in_week week (G.Issue_node.timeline_items issue) 22 + 23 + let pr_active_in_week ~week pr = 24 + timestamp_in_week week (G.Pr_node.created_at pr) 25 + || timestamp_in_week week (G.Pr_node.updated_at pr) 26 + || (match G.Pr_node.merged_at pr with 27 + | Some ts -> timestamp_in_week week ts 28 + | None -> false) 29 + || any_timeline_event_in_week week (G.Pr_node.timeline_items pr) 30 + 31 + let discussion_active_in_week ~week discussion = 32 + timestamp_in_week week (G.Discussion_node.updated_at discussion) 33 + 34 + let string_equal_ci a b = String.lowercase_ascii a = String.lowercase_ascii b 35 + 36 + let has_label name issue = 37 + List.exists 38 + (fun label -> string_equal_ci name (G.Label.name label)) 39 + (G.Issue_node.labels issue) 40 + 41 + let has_label_pr name pr = 42 + List.exists 43 + (fun label -> string_equal_ci name (G.Label.name label)) 44 + (G.Pr_node.labels pr) 45 + 46 + let good_first_issue_labels = 47 + [ 48 + "good first issue"; 49 + "good-first-issue"; 50 + "beginner"; 51 + "beginner-friendly"; 52 + "easy"; 53 + "help wanted"; 54 + ] 55 + 56 + let is_good_first_issue issue = 57 + List.exists (fun label_name -> has_label label_name issue) good_first_issue_labels
+58
repowatch/lib/filter.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Week-based filtering for GitHub data. 7 + 8 + This module provides functions to filter GitHub issues, PRs, and discussions 9 + based on whether they had activity during a specific ISO week. Activity 10 + includes creation, updates, and timeline events. *) 11 + 12 + (** {1 Activity Checking} *) 13 + 14 + val issue_active_in_week : week:Week.t -> Graphql_types.Issue_node.t -> bool 15 + (** [issue_active_in_week ~week issue] returns [true] if the issue has any 16 + activity during the given week. 17 + 18 + Activity is detected if any of: 19 + - The issue was created during the week 20 + - The issue was updated during the week 21 + - Any timeline event occurred during the week *) 22 + 23 + val pr_active_in_week : week:Week.t -> Graphql_types.Pr_node.t -> bool 24 + (** [pr_active_in_week ~week pr] returns [true] if the PR has any 25 + activity during the given week. 26 + 27 + Activity is detected if any of: 28 + - The PR was created during the week 29 + - The PR was updated during the week 30 + - The PR was merged during the week 31 + - Any timeline event occurred during the week *) 32 + 33 + val discussion_active_in_week : 34 + week:Week.t -> Graphql_types.Discussion_node.t -> bool 35 + (** [discussion_active_in_week ~week discussion] returns [true] if the 36 + discussion was updated during the given week. *) 37 + 38 + (** {1 Label Helpers} *) 39 + 40 + val is_good_first_issue : Graphql_types.Issue_node.t -> bool 41 + (** [is_good_first_issue issue] returns [true] if the issue has a label 42 + commonly used to mark issues suitable for newcomers. 43 + 44 + Recognized labels include: 45 + - "good first issue" 46 + - "good-first-issue" 47 + - "beginner" 48 + - "beginner-friendly" 49 + - "easy" 50 + - "help wanted" *) 51 + 52 + val has_label : string -> Graphql_types.Issue_node.t -> bool 53 + (** [has_label name issue] returns [true] if the issue has a label matching 54 + the given name (case-insensitive). *) 55 + 56 + val has_label_pr : string -> Graphql_types.Pr_node.t -> bool 57 + (** [has_label_pr name pr] returns [true] if the PR has a label matching 58 + the given name (case-insensitive). *)
+134
repowatch/lib/github.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + type error = 7 + | Rate_limited of { reset_at : float } 8 + | Unauthorized 9 + | Forbidden 10 + | Not_found 11 + | Server_error of int 12 + | Network_error of string 13 + | Parse_error of string 14 + 15 + let pp_error fmt = function 16 + | Rate_limited { reset_at } -> 17 + Format.fprintf fmt "Rate limited (resets at %f)" reset_at 18 + | Unauthorized -> Format.fprintf fmt "Unauthorized (401)" 19 + | Forbidden -> Format.fprintf fmt "Forbidden (403)" 20 + | Not_found -> Format.fprintf fmt "Not found (404)" 21 + | Server_error code -> Format.fprintf fmt "Server error (%d)" code 22 + | Network_error msg -> Format.fprintf fmt "Network error: %s" msg 23 + | Parse_error msg -> Format.fprintf fmt "Parse error: %s" msg 24 + 25 + type rate_limit = { 26 + mutable remaining : int option; 27 + mutable reset_at : float option; 28 + } 29 + 30 + type t = { 31 + session : Requests.t; 32 + clock : float Eio.Time.clock_ty Eio.Resource.t; 33 + rate_limit : rate_limit; 34 + } 35 + 36 + let graphql_endpoint = "https://api.github.com/graphql" 37 + let rest_base = "https://api.github.com" 38 + 39 + let create ~sw ~env ~token = 40 + let session = 41 + Requests.create ~sw env 42 + |> fun s -> Requests.set_auth s (Requests.Auth.bearer ~token) 43 + |> fun s -> Requests.set_default_header s "Accept" "application/json" 44 + |> fun s -> Requests.set_default_header s "User-Agent" "repowatch/1.0" 45 + in 46 + let clock = env#clock in 47 + let rate_limit = { remaining = None; reset_at = None } in 48 + { session; clock; rate_limit } 49 + 50 + let create_from_env ~sw ~env = 51 + match Sys.getenv_opt "GITHUB_TOKEN" with 52 + | Some token -> Ok (create ~sw ~env ~token) 53 + | None -> Error "GITHUB_TOKEN environment variable not set" 54 + 55 + let update_rate_limits t headers = 56 + (match Requests.Headers.get_string "X-RateLimit-Remaining" headers with 57 + | Some s -> ( 58 + try t.rate_limit.remaining <- Some (int_of_string s) with _ -> ()) 59 + | None -> ()); 60 + match Requests.Headers.get_string "X-RateLimit-Reset" headers with 61 + | Some s -> ( 62 + try t.rate_limit.reset_at <- Some (float_of_string s) with _ -> ()) 63 + | None -> () 64 + 65 + let rate_limit_remaining t = t.rate_limit.remaining 66 + let rate_limit_reset t = t.rate_limit.reset_at 67 + 68 + let handle_response t response = 69 + let headers = Requests.Response.headers response in 70 + update_rate_limits t headers; 71 + let status = Requests.Response.status_code response in 72 + match status with 73 + | 200 | 201 -> 74 + let body = Requests.Response.text response in 75 + Ok body 76 + | 401 -> Error Unauthorized 77 + | 403 -> 78 + (* Check if it's rate limiting *) 79 + let remaining = t.rate_limit.remaining in 80 + let reset_at = t.rate_limit.reset_at in 81 + if remaining = Some 0 then 82 + Error (Rate_limited { reset_at = Option.value reset_at ~default:0.0 }) 83 + else Error Forbidden 84 + | 404 -> Error Not_found 85 + | code when code >= 500 -> Error (Server_error code) 86 + | code -> Error (Server_error code) 87 + 88 + let with_retry ~clock ~max_attempts f = 89 + let rec loop attempt = 90 + match f () with 91 + | Ok _ as result -> result 92 + | Error (Server_error _) when attempt < max_attempts -> 93 + let delay = Float.pow 2.0 (Float.of_int attempt) in 94 + Eio.Time.sleep clock delay; 95 + loop (attempt + 1) 96 + | Error _ as err -> err 97 + in 98 + loop 0 99 + 100 + let graphql t ~query ~variables = 101 + let body = Graphql.build_request_body ~query ~variables in 102 + with_retry ~clock:t.clock ~max_attempts:3 (fun () -> 103 + try 104 + let response = 105 + Requests.post t.session graphql_endpoint 106 + ~headers: 107 + (Requests.Headers.empty 108 + |> Requests.Headers.content_type Requests.Mime.json) 109 + ~body:(Requests.Body.of_string Requests.Mime.json body) 110 + in 111 + handle_response t response 112 + with 113 + | Eio.Io (Eio.Net.E (Eio.Net.Connection_failure _), _) as exn -> 114 + Error (Network_error (Printexc.to_string exn)) 115 + | exn -> Error (Network_error (Printexc.to_string exn))) 116 + 117 + let get_releases t ~owner ~repo ~page = 118 + let url = 119 + Printf.sprintf "%s/repos/%s/%s/releases?page=%d&per_page=100" rest_base 120 + owner repo page 121 + in 122 + with_retry ~clock:t.clock ~max_attempts:3 (fun () -> 123 + try 124 + let response = Requests.get t.session url in 125 + handle_response t response 126 + with exn -> Error (Network_error (Printexc.to_string exn))) 127 + 128 + let get_user t ~username = 129 + let url = Printf.sprintf "%s/users/%s" rest_base username in 130 + with_retry ~clock:t.clock ~max_attempts:3 (fun () -> 131 + try 132 + let response = Requests.get t.session url in 133 + handle_response t response 134 + with exn -> Error (Network_error (Printexc.to_string exn)))
+103
repowatch/lib/github.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** GitHub API client with authentication and rate limiting. 7 + 8 + This module provides a client for GitHub's GraphQL and REST APIs with 9 + automatic rate limit handling and retry logic. *) 10 + 11 + (** {1 Types} *) 12 + 13 + type t 14 + (** GitHub API client with authentication and rate limiting. *) 15 + 16 + type error = 17 + | Rate_limited of { reset_at : float } 18 + (** Request was rate limited. [reset_at] is Unix timestamp when 19 + the rate limit resets. *) 20 + | Unauthorized 21 + (** Authentication failed (HTTP 401). *) 22 + | Forbidden 23 + (** Access forbidden (HTTP 403), not rate limiting. *) 24 + | Not_found 25 + (** Resource not found (HTTP 404). *) 26 + | Server_error of int 27 + (** GitHub server error (HTTP 5xx). *) 28 + | Network_error of string 29 + (** Network-level error. *) 30 + | Parse_error of string 31 + (** JSON parsing error. *) 32 + 33 + val pp_error : Format.formatter -> error -> unit 34 + (** Pretty-print an error. *) 35 + 36 + (** {1 Client Creation} *) 37 + 38 + val create : 39 + sw:Eio.Switch.t -> 40 + env: 41 + < clock : float Eio.Time.clock_ty Eio.Resource.t 42 + ; net : _ Eio.Net.t 43 + ; fs : Eio.Fs.dir_ty Eio.Path.t 44 + ; .. > -> 45 + token:string -> 46 + t 47 + (** [create ~sw ~env ~token] creates a GitHub client with the given 48 + authentication token. 49 + 50 + @param sw Switch for resource management 51 + @param env Eio environment with clock, net, and fs capabilities 52 + @param token GitHub personal access token or OAuth token *) 53 + 54 + val create_from_env : 55 + sw:Eio.Switch.t -> 56 + env: 57 + < clock : float Eio.Time.clock_ty Eio.Resource.t 58 + ; net : _ Eio.Net.t 59 + ; fs : Eio.Fs.dir_ty Eio.Path.t 60 + ; .. > -> 61 + (t, string) result 62 + (** [create_from_env ~sw ~env] creates a client using the [GITHUB_TOKEN] 63 + environment variable. Returns [Error msg] if the variable is not set. *) 64 + 65 + (** {1 GraphQL API} *) 66 + 67 + val graphql : 68 + t -> 69 + query:string -> 70 + variables:Graphql.query_variables -> 71 + (string, error) result 72 + (** [graphql t ~query ~variables] executes a GraphQL query against GitHub's 73 + API and returns the raw JSON response body. 74 + 75 + This function automatically: 76 + - Retries on server errors with exponential backoff 77 + - Handles rate limiting with appropriate delays 78 + - Updates internal rate limit tracking *) 79 + 80 + (** {1 REST API} *) 81 + 82 + val get_releases : 83 + t -> 84 + owner:string -> 85 + repo:string -> 86 + page:int -> 87 + (string, error) result 88 + (** [get_releases t ~owner ~repo ~page] fetches releases using the REST API. 89 + Returns the raw JSON response body. Page numbering starts at 1. *) 90 + 91 + val get_user : t -> username:string -> (string, error) result 92 + (** [get_user t ~username] fetches a user profile using the REST API. 93 + Returns the raw JSON response body. *) 94 + 95 + (** {1 Rate Limit Information} *) 96 + 97 + val rate_limit_remaining : t -> int option 98 + (** [rate_limit_remaining t] returns the current rate limit remaining, 99 + if known from previous responses. *) 100 + 101 + val rate_limit_reset : t -> float option 102 + (** [rate_limit_reset t] returns the Unix timestamp when the rate limit 103 + resets, if known from previous responses. *)
+218
repowatch/lib/graphql.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + type query_variables = { 7 + owner : string; 8 + name : string; 9 + issues_after : string option; 10 + prs_after : string option; 11 + } 12 + 13 + let issues_prs_query = 14 + {|query($owner: String!, $name: String!, $issuesAfter: String, $prsAfter: String) { 15 + repository(owner: $owner, name: $name) { 16 + issues(first: 25, after: $issuesAfter, orderBy: {field: UPDATED_AT, direction: DESC}) { 17 + pageInfo { 18 + hasNextPage 19 + endCursor 20 + } 21 + nodes { 22 + number 23 + title 24 + url 25 + createdAt 26 + updatedAt 27 + closedAt 28 + bodyText 29 + state 30 + author { 31 + login 32 + } 33 + labels(first: 20) { 34 + nodes { 35 + name 36 + } 37 + } 38 + comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) { 39 + totalCount 40 + nodes { 41 + author { 42 + login 43 + } 44 + bodyText 45 + createdAt 46 + updatedAt 47 + } 48 + } 49 + timelineItems(first: 100, itemTypes: [ISSUE_COMMENT, LABELED_EVENT, UNLABELED_EVENT, CLOSED_EVENT, REOPENED_EVENT]) { 50 + nodes { 51 + __typename 52 + ... on IssueComment { 53 + createdAt 54 + } 55 + ... on LabeledEvent { 56 + createdAt 57 + } 58 + ... on UnlabeledEvent { 59 + createdAt 60 + } 61 + ... on ClosedEvent { 62 + createdAt 63 + } 64 + ... on ReopenedEvent { 65 + createdAt 66 + } 67 + } 68 + } 69 + } 70 + } 71 + pullRequests(first: 25, after: $prsAfter, orderBy: {field: UPDATED_AT, direction: DESC}) { 72 + pageInfo { 73 + hasNextPage 74 + endCursor 75 + } 76 + nodes { 77 + number 78 + title 79 + url 80 + createdAt 81 + updatedAt 82 + closedAt 83 + mergedAt 84 + bodyText 85 + state 86 + additions 87 + deletions 88 + changedFiles 89 + mergeable 90 + isDraft 91 + author { 92 + login 93 + } 94 + labels(first: 20) { 95 + nodes { 96 + name 97 + } 98 + } 99 + comments(first: 10, orderBy: {field: UPDATED_AT, direction: DESC}) { 100 + totalCount 101 + nodes { 102 + author { 103 + login 104 + } 105 + bodyText 106 + createdAt 107 + updatedAt 108 + } 109 + } 110 + timelineItems(first: 100, itemTypes: [PULL_REQUEST_COMMIT, PULL_REQUEST_REVIEW, ISSUE_COMMENT, CLOSED_EVENT, REOPENED_EVENT, MERGED_EVENT]) { 111 + nodes { 112 + __typename 113 + ... on PullRequestCommit { 114 + commit { 115 + committedDate 116 + } 117 + } 118 + ... on PullRequestReview { 119 + createdAt 120 + } 121 + ... on IssueComment { 122 + createdAt 123 + } 124 + ... on ClosedEvent { 125 + createdAt 126 + } 127 + ... on ReopenedEvent { 128 + createdAt 129 + } 130 + ... on MergedEvent { 131 + createdAt 132 + } 133 + } 134 + } 135 + } 136 + } 137 + } 138 + }|} 139 + 140 + let discussions_query = 141 + {|query($owner: String!, $name: String!) { 142 + repository(owner: $owner, name: $name) { 143 + discussions(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) { 144 + nodes { 145 + number 146 + title 147 + url 148 + updatedAt 149 + bodyText 150 + author { 151 + login 152 + } 153 + category { 154 + name 155 + } 156 + comments { 157 + totalCount 158 + } 159 + answerChosenAt 160 + } 161 + } 162 + } 163 + }|} 164 + 165 + (* JSON escape a string *) 166 + let escape_json_string s = 167 + let b = Buffer.create (String.length s * 2) in 168 + String.iter 169 + (function 170 + | '"' -> Buffer.add_string b "\\\"" 171 + | '\\' -> Buffer.add_string b "\\\\" 172 + | '\n' -> Buffer.add_string b "\\n" 173 + | '\r' -> Buffer.add_string b "\\r" 174 + | '\t' -> Buffer.add_string b "\\t" 175 + | c when Char.code c < 32 -> 176 + Buffer.add_string b (Printf.sprintf "\\u%04x" (Char.code c)) 177 + | c -> Buffer.add_char b c) 178 + s; 179 + Buffer.contents b 180 + 181 + let variables_to_json vars = 182 + let add_if_some key opt acc = 183 + match opt with 184 + | Some v -> (key, Jsont.Json.string v) :: acc 185 + | None -> acc 186 + in 187 + [ ("owner", Jsont.Json.string vars.owner); ("name", Jsont.Json.string vars.name) ] 188 + |> add_if_some "issuesAfter" vars.issues_after 189 + |> add_if_some "prsAfter" vars.prs_after 190 + 191 + let build_request_body ~query ~variables = 192 + (* Build the JSON manually for efficiency and control *) 193 + let vars_json = 194 + let parts = 195 + let base = 196 + [ 197 + Printf.sprintf "\"owner\": \"%s\"" (escape_json_string variables.owner); 198 + Printf.sprintf "\"name\": \"%s\"" (escape_json_string variables.name); 199 + ] 200 + in 201 + let with_issues = 202 + match variables.issues_after with 203 + | Some cursor -> 204 + base 205 + @ [ Printf.sprintf "\"issuesAfter\": \"%s\"" 206 + (escape_json_string cursor) ] 207 + | None -> base 208 + in 209 + match variables.prs_after with 210 + | Some cursor -> 211 + with_issues 212 + @ [ Printf.sprintf "\"prsAfter\": \"%s\"" (escape_json_string cursor) ] 213 + | None -> with_issues 214 + in 215 + "{" ^ String.concat ", " parts ^ "}" 216 + in 217 + Printf.sprintf "{\"query\": \"%s\", \"variables\": %s}" 218 + (escape_json_string query) vars_json
+51
repowatch/lib/graphql.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** GraphQL query construction for GitHub API. 7 + 8 + This module provides the GraphQL query strings and request body building 9 + functions for fetching repository data from GitHub's GraphQL API. *) 10 + 11 + (** {1 Query Variables} *) 12 + 13 + type query_variables = { 14 + owner : string; (** Repository owner (user or organization). *) 15 + name : string; (** Repository name. *) 16 + issues_after : string option; (** Cursor for issues pagination. *) 17 + prs_after : string option; (** Cursor for PRs pagination. *) 18 + } 19 + (** Variables for the issues/PRs GraphQL query. *) 20 + 21 + (** {1 Query Strings} *) 22 + 23 + val issues_prs_query : string 24 + (** The GraphQL query string for fetching issues and pull requests. 25 + 26 + This query fetches both issues and PRs in a single request with: 27 + - Cursor-based pagination (first 25 items per page) 28 + - Ordering by UPDATED_AT descending 29 + - Author information (handles deleted users) 30 + - Labels (first 20) 31 + - Recent comments (first 10) 32 + - Timeline events for activity tracking *) 33 + 34 + val discussions_query : string 35 + (** The GraphQL query string for fetching discussions. 36 + 37 + This is a simpler query that fetches the first 100 discussions 38 + ordered by UPDATED_AT descending, without pagination. *) 39 + 40 + (** {1 Request Building} *) 41 + 42 + val build_request_body : query:string -> variables:query_variables -> string 43 + (** [build_request_body ~query ~variables] builds the JSON request body 44 + for the GitHub GraphQL endpoint. 45 + 46 + Returns a JSON string of the form: 47 + [\{"query": "...", "variables": \{...\}\}] *) 48 + 49 + val variables_to_json : query_variables -> (string * Jsont.Json.t) list 50 + (** [variables_to_json vars] converts query variables to a JSON-compatible 51 + association list for use with other GraphQL clients. *)
+294
repowatch/lib/graphql_codec.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module G = Graphql_types 7 + 8 + (* Helper for nullable strings *) 9 + let nullable_string = Jsont.option Jsont.string 10 + 11 + (* Helper for nullable integers that default to 0 *) 12 + let nullable_int_zero = 13 + Jsont.map 14 + ~dec:(fun v -> Option.value v ~default:0) 15 + ~enc:(fun v -> Some v) 16 + (Jsont.option Jsont.int) 17 + 18 + let page_info = 19 + Jsont.Object.map ~kind:"PageInfo" 20 + (fun has_next_page end_cursor -> G.Page_info.make ~has_next_page ~end_cursor) 21 + |> Jsont.Object.mem "hasNextPage" Jsont.bool ~enc:G.Page_info.has_next_page 22 + |> Jsont.Object.mem "endCursor" nullable_string ~dec_absent:None 23 + ~enc:G.Page_info.end_cursor 24 + |> Jsont.Object.finish 25 + 26 + (* Author can be null in GitHub responses (deleted users become "ghost") *) 27 + let author_inner = 28 + Jsont.Object.map ~kind:"Author" (fun login -> G.Author.make ~login) 29 + |> Jsont.Object.mem "login" Jsont.string ~enc:G.Author.login 30 + |> Jsont.Object.finish 31 + 32 + let author = 33 + Jsont.map 34 + ~dec:(fun opt -> Option.value opt ~default:G.Author.ghost) 35 + ~enc:(fun a -> if G.Author.login a = "ghost" then None else Some a) 36 + (Jsont.option author_inner) 37 + 38 + let label = 39 + Jsont.Object.map ~kind:"Label" (fun name -> G.Label.make ~name) 40 + |> Jsont.Object.mem "name" Jsont.string ~enc:G.Label.name 41 + |> Jsont.Object.finish 42 + 43 + let comment = 44 + Jsont.Object.map ~kind:"Comment" (fun author body_text created_at updated_at -> 45 + G.Comment.make ~author ~body_text ~created_at ~updated_at) 46 + |> Jsont.Object.mem "author" author ~enc:G.Comment.author 47 + |> Jsont.Object.mem "bodyText" Jsont.string ~enc:G.Comment.body_text 48 + |> Jsont.Object.mem "createdAt" Jsont.string ~enc:G.Comment.created_at 49 + |> Jsont.Object.mem "updatedAt" Jsont.string ~enc:G.Comment.updated_at 50 + |> Jsont.Object.finish 51 + 52 + (* Codec for the nested commit object in PullRequestCommit timeline events *) 53 + let commit_committed_date = 54 + Jsont.Object.map ~kind:"Commit" (fun committed_date -> committed_date) 55 + |> Jsont.Object.mem "committedDate" nullable_string ~dec_absent:None 56 + ~enc:(fun s -> s) 57 + |> Jsont.Object.finish 58 + 59 + (* Timeline items use __typename to discriminate between types *) 60 + let timeline_item = 61 + let open G.Timeline_item in 62 + (* Helper to decode based on __typename *) 63 + Jsont.Object.map ~kind:"TimelineItem" 64 + (fun typename created_at committed_date -> 65 + match typename with 66 + | "IssueComment" -> 67 + Issue_comment { created_at = Option.value created_at ~default:"" } 68 + | "LabeledEvent" -> 69 + Labeled_event { created_at = Option.value created_at ~default:"" } 70 + | "UnlabeledEvent" -> 71 + Unlabeled_event { created_at = Option.value created_at ~default:"" } 72 + | "ClosedEvent" -> 73 + Closed_event { created_at = Option.value created_at ~default:"" } 74 + | "ReopenedEvent" -> 75 + Reopened_event { created_at = Option.value created_at ~default:"" } 76 + | "PullRequestCommit" -> 77 + let date = 78 + Option.bind committed_date Fun.id |> Option.value ~default:"" 79 + in 80 + Pr_commit { committed_date = date } 81 + | "PullRequestReview" -> 82 + Pr_review { created_at = Option.value created_at ~default:"" } 83 + | "MergedEvent" -> 84 + Merged_event { created_at = Option.value created_at ~default:"" } 85 + | _ -> Unknown) 86 + |> Jsont.Object.mem "__typename" Jsont.string ~enc:(fun item -> 87 + match item with 88 + | Issue_comment _ -> "IssueComment" 89 + | Labeled_event _ -> "LabeledEvent" 90 + | Unlabeled_event _ -> "UnlabeledEvent" 91 + | Closed_event _ -> "ClosedEvent" 92 + | Reopened_event _ -> "ReopenedEvent" 93 + | Pr_commit _ -> "PullRequestCommit" 94 + | Pr_review _ -> "PullRequestReview" 95 + | Merged_event _ -> "MergedEvent" 96 + | Unknown -> "Unknown") 97 + |> Jsont.Object.mem "createdAt" nullable_string ~dec_absent:None 98 + ~enc:(fun item -> 99 + match item with 100 + | Issue_comment { created_at } 101 + | Labeled_event { created_at } 102 + | Unlabeled_event { created_at } 103 + | Closed_event { created_at } 104 + | Reopened_event { created_at } 105 + | Pr_review { created_at } 106 + | Merged_event { created_at } -> 107 + Some created_at 108 + | Pr_commit _ | Unknown -> None) 109 + |> Jsont.Object.mem "commit" (Jsont.option commit_committed_date) 110 + ~dec_absent:None 111 + ~enc:(fun item -> 112 + match item with 113 + | Pr_commit { committed_date } -> Some (Some committed_date) 114 + | _ -> None) 115 + |> Jsont.Object.finish 116 + 117 + (* Labels connection - extract nodes *) 118 + let labels_nodes = 119 + Jsont.Object.map ~kind:"LabelsConnection" (fun labels -> labels) 120 + |> Jsont.Object.mem "nodes" (Jsont.list label) ~enc:(fun x -> x) 121 + |> Jsont.Object.finish 122 + 123 + (* Comments connection - extract nodes and totalCount *) 124 + let comments_connection = 125 + Jsont.Object.map ~kind:"CommentsConnection" 126 + (fun total_count nodes -> (total_count, nodes)) 127 + |> Jsont.Object.mem "totalCount" Jsont.int ~enc:fst 128 + |> Jsont.Object.mem "nodes" (Jsont.list comment) ~enc:snd 129 + |> Jsont.Object.finish 130 + 131 + (* Timeline items connection - extract nodes *) 132 + let timeline_nodes = 133 + Jsont.Object.map ~kind:"TimelineConnection" (fun items -> items) 134 + |> Jsont.Object.mem "nodes" (Jsont.list timeline_item) ~enc:(fun x -> x) 135 + |> Jsont.Object.finish 136 + 137 + let issue_node = 138 + Jsont.Object.map ~kind:"IssueNode" 139 + (fun number title url created_at updated_at closed_at body_text state author 140 + labels (comments_total_count, comments) timeline_items -> 141 + G.Issue_node.make ~number ~title ~url ~created_at ~updated_at ~closed_at 142 + ~body_text ~state ~author ~labels ~comments ~comments_total_count 143 + ~timeline_items) 144 + |> Jsont.Object.mem "number" Jsont.int ~enc:G.Issue_node.number 145 + |> Jsont.Object.mem "title" Jsont.string ~enc:G.Issue_node.title 146 + |> Jsont.Object.mem "url" Jsont.string ~enc:G.Issue_node.url 147 + |> Jsont.Object.mem "createdAt" Jsont.string ~enc:G.Issue_node.created_at 148 + |> Jsont.Object.mem "updatedAt" Jsont.string ~enc:G.Issue_node.updated_at 149 + |> Jsont.Object.mem "closedAt" nullable_string ~dec_absent:None 150 + ~enc:G.Issue_node.closed_at 151 + |> Jsont.Object.mem "bodyText" Jsont.string ~enc:G.Issue_node.body_text 152 + |> Jsont.Object.mem "state" Jsont.string ~enc:G.Issue_node.state 153 + |> Jsont.Object.mem "author" author ~enc:G.Issue_node.author 154 + |> Jsont.Object.mem "labels" labels_nodes ~enc:G.Issue_node.labels 155 + |> Jsont.Object.mem "comments" comments_connection 156 + ~enc:(fun n -> 157 + (G.Issue_node.comments_total_count n, G.Issue_node.comments n)) 158 + |> Jsont.Object.mem "timelineItems" timeline_nodes 159 + ~enc:G.Issue_node.timeline_items 160 + |> Jsont.Object.finish 161 + 162 + let pr_node = 163 + Jsont.Object.map ~kind:"PrNode" 164 + (fun number title url created_at updated_at closed_at merged_at body_text 165 + state additions deletions changed_files mergeable is_draft author labels 166 + (comments_total_count, comments) timeline_items -> 167 + G.Pr_node.make ~number ~title ~url ~created_at ~updated_at ~closed_at 168 + ~merged_at ~body_text ~state ~additions ~deletions ~changed_files 169 + ~mergeable ~is_draft ~author ~labels ~comments ~comments_total_count 170 + ~timeline_items) 171 + |> Jsont.Object.mem "number" Jsont.int ~enc:G.Pr_node.number 172 + |> Jsont.Object.mem "title" Jsont.string ~enc:G.Pr_node.title 173 + |> Jsont.Object.mem "url" Jsont.string ~enc:G.Pr_node.url 174 + |> Jsont.Object.mem "createdAt" Jsont.string ~enc:G.Pr_node.created_at 175 + |> Jsont.Object.mem "updatedAt" Jsont.string ~enc:G.Pr_node.updated_at 176 + |> Jsont.Object.mem "closedAt" nullable_string ~dec_absent:None 177 + ~enc:G.Pr_node.closed_at 178 + |> Jsont.Object.mem "mergedAt" nullable_string ~dec_absent:None 179 + ~enc:G.Pr_node.merged_at 180 + |> Jsont.Object.mem "bodyText" Jsont.string ~enc:G.Pr_node.body_text 181 + |> Jsont.Object.mem "state" Jsont.string ~enc:G.Pr_node.state 182 + |> Jsont.Object.mem "additions" nullable_int_zero ~enc:G.Pr_node.additions 183 + |> Jsont.Object.mem "deletions" nullable_int_zero ~enc:G.Pr_node.deletions 184 + |> Jsont.Object.mem "changedFiles" nullable_int_zero 185 + ~enc:G.Pr_node.changed_files 186 + |> Jsont.Object.mem "mergeable" Jsont.string ~enc:G.Pr_node.mergeable 187 + |> Jsont.Object.mem "isDraft" Jsont.bool ~enc:G.Pr_node.is_draft 188 + |> Jsont.Object.mem "author" author ~enc:G.Pr_node.author 189 + |> Jsont.Object.mem "labels" labels_nodes ~enc:G.Pr_node.labels 190 + |> Jsont.Object.mem "comments" comments_connection 191 + ~enc:(fun n -> (G.Pr_node.comments_total_count n, G.Pr_node.comments n)) 192 + |> Jsont.Object.mem "timelineItems" timeline_nodes 193 + ~enc:G.Pr_node.timeline_items 194 + |> Jsont.Object.finish 195 + 196 + let issues_connection = 197 + Jsont.Object.map ~kind:"IssuesConnection" 198 + (fun page_info nodes -> G.Issues_connection.make ~page_info ~nodes) 199 + |> Jsont.Object.mem "pageInfo" page_info ~enc:G.Issues_connection.page_info 200 + |> Jsont.Object.mem "nodes" (Jsont.list issue_node) 201 + ~enc:G.Issues_connection.nodes 202 + |> Jsont.Object.finish 203 + 204 + let prs_connection = 205 + Jsont.Object.map ~kind:"PrsConnection" 206 + (fun page_info nodes -> G.Prs_connection.make ~page_info ~nodes) 207 + |> Jsont.Object.mem "pageInfo" page_info ~enc:G.Prs_connection.page_info 208 + |> Jsont.Object.mem "nodes" (Jsont.list pr_node) ~enc:G.Prs_connection.nodes 209 + |> Jsont.Object.finish 210 + 211 + (* Category is a nested object with just a name *) 212 + let category_name = 213 + Jsont.Object.map ~kind:"Category" (fun name -> name) 214 + |> Jsont.Object.mem "name" Jsont.string ~enc:(fun x -> x) 215 + |> Jsont.Object.finish 216 + 217 + (* Comments for discussion is just totalCount *) 218 + let discussion_comments_count = 219 + Jsont.Object.map ~kind:"DiscussionComments" (fun count -> count) 220 + |> Jsont.Object.mem "totalCount" Jsont.int ~enc:(fun x -> x) 221 + |> Jsont.Object.finish 222 + 223 + let discussion_node = 224 + Jsont.Object.map ~kind:"DiscussionNode" 225 + (fun number title url updated_at body_text author category comments_count 226 + answer_chosen_at -> 227 + let answered = Option.is_some answer_chosen_at in 228 + G.Discussion_node.make ~number ~title ~url ~updated_at ~body_text ~author 229 + ~category ~comments_count ~answered) 230 + |> Jsont.Object.mem "number" Jsont.int ~enc:G.Discussion_node.number 231 + |> Jsont.Object.mem "title" Jsont.string ~enc:G.Discussion_node.title 232 + |> Jsont.Object.mem "url" Jsont.string ~enc:G.Discussion_node.url 233 + |> Jsont.Object.mem "updatedAt" Jsont.string ~enc:G.Discussion_node.updated_at 234 + |> Jsont.Object.mem "bodyText" Jsont.string ~enc:G.Discussion_node.body_text 235 + |> Jsont.Object.mem "author" author ~enc:G.Discussion_node.author 236 + |> Jsont.Object.mem "category" category_name ~enc:G.Discussion_node.category 237 + |> Jsont.Object.mem "comments" discussion_comments_count 238 + ~enc:G.Discussion_node.comments_count 239 + |> Jsont.Object.mem "answerChosenAt" nullable_string ~dec_absent:None 240 + ~enc:(fun d -> if G.Discussion_node.answered d then Some "" else None) 241 + |> Jsont.Object.finish 242 + 243 + (* Repository wrapper for issues/PRs query *) 244 + let repository_inner = 245 + Jsont.Object.map ~kind:"Repository" 246 + (fun issues pull_requests -> 247 + G.Repository_response.make ~issues ~pull_requests) 248 + |> Jsont.Object.mem "issues" issues_connection 249 + ~enc:G.Repository_response.issues 250 + |> Jsont.Object.mem "pullRequests" prs_connection 251 + ~enc:G.Repository_response.pull_requests 252 + |> Jsont.Object.finish 253 + 254 + (* Data wrapper for issues/PRs response *) 255 + let data_repository = 256 + Jsont.Object.map ~kind:"DataRepository" (fun repo -> repo) 257 + |> Jsont.Object.mem "repository" repository_inner ~enc:(fun x -> x) 258 + |> Jsont.Object.finish 259 + 260 + let repository_response = 261 + Jsont.Object.map ~kind:"RepositoryResponse" (fun data -> data) 262 + |> Jsont.Object.mem "data" data_repository ~enc:(fun x -> x) 263 + |> Jsont.Object.finish 264 + 265 + (* Discussions connection wrapper *) 266 + let discussions_nodes = 267 + Jsont.Object.map ~kind:"DiscussionsConnection" (fun nodes -> nodes) 268 + |> Jsont.Object.mem "nodes" (Jsont.list discussion_node) ~enc:(fun x -> x) 269 + |> Jsont.Object.finish 270 + 271 + (* Repository wrapper for discussions query *) 272 + let repository_discussions_inner = 273 + Jsont.Object.map ~kind:"RepositoryDiscussions" 274 + (fun discussions -> G.Discussions_response.make ~discussions) 275 + |> Jsont.Object.mem "discussions" discussions_nodes 276 + ~enc:G.Discussions_response.discussions 277 + |> Jsont.Object.finish 278 + 279 + (* Data wrapper for discussions response *) 280 + let data_discussions = 281 + Jsont.Object.map ~kind:"DataDiscussions" (fun repo -> repo) 282 + |> Jsont.Object.mem "repository" repository_discussions_inner ~enc:(fun x -> x) 283 + |> Jsont.Object.finish 284 + 285 + let discussions_response = 286 + Jsont.Object.map ~kind:"DiscussionsResponse" (fun data -> data) 287 + |> Jsont.Object.mem "data" data_discussions ~enc:(fun x -> x) 288 + |> Jsont.Object.finish 289 + 290 + let decode_repository_response s = 291 + Jsont_bytesrw.decode_string repository_response s 292 + 293 + let decode_discussions_response s = 294 + Jsont_bytesrw.decode_string discussions_response s
+65
repowatch/lib/graphql_codec.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Jsont codecs for GitHub GraphQL API responses. 7 + 8 + This module provides type-safe JSON codecs for decoding the responses 9 + from GitHub's GraphQL API for issues, pull requests, and discussions. *) 10 + 11 + (** {1 Component Codecs} *) 12 + 13 + val page_info : Graphql_types.Page_info.t Jsont.t 14 + (** Codec for GraphQL pagination info. *) 15 + 16 + val author : Graphql_types.Author.t Jsont.t 17 + (** Codec for GitHub user/author. Handles null authors as "ghost". *) 18 + 19 + val label : Graphql_types.Label.t Jsont.t 20 + (** Codec for issue/PR labels. *) 21 + 22 + val comment : Graphql_types.Comment.t Jsont.t 23 + (** Codec for comments. *) 24 + 25 + val timeline_item : Graphql_types.Timeline_item.t Jsont.t 26 + (** Codec for timeline events. Uses __typename to determine event type. *) 27 + 28 + (** {1 Issue/PR Codecs} *) 29 + 30 + val issue_node : Graphql_types.Issue_node.t Jsont.t 31 + (** Codec for GitHub issue nodes from GraphQL. *) 32 + 33 + val pr_node : Graphql_types.Pr_node.t Jsont.t 34 + (** Codec for GitHub PR nodes from GraphQL. *) 35 + 36 + val issues_connection : Graphql_types.Issues_connection.t Jsont.t 37 + (** Codec for paginated issues connection. *) 38 + 39 + val prs_connection : Graphql_types.Prs_connection.t Jsont.t 40 + (** Codec for paginated PRs connection. *) 41 + 42 + (** {1 Discussion Codecs} *) 43 + 44 + val discussion_node : Graphql_types.Discussion_node.t Jsont.t 45 + (** Codec for GitHub discussion nodes from GraphQL. *) 46 + 47 + (** {1 Response Codecs} *) 48 + 49 + val repository_response : Graphql_types.Repository_response.t Jsont.t 50 + (** Codec for the full issues/PRs query response. Expects the response to 51 + be wrapped in [\{"data": \{"repository": ...\}\}]. *) 52 + 53 + val discussions_response : Graphql_types.Discussions_response.t Jsont.t 54 + (** Codec for the discussions query response. Expects the response to 55 + be wrapped in [\{"data": \{"repository": ...\}\}]. *) 56 + 57 + (** {1 Decoding Functions} *) 58 + 59 + val decode_repository_response : 60 + string -> (Graphql_types.Repository_response.t, string) result 61 + (** Decode a JSON string as an issues/PRs response. *) 62 + 63 + val decode_discussions_response : 64 + string -> (Graphql_types.Discussions_response.t, string) result 65 + (** Decode a JSON string as a discussions response. *)
+260
repowatch/lib/graphql_types.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module Page_info = struct 7 + type t = { has_next_page : bool; end_cursor : string option } 8 + 9 + let make ~has_next_page ~end_cursor = { has_next_page; end_cursor } 10 + let has_next_page t = t.has_next_page 11 + let end_cursor t = t.end_cursor 12 + end 13 + 14 + module Author = struct 15 + type t = { login : string } 16 + 17 + let make ~login = { login } 18 + let ghost = { login = "ghost" } 19 + let login t = t.login 20 + end 21 + 22 + module Label = struct 23 + type t = { name : string } 24 + 25 + let make ~name = { name } 26 + let name t = t.name 27 + end 28 + 29 + module Comment = struct 30 + type t = { 31 + author : Author.t; 32 + body_text : string; 33 + created_at : string; 34 + updated_at : string; 35 + } 36 + 37 + let make ~author ~body_text ~created_at ~updated_at = 38 + { author; body_text; created_at; updated_at } 39 + 40 + let author t = t.author 41 + let body_text t = t.body_text 42 + let created_at t = t.created_at 43 + let updated_at t = t.updated_at 44 + end 45 + 46 + module Timeline_item = struct 47 + type t = 48 + | Issue_comment of { created_at : string } 49 + | Labeled_event of { created_at : string } 50 + | Unlabeled_event of { created_at : string } 51 + | Closed_event of { created_at : string } 52 + | Reopened_event of { created_at : string } 53 + | Pr_commit of { committed_date : string } 54 + | Pr_review of { created_at : string } 55 + | Merged_event of { created_at : string } 56 + | Unknown 57 + 58 + let created_at = function 59 + | Issue_comment { created_at } -> Some created_at 60 + | Labeled_event { created_at } -> Some created_at 61 + | Unlabeled_event { created_at } -> Some created_at 62 + | Closed_event { created_at } -> Some created_at 63 + | Reopened_event { created_at } -> Some created_at 64 + | Pr_commit { committed_date } -> Some committed_date 65 + | Pr_review { created_at } -> Some created_at 66 + | Merged_event { created_at } -> Some created_at 67 + | Unknown -> None 68 + end 69 + 70 + module Issue_node = struct 71 + type t = { 72 + number : int; 73 + title : string; 74 + url : string; 75 + created_at : string; 76 + updated_at : string; 77 + closed_at : string option; 78 + body_text : string; 79 + state : string; 80 + author : Author.t; 81 + labels : Label.t list; 82 + comments : Comment.t list; 83 + comments_total_count : int; 84 + timeline_items : Timeline_item.t list; 85 + } 86 + 87 + let make ~number ~title ~url ~created_at ~updated_at ~closed_at ~body_text 88 + ~state ~author ~labels ~comments ~comments_total_count ~timeline_items = 89 + { 90 + number; 91 + title; 92 + url; 93 + created_at; 94 + updated_at; 95 + closed_at; 96 + body_text; 97 + state; 98 + author; 99 + labels; 100 + comments; 101 + comments_total_count; 102 + timeline_items; 103 + } 104 + 105 + let number t = t.number 106 + let title t = t.title 107 + let url t = t.url 108 + let created_at t = t.created_at 109 + let updated_at t = t.updated_at 110 + let closed_at t = t.closed_at 111 + let body_text t = t.body_text 112 + let state t = t.state 113 + let author t = t.author 114 + let labels t = t.labels 115 + let comments t = t.comments 116 + let comments_total_count t = t.comments_total_count 117 + let timeline_items t = t.timeline_items 118 + end 119 + 120 + module Pr_node = struct 121 + type t = { 122 + number : int; 123 + title : string; 124 + url : string; 125 + created_at : string; 126 + updated_at : string; 127 + closed_at : string option; 128 + merged_at : string option; 129 + body_text : string; 130 + state : string; 131 + additions : int; 132 + deletions : int; 133 + changed_files : int; 134 + mergeable : string; 135 + is_draft : bool; 136 + author : Author.t; 137 + labels : Label.t list; 138 + comments : Comment.t list; 139 + comments_total_count : int; 140 + timeline_items : Timeline_item.t list; 141 + } 142 + 143 + let make ~number ~title ~url ~created_at ~updated_at ~closed_at ~merged_at 144 + ~body_text ~state ~additions ~deletions ~changed_files ~mergeable 145 + ~is_draft ~author ~labels ~comments ~comments_total_count ~timeline_items 146 + = 147 + { 148 + number; 149 + title; 150 + url; 151 + created_at; 152 + updated_at; 153 + closed_at; 154 + merged_at; 155 + body_text; 156 + state; 157 + additions; 158 + deletions; 159 + changed_files; 160 + mergeable; 161 + is_draft; 162 + author; 163 + labels; 164 + comments; 165 + comments_total_count; 166 + timeline_items; 167 + } 168 + 169 + let number t = t.number 170 + let title t = t.title 171 + let url t = t.url 172 + let created_at t = t.created_at 173 + let updated_at t = t.updated_at 174 + let closed_at t = t.closed_at 175 + let merged_at t = t.merged_at 176 + let body_text t = t.body_text 177 + let state t = t.state 178 + let additions t = t.additions 179 + let deletions t = t.deletions 180 + let changed_files t = t.changed_files 181 + let mergeable t = t.mergeable 182 + let is_draft t = t.is_draft 183 + let author t = t.author 184 + let labels t = t.labels 185 + let comments t = t.comments 186 + let comments_total_count t = t.comments_total_count 187 + let timeline_items t = t.timeline_items 188 + end 189 + 190 + module Issues_connection = struct 191 + type t = { page_info : Page_info.t; nodes : Issue_node.t list } 192 + 193 + let make ~page_info ~nodes = { page_info; nodes } 194 + let page_info t = t.page_info 195 + let nodes t = t.nodes 196 + end 197 + 198 + module Prs_connection = struct 199 + type t = { page_info : Page_info.t; nodes : Pr_node.t list } 200 + 201 + let make ~page_info ~nodes = { page_info; nodes } 202 + let page_info t = t.page_info 203 + let nodes t = t.nodes 204 + end 205 + 206 + module Discussion_node = struct 207 + type t = { 208 + number : int; 209 + title : string; 210 + url : string; 211 + updated_at : string; 212 + body_text : string; 213 + author : Author.t; 214 + category : string; 215 + comments_count : int; 216 + answered : bool; 217 + } 218 + 219 + let make ~number ~title ~url ~updated_at ~body_text ~author ~category 220 + ~comments_count ~answered = 221 + { 222 + number; 223 + title; 224 + url; 225 + updated_at; 226 + body_text; 227 + author; 228 + category; 229 + comments_count; 230 + answered; 231 + } 232 + 233 + let number t = t.number 234 + let title t = t.title 235 + let url t = t.url 236 + let updated_at t = t.updated_at 237 + let body_text t = t.body_text 238 + let author t = t.author 239 + let category t = t.category 240 + let comments_count t = t.comments_count 241 + let answered t = t.answered 242 + end 243 + 244 + module Repository_response = struct 245 + type t = { 246 + issues : Issues_connection.t; 247 + pull_requests : Prs_connection.t; 248 + } 249 + 250 + let make ~issues ~pull_requests = { issues; pull_requests } 251 + let issues t = t.issues 252 + let pull_requests t = t.pull_requests 253 + end 254 + 255 + module Discussions_response = struct 256 + type t = { discussions : Discussion_node.t list } 257 + 258 + let make ~discussions = { discussions } 259 + let discussions t = t.discussions 260 + end
+357
repowatch/lib/graphql_types.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** GraphQL response types for GitHub API. 7 + 8 + This module defines types for decoding GitHub GraphQL API responses for 9 + issues, pull requests, and discussions queries. *) 10 + 11 + (** {1 Pagination} *) 12 + 13 + module Page_info : sig 14 + type t 15 + (** Pagination information from a GraphQL connection. *) 16 + 17 + val make : has_next_page:bool -> end_cursor:string option -> t 18 + (** Create pagination info. *) 19 + 20 + val has_next_page : t -> bool 21 + (** Whether there are more pages available. *) 22 + 23 + val end_cursor : t -> string option 24 + (** Cursor for fetching the next page, if any. *) 25 + end 26 + 27 + (** {1 User Information} *) 28 + 29 + module Author : sig 30 + type t 31 + (** A GitHub user or organization. *) 32 + 33 + val make : login:string -> t 34 + (** Create an author. *) 35 + 36 + val ghost : t 37 + (** The "ghost" author, used when the original author has been deleted. *) 38 + 39 + val login : t -> string 40 + (** The user's login name. Returns "ghost" for deleted users. *) 41 + end 42 + 43 + (** {1 Labels} *) 44 + 45 + module Label : sig 46 + type t 47 + (** A GitHub issue or PR label. *) 48 + 49 + val make : name:string -> t 50 + (** Create a label. *) 51 + 52 + val name : t -> string 53 + (** The label name. *) 54 + end 55 + 56 + (** {1 Comments} *) 57 + 58 + module Comment : sig 59 + type t 60 + (** A comment on an issue, PR, or discussion. *) 61 + 62 + val make : 63 + author:Author.t -> 64 + body_text:string -> 65 + created_at:string -> 66 + updated_at:string -> 67 + t 68 + (** Create a comment. *) 69 + 70 + val author : t -> Author.t 71 + (** The comment author. *) 72 + 73 + val body_text : t -> string 74 + (** The plain text body of the comment. *) 75 + 76 + val created_at : t -> string 77 + (** When the comment was created (ISO 8601). *) 78 + 79 + val updated_at : t -> string 80 + (** When the comment was last updated (ISO 8601). *) 81 + end 82 + 83 + (** {1 Timeline Events} *) 84 + 85 + module Timeline_item : sig 86 + type t = 87 + | Issue_comment of { created_at : string } 88 + | Labeled_event of { created_at : string } 89 + | Unlabeled_event of { created_at : string } 90 + | Closed_event of { created_at : string } 91 + | Reopened_event of { created_at : string } 92 + | Pr_commit of { committed_date : string } 93 + | Pr_review of { created_at : string } 94 + | Merged_event of { created_at : string } 95 + | Unknown 96 + (** Timeline events from an issue or PR. 97 + 98 + These track activity that may have occurred during a given week, 99 + enabling filtering of items by their recent activity. *) 100 + 101 + val created_at : t -> string option 102 + (** Extract the timestamp from any timeline item, if present. *) 103 + end 104 + 105 + (** {1 Issues} *) 106 + 107 + module Issue_node : sig 108 + type t 109 + (** A GitHub issue from the GraphQL API. *) 110 + 111 + val make : 112 + number:int -> 113 + title:string -> 114 + url:string -> 115 + created_at:string -> 116 + updated_at:string -> 117 + closed_at:string option -> 118 + body_text:string -> 119 + state:string -> 120 + author:Author.t -> 121 + labels:Label.t list -> 122 + comments:Comment.t list -> 123 + comments_total_count:int -> 124 + timeline_items:Timeline_item.t list -> 125 + t 126 + (** Create an issue node. *) 127 + 128 + val number : t -> int 129 + (** The issue number. *) 130 + 131 + val title : t -> string 132 + (** The issue title. *) 133 + 134 + val url : t -> string 135 + (** URL to the issue on GitHub. *) 136 + 137 + val created_at : t -> string 138 + (** When the issue was created (ISO 8601). *) 139 + 140 + val updated_at : t -> string 141 + (** When the issue was last updated (ISO 8601). *) 142 + 143 + val closed_at : t -> string option 144 + (** When the issue was closed, if closed. *) 145 + 146 + val body_text : t -> string 147 + (** The plain text body of the issue. *) 148 + 149 + val state : t -> string 150 + (** The issue state: "OPEN" or "CLOSED". *) 151 + 152 + val author : t -> Author.t 153 + (** The issue author. *) 154 + 155 + val labels : t -> Label.t list 156 + (** Labels applied to the issue. *) 157 + 158 + val comments : t -> Comment.t list 159 + (** Recent comments on the issue. *) 160 + 161 + val comments_total_count : t -> int 162 + (** Total number of comments. *) 163 + 164 + val timeline_items : t -> Timeline_item.t list 165 + (** Recent timeline events. *) 166 + end 167 + 168 + (** {1 Pull Requests} *) 169 + 170 + module Pr_node : sig 171 + type t 172 + (** A GitHub pull request from the GraphQL API. *) 173 + 174 + val make : 175 + number:int -> 176 + title:string -> 177 + url:string -> 178 + created_at:string -> 179 + updated_at:string -> 180 + closed_at:string option -> 181 + merged_at:string option -> 182 + body_text:string -> 183 + state:string -> 184 + additions:int -> 185 + deletions:int -> 186 + changed_files:int -> 187 + mergeable:string -> 188 + is_draft:bool -> 189 + author:Author.t -> 190 + labels:Label.t list -> 191 + comments:Comment.t list -> 192 + comments_total_count:int -> 193 + timeline_items:Timeline_item.t list -> 194 + t 195 + (** Create a PR node. *) 196 + 197 + val number : t -> int 198 + (** The PR number. *) 199 + 200 + val title : t -> string 201 + (** The PR title. *) 202 + 203 + val url : t -> string 204 + (** URL to the PR on GitHub. *) 205 + 206 + val created_at : t -> string 207 + (** When the PR was created (ISO 8601). *) 208 + 209 + val updated_at : t -> string 210 + (** When the PR was last updated (ISO 8601). *) 211 + 212 + val closed_at : t -> string option 213 + (** When the PR was closed, if closed. *) 214 + 215 + val merged_at : t -> string option 216 + (** When the PR was merged, if merged. *) 217 + 218 + val body_text : t -> string 219 + (** The plain text body of the PR. *) 220 + 221 + val state : t -> string 222 + (** The PR state: "OPEN", "CLOSED", or "MERGED". *) 223 + 224 + val additions : t -> int 225 + (** Lines added. *) 226 + 227 + val deletions : t -> int 228 + (** Lines deleted. *) 229 + 230 + val changed_files : t -> int 231 + (** Number of files changed. *) 232 + 233 + val mergeable : t -> string 234 + (** Mergeable state: "MERGEABLE", "CONFLICTING", or "UNKNOWN". *) 235 + 236 + val is_draft : t -> bool 237 + (** Whether this is a draft PR. *) 238 + 239 + val author : t -> Author.t 240 + (** The PR author. *) 241 + 242 + val labels : t -> Label.t list 243 + (** Labels applied to the PR. *) 244 + 245 + val comments : t -> Comment.t list 246 + (** Recent comments on the PR. *) 247 + 248 + val comments_total_count : t -> int 249 + (** Total number of comments. *) 250 + 251 + val timeline_items : t -> Timeline_item.t list 252 + (** Recent timeline events. *) 253 + end 254 + 255 + (** {1 Connections (Paginated Lists)} *) 256 + 257 + module Issues_connection : sig 258 + type t 259 + (** A paginated list of issues. *) 260 + 261 + val make : page_info:Page_info.t -> nodes:Issue_node.t list -> t 262 + (** Create an issues connection. *) 263 + 264 + val page_info : t -> Page_info.t 265 + (** Pagination information. *) 266 + 267 + val nodes : t -> Issue_node.t list 268 + (** The issues in this page. *) 269 + end 270 + 271 + module Prs_connection : sig 272 + type t 273 + (** A paginated list of pull requests. *) 274 + 275 + val make : page_info:Page_info.t -> nodes:Pr_node.t list -> t 276 + (** Create a PRs connection. *) 277 + 278 + val page_info : t -> Page_info.t 279 + (** Pagination information. *) 280 + 281 + val nodes : t -> Pr_node.t list 282 + (** The PRs in this page. *) 283 + end 284 + 285 + (** {1 Discussions} *) 286 + 287 + module Discussion_node : sig 288 + type t 289 + (** A GitHub discussion from the GraphQL API. *) 290 + 291 + val make : 292 + number:int -> 293 + title:string -> 294 + url:string -> 295 + updated_at:string -> 296 + body_text:string -> 297 + author:Author.t -> 298 + category:string -> 299 + comments_count:int -> 300 + answered:bool -> 301 + t 302 + (** Create a discussion node. *) 303 + 304 + val number : t -> int 305 + (** The discussion number. *) 306 + 307 + val title : t -> string 308 + (** The discussion title. *) 309 + 310 + val url : t -> string 311 + (** URL to the discussion on GitHub. *) 312 + 313 + val updated_at : t -> string 314 + (** When the discussion was last updated (ISO 8601). *) 315 + 316 + val body_text : t -> string 317 + (** The plain text body of the discussion. *) 318 + 319 + val author : t -> Author.t 320 + (** The discussion author. *) 321 + 322 + val category : t -> string 323 + (** The discussion category name. *) 324 + 325 + val comments_count : t -> int 326 + (** Number of comments. *) 327 + 328 + val answered : t -> bool 329 + (** Whether the discussion has an accepted answer. *) 330 + end 331 + 332 + (** {1 API Responses} *) 333 + 334 + module Repository_response : sig 335 + type t 336 + (** Response from the issues/PRs GraphQL query. *) 337 + 338 + val make : issues:Issues_connection.t -> pull_requests:Prs_connection.t -> t 339 + (** Create a repository response. *) 340 + 341 + val issues : t -> Issues_connection.t 342 + (** The issues connection. *) 343 + 344 + val pull_requests : t -> Prs_connection.t 345 + (** The pull requests connection. *) 346 + end 347 + 348 + module Discussions_response : sig 349 + type t 350 + (** Response from the discussions GraphQL query. *) 351 + 352 + val make : discussions:Discussion_node.t list -> t 353 + (** Create a discussions response. *) 354 + 355 + val discussions : t -> Discussion_node.t list 356 + (** The discussions list. *) 357 + end
+94
repowatch/lib/loader.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let load_file fs path = Codec.decode_file fs path 7 + 8 + let load_file_exn fs path = 9 + match load_file fs path with 10 + | Ok data -> data 11 + | Error e -> failwith e 12 + 13 + let is_json_file name = 14 + String.length name > 5 && String.sub name (String.length name - 5) 5 = ".json" 15 + 16 + let rec find_json_files_rec fs dir acc = 17 + let path = Eio.Path.(fs / dir) in 18 + let entries = 19 + try Eio.Path.read_dir path 20 + with _ -> [] 21 + in 22 + List.fold_left 23 + (fun acc entry -> 24 + let entry_path = Filename.concat dir entry in 25 + let full_path = Eio.Path.(fs / entry_path) in 26 + match Eio.Path.kind ~follow:true full_path with 27 + | `Directory -> find_json_files_rec fs entry_path acc 28 + | `Regular_file when is_json_file entry -> entry_path :: acc 29 + | _ -> acc) 30 + acc entries 31 + 32 + let find_json_files fs dir = 33 + find_json_files_rec fs dir [] |> List.sort String.compare 34 + 35 + let load_directory fs dir = 36 + let files = find_json_files fs dir in 37 + let results = 38 + List.map 39 + (fun file -> 40 + match load_file fs file with 41 + | Ok data -> `Ok (file, data) 42 + | Error e -> `Error (file, e)) 43 + files 44 + in 45 + let successes, errors = 46 + List.partition_map 47 + (function 48 + | `Ok (_, data) -> Either.Left data 49 + | `Error (file, e) -> Either.Right (file, e)) 50 + results 51 + in 52 + if errors = [] then Ok successes else Error errors 53 + 54 + let load_directory_partial fs dir = 55 + let files = find_json_files fs dir in 56 + List.fold_left 57 + (fun (data, errors) file -> 58 + match load_file fs file with 59 + | Ok d -> (d :: data, errors) 60 + | Error e -> (data, (file, e) :: errors)) 61 + ([], []) files 62 + |> fun (data, errors) -> (List.rev data, List.rev errors) 63 + 64 + let find_repos fs data_dir = 65 + let path = Eio.Path.(fs / data_dir) in 66 + let owners = 67 + try Eio.Path.read_dir path 68 + with _ -> [] 69 + in 70 + List.fold_left 71 + (fun acc owner -> 72 + let owner_path = Eio.Path.(fs / Filename.concat data_dir owner) in 73 + match Eio.Path.kind ~follow:true owner_path with 74 + | `Directory -> 75 + let repos = 76 + try Eio.Path.read_dir owner_path 77 + with _ -> [] 78 + in 79 + List.fold_left 80 + (fun acc repo -> 81 + let repo_path = 82 + Eio.Path.(fs / Filename.concat (Filename.concat data_dir owner) repo) 83 + in 84 + match Eio.Path.kind ~follow:true repo_path with 85 + | `Directory -> Printf.sprintf "%s/%s" owner repo :: acc 86 + | _ -> acc) 87 + acc repos 88 + | _ -> acc) 89 + [] owners 90 + |> List.sort String.compare 91 + 92 + let load_repo fs ~data_dir ~owner ~repo = 93 + let dir = Filename.concat (Filename.concat data_dir owner) repo in 94 + load_directory fs dir
+76
repowatch/lib/loader.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** File loading utilities for GitHub activity data. 7 + 8 + This module provides functions for loading JSON files from the filesystem 9 + and discovering files in directory structures. *) 10 + 11 + (** {1 File Loading} *) 12 + 13 + val load_file : 14 + Eio.Fs.dir_ty Eio.Path.t -> string -> (Types.Week_data.t, string) result 15 + (** [load_file fs path] loads and parses a single JSON file. 16 + 17 + @param fs The filesystem capability 18 + @param path Path to the JSON file *) 19 + 20 + val load_file_exn : Eio.Fs.dir_ty Eio.Path.t -> string -> Types.Week_data.t 21 + (** [load_file_exn fs path] loads and parses a single JSON file. 22 + Raises [Failure] on error. *) 23 + 24 + (** {1 Directory Loading} *) 25 + 26 + val find_json_files : Eio.Fs.dir_ty Eio.Path.t -> string -> string list 27 + (** [find_json_files fs dir] finds all JSON files in a directory tree. 28 + 29 + Files are returned in sorted order, which for the week-NN-YYYY.json format 30 + results in chronological ordering. 31 + 32 + @param fs The filesystem capability 33 + @param dir Root directory to search *) 34 + 35 + val load_directory : 36 + Eio.Fs.dir_ty Eio.Path.t -> 37 + string -> 38 + (Types.Week_data.t list, (string * string) list) result 39 + (** [load_directory fs dir] loads all JSON files from a directory. 40 + 41 + Returns [Ok data] if all files parse successfully, or [Error errors] with 42 + a list of (filename, error message) pairs for files that failed to parse. *) 43 + 44 + val load_directory_partial : 45 + Eio.Fs.dir_ty Eio.Path.t -> 46 + string -> 47 + Types.Week_data.t list * (string * string) list 48 + (** [load_directory_partial fs dir] loads all JSON files, returning both 49 + successfully parsed data and errors. 50 + 51 + This is useful when you want to process whatever data is available 52 + even if some files fail to parse. *) 53 + 54 + (** {1 Repository Discovery} *) 55 + 56 + val find_repos : Eio.Fs.dir_ty Eio.Path.t -> string -> string list 57 + (** [find_repos fs data_dir] finds repository paths in a ruminant data directory. 58 + 59 + The ruminant format stores data in [data/gh/{owner}/{repo}/] structure. 60 + This function returns a list of "owner/repo" strings. 61 + 62 + @param fs The filesystem capability 63 + @param data_dir Root of the ruminant data directory (e.g., "data/gh") *) 64 + 65 + val load_repo : 66 + Eio.Fs.dir_ty Eio.Path.t -> 67 + data_dir:string -> 68 + owner:string -> 69 + repo:string -> 70 + (Types.Week_data.t list, (string * string) list) result 71 + (** [load_repo fs ~data_dir ~owner ~repo] loads all data for a repository. 72 + 73 + @param fs The filesystem capability 74 + @param data_dir Root of the ruminant data directory 75 + @param owner Repository owner 76 + @param repo Repository name *)
+260
repowatch/lib/printer.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let truncate_string max_len s = 7 + if String.length s <= max_len then s 8 + else String.sub s 0 (max_len - 3) ^ "..." 9 + 10 + let pp_metadata ppf m = 11 + let open Types.Metadata in 12 + Fmt.pf ppf "@[<v>Repository: %s@,Year: %d, Week: %d@,Period: %s to %s@,Cached: %s@]" 13 + (repo m) (year m) (week m) (week_start m) (week_end m) (cached_at m) 14 + 15 + let pp_labels ppf labels = 16 + if labels = [] then () 17 + else Fmt.pf ppf " [%a]" Fmt.(list ~sep:(any ", ") string) labels 18 + 19 + let pp_issue ppf i = 20 + let open Types.Issue in 21 + Fmt.pf ppf "@[<v>#%d %s@, Author: @%s@, State: %s%a@, Created: %s@, Updated: %s%a@]" 22 + (id i) (title i) (user i) (state i) pp_labels (labels i) 23 + (created_at i) (updated_at i) 24 + (fun ppf -> function 25 + | Some ca -> Fmt.pf ppf "@, Closed: %s" ca 26 + | None -> ()) 27 + (closed_at i) 28 + 29 + let pp_issue_short ppf i = 30 + let open Types.Issue in 31 + let state_mark = if state i = "open" then "o" else "x" in 32 + Fmt.pf ppf "[%s] #%-5d %-60s @%s" 33 + state_mark (id i) (truncate_string 60 (title i)) (user i) 34 + 35 + let pp_pr ppf p = 36 + let open Types.Pr in 37 + Fmt.pf ppf 38 + "@[<v>#%d %s%s@, Author: @%s@, State: %s%a@, +%d -%d (%d files)@, \ 39 + Mergeable: %s@, Created: %s@, Updated: %s%a%a@]" 40 + (id p) (title p) (if draft p then " [DRAFT]" else "") 41 + (user p) (state p) pp_labels (labels p) 42 + (additions p) (deletions p) (changed_files p) 43 + (mergeable p) (created_at p) (updated_at p) 44 + (fun ppf -> function 45 + | Some ca -> Fmt.pf ppf "@, Closed: %s" ca 46 + | None -> ()) 47 + (closed_at p) 48 + (fun ppf -> function 49 + | Some ma -> Fmt.pf ppf "@, Merged: %s" ma 50 + | None -> ()) 51 + (merged_at p) 52 + 53 + let pp_pr_short ppf p = 54 + let open Types.Pr in 55 + let state_mark = 56 + match merged_at p with 57 + | Some _ -> "M" 58 + | None -> if state p = "open" then "o" else "x" 59 + in 60 + let draft_mark = if draft p then "D" else " " in 61 + Fmt.pf ppf "[%s%s] #%-5d +%-5d -%-5d %-50s @%s" 62 + state_mark draft_mark (id p) (additions p) (deletions p) 63 + (truncate_string 50 (title p)) (user p) 64 + 65 + let pp_discussion ppf d = 66 + let open Types.Discussion in 67 + Fmt.pf ppf "@[<v>#%d %s@, Author: @%s@, Category: %s@, Comments: %d%s@, Updated: %s@]" 68 + (id d) (title d) (user d) (category d) (comments d) 69 + (if answered d then " [ANSWERED]" else "") 70 + (updated_at d) 71 + 72 + let pp_discussion_short ppf d = 73 + let open Types.Discussion in 74 + let answered_mark = if answered d then "A" else " " in 75 + Fmt.pf ppf "[%s] #%-5d %-50s (%s) @%s" 76 + answered_mark (id d) (truncate_string 50 (title d)) (category d) (user d) 77 + 78 + let pp_asset ppf a = 79 + let open Types.Asset in 80 + let size_kb = size a / 1024 in 81 + Fmt.pf ppf " - %s (%d KB, %d downloads)" (name a) size_kb (download_count a) 82 + 83 + let pp_release ppf r = 84 + let open Types.Release in 85 + Fmt.pf ppf "@[<v>%s (%s)%s%s@, Author: @%s@, Published: %s@, URL: %s%a@]" 86 + (tag_name r) (name r) 87 + (if prerelease r then " [PRE-RELEASE]" else "") 88 + (if draft r then " [DRAFT]" else "") 89 + (author r) (published_at r) (html_url r) 90 + (fun ppf assets -> 91 + if assets <> [] then 92 + Fmt.pf ppf "@, Assets:@, %a" 93 + Fmt.(list ~sep:(any "@, ") pp_asset) assets) 94 + (assets r) 95 + 96 + let pp_release_short ppf r = 97 + let open Types.Release in 98 + let marks = 99 + (if prerelease r then "P" else " ") ^ 100 + (if draft r then "D" else " ") 101 + in 102 + Fmt.pf ppf "[%s] %-20s %-40s @%s" 103 + marks (tag_name r) (truncate_string 40 (name r)) (author r) 104 + 105 + let pp_week_summary ppf w = 106 + let open Types.Week_data in 107 + let m = metadata w in 108 + let num_issues = List.length (issues w) in 109 + let num_prs = List.length (prs w) in 110 + let num_gfi = List.length (good_first_issues w) in 111 + let num_discussions = List.length (discussions w) in 112 + let num_releases = List.length (releases w) in 113 + let num_users = List.length (users w) in 114 + Fmt.pf ppf "@[<v>%a@,@,Activity: %d issues, %d PRs, %d discussions, %d releases@,\ 115 + Good first issues: %d@,Active users: %d@]" 116 + pp_metadata m num_issues num_prs num_discussions num_releases 117 + num_gfi num_users 118 + 119 + let pp_week_data ppf w = 120 + let open Types.Week_data in 121 + Fmt.pf ppf "@[<v>%a@,@," 122 + pp_week_summary w; 123 + if issues w <> [] then 124 + Fmt.pf ppf "Issues:@, @[<v>%a@]@,@," 125 + Fmt.(list ~sep:(any "@,@, ") pp_issue) (issues w); 126 + if prs w <> [] then 127 + Fmt.pf ppf "Pull Requests:@, @[<v>%a@]@,@," 128 + Fmt.(list ~sep:(any "@,@, ") pp_pr) (prs w); 129 + if discussions w <> [] then 130 + Fmt.pf ppf "Discussions:@, @[<v>%a@]@,@," 131 + Fmt.(list ~sep:(any "@,@, ") pp_discussion) (discussions w); 132 + if releases w <> [] then 133 + Fmt.pf ppf "Releases:@, @[<v>%a@]@," 134 + Fmt.(list ~sep:(any "@,@, ") pp_release) (releases w); 135 + Fmt.pf ppf "@]" 136 + 137 + type stats = { 138 + total_issues : int; 139 + open_issues : int; 140 + closed_issues : int; 141 + total_prs : int; 142 + open_prs : int; 143 + merged_prs : int; 144 + closed_prs : int; 145 + draft_prs : int; 146 + total_discussions : int; 147 + answered_discussions : int; 148 + total_releases : int; 149 + total_users : int; 150 + total_additions : int; 151 + total_deletions : int; 152 + total_files_changed : int; 153 + } 154 + 155 + let empty_stats = { 156 + total_issues = 0; 157 + open_issues = 0; 158 + closed_issues = 0; 159 + total_prs = 0; 160 + open_prs = 0; 161 + merged_prs = 0; 162 + closed_prs = 0; 163 + draft_prs = 0; 164 + total_discussions = 0; 165 + answered_discussions = 0; 166 + total_releases = 0; 167 + total_users = 0; 168 + total_additions = 0; 169 + total_deletions = 0; 170 + total_files_changed = 0; 171 + } 172 + 173 + let compute_stats w = 174 + let open Types in 175 + let issues = Week_data.issues w in 176 + let prs = Week_data.prs w in 177 + let discussions = Week_data.discussions w in 178 + let releases = Week_data.releases w in 179 + let users = Week_data.users w in 180 + let open_issues, closed_issues = 181 + List.fold_left 182 + (fun (o, c) i -> 183 + if Issue.state i = "open" then (o + 1, c) else (o, c + 1)) 184 + (0, 0) issues 185 + in 186 + let open_prs, merged_prs, closed_prs, draft_prs, additions, deletions, files = 187 + List.fold_left 188 + (fun (op, mp, cp, dp, a, d, f) p -> 189 + let op, mp, cp = 190 + match Pr.merged_at p with 191 + | Some _ -> (op, mp + 1, cp) 192 + | None -> if Pr.state p = "open" then (op + 1, mp, cp) else (op, mp, cp + 1) 193 + in 194 + let dp = if Pr.draft p then dp + 1 else dp in 195 + (op, mp, cp, dp, a + Pr.additions p, d + Pr.deletions p, f + Pr.changed_files p)) 196 + (0, 0, 0, 0, 0, 0, 0) prs 197 + in 198 + let answered_discussions = 199 + List.fold_left 200 + (fun acc d -> if Discussion.answered d then acc + 1 else acc) 201 + 0 discussions 202 + in 203 + { 204 + total_issues = List.length issues; 205 + open_issues; 206 + closed_issues; 207 + total_prs = List.length prs; 208 + open_prs; 209 + merged_prs; 210 + closed_prs; 211 + draft_prs; 212 + total_discussions = List.length discussions; 213 + answered_discussions; 214 + total_releases = List.length releases; 215 + total_users = List.length users; 216 + total_additions = additions; 217 + total_deletions = deletions; 218 + total_files_changed = files; 219 + } 220 + 221 + let aggregate_stats stats_list = 222 + List.fold_left 223 + (fun acc s -> 224 + { 225 + total_issues = acc.total_issues + s.total_issues; 226 + open_issues = acc.open_issues + s.open_issues; 227 + closed_issues = acc.closed_issues + s.closed_issues; 228 + total_prs = acc.total_prs + s.total_prs; 229 + open_prs = acc.open_prs + s.open_prs; 230 + merged_prs = acc.merged_prs + s.merged_prs; 231 + closed_prs = acc.closed_prs + s.closed_prs; 232 + draft_prs = acc.draft_prs + s.draft_prs; 233 + total_discussions = acc.total_discussions + s.total_discussions; 234 + answered_discussions = acc.answered_discussions + s.answered_discussions; 235 + total_releases = acc.total_releases + s.total_releases; 236 + total_users = acc.total_users + s.total_users; 237 + total_additions = acc.total_additions + s.total_additions; 238 + total_deletions = acc.total_deletions + s.total_deletions; 239 + total_files_changed = acc.total_files_changed + s.total_files_changed; 240 + }) 241 + empty_stats stats_list 242 + 243 + let pp_stats ppf s = 244 + Fmt.pf ppf "@[<v>\ 245 + Issues: %d total (%d open, %d closed)@,\ 246 + Pull Reqs: %d total (%d open, %d merged, %d closed, %d draft)@,\ 247 + Discussions: %d total (%d answered)@,\ 248 + Releases: %d@,\ 249 + Users: %d@,\ 250 + Code: +%d -%d (%d files)@]" 251 + s.total_issues s.open_issues s.closed_issues 252 + s.total_prs s.open_prs s.merged_prs s.closed_prs s.draft_prs 253 + s.total_discussions s.answered_discussions 254 + s.total_releases s.total_users 255 + s.total_additions s.total_deletions s.total_files_changed 256 + 257 + let pp_stats_compact ppf s = 258 + Fmt.pf ppf "I:%d P:%d D:%d R:%d (+%d -%d)" 259 + s.total_issues s.total_prs s.total_discussions s.total_releases 260 + s.total_additions s.total_deletions
+82
repowatch/lib/printer.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Pretty-printing for GitHub activity data. 7 + 8 + This module provides formatters for displaying activity data in 9 + human-readable formats. *) 10 + 11 + (** {1 Basic Formatters} *) 12 + 13 + val pp_metadata : Types.Metadata.t Fmt.t 14 + (** Format metadata information. *) 15 + 16 + val pp_issue : Types.Issue.t Fmt.t 17 + (** Format an issue. *) 18 + 19 + val pp_issue_short : Types.Issue.t Fmt.t 20 + (** Format an issue in short form (one line). *) 21 + 22 + val pp_pr : Types.Pr.t Fmt.t 23 + (** Format a pull request. *) 24 + 25 + val pp_pr_short : Types.Pr.t Fmt.t 26 + (** Format a pull request in short form (one line). *) 27 + 28 + val pp_discussion : Types.Discussion.t Fmt.t 29 + (** Format a discussion. *) 30 + 31 + val pp_discussion_short : Types.Discussion.t Fmt.t 32 + (** Format a discussion in short form (one line). *) 33 + 34 + val pp_asset : Types.Asset.t Fmt.t 35 + (** Format a release asset. *) 36 + 37 + val pp_release : Types.Release.t Fmt.t 38 + (** Format a release. *) 39 + 40 + val pp_release_short : Types.Release.t Fmt.t 41 + (** Format a release in short form (one line). *) 42 + 43 + (** {1 Summary Formatters} *) 44 + 45 + val pp_week_summary : Types.Week_data.t Fmt.t 46 + (** Format a summary of weekly activity. *) 47 + 48 + val pp_week_data : Types.Week_data.t Fmt.t 49 + (** Format complete weekly data. *) 50 + 51 + (** {1 Statistics} *) 52 + 53 + type stats = { 54 + total_issues : int; 55 + open_issues : int; 56 + closed_issues : int; 57 + total_prs : int; 58 + open_prs : int; 59 + merged_prs : int; 60 + closed_prs : int; 61 + draft_prs : int; 62 + total_discussions : int; 63 + answered_discussions : int; 64 + total_releases : int; 65 + total_users : int; 66 + total_additions : int; 67 + total_deletions : int; 68 + total_files_changed : int; 69 + } 70 + (** Statistics computed from weekly data. *) 71 + 72 + val compute_stats : Types.Week_data.t -> stats 73 + (** Compute statistics from weekly data. *) 74 + 75 + val aggregate_stats : stats list -> stats 76 + (** Aggregate multiple stats into one. *) 77 + 78 + val pp_stats : stats Fmt.t 79 + (** Format statistics. *) 80 + 81 + val pp_stats_compact : stats Fmt.t 82 + (** Format statistics in compact form. *)
+25
repowatch/lib/repowatch.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Repowatch - GitHub repository activity watcher and analyzer. 7 + 8 + This library provides types and utilities for parsing and analyzing 9 + GitHub repository activity data in the ruminant JSON format. *) 10 + 11 + module Types = Types 12 + module Codec = Codec 13 + module Loader = Loader 14 + module Config = Config 15 + module Printer = Printer 16 + module Week = Week 17 + module Graphql_types = Graphql_types 18 + module Graphql_codec = Graphql_codec 19 + module Graphql = Graphql 20 + module Filter = Filter 21 + module Transform = Transform 22 + module Github = Github 23 + module Storage = Storage 24 + module Users = Users 25 + module Sync = Sync
+58
repowatch/lib/storage.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let data_dir ~base_dir ~owner ~repo = 7 + Eio.Path.(base_dir / "gh" / owner / repo) 8 + 9 + let week_filename ~week = 10 + Printf.sprintf "week-%02d-%04d.json" (Week.week week) (Week.year week) 11 + 12 + let ensure_dir path = 13 + (* Create directory tree if it doesn't exist *) 14 + try Eio.Path.mkdirs ~exists_ok:true ~perm:0o755 path 15 + with Eio.Io _ -> () 16 + 17 + let save_week ~fs ~base_dir ~owner ~repo ~week ~data = 18 + let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in 19 + ensure_dir dir; 20 + let filename = week_filename ~week in 21 + let filepath = Eio.Path.(dir / filename) in 22 + let json = Codec.encode_string data in 23 + Eio.Path.save ~create:(`Or_truncate 0o644) filepath json 24 + 25 + let load_week ~fs ~base_dir ~owner ~repo ~week = 26 + let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in 27 + let filename = week_filename ~week in 28 + let filepath = Eio.Path.(dir / filename) in 29 + try 30 + let content = Eio.Path.load filepath in 31 + match Codec.decode_string content with 32 + | Ok data -> Some data 33 + | Error _ -> None 34 + with Eio.Io _ -> None 35 + 36 + let parse_week_filename filename = 37 + (* Parse "week-WW-YYYY.json" *) 38 + try 39 + if String.length filename < 17 then None 40 + else if not (String.sub filename 0 5 = "week-") then None 41 + else if not (String.sub filename (String.length filename - 5) 5 = ".json") 42 + then None 43 + else 44 + let week_str = String.sub filename 5 2 in 45 + let year_str = String.sub filename 8 4 in 46 + let week_num = int_of_string week_str in 47 + let year = int_of_string year_str in 48 + Some (Week.of_year_week ~year ~week:week_num) 49 + with _ -> None 50 + 51 + let list_cached_weeks ~fs ~base_dir ~owner ~repo = 52 + let dir = data_dir ~base_dir:(Eio.Path.(fs / base_dir)) ~owner ~repo in 53 + try 54 + let entries = Eio.Path.read_dir dir in 55 + entries 56 + |> List.filter_map parse_week_filename 57 + |> List.sort Week.compare 58 + with Eio.Io _ -> []
+66
repowatch/lib/storage.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Persistent storage for sync results. 7 + 8 + This module provides functions to save and load weekly repository data 9 + in the ruminant JSON format, maintaining the directory structure 10 + [data/gh/{owner}/{repo}/]. *) 11 + 12 + (** {1 Path Construction} *) 13 + 14 + val data_dir : 15 + base_dir:Eio.Fs.dir_ty Eio.Path.t -> 16 + owner:string -> 17 + repo:string -> 18 + Eio.Fs.dir_ty Eio.Path.t 19 + (** [data_dir ~base_dir ~owner ~repo] returns the directory path for 20 + repository data: [base_dir/gh/{owner}/{repo}/]. *) 21 + 22 + val week_filename : week:Week.t -> string 23 + (** [week_filename ~week] returns the filename for a week's data: 24 + [week-{WW}-{YYYY}.json]. *) 25 + 26 + (** {1 Saving Data} *) 27 + 28 + val save_week : 29 + fs:Eio.Fs.dir_ty Eio.Path.t -> 30 + base_dir:string -> 31 + owner:string -> 32 + repo:string -> 33 + week:Week.t -> 34 + data:Types.Week_data.t -> 35 + unit 36 + (** [save_week ~fs ~base_dir ~owner ~repo ~week ~data] saves the weekly 37 + data to a JSON file, creating directories as needed. 38 + 39 + The file is written to [{base_dir}/gh/{owner}/{repo}/week-{WW}-{YYYY}.json]. *) 40 + 41 + (** {1 Loading Data} *) 42 + 43 + val load_week : 44 + fs:Eio.Fs.dir_ty Eio.Path.t -> 45 + base_dir:string -> 46 + owner:string -> 47 + repo:string -> 48 + week:Week.t -> 49 + Types.Week_data.t option 50 + (** [load_week ~fs ~base_dir ~owner ~repo ~week] loads existing week data 51 + if present. Returns [None] if the file does not exist. *) 52 + 53 + val list_cached_weeks : 54 + fs:Eio.Fs.dir_ty Eio.Path.t -> 55 + base_dir:string -> 56 + owner:string -> 57 + repo:string -> 58 + Week.t list 59 + (** [list_cached_weeks ~fs ~base_dir ~owner ~repo] lists all cached weeks 60 + for a repository, sorted by date. *) 61 + 62 + (** {1 Directory Management} *) 63 + 64 + val ensure_dir : Eio.Fs.dir_ty Eio.Path.t -> unit 65 + (** [ensure_dir path] creates the directory and all parent directories 66 + if they don't exist. *)
+346
repowatch/lib/sync.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module G = Graphql_types 7 + 8 + type phase = 9 + [ `Issues_prs 10 + | `Discussions 11 + | `Releases 12 + | `Users 13 + ] 14 + 15 + type progress = { phase : phase; current : int; total : int option } 16 + 17 + type sync_result = { 18 + week_data : Types.Week_data.t; 19 + issues_fetched : int; 20 + prs_fetched : int; 21 + pages_fetched : int; 22 + rate_limit_remaining : int option; 23 + } 24 + 25 + let max_pages = 20 26 + let early_exit_threshold = 5 27 + 28 + (* Pagination state for issues/PRs sync *) 29 + type pagination_state = { 30 + mutable issues_cursor : string option; 31 + mutable prs_cursor : string option; 32 + mutable issues_done : bool; 33 + mutable prs_done : bool; 34 + mutable pages_without_activity : int; 35 + } 36 + 37 + let sync_issues_prs ~client ~owner ~repo ~week ~on_progress = 38 + let state = 39 + { 40 + issues_cursor = None; 41 + prs_cursor = None; 42 + issues_done = false; 43 + prs_done = false; 44 + pages_without_activity = 0; 45 + } 46 + in 47 + let all_issues = ref [] in 48 + let all_prs = ref [] in 49 + let rec loop page_count = 50 + if page_count >= max_pages then Ok page_count 51 + else if state.issues_done && state.prs_done then Ok page_count 52 + else if state.pages_without_activity >= early_exit_threshold then 53 + Ok page_count 54 + else 55 + let variables = 56 + Graphql. 57 + { 58 + owner; 59 + name = repo; 60 + issues_after = state.issues_cursor; 61 + prs_after = state.prs_cursor; 62 + } 63 + in 64 + Option.iter 65 + (fun f -> 66 + f { phase = `Issues_prs; current = page_count + 1; total = None }) 67 + on_progress; 68 + match Github.graphql client ~query:Graphql.issues_prs_query ~variables with 69 + | Error e -> Error e 70 + | Ok response_body -> ( 71 + match Graphql_codec.decode_repository_response response_body with 72 + | Error msg -> Error (Github.Parse_error msg) 73 + | Ok response -> 74 + (* Process issues *) 75 + let issues_conn = G.Repository_response.issues response in 76 + let issues_page_info = G.Issues_connection.page_info issues_conn in 77 + let issue_nodes = G.Issues_connection.nodes issues_conn in 78 + let new_issues = 79 + List.filter (Filter.issue_active_in_week ~week) issue_nodes 80 + in 81 + all_issues := !all_issues @ new_issues; 82 + (* Update issues pagination *) 83 + if G.Page_info.has_next_page issues_page_info then 84 + state.issues_cursor <- G.Page_info.end_cursor issues_page_info 85 + else state.issues_done <- true; 86 + (* Process PRs *) 87 + let prs_conn = G.Repository_response.pull_requests response in 88 + let prs_page_info = G.Prs_connection.page_info prs_conn in 89 + let pr_nodes = G.Prs_connection.nodes prs_conn in 90 + let new_prs = 91 + List.filter (Filter.pr_active_in_week ~week) pr_nodes 92 + in 93 + all_prs := !all_prs @ new_prs; 94 + (* Update PRs pagination *) 95 + if G.Page_info.has_next_page prs_page_info then 96 + state.prs_cursor <- G.Page_info.end_cursor prs_page_info 97 + else state.prs_done <- true; 98 + (* Track activity for early exit *) 99 + if List.length new_issues = 0 && List.length new_prs = 0 then 100 + state.pages_without_activity <- 101 + state.pages_without_activity + 1 102 + else state.pages_without_activity <- 0; 103 + loop (page_count + 1)) 104 + in 105 + match loop 0 with 106 + | Ok pages -> Ok (!all_issues, !all_prs, pages) 107 + | Error e -> Error e 108 + 109 + let sync_discussions ~client ~owner ~repo ~week ~on_progress = 110 + let variables = 111 + Graphql.{ owner; name = repo; issues_after = None; prs_after = None } 112 + in 113 + Option.iter 114 + (fun f -> f { phase = `Discussions; current = 1; total = Some 1 }) 115 + on_progress; 116 + match Github.graphql client ~query:Graphql.discussions_query ~variables with 117 + | Error e -> Error e 118 + | Ok response_body -> ( 119 + match Graphql_codec.decode_discussions_response response_body with 120 + | Error msg -> Error (Github.Parse_error msg) 121 + | Ok response -> 122 + let discussions = G.Discussions_response.discussions response in 123 + let active = 124 + List.filter (Filter.discussion_active_in_week ~week) discussions 125 + in 126 + Ok active) 127 + 128 + let sync_releases ~client ~owner ~repo ~week ~on_progress = 129 + let week_start = Week.start_ptime week in 130 + let week_end = Week.end_ptime week in 131 + let rec loop page acc = 132 + if page > 5 then 133 + (* Max 5 pages of releases *) 134 + Ok acc 135 + else ( 136 + Option.iter 137 + (fun f -> f { phase = `Releases; current = page; total = None }) 138 + on_progress; 139 + match Github.get_releases client ~owner ~repo ~page with 140 + | Error e -> Error e 141 + | Ok response_body -> ( 142 + match Jsont_bytesrw.decode_string (Jsont.list Jsont.json) response_body 143 + with 144 + | Error msg -> Error (Github.Parse_error msg) 145 + | Ok releases_json -> 146 + if List.length releases_json = 0 then Ok acc 147 + else 148 + let releases = 149 + List.filter_map Transform.release_of_json releases_json 150 + in 151 + (* Filter by week *) 152 + let active = 153 + List.filter 154 + (fun r -> 155 + match 156 + Ptime.of_rfc3339 (Types.Release.published_at r) 157 + with 158 + | Ok (t, _, _) -> 159 + Ptime.compare t week_start >= 0 160 + && Ptime.compare t week_end <= 0 161 + | Error _ -> false) 162 + releases 163 + in 164 + (* Check if all releases are before the week *) 165 + let all_before = 166 + List.for_all 167 + (fun r -> 168 + match 169 + Ptime.of_rfc3339 (Types.Release.published_at r) 170 + with 171 + | Ok (t, _, _) -> Ptime.compare t week_start < 0 172 + | Error _ -> true) 173 + releases 174 + in 175 + if all_before then Ok (acc @ active) 176 + else loop (page + 1) (acc @ active))) 177 + in 178 + loop 1 [] 179 + 180 + let sync_week ~client ~owner ~repo ~week ?on_progress () = 181 + (* Sync issues and PRs *) 182 + match sync_issues_prs ~client ~owner ~repo ~week ~on_progress with 183 + | Error e -> Error e 184 + | Ok (issue_nodes, pr_nodes, pages) -> ( 185 + (* Sync discussions *) 186 + match sync_discussions ~client ~owner ~repo ~week ~on_progress with 187 + | Error e -> Error e 188 + | Ok discussion_nodes -> ( 189 + (* Sync releases *) 190 + match sync_releases ~client ~owner ~repo ~week ~on_progress with 191 + | Error e -> Error e 192 + | Ok releases -> 193 + (* Transform to Types *) 194 + let issues = List.map Transform.issue_of_node issue_nodes in 195 + let prs = List.map Transform.pr_of_node pr_nodes in 196 + let discussions = 197 + List.map Transform.discussion_of_node discussion_nodes 198 + in 199 + let good_first_issues = 200 + issue_nodes 201 + |> List.filter Filter.is_good_first_issue 202 + |> List.map Transform.issue_of_node 203 + in 204 + (* Get current time for metadata *) 205 + let cached_at = 206 + Option.value 207 + (Ptime.of_float_s (Unix.gettimeofday ())) 208 + ~default:Ptime.epoch 209 + in 210 + let metadata = 211 + Transform.make_metadata ~owner ~repo ~week ~cached_at 212 + in 213 + (* Create week data *) 214 + let week_data = 215 + Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues 216 + ~discussions ~releases ~users:[] 217 + in 218 + (* Extract users *) 219 + let users = Users.extract_from_week_data week_data in 220 + let week_data = 221 + Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues 222 + ~discussions ~releases ~users 223 + in 224 + Ok 225 + { 226 + week_data; 227 + issues_fetched = List.length issue_nodes; 228 + prs_fetched = List.length pr_nodes; 229 + pages_fetched = pages; 230 + rate_limit_remaining = Github.rate_limit_remaining client; 231 + })) 232 + 233 + let sync_range ~client ~owner ~repo ~from_week ~to_week ?on_progress () = 234 + let weeks = Week.range ~from:from_week ~to_:to_week in 235 + let rec loop results = function 236 + | [] -> Ok (List.rev results) 237 + | week :: rest -> ( 238 + match sync_week ~client ~owner ~repo ~week ?on_progress () with 239 + | Error e -> Error e 240 + | Ok result -> loop (result :: results) rest) 241 + in 242 + loop [] weeks 243 + 244 + (* Merge utilities for incremental sync *) 245 + 246 + let merge_issues ~existing ~new_items = 247 + (* Build a map of new items by ID *) 248 + let new_map = 249 + List.fold_left 250 + (fun acc item -> (Types.Issue.id item, item) :: acc) 251 + [] new_items 252 + in 253 + (* Keep existing items that aren't in new, then add all new *) 254 + let existing_filtered = 255 + List.filter 256 + (fun item -> not (List.mem_assoc (Types.Issue.id item) new_map)) 257 + existing 258 + in 259 + existing_filtered @ new_items 260 + 261 + let merge_prs ~existing ~new_items = 262 + let new_map = 263 + List.fold_left (fun acc item -> (Types.Pr.id item, item) :: acc) [] new_items 264 + in 265 + let existing_filtered = 266 + List.filter 267 + (fun item -> not (List.mem_assoc (Types.Pr.id item) new_map)) 268 + existing 269 + in 270 + existing_filtered @ new_items 271 + 272 + let merge_discussions ~existing ~new_items = 273 + let new_map = 274 + List.fold_left 275 + (fun acc item -> (Types.Discussion.id item, item) :: acc) 276 + [] new_items 277 + in 278 + let existing_filtered = 279 + List.filter 280 + (fun item -> not (List.mem_assoc (Types.Discussion.id item) new_map)) 281 + existing 282 + in 283 + existing_filtered @ new_items 284 + 285 + let merge_releases ~existing ~new_items = 286 + let new_map = 287 + List.fold_left 288 + (fun acc item -> (Types.Release.tag_name item, item) :: acc) 289 + [] new_items 290 + in 291 + let existing_filtered = 292 + List.filter 293 + (fun item -> not (List.mem_assoc (Types.Release.tag_name item) new_map)) 294 + existing 295 + in 296 + existing_filtered @ new_items 297 + 298 + let merge_users ~existing ~new_items = 299 + List.sort_uniq String.compare (existing @ new_items) 300 + 301 + let merge_week_data ~existing new_data = 302 + let issues = 303 + merge_issues 304 + ~existing:(Types.Week_data.issues existing) 305 + ~new_items:(Types.Week_data.issues new_data) 306 + in 307 + let prs = 308 + merge_prs 309 + ~existing:(Types.Week_data.prs existing) 310 + ~new_items:(Types.Week_data.prs new_data) 311 + in 312 + let good_first_issues = 313 + merge_issues 314 + ~existing:(Types.Week_data.good_first_issues existing) 315 + ~new_items:(Types.Week_data.good_first_issues new_data) 316 + in 317 + let discussions = 318 + merge_discussions 319 + ~existing:(Types.Week_data.discussions existing) 320 + ~new_items:(Types.Week_data.discussions new_data) 321 + in 322 + let releases = 323 + merge_releases 324 + ~existing:(Types.Week_data.releases existing) 325 + ~new_items:(Types.Week_data.releases new_data) 326 + in 327 + let users = 328 + merge_users 329 + ~existing:(Types.Week_data.users existing) 330 + ~new_items:(Types.Week_data.users new_data) 331 + in 332 + (* Use the new metadata (has updated cached_at timestamp) *) 333 + let metadata = Types.Week_data.metadata new_data in 334 + Types.Week_data.make ~metadata ~issues ~prs ~good_first_issues ~discussions 335 + ~releases ~users 336 + 337 + let sync_week_incremental ~client ~owner ~repo ~week ~existing ?on_progress () = 338 + match sync_week ~client ~owner ~repo ~week ?on_progress () with 339 + | Error e -> Error e 340 + | Ok result -> 341 + let merged_data = 342 + match existing with 343 + | None -> result.week_data 344 + | Some existing_data -> merge_week_data ~existing:existing_data result.week_data 345 + in 346 + Ok { result with week_data = merged_data }
+112
repowatch/lib/sync.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Repository sync engine. 7 + 8 + This module provides the sync engine for fetching repository data from 9 + GitHub and transforming it into the ruminant format. It handles pagination, 10 + rate limiting, and week-based filtering. *) 11 + 12 + (** {1 Progress Reporting} *) 13 + 14 + type phase = 15 + [ `Issues_prs 16 + | `Discussions 17 + | `Releases 18 + | `Users 19 + ] 20 + (** Sync phases. *) 21 + 22 + type progress = { 23 + phase : phase; (** Current sync phase. *) 24 + current : int; (** Current page or item number. *) 25 + total : int option; (** Total pages/items if known. *) 26 + } 27 + (** Progress information for sync callbacks. *) 28 + 29 + (** {1 Sync Results} *) 30 + 31 + type sync_result = { 32 + week_data : Types.Week_data.t; (** The synced week data. *) 33 + issues_fetched : int; (** Number of issues fetched. *) 34 + prs_fetched : int; (** Number of PRs fetched. *) 35 + pages_fetched : int; (** Total GraphQL pages fetched. *) 36 + rate_limit_remaining : int option; (** Rate limit remaining after sync. *) 37 + } 38 + (** Result of a successful sync operation. *) 39 + 40 + (** {1 Sync Operations} *) 41 + 42 + val sync_week : 43 + client:Github.t -> 44 + owner:string -> 45 + repo:string -> 46 + week:Week.t -> 47 + ?on_progress:(progress -> unit) -> 48 + unit -> 49 + (sync_result, Github.error) result 50 + (** [sync_week ~client ~owner ~repo ~week ()] fetches all repository 51 + activity for the given week. 52 + 53 + This function: 54 + - Queries GitHub GraphQL API for issues and PRs with pagination 55 + - Queries for discussions (single page) 56 + - Fetches releases via REST API 57 + - Filters all data to include only items active during the week 58 + - Extracts usernames from the data 59 + 60 + @param client GitHub API client 61 + @param owner Repository owner (user or organization) 62 + @param repo Repository name 63 + @param week ISO week to sync 64 + @param on_progress Optional callback for progress updates *) 65 + 66 + val sync_range : 67 + client:Github.t -> 68 + owner:string -> 69 + repo:string -> 70 + from_week:Week.t -> 71 + to_week:Week.t -> 72 + ?on_progress:(progress -> unit) -> 73 + unit -> 74 + (sync_result list, Github.error) result 75 + (** [sync_range ~client ~owner ~repo ~from_week ~to_week ()] syncs a 76 + range of weeks. Stops on the first error. *) 77 + 78 + (** {1 Incremental Sync} *) 79 + 80 + val merge_week_data : existing:Types.Week_data.t -> Types.Week_data.t -> Types.Week_data.t 81 + (** [merge_week_data ~existing new_data] merges new data into existing data. 82 + 83 + Merging rules: 84 + - Issues/PRs/Discussions: newer entries (by ID) replace older ones 85 + - Releases: deduplicated by tag_name, newer replaces older 86 + - Users: union of both lists 87 + - Metadata: uses the new data's metadata (updated cached_at) *) 88 + 89 + val sync_week_incremental : 90 + client:Github.t -> 91 + owner:string -> 92 + repo:string -> 93 + week:Week.t -> 94 + existing:Types.Week_data.t option -> 95 + ?on_progress:(progress -> unit) -> 96 + unit -> 97 + (sync_result, Github.error) result 98 + (** [sync_week_incremental ~client ~owner ~repo ~week ~existing ()] 99 + fetches repository activity and merges with existing data if present. 100 + 101 + This is designed for daily cron jobs that incrementally update the 102 + current week's data. New items are added and existing items are 103 + updated with fresh data from GitHub. *) 104 + 105 + (** {1 Configuration} *) 106 + 107 + val max_pages : int 108 + (** Maximum number of GraphQL pages to fetch (default: 20). *) 109 + 110 + val early_exit_threshold : int 111 + (** Number of pages without relevant activity before stopping early 112 + (default: 5). *)
+159
repowatch/lib/transform.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module G = Graphql_types 7 + 8 + let format_comment ~author ~body = 9 + Printf.sprintf "@%s: %s" author body 10 + 11 + let issue_of_node node = 12 + let labels = 13 + List.map G.Label.name (G.Issue_node.labels node) 14 + in 15 + let comments = 16 + List.map 17 + (fun c -> 18 + format_comment 19 + ~author:(G.Author.login (G.Comment.author c)) 20 + ~body:(G.Comment.body_text c)) 21 + (G.Issue_node.comments node) 22 + in 23 + let state = String.lowercase_ascii (G.Issue_node.state node) in 24 + Types.Issue.make 25 + ~id:(G.Issue_node.number node) 26 + ~title:(G.Issue_node.title node) 27 + ~url:(G.Issue_node.url node) 28 + ~user:(G.Author.login (G.Issue_node.author node)) 29 + ~created_at:(G.Issue_node.created_at node) 30 + ~updated_at:(G.Issue_node.updated_at node) 31 + ~closed_at:(G.Issue_node.closed_at node) 32 + ~body:(G.Issue_node.body_text node) 33 + ~labels 34 + ~state 35 + ~comments 36 + 37 + let pr_of_node node = 38 + let labels = 39 + List.map G.Label.name (G.Pr_node.labels node) 40 + in 41 + let comments = 42 + List.map 43 + (fun c -> 44 + format_comment 45 + ~author:(G.Author.login (G.Comment.author c)) 46 + ~body:(G.Comment.body_text c)) 47 + (G.Pr_node.comments node) 48 + in 49 + (* Normalize state: GraphQL returns "OPEN", "CLOSED", "MERGED" *) 50 + let state = 51 + let raw_state = G.Pr_node.state node in 52 + match String.lowercase_ascii raw_state with 53 + | "merged" -> "merged" 54 + | "closed" -> 55 + (* Check if it was merged even though state is CLOSED *) 56 + if Option.is_some (G.Pr_node.merged_at node) then "merged" 57 + else "closed" 58 + | _ -> "open" 59 + in 60 + Types.Pr.make 61 + ~id:(G.Pr_node.number node) 62 + ~title:(G.Pr_node.title node) 63 + ~url:(G.Pr_node.url node) 64 + ~user:(G.Author.login (G.Pr_node.author node)) 65 + ~created_at:(G.Pr_node.created_at node) 66 + ~updated_at:(G.Pr_node.updated_at node) 67 + ~closed_at:(G.Pr_node.closed_at node) 68 + ~merged_at:(G.Pr_node.merged_at node) 69 + ~body:(G.Pr_node.body_text node) 70 + ~labels 71 + ~state 72 + ~comments 73 + ~additions:(G.Pr_node.additions node) 74 + ~deletions:(G.Pr_node.deletions node) 75 + ~changed_files:(G.Pr_node.changed_files node) 76 + ~mergeable:(G.Pr_node.mergeable node) 77 + ~draft:(G.Pr_node.is_draft node) 78 + 79 + let discussion_of_node node = 80 + Types.Discussion.make 81 + ~id:(G.Discussion_node.number node) 82 + ~title:(G.Discussion_node.title node) 83 + ~url:(G.Discussion_node.url node) 84 + ~user:(G.Author.login (G.Discussion_node.author node)) 85 + ~updated_at:(G.Discussion_node.updated_at node) 86 + ~body:(G.Discussion_node.body_text node) 87 + ~category:(G.Discussion_node.category node) 88 + ~comments:(G.Discussion_node.comments_count node) 89 + ~answered:(G.Discussion_node.answered node) 90 + 91 + (* Codec for REST API release *) 92 + let nullable_string = Jsont.option Jsont.string 93 + 94 + let rest_asset = 95 + Jsont.Object.map ~kind:"RestAsset" 96 + (fun name download_count size -> 97 + Types.Asset.make ~name ~download_count ~size) 98 + |> Jsont.Object.mem "name" Jsont.string ~enc:Types.Asset.name 99 + |> Jsont.Object.mem "download_count" Jsont.int ~enc:Types.Asset.download_count 100 + |> Jsont.Object.mem "size" Jsont.int ~enc:Types.Asset.size 101 + |> Jsont.Object.finish 102 + 103 + let rest_author_login = 104 + Jsont.Object.map ~kind:"RestAuthor" (fun login -> login) 105 + |> Jsont.Object.mem "login" Jsont.string ~enc:(fun x -> x) 106 + |> Jsont.Object.finish 107 + 108 + let rest_release = 109 + Jsont.Object.map ~kind:"RestRelease" 110 + (fun tag_name name published_at author html_url body prerelease draft assets -> 111 + let name = Option.value name ~default:tag_name in 112 + let body = Option.value body ~default:"" in 113 + Types.Release.make ~tag_name ~name ~published_at ~author ~html_url ~body 114 + ~prerelease ~draft ~assets) 115 + |> Jsont.Object.mem "tag_name" Jsont.string ~enc:Types.Release.tag_name 116 + |> Jsont.Object.mem "name" nullable_string ~dec_absent:None 117 + ~enc:(fun r -> Some (Types.Release.name r)) 118 + |> Jsont.Object.mem "published_at" Jsont.string ~enc:Types.Release.published_at 119 + |> Jsont.Object.mem "author" rest_author_login ~enc:Types.Release.author 120 + |> Jsont.Object.mem "html_url" Jsont.string ~enc:Types.Release.html_url 121 + |> Jsont.Object.mem "body" nullable_string ~dec_absent:None 122 + ~enc:(fun r -> Some (Types.Release.body r)) 123 + |> Jsont.Object.mem "prerelease" Jsont.bool ~enc:Types.Release.prerelease 124 + |> Jsont.Object.mem "draft" Jsont.bool ~enc:Types.Release.draft 125 + |> Jsont.Object.mem "assets" (Jsont.list rest_asset) ~enc:Types.Release.assets 126 + |> Jsont.Object.finish 127 + 128 + let release_of_json json = 129 + match Jsont.Json.decode rest_release json with 130 + | Ok release -> Some release 131 + | Error _ -> None 132 + 133 + let ptime_to_iso8601 t = 134 + let (y, m, d), ((hh, mm, ss), _tz) = Ptime.to_date_time t in 135 + Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d hh mm ss 136 + 137 + let make_metadata ~owner ~repo ~week ~cached_at = 138 + Types.Metadata.make 139 + ~repo:(Printf.sprintf "%s/%s" owner repo) 140 + ~year:(Week.year week) 141 + ~week:(Week.week week) 142 + ~week_start:(Week.start_date week) 143 + ~week_end:(Week.end_date week) 144 + ~cached_at:(ptime_to_iso8601 cached_at) 145 + 146 + let issues_of_nodes ~week nodes = 147 + nodes 148 + |> List.filter (Filter.issue_active_in_week ~week) 149 + |> List.map issue_of_node 150 + 151 + let prs_of_nodes ~week nodes = 152 + nodes 153 + |> List.filter (Filter.pr_active_in_week ~week) 154 + |> List.map pr_of_node 155 + 156 + let discussions_of_nodes ~week nodes = 157 + nodes 158 + |> List.filter (Filter.discussion_active_in_week ~week) 159 + |> List.map discussion_of_node
+70
repowatch/lib/transform.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Transform GraphQL responses to repowatch types. 7 + 8 + This module provides functions to convert the raw GraphQL response types 9 + into the normalized [Types] representations used for storage and analysis. *) 10 + 11 + (** {1 Issue Transformation} *) 12 + 13 + val issue_of_node : Graphql_types.Issue_node.t -> Types.Issue.t 14 + (** [issue_of_node node] converts a GraphQL issue node to [Types.Issue.t]. 15 + 16 + Comments are formatted as "@author: body" strings. *) 17 + 18 + (** {1 Pull Request Transformation} *) 19 + 20 + val pr_of_node : Graphql_types.Pr_node.t -> Types.Pr.t 21 + (** [pr_of_node node] converts a GraphQL PR node to [Types.Pr.t]. 22 + 23 + The state is normalized to lowercase ("open", "closed", "merged"). *) 24 + 25 + (** {1 Discussion Transformation} *) 26 + 27 + val discussion_of_node : Graphql_types.Discussion_node.t -> Types.Discussion.t 28 + (** [discussion_of_node node] converts a GraphQL discussion node to 29 + [Types.Discussion.t]. *) 30 + 31 + (** {1 Release Transformation} *) 32 + 33 + val release_of_json : Jsont.Json.t -> Types.Release.t option 34 + (** [release_of_json json] attempts to decode a release from the GitHub REST 35 + API JSON format. Returns [None] if decoding fails. *) 36 + 37 + (** {1 Comment Formatting} *) 38 + 39 + val format_comment : author:string -> body:string -> string 40 + (** [format_comment ~author ~body] formats a comment as "@author: body". *) 41 + 42 + (** {1 Metadata Construction} *) 43 + 44 + val make_metadata : 45 + owner:string -> 46 + repo:string -> 47 + week:Week.t -> 48 + cached_at:Ptime.t -> 49 + Types.Metadata.t 50 + (** [make_metadata ~owner ~repo ~week ~cached_at] creates metadata for a 51 + weekly snapshot. *) 52 + 53 + (** {1 Batch Transformation} *) 54 + 55 + val issues_of_nodes : 56 + week:Week.t -> Graphql_types.Issue_node.t list -> Types.Issue.t list 57 + (** [issues_of_nodes ~week nodes] filters and transforms issues that were 58 + active during the given week. *) 59 + 60 + val prs_of_nodes : 61 + week:Week.t -> Graphql_types.Pr_node.t list -> Types.Pr.t list 62 + (** [prs_of_nodes ~week nodes] filters and transforms PRs that were 63 + active during the given week. *) 64 + 65 + val discussions_of_nodes : 66 + week:Week.t -> 67 + Graphql_types.Discussion_node.t list -> 68 + Types.Discussion.t list 69 + (** [discussions_of_nodes ~week nodes] filters and transforms discussions 70 + that were updated during the given week. *)
+198
repowatch/lib/types.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + module Metadata = struct 7 + type t = { 8 + repo : string; 9 + year : int; 10 + week : int; 11 + week_start : string; 12 + week_end : string; 13 + cached_at : string; 14 + } 15 + 16 + let make ~repo ~year ~week ~week_start ~week_end ~cached_at = 17 + { repo; year; week; week_start; week_end; cached_at } 18 + 19 + let repo t = t.repo 20 + let year t = t.year 21 + let week t = t.week 22 + let week_start t = t.week_start 23 + let week_end t = t.week_end 24 + let cached_at t = t.cached_at 25 + end 26 + 27 + module Issue = struct 28 + type t = { 29 + id : int; 30 + title : string; 31 + url : string; 32 + user : string; 33 + created_at : string; 34 + updated_at : string; 35 + closed_at : string option; 36 + body : string; 37 + labels : string list; 38 + state : string; 39 + comments : string list; 40 + } 41 + 42 + let make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at ~body 43 + ~labels ~state ~comments = 44 + { id; title; url; user; created_at; updated_at; closed_at; body; labels; 45 + state; comments } 46 + 47 + let id t = t.id 48 + let title t = t.title 49 + let url t = t.url 50 + let user t = t.user 51 + let created_at t = t.created_at 52 + let updated_at t = t.updated_at 53 + let closed_at t = t.closed_at 54 + let body t = t.body 55 + let labels t = t.labels 56 + let state t = t.state 57 + let comments t = t.comments 58 + end 59 + 60 + module Pr = struct 61 + type t = { 62 + id : int; 63 + title : string; 64 + url : string; 65 + user : string; 66 + created_at : string; 67 + updated_at : string; 68 + closed_at : string option; 69 + merged_at : string option; 70 + body : string; 71 + labels : string list; 72 + state : string; 73 + comments : string list; 74 + additions : int; 75 + deletions : int; 76 + changed_files : int; 77 + mergeable : string; 78 + draft : bool; 79 + } 80 + 81 + let make ~id ~title ~url ~user ~created_at ~updated_at ~closed_at ~merged_at 82 + ~body ~labels ~state ~comments ~additions ~deletions ~changed_files 83 + ~mergeable ~draft = 84 + { id; title; url; user; created_at; updated_at; closed_at; merged_at; 85 + body; labels; state; comments; additions; deletions; changed_files; 86 + mergeable; draft } 87 + 88 + let id t = t.id 89 + let title t = t.title 90 + let url t = t.url 91 + let user t = t.user 92 + let created_at t = t.created_at 93 + let updated_at t = t.updated_at 94 + let closed_at t = t.closed_at 95 + let merged_at t = t.merged_at 96 + let body t = t.body 97 + let labels t = t.labels 98 + let state t = t.state 99 + let comments t = t.comments 100 + let additions t = t.additions 101 + let deletions t = t.deletions 102 + let changed_files t = t.changed_files 103 + let mergeable t = t.mergeable 104 + let draft t = t.draft 105 + end 106 + 107 + module Discussion = struct 108 + type t = { 109 + id : int; 110 + title : string; 111 + url : string; 112 + user : string; 113 + updated_at : string; 114 + body : string; 115 + category : string; 116 + comments : int; 117 + answered : bool; 118 + } 119 + 120 + let make ~id ~title ~url ~user ~updated_at ~body ~category ~comments ~answered = 121 + { id; title; url; user; updated_at; body; category; comments; answered } 122 + 123 + let id t = t.id 124 + let title t = t.title 125 + let url t = t.url 126 + let user t = t.user 127 + let updated_at t = t.updated_at 128 + let body t = t.body 129 + let category t = t.category 130 + let comments t = t.comments 131 + let answered t = t.answered 132 + end 133 + 134 + module Asset = struct 135 + type t = { 136 + name : string; 137 + download_count : int; 138 + size : int; 139 + } 140 + 141 + let make ~name ~download_count ~size = { name; download_count; size } 142 + let name t = t.name 143 + let download_count t = t.download_count 144 + let size t = t.size 145 + end 146 + 147 + module Release = struct 148 + type t = { 149 + tag_name : string; 150 + name : string; 151 + published_at : string; 152 + author : string; 153 + html_url : string; 154 + body : string; 155 + prerelease : bool; 156 + draft : bool; 157 + assets : Asset.t list; 158 + } 159 + 160 + let make ~tag_name ~name ~published_at ~author ~html_url ~body ~prerelease 161 + ~draft ~assets = 162 + { tag_name; name; published_at; author; html_url; body; prerelease; draft; 163 + assets } 164 + 165 + let tag_name t = t.tag_name 166 + let name t = t.name 167 + let published_at t = t.published_at 168 + let author t = t.author 169 + let html_url t = t.html_url 170 + let body t = t.body 171 + let prerelease t = t.prerelease 172 + let draft t = t.draft 173 + let assets t = t.assets 174 + end 175 + 176 + module Week_data = struct 177 + type t = { 178 + metadata : Metadata.t; 179 + issues : Issue.t list; 180 + prs : Pr.t list; 181 + good_first_issues : Issue.t list; 182 + discussions : Discussion.t list; 183 + releases : Release.t list; 184 + users : string list; 185 + } 186 + 187 + let make ~metadata ~issues ~prs ~good_first_issues ~discussions ~releases 188 + ~users = 189 + { metadata; issues; prs; good_first_issues; discussions; releases; users } 190 + 191 + let metadata t = t.metadata 192 + let issues t = t.issues 193 + let prs t = t.prs 194 + let good_first_issues t = t.good_first_issues 195 + let discussions t = t.discussions 196 + let releases t = t.releases 197 + let users t = t.users 198 + end
+337
repowatch/lib/types.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Type definitions for GitHub repository activity data. 7 + 8 + These types correspond to the JSON schema used by the ruminant data format 9 + for weekly repository activity snapshots. *) 10 + 11 + (** {1 Metadata} *) 12 + 13 + module Metadata : sig 14 + type t 15 + (** Metadata about a weekly activity snapshot. *) 16 + 17 + val make : 18 + repo:string -> 19 + year:int -> 20 + week:int -> 21 + week_start:string -> 22 + week_end:string -> 23 + cached_at:string -> 24 + t 25 + (** Create metadata for a weekly snapshot. 26 + 27 + @param repo Repository in "owner/repo" format 28 + @param year The year 29 + @param week ISO week number (1-53) 30 + @param week_start Start date in ISO 8601 format 31 + @param week_end End date in ISO 8601 format 32 + @param cached_at Timestamp when data was cached *) 33 + 34 + val repo : t -> string 35 + (** Repository identifier in "owner/repo" format. *) 36 + 37 + val year : t -> int 38 + (** The year this snapshot covers. *) 39 + 40 + val week : t -> int 41 + (** ISO week number (1-53). *) 42 + 43 + val week_start : t -> string 44 + (** Start date of the week in ISO 8601 format. *) 45 + 46 + val week_end : t -> string 47 + (** End date of the week in ISO 8601 format. *) 48 + 49 + val cached_at : t -> string 50 + (** Timestamp when the data was cached. *) 51 + end 52 + 53 + (** {1 Issues} *) 54 + 55 + module Issue : sig 56 + type t 57 + (** A GitHub issue. *) 58 + 59 + val make : 60 + id:int -> 61 + title:string -> 62 + url:string -> 63 + user:string -> 64 + created_at:string -> 65 + updated_at:string -> 66 + closed_at:string option -> 67 + body:string -> 68 + labels:string list -> 69 + state:string -> 70 + comments:string list -> 71 + t 72 + (** Create an issue record. *) 73 + 74 + val id : t -> int 75 + (** Issue number. *) 76 + 77 + val title : t -> string 78 + (** Issue title. *) 79 + 80 + val url : t -> string 81 + (** URL to the issue on GitHub. *) 82 + 83 + val user : t -> string 84 + (** Username who created the issue. *) 85 + 86 + val created_at : t -> string 87 + (** Creation timestamp in ISO 8601 format. *) 88 + 89 + val updated_at : t -> string 90 + (** Last update timestamp. *) 91 + 92 + val closed_at : t -> string option 93 + (** Closure timestamp if closed. *) 94 + 95 + val body : t -> string 96 + (** Issue body text. *) 97 + 98 + val labels : t -> string list 99 + (** Labels applied to the issue. *) 100 + 101 + val state : t -> string 102 + (** Issue state ("open" or "closed"). *) 103 + 104 + val comments : t -> string list 105 + (** List of comment texts. *) 106 + end 107 + 108 + (** {1 Pull Requests} *) 109 + 110 + module Pr : sig 111 + type t 112 + (** A GitHub pull request. *) 113 + 114 + val make : 115 + id:int -> 116 + title:string -> 117 + url:string -> 118 + user:string -> 119 + created_at:string -> 120 + updated_at:string -> 121 + closed_at:string option -> 122 + merged_at:string option -> 123 + body:string -> 124 + labels:string list -> 125 + state:string -> 126 + comments:string list -> 127 + additions:int -> 128 + deletions:int -> 129 + changed_files:int -> 130 + mergeable:string -> 131 + draft:bool -> 132 + t 133 + (** Create a pull request record. *) 134 + 135 + val id : t -> int 136 + (** PR number. *) 137 + 138 + val title : t -> string 139 + (** PR title. *) 140 + 141 + val url : t -> string 142 + (** URL to the PR on GitHub. *) 143 + 144 + val user : t -> string 145 + (** Username who created the PR. *) 146 + 147 + val created_at : t -> string 148 + (** Creation timestamp. *) 149 + 150 + val updated_at : t -> string 151 + (** Last update timestamp. *) 152 + 153 + val closed_at : t -> string option 154 + (** Closure timestamp if closed. *) 155 + 156 + val merged_at : t -> string option 157 + (** Merge timestamp if merged. *) 158 + 159 + val body : t -> string 160 + (** PR body text. *) 161 + 162 + val labels : t -> string list 163 + (** Labels applied to the PR. *) 164 + 165 + val state : t -> string 166 + (** PR state ("open", "closed", or "merged"). *) 167 + 168 + val comments : t -> string list 169 + (** List of comment texts. *) 170 + 171 + val additions : t -> int 172 + (** Number of lines added. *) 173 + 174 + val deletions : t -> int 175 + (** Number of lines deleted. *) 176 + 177 + val changed_files : t -> int 178 + (** Number of files changed. *) 179 + 180 + val mergeable : t -> string 181 + (** Merge status ("MERGEABLE", "CONFLICTING", etc.). *) 182 + 183 + val draft : t -> bool 184 + (** Whether this is a draft PR. *) 185 + end 186 + 187 + (** {1 Discussions} *) 188 + 189 + module Discussion : sig 190 + type t 191 + (** A GitHub discussion. *) 192 + 193 + val make : 194 + id:int -> 195 + title:string -> 196 + url:string -> 197 + user:string -> 198 + updated_at:string -> 199 + body:string -> 200 + category:string -> 201 + comments:int -> 202 + answered:bool -> 203 + t 204 + (** Create a discussion record. *) 205 + 206 + val id : t -> int 207 + (** Discussion ID. *) 208 + 209 + val title : t -> string 210 + (** Discussion title. *) 211 + 212 + val url : t -> string 213 + (** URL to the discussion on GitHub. *) 214 + 215 + val user : t -> string 216 + (** Username who created the discussion. *) 217 + 218 + val updated_at : t -> string 219 + (** Last update timestamp. *) 220 + 221 + val body : t -> string 222 + (** Discussion body text. *) 223 + 224 + val category : t -> string 225 + (** Discussion category. *) 226 + 227 + val comments : t -> int 228 + (** Number of comments. *) 229 + 230 + val answered : t -> bool 231 + (** Whether the discussion has been marked answered. *) 232 + end 233 + 234 + (** {1 Release Assets} *) 235 + 236 + module Asset : sig 237 + type t 238 + (** A release asset (downloadable file). *) 239 + 240 + val make : name:string -> download_count:int -> size:int -> t 241 + (** Create an asset record. *) 242 + 243 + val name : t -> string 244 + (** Asset filename. *) 245 + 246 + val download_count : t -> int 247 + (** Number of downloads. *) 248 + 249 + val size : t -> int 250 + (** Asset size in bytes. *) 251 + end 252 + 253 + (** {1 Releases} *) 254 + 255 + module Release : sig 256 + type t 257 + (** A GitHub release. *) 258 + 259 + val make : 260 + tag_name:string -> 261 + name:string -> 262 + published_at:string -> 263 + author:string -> 264 + html_url:string -> 265 + body:string -> 266 + prerelease:bool -> 267 + draft:bool -> 268 + assets:Asset.t list -> 269 + t 270 + (** Create a release record. *) 271 + 272 + val tag_name : t -> string 273 + (** Git tag name for the release. *) 274 + 275 + val name : t -> string 276 + (** Release title. *) 277 + 278 + val published_at : t -> string 279 + (** Publication timestamp. *) 280 + 281 + val author : t -> string 282 + (** Username who created the release. *) 283 + 284 + val html_url : t -> string 285 + (** URL to the release on GitHub. *) 286 + 287 + val body : t -> string 288 + (** Release notes body text. *) 289 + 290 + val prerelease : t -> bool 291 + (** Whether this is a prerelease. *) 292 + 293 + val draft : t -> bool 294 + (** Whether this is a draft release. *) 295 + 296 + val assets : t -> Asset.t list 297 + (** Downloadable assets attached to the release. *) 298 + end 299 + 300 + (** {1 Weekly Data} *) 301 + 302 + module Week_data : sig 303 + type t 304 + (** Complete weekly activity snapshot for a repository. *) 305 + 306 + val make : 307 + metadata:Metadata.t -> 308 + issues:Issue.t list -> 309 + prs:Pr.t list -> 310 + good_first_issues:Issue.t list -> 311 + discussions:Discussion.t list -> 312 + releases:Release.t list -> 313 + users:string list -> 314 + t 315 + (** Create a weekly data snapshot. *) 316 + 317 + val metadata : t -> Metadata.t 318 + (** Snapshot metadata. *) 319 + 320 + val issues : t -> Issue.t list 321 + (** Issues with activity during the week. *) 322 + 323 + val prs : t -> Pr.t list 324 + (** Pull requests with activity during the week. *) 325 + 326 + val good_first_issues : t -> Issue.t list 327 + (** Issues labeled "good first issue". *) 328 + 329 + val discussions : t -> Discussion.t list 330 + (** Discussions with activity during the week. *) 331 + 332 + val releases : t -> Release.t list 333 + (** Releases published during the week. *) 334 + 335 + val users : t -> string list 336 + (** Usernames who were active during the week. *) 337 + end
+188
repowatch/lib/users.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Common words to exclude from username detection *) 7 + let excluded_words = 8 + [ 9 + (* Common English words that might appear with @ *) 10 + "all"; 11 + "everyone"; 12 + "here"; 13 + "channel"; 14 + "team"; 15 + "author"; 16 + "authors"; 17 + "maintainer"; 18 + "maintainers"; 19 + "reviewer"; 20 + "reviewers"; 21 + "user"; 22 + "users"; 23 + "admin"; 24 + "admins"; 25 + "bot"; 26 + "bots"; 27 + "ghost"; 28 + (* Programming terms *) 29 + "param"; 30 + "params"; 31 + "return"; 32 + "returns"; 33 + "type"; 34 + "types"; 35 + "value"; 36 + "values"; 37 + "deprecated"; 38 + "since"; 39 + "see"; 40 + "link"; 41 + "code"; 42 + "example"; 43 + "note"; 44 + "warning"; 45 + "todo"; 46 + "fixme"; 47 + "hack"; 48 + (* OCaml-specific terms *) 49 + "raise"; 50 + "raises"; 51 + "inline"; 52 + "ocaml"; 53 + "opam"; 54 + "dune"; 55 + "module"; 56 + "functor"; 57 + "mli"; 58 + "cma"; 59 + "cmo"; 60 + "cmx"; 61 + "cmi"; 62 + (* Git/GitHub terms *) 63 + "dependabot"; 64 + "github"; 65 + "actions"; 66 + "codecov"; 67 + "renovate"; 68 + ] 69 + 70 + (* Check if a string looks like a hex hash (commit SHA fragment) *) 71 + let looks_like_hex s = 72 + String.length s >= 6 73 + && String.for_all 74 + (fun c -> (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) 75 + (String.lowercase_ascii s) 76 + 77 + let is_valid_username s = 78 + let len = String.length s in 79 + if len < 2 || len > 39 then false 80 + else if List.mem (String.lowercase_ascii s) excluded_words then false 81 + else if looks_like_hex s then false 82 + else 83 + (* Must start with letter *) 84 + let first = s.[0] in 85 + if not ((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z')) 86 + then false 87 + else 88 + (* Must not start or end with hyphen *) 89 + if s.[0] = '-' || s.[len - 1] = '-' then false 90 + else 91 + (* Must contain only alphanumeric and single hyphens *) 92 + let rec check i prev_hyphen = 93 + if i >= len then true 94 + else 95 + let c = s.[i] in 96 + if c = '-' then 97 + if prev_hyphen then false (* consecutive hyphens *) 98 + else check (i + 1) true 99 + else if 100 + (c >= 'a' && c <= 'z') 101 + || (c >= 'A' && c <= 'Z') 102 + || (c >= '0' && c <= '9') 103 + then check (i + 1) false 104 + else false 105 + in 106 + check 0 false 107 + 108 + (* Extract @mentions from text *) 109 + let extract_mentions text = 110 + let len = String.length text in 111 + let rec find_at i acc = 112 + if i >= len then acc 113 + else if text.[i] = '@' then 114 + let rec read_username j = 115 + if j >= len then j 116 + else 117 + let c = text.[j] in 118 + if 119 + (c >= 'a' && c <= 'z') 120 + || (c >= 'A' && c <= 'Z') 121 + || (c >= '0' && c <= '9') 122 + || c = '-' 123 + then read_username (j + 1) 124 + else j 125 + in 126 + let end_pos = read_username (i + 1) in 127 + if end_pos > i + 1 then 128 + let username = String.sub text (i + 1) (end_pos - i - 1) in 129 + if is_valid_username username then find_at end_pos (username :: acc) 130 + else find_at end_pos acc 131 + else find_at (i + 1) acc 132 + else find_at (i + 1) acc 133 + in 134 + find_at 0 [] |> List.sort_uniq String.compare 135 + 136 + (* Extract username from comment format "@username: body" *) 137 + let extract_comment_author comment = 138 + if String.length comment > 1 && comment.[0] = '@' then 139 + try 140 + let colon_pos = String.index comment ':' in 141 + let username = String.sub comment 1 (colon_pos - 1) in 142 + if is_valid_username username then Some username else None 143 + with Not_found -> None 144 + else None 145 + 146 + let extract_from_week_data data = 147 + let users = ref [] in 148 + let add u = if is_valid_username u then users := u :: !users in 149 + (* Issue authors and mentions *) 150 + List.iter 151 + (fun issue -> 152 + add (Types.Issue.user issue); 153 + List.iter (fun m -> add m) (extract_mentions (Types.Issue.body issue)); 154 + List.iter 155 + (fun comment -> 156 + Option.iter add (extract_comment_author comment); 157 + List.iter (fun m -> add m) (extract_mentions comment)) 158 + (Types.Issue.comments issue)) 159 + (Types.Week_data.issues data); 160 + (* PR authors and mentions *) 161 + List.iter 162 + (fun pr -> 163 + add (Types.Pr.user pr); 164 + List.iter (fun m -> add m) (extract_mentions (Types.Pr.body pr)); 165 + List.iter 166 + (fun comment -> 167 + Option.iter add (extract_comment_author comment); 168 + List.iter (fun m -> add m) (extract_mentions comment)) 169 + (Types.Pr.comments pr)) 170 + (Types.Week_data.prs data); 171 + (* Good first issues *) 172 + List.iter 173 + (fun issue -> 174 + add (Types.Issue.user issue); 175 + List.iter (fun m -> add m) (extract_mentions (Types.Issue.body issue))) 176 + (Types.Week_data.good_first_issues data); 177 + (* Discussion authors *) 178 + List.iter 179 + (fun disc -> 180 + add (Types.Discussion.user disc); 181 + List.iter (fun m -> add m) (extract_mentions (Types.Discussion.body disc))) 182 + (Types.Week_data.discussions data); 183 + (* Release authors *) 184 + List.iter 185 + (fun rel -> add (Types.Release.author rel)) 186 + (Types.Week_data.releases data); 187 + (* Return unique sorted list *) 188 + !users |> List.sort_uniq String.compare
+43
repowatch/lib/users.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Extract and validate GitHub usernames. 7 + 8 + This module provides functions to extract GitHub usernames from 9 + weekly activity data and validate them against common patterns 10 + that shouldn't be treated as usernames. *) 11 + 12 + (** {1 Username Extraction} *) 13 + 14 + val extract_from_week_data : Types.Week_data.t -> string list 15 + (** [extract_from_week_data data] extracts all unique usernames from 16 + the week data. This includes: 17 + - Issue and PR authors 18 + - Comment authors (extracted from "@user: comment" format) 19 + - Discussion authors 20 + - Release authors 21 + - Usernames mentioned in issue/PR bodies and comments *) 22 + 23 + val extract_mentions : string -> string list 24 + (** [extract_mentions text] extracts mentions (usernames prefixed with [\@]) 25 + from text. Returns a list of unique usernames found. *) 26 + 27 + (** {1 Validation} *) 28 + 29 + val is_valid_username : string -> bool 30 + (** [is_valid_username s] returns [true] if the string looks like a 31 + valid GitHub username and is not in the exclusion list. 32 + 33 + GitHub usernames: 34 + - Start with a letter 35 + - Contain only alphanumeric characters and hyphens 36 + - Are 1-39 characters long 37 + - Don't start or end with a hyphen 38 + - Don't contain consecutive hyphens *) 39 + 40 + val excluded_words : string list 41 + (** List of words that look like mentions but should be excluded. 42 + Includes common programming terms, OCaml-specific terms, and 43 + common English words. *)
+212
repowatch/lib/week.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + type t = { year : int; week : int } 7 + 8 + (* Number of ISO weeks in a year. A year has 53 weeks if: 9 + - January 1 is a Thursday, or 10 + - January 1 is a Wednesday and it's a leap year *) 11 + let weeks_in_year year = 12 + let jan1_weekday = 13 + (* Zeller's congruence for January 1 *) 14 + let y = if 1 <= 1 then year - 1 else year in 15 + let m = 13 in 16 + (* January treated as month 13 of previous year *) 17 + let q = 1 in 18 + let k = y mod 100 in 19 + let j = y / 100 in 20 + let h = (q + ((13 * (m + 1)) / 5) + k + (k / 4) + (j / 4) - (2 * j)) mod 7 21 + in 22 + (* Convert to ISO weekday (1=Monday, 7=Sunday) *) 23 + ((h + 5) mod 7) + 1 24 + in 25 + let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in 26 + if jan1_weekday = 4 || (jan1_weekday = 3 && is_leap) then 53 else 52 27 + 28 + let of_year_week ~year ~week = 29 + let max_weeks = weeks_in_year year in 30 + if week < 1 || week > max_weeks then 31 + invalid_arg 32 + (Printf.sprintf "Week %d is out of range for year %d (max: %d)" week year 33 + max_weeks) 34 + else { year; week } 35 + 36 + let year t = t.year 37 + let week t = t.week 38 + 39 + (* Convert Gregorian date to ISO week date *) 40 + let of_date ~year ~month ~day = 41 + (* Algorithm based on ISO 8601 week date calculation *) 42 + (* First, calculate the day of year *) 43 + let days_before_month = 44 + [| 0; 31; 59; 90; 120; 151; 181; 212; 243; 273; 304; 334 |] 45 + in 46 + let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in 47 + let doy = 48 + days_before_month.(month - 1) 49 + + day 50 + + (if month > 2 && is_leap then 1 else 0) 51 + in 52 + (* Calculate day of week (1=Monday, 7=Sunday) using a known reference *) 53 + (* January 1, 2000 was a Saturday (day 6 in ISO) *) 54 + let days_since_2000 = 55 + let years_diff = year - 2000 in 56 + let leap_years = 57 + if years_diff >= 0 then 58 + let prev_year = year - 1 in 59 + (prev_year / 4) - (prev_year / 100) + (prev_year / 400) 60 + - ((1999 / 4) - (1999 / 100) + (1999 / 400)) 61 + else 62 + let prev_year = year - 1 in 63 + (prev_year / 4) - (prev_year / 100) + (prev_year / 400) 64 + - ((1999 / 4) - (1999 / 100) + (1999 / 400)) 65 + in 66 + (years_diff * 365) + leap_years + doy - 1 67 + in 68 + (* January 1, 2000 was Saturday = day 6 *) 69 + let dow = ((days_since_2000 mod 7) + 6 - 1 + 7) mod 7 + 1 in 70 + (* ISO week number calculation *) 71 + let week_num = (doy - dow + 10) / 7 in 72 + if week_num < 1 then 73 + (* Belongs to last week of previous year *) 74 + { year = year - 1; week = weeks_in_year (year - 1) } 75 + else if week_num > weeks_in_year year then 76 + (* Belongs to first week of next year *) 77 + { year = year + 1; week = 1 } 78 + else { year; week = week_num } 79 + 80 + let of_ptime ptime = 81 + let (y, m, d), _ = Ptime.to_date_time ptime in 82 + of_date ~year:y ~month:m ~day:d 83 + 84 + let current ~clock = 85 + let now = Eio.Time.now clock in 86 + match Ptime.of_float_s now with 87 + | Some ptime -> of_ptime ptime 88 + | None -> 89 + (* Fallback to epoch if conversion fails *) 90 + of_date ~year:1970 ~month:1 ~day:1 91 + 92 + (* Calculate the Monday of week 1 for a given ISO week-year *) 93 + let monday_of_week1 year = 94 + (* Week 1 contains January 4, so find the Monday of that week *) 95 + let jan4_days_since_2000 = 96 + let years_diff = year - 2000 in 97 + let prev_year = year - 1 in 98 + let leap_years = 99 + (prev_year / 4) - (prev_year / 100) + (prev_year / 400) 100 + - ((1999 / 4) - (1999 / 100) + (1999 / 400)) 101 + in 102 + (years_diff * 365) + leap_years + 4 - 1 103 + in 104 + let jan4_dow = ((jan4_days_since_2000 mod 7) + 6 - 1 + 7) mod 7 + 1 in 105 + (* Monday of week 1 = January 4 - (weekday of Jan 4 - 1) *) 106 + jan4_days_since_2000 - (jan4_dow - 1) 107 + 108 + let days_since_2000_to_date days = 109 + (* Approximate year *) 110 + let approx_year = 2000 + (days / 365) in 111 + let rec find_year y = 112 + let start_of_year = 113 + let years_diff = y - 2000 in 114 + let prev_year = y - 1 in 115 + let leap_years = 116 + (prev_year / 4) - (prev_year / 100) + (prev_year / 400) 117 + - ((1999 / 4) - (1999 / 100) + (1999 / 400)) 118 + in 119 + (years_diff * 365) + leap_years 120 + in 121 + if start_of_year > days then find_year (y - 1) else (y, days - start_of_year) 122 + in 123 + let year, doy = find_year approx_year in 124 + let doy = doy + 1 in 125 + (* Convert to 1-based day of year *) 126 + let is_leap = year mod 4 = 0 && (year mod 100 <> 0 || year mod 400 = 0) in 127 + let days_in_month = 128 + [| 31; (if is_leap then 29 else 28); 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] 129 + in 130 + let rec find_month m remaining = 131 + if remaining <= days_in_month.(m - 1) then (m, remaining) 132 + else find_month (m + 1) (remaining - days_in_month.(m - 1)) 133 + in 134 + let month, day = find_month 1 doy in 135 + (year, month, day) 136 + 137 + let start_ptime t = 138 + let monday_w1 = monday_of_week1 t.year in 139 + let monday = monday_w1 + ((t.week - 1) * 7) in 140 + let year, month, day = days_since_2000_to_date monday in 141 + match Ptime.of_date (year, month, day) with 142 + | Some pt -> pt 143 + | None -> Ptime.epoch 144 + 145 + let end_ptime t = 146 + let monday_w1 = monday_of_week1 t.year in 147 + let sunday = monday_w1 + ((t.week - 1) * 7) + 6 in 148 + let year, month, day = days_since_2000_to_date sunday in 149 + match Ptime.of_date_time ((year, month, day), ((23, 59, 59), 0)) with 150 + | Some pt -> pt 151 + | None -> Ptime.epoch 152 + 153 + let start_date t = 154 + let pt = start_ptime t in 155 + let (y, m, d), _ = Ptime.to_date_time pt in 156 + Printf.sprintf "%04d-%02d-%02d" y m d 157 + 158 + let end_date t = 159 + let pt = end_ptime t in 160 + let (y, m, d), _ = Ptime.to_date_time pt in 161 + Printf.sprintf "%04d-%02d-%02d" y m d 162 + 163 + let contains_ptime t time = 164 + let start = start_ptime t in 165 + let end_ = end_ptime t in 166 + Ptime.compare time start >= 0 && Ptime.compare time end_ <= 0 167 + 168 + let parse_iso8601 s = 169 + (* Parse ISO 8601 timestamp like "2024-01-15T10:30:00Z" *) 170 + try 171 + let len = String.length s in 172 + if len < 10 then None 173 + else 174 + let year = int_of_string (String.sub s 0 4) in 175 + let month = int_of_string (String.sub s 5 2) in 176 + let day = int_of_string (String.sub s 8 2) in 177 + if len >= 19 then 178 + let hour = int_of_string (String.sub s 11 2) in 179 + let min = int_of_string (String.sub s 14 2) in 180 + let sec = int_of_string (String.sub s 17 2) in 181 + Ptime.of_date_time ((year, month, day), ((hour, min, sec), 0)) 182 + else Ptime.of_date (year, month, day) 183 + with _ -> None 184 + 185 + let contains_timestamp t timestamp = 186 + match parse_iso8601 timestamp with 187 + | Some time -> contains_ptime t time 188 + | None -> false 189 + 190 + let prev t = 191 + if t.week > 1 then { year = t.year; week = t.week - 1 } 192 + else { year = t.year - 1; week = weeks_in_year (t.year - 1) } 193 + 194 + let next t = 195 + let max_weeks = weeks_in_year t.year in 196 + if t.week < max_weeks then { year = t.year; week = t.week + 1 } 197 + else { year = t.year + 1; week = 1 } 198 + 199 + let compare a b = 200 + match Int.compare a.year b.year with 0 -> Int.compare a.week b.week | c -> c 201 + 202 + let equal a b = a.year = b.year && a.week = b.week 203 + 204 + let range ~from ~to_ = 205 + let rec loop acc current = 206 + if compare current from < 0 then acc 207 + else loop (current :: acc) (prev current) 208 + in 209 + if compare from to_ > 0 then [] else loop [] to_ 210 + 211 + let to_string t = Printf.sprintf "%04d-W%02d" t.year t.week 212 + let pp fmt t = Format.fprintf fmt "%s" (to_string t)
+99
repowatch/lib/week.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** ISO week calculations and date filtering. 7 + 8 + This module provides utilities for working with ISO 8601 week dates, 9 + which are used to partition GitHub activity data into weekly buckets. 10 + 11 + An ISO week always starts on Monday and ends on Sunday. Week 1 is the 12 + week containing the first Thursday of the year (equivalently, the week 13 + containing January 4th). *) 14 + 15 + type t 16 + (** An ISO week represented as a year and week number (1-53). *) 17 + 18 + (** {1 Constructors} *) 19 + 20 + val of_date : year:int -> month:int -> day:int -> t 21 + (** [of_date ~year ~month ~day] returns the ISO week containing the given 22 + Gregorian date. Month is 1-12, day is 1-31. *) 23 + 24 + val of_ptime : Ptime.t -> t 25 + (** [of_ptime t] returns the ISO week containing the given timestamp. *) 26 + 27 + val current : clock:_ Eio.Time.clock -> t 28 + (** [current ~clock] returns the current ISO week based on the system clock. *) 29 + 30 + val of_year_week : year:int -> week:int -> t 31 + (** [of_year_week ~year ~week] creates an ISO week from year and week number. 32 + Week number should be 1-53. Raises [Invalid_argument] if week is out of 33 + range for the given year. *) 34 + 35 + (** {1 Accessors} *) 36 + 37 + val year : t -> int 38 + (** [year t] returns the ISO week-year. Note that this may differ from the 39 + Gregorian year for dates near year boundaries. *) 40 + 41 + val week : t -> int 42 + (** [week t] returns the ISO week number (1-53). *) 43 + 44 + (** {1 Date Boundaries} *) 45 + 46 + val start_date : t -> string 47 + (** [start_date t] returns the Monday of the week in ISO 8601 format 48 + (YYYY-MM-DD). *) 49 + 50 + val end_date : t -> string 51 + (** [end_date t] returns the Sunday of the week in ISO 8601 format 52 + (YYYY-MM-DD). *) 53 + 54 + val start_ptime : t -> Ptime.t 55 + (** [start_ptime t] returns the Monday of the week at 00:00:00 UTC. *) 56 + 57 + val end_ptime : t -> Ptime.t 58 + (** [end_ptime t] returns the Sunday of the week at 23:59:59 UTC. *) 59 + 60 + (** {1 Timestamp Filtering} *) 61 + 62 + val contains_timestamp : t -> string -> bool 63 + (** [contains_timestamp t timestamp] returns [true] if the ISO 8601 timestamp 64 + falls within this week. Accepts timestamps in the format used by GitHub 65 + API (e.g., "2024-01-15T10:30:00Z"). Returns [false] if the timestamp 66 + cannot be parsed. *) 67 + 68 + val contains_ptime : t -> Ptime.t -> bool 69 + (** [contains_ptime t time] returns [true] if [time] falls within this week. *) 70 + 71 + (** {1 Navigation} *) 72 + 73 + val prev : t -> t 74 + (** [prev t] returns the previous week. *) 75 + 76 + val next : t -> t 77 + (** [next t] returns the next week. *) 78 + 79 + val range : from:t -> to_:t -> t list 80 + (** [range ~from ~to_] generates a list of weeks from [from] to [to_] 81 + (inclusive). Returns an empty list if [from] is after [to_]. *) 82 + 83 + (** {1 Comparison} *) 84 + 85 + val compare : t -> t -> int 86 + (** [compare a b] compares two weeks. Returns a negative integer if [a] is 87 + before [b], zero if equal, and a positive integer if [a] is after [b]. *) 88 + 89 + val equal : t -> t -> bool 90 + (** [equal a b] returns [true] if [a] and [b] represent the same week. *) 91 + 92 + (** {1 Formatting} *) 93 + 94 + val to_string : t -> string 95 + (** [to_string t] returns a string representation in ISO week format 96 + (e.g., "2024-W03"). *) 97 + 98 + val pp : Format.formatter -> t -> unit 99 + (** [pp fmt t] pretty-prints the week. *)
+40
repowatch/repowatch.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "GitHub repository activity watcher and analyzer" 4 + description: 5 + "Repowatch parses and analyzes GitHub repository activity data from JSON files. It provides a CLI for viewing issues, PRs, discussions, and releases with statistics and filtering capabilities." 6 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 7 + authors: ["Anil Madhavapeddy <anil@recoil.org>"] 8 + license: "ISC" 9 + homepage: "https://github.com/avsm/repowatch" 10 + doc: "https://avsm.github.io/repowatch" 11 + bug-reports: "https://github.com/avsm/repowatch/issues" 12 + depends: [ 13 + "ocaml" {>= "5.2.0"} 14 + "dune" {>= "3.20" & >= "3.20"} 15 + "eio_main" {>= "1.2"} 16 + "jsont" {>= "0.1.0"} 17 + "tomlt" {>= "0.1.0"} 18 + "xdge" {>= "0.1.0"} 19 + "cmdliner" {>= "1.3.0"} 20 + "logs" {>= "0.7.0"} 21 + "fmt" {>= "0.9.0"} 22 + "ptime" {>= "1.0.0"} 23 + "odoc" {with-doc} 24 + ] 25 + build: [ 26 + ["dune" "subst"] {dev} 27 + [ 28 + "dune" 29 + "build" 30 + "-p" 31 + name 32 + "-j" 33 + jobs 34 + "@install" 35 + "@runtest" {with-test} 36 + "@doc" {with-doc} 37 + ] 38 + ] 39 + dev-repo: "git+https://github.com/avsm/repowatch.git" 40 + x-maintenance-intent: ["(latest)"]
+7
repowatch/test/dune
··· 1 + (test 2 + (name test_codec) 3 + (libraries repowatch eio_main)) 4 + 5 + (test 6 + (name test_week) 7 + (libraries repowatch))
+263
repowatch/test/test_codec.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let test_decode_metadata () = 7 + let json = 8 + {|{ 9 + "metadata": { 10 + "repo": "test/repo", 11 + "year": 2025, 12 + "week": 1, 13 + "week_start": "2025-01-01", 14 + "week_end": "2025-01-07", 15 + "cached_at": "2025-01-08T00:00:00" 16 + }, 17 + "issues": [], 18 + "prs": [], 19 + "good_first_issues": [], 20 + "discussions": [], 21 + "releases": [], 22 + "users": [] 23 + }|} 24 + in 25 + match Repowatch.Codec.decode_string json with 26 + | Ok data -> 27 + let m = Repowatch.Types.Week_data.metadata data in 28 + assert (Repowatch.Types.Metadata.repo m = "test/repo"); 29 + assert (Repowatch.Types.Metadata.year m = 2025); 30 + assert (Repowatch.Types.Metadata.week m = 1); 31 + Printf.printf "test_decode_metadata: PASS\n" 32 + | Error e -> 33 + Printf.printf "test_decode_metadata: FAIL - %s\n" e; 34 + exit 1 35 + 36 + let test_decode_issue () = 37 + let json = 38 + {|{ 39 + "metadata": { 40 + "repo": "test/repo", 41 + "year": 2025, 42 + "week": 1, 43 + "week_start": "2025-01-01", 44 + "week_end": "2025-01-07", 45 + "cached_at": "2025-01-08T00:00:00" 46 + }, 47 + "issues": [ 48 + { 49 + "id": 123, 50 + "title": "Test issue", 51 + "url": "https://github.com/test/repo/issues/123", 52 + "user": "testuser", 53 + "created_at": "2025-01-01T00:00:00Z", 54 + "updated_at": "2025-01-02T00:00:00Z", 55 + "closed_at": null, 56 + "body": "This is a test issue", 57 + "labels": ["bug", "help wanted"], 58 + "state": "open", 59 + "comments": [] 60 + } 61 + ], 62 + "prs": [], 63 + "good_first_issues": [], 64 + "discussions": [], 65 + "releases": [], 66 + "users": ["testuser"] 67 + }|} 68 + in 69 + match Repowatch.Codec.decode_string json with 70 + | Ok data -> 71 + let issues = Repowatch.Types.Week_data.issues data in 72 + assert (List.length issues = 1); 73 + let i = List.hd issues in 74 + assert (Repowatch.Types.Issue.id i = 123); 75 + assert (Repowatch.Types.Issue.title i = "Test issue"); 76 + assert (Repowatch.Types.Issue.state i = "open"); 77 + assert (Repowatch.Types.Issue.closed_at i = None); 78 + assert (List.length (Repowatch.Types.Issue.labels i) = 2); 79 + Printf.printf "test_decode_issue: PASS\n" 80 + | Error e -> 81 + Printf.printf "test_decode_issue: FAIL - %s\n" e; 82 + exit 1 83 + 84 + let test_decode_pr () = 85 + let json = 86 + {|{ 87 + "metadata": { 88 + "repo": "test/repo", 89 + "year": 2025, 90 + "week": 1, 91 + "week_start": "2025-01-01", 92 + "week_end": "2025-01-07", 93 + "cached_at": "2025-01-08T00:00:00" 94 + }, 95 + "issues": [], 96 + "prs": [ 97 + { 98 + "id": 456, 99 + "title": "Test PR", 100 + "url": "https://github.com/test/repo/pull/456", 101 + "user": "pruser", 102 + "created_at": "2025-01-01T00:00:00Z", 103 + "updated_at": "2025-01-02T00:00:00Z", 104 + "closed_at": null, 105 + "merged_at": null, 106 + "body": "This is a test PR", 107 + "labels": ["enhancement"], 108 + "state": "open", 109 + "comments": [], 110 + "additions": 100, 111 + "deletions": 50, 112 + "changed_files": 5, 113 + "mergeable": "MERGEABLE", 114 + "draft": false 115 + } 116 + ], 117 + "good_first_issues": [], 118 + "discussions": [], 119 + "releases": [], 120 + "users": ["pruser"] 121 + }|} 122 + in 123 + match Repowatch.Codec.decode_string json with 124 + | Ok data -> 125 + let prs = Repowatch.Types.Week_data.prs data in 126 + assert (List.length prs = 1); 127 + let p = List.hd prs in 128 + assert (Repowatch.Types.Pr.id p = 456); 129 + assert (Repowatch.Types.Pr.additions p = 100); 130 + assert (Repowatch.Types.Pr.deletions p = 50); 131 + assert (Repowatch.Types.Pr.changed_files p = 5); 132 + assert (not (Repowatch.Types.Pr.draft p)); 133 + Printf.printf "test_decode_pr: PASS\n" 134 + | Error e -> 135 + Printf.printf "test_decode_pr: FAIL - %s\n" e; 136 + exit 1 137 + 138 + let test_roundtrip () = 139 + let json = 140 + {|{ 141 + "metadata": { 142 + "repo": "test/repo", 143 + "year": 2025, 144 + "week": 1, 145 + "week_start": "2025-01-01", 146 + "week_end": "2025-01-07", 147 + "cached_at": "2025-01-08T00:00:00" 148 + }, 149 + "issues": [], 150 + "prs": [], 151 + "good_first_issues": [], 152 + "discussions": [], 153 + "releases": [], 154 + "users": [] 155 + }|} 156 + in 157 + match Repowatch.Codec.decode_string json with 158 + | Ok data -> 159 + let encoded = Repowatch.Codec.encode_string data in 160 + (match Repowatch.Codec.decode_string encoded with 161 + | Ok data2 -> 162 + let m1 = Repowatch.Types.Week_data.metadata data in 163 + let m2 = Repowatch.Types.Week_data.metadata data2 in 164 + assert (Repowatch.Types.Metadata.repo m1 = Repowatch.Types.Metadata.repo m2); 165 + assert (Repowatch.Types.Metadata.year m1 = Repowatch.Types.Metadata.year m2); 166 + Printf.printf "test_roundtrip: PASS\n" 167 + | Error e -> 168 + Printf.printf "test_roundtrip: FAIL (decode after encode) - %s\n" e; 169 + exit 1) 170 + | Error e -> 171 + Printf.printf "test_roundtrip: FAIL (initial decode) - %s\n" e; 172 + exit 1 173 + 174 + let test_stats () = 175 + let json = 176 + {|{ 177 + "metadata": { 178 + "repo": "test/repo", 179 + "year": 2025, 180 + "week": 1, 181 + "week_start": "2025-01-01", 182 + "week_end": "2025-01-07", 183 + "cached_at": "2025-01-08T00:00:00" 184 + }, 185 + "issues": [ 186 + { 187 + "id": 1, 188 + "title": "Open issue", 189 + "url": "https://github.com/test/repo/issues/1", 190 + "user": "user1", 191 + "created_at": "2025-01-01T00:00:00Z", 192 + "updated_at": "2025-01-02T00:00:00Z", 193 + "closed_at": null, 194 + "body": "Open", 195 + "labels": [], 196 + "state": "open", 197 + "comments": [] 198 + }, 199 + { 200 + "id": 2, 201 + "title": "Closed issue", 202 + "url": "https://github.com/test/repo/issues/2", 203 + "user": "user2", 204 + "created_at": "2025-01-01T00:00:00Z", 205 + "updated_at": "2025-01-02T00:00:00Z", 206 + "closed_at": "2025-01-03T00:00:00Z", 207 + "body": "Closed", 208 + "labels": [], 209 + "state": "closed", 210 + "comments": [] 211 + } 212 + ], 213 + "prs": [ 214 + { 215 + "id": 10, 216 + "title": "Merged PR", 217 + "url": "https://github.com/test/repo/pull/10", 218 + "user": "user1", 219 + "created_at": "2025-01-01T00:00:00Z", 220 + "updated_at": "2025-01-02T00:00:00Z", 221 + "closed_at": "2025-01-03T00:00:00Z", 222 + "merged_at": "2025-01-03T00:00:00Z", 223 + "body": "Merged", 224 + "labels": [], 225 + "state": "closed", 226 + "comments": [], 227 + "additions": 50, 228 + "deletions": 25, 229 + "changed_files": 3, 230 + "mergeable": "UNKNOWN", 231 + "draft": false 232 + } 233 + ], 234 + "good_first_issues": [], 235 + "discussions": [], 236 + "releases": [], 237 + "users": ["user1", "user2"] 238 + }|} 239 + in 240 + match Repowatch.Codec.decode_string json with 241 + | Ok data -> 242 + let stats = Repowatch.Printer.compute_stats data in 243 + assert (stats.total_issues = 2); 244 + assert (stats.open_issues = 1); 245 + assert (stats.closed_issues = 1); 246 + assert (stats.total_prs = 1); 247 + assert (stats.merged_prs = 1); 248 + assert (stats.total_additions = 50); 249 + assert (stats.total_deletions = 25); 250 + assert (stats.total_users = 2); 251 + Printf.printf "test_stats: PASS\n" 252 + | Error e -> 253 + Printf.printf "test_stats: FAIL - %s\n" e; 254 + exit 1 255 + 256 + let () = 257 + Printf.printf "Running repowatch codec tests...\n"; 258 + test_decode_metadata (); 259 + test_decode_issue (); 260 + test_decode_pr (); 261 + test_roundtrip (); 262 + test_stats (); 263 + Printf.printf "All tests passed!\n"
+200
repowatch/test/test_week.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Tests for Week module - ISO week calculations *) 7 + 8 + let check_equal name expected actual = 9 + if expected <> actual then begin 10 + Printf.printf "FAIL %s: expected %s, got %s\n" name expected actual; 11 + exit 1 12 + end 13 + else Printf.printf "PASS %s\n" name 14 + 15 + let check_bool name expected actual = 16 + if expected <> actual then begin 17 + Printf.printf "FAIL %s: expected %b, got %b\n" name expected actual; 18 + exit 1 19 + end 20 + else Printf.printf "PASS %s\n" name 21 + 22 + let check_int name expected actual = 23 + if expected <> actual then begin 24 + Printf.printf "FAIL %s: expected %d, got %d\n" name expected actual; 25 + exit 1 26 + end 27 + else Printf.printf "PASS %s\n" name 28 + 29 + let test_of_date () = 30 + (* Test well-known ISO week dates *) 31 + 32 + (* January 1, 2024 is a Monday - it's in week 1 of 2024 *) 33 + let w = Repowatch.Week.of_date ~year:2024 ~month:1 ~day:1 in 34 + check_int "2024-01-01 year" 2024 (Repowatch.Week.year w); 35 + check_int "2024-01-01 week" 1 (Repowatch.Week.week w); 36 + 37 + (* December 31, 2023 is a Sunday - last day of week 52 of 2023 *) 38 + let w = Repowatch.Week.of_date ~year:2023 ~month:12 ~day:31 in 39 + check_int "2023-12-31 year" 2023 (Repowatch.Week.year w); 40 + check_int "2023-12-31 week" 52 (Repowatch.Week.week w); 41 + 42 + (* January 1, 2023 is a Sunday - it's still in week 52 of 2022 *) 43 + let w = Repowatch.Week.of_date ~year:2023 ~month:1 ~day:1 in 44 + check_int "2023-01-01 year" 2022 (Repowatch.Week.year w); 45 + check_int "2023-01-01 week" 52 (Repowatch.Week.week w); 46 + 47 + (* January 2, 2023 is a Monday - it's week 1 of 2023 *) 48 + let w = Repowatch.Week.of_date ~year:2023 ~month:1 ~day:2 in 49 + check_int "2023-01-02 year" 2023 (Repowatch.Week.year w); 50 + check_int "2023-01-02 week" 1 (Repowatch.Week.week w); 51 + 52 + (* December 28, 2020 is a Monday in week 53 of 2020 (leap year) *) 53 + let w = Repowatch.Week.of_date ~year:2020 ~month:12 ~day:28 in 54 + check_int "2020-12-28 year" 2020 (Repowatch.Week.year w); 55 + check_int "2020-12-28 week" 53 (Repowatch.Week.week w); 56 + 57 + Printf.printf "All of_date tests passed\n" 58 + 59 + let test_of_year_week () = 60 + let w = Repowatch.Week.of_year_week ~year:2024 ~week:3 in 61 + check_int "2024-W03 year" 2024 (Repowatch.Week.year w); 62 + check_int "2024-W03 week" 3 (Repowatch.Week.week w); 63 + check_equal "2024-W03 string" "2024-W03" (Repowatch.Week.to_string w); 64 + 65 + (* Test that invalid weeks are rejected *) 66 + (try 67 + let _ = Repowatch.Week.of_year_week ~year:2024 ~week:0 in 68 + Printf.printf "FAIL of_year_week invalid week 0: should have raised\n"; 69 + exit 1 70 + with Invalid_argument _ -> Printf.printf "PASS of_year_week invalid week 0\n"); 71 + 72 + (try 73 + let _ = Repowatch.Week.of_year_week ~year:2024 ~week:53 in 74 + Printf.printf 75 + "FAIL of_year_week invalid week 53 for 2024: should have raised\n"; 76 + exit 1 77 + with Invalid_argument _ -> 78 + Printf.printf "PASS of_year_week invalid week 53 for 2024\n"); 79 + 80 + (* 2020 has 53 weeks, so this should work *) 81 + let w = Repowatch.Week.of_year_week ~year:2020 ~week:53 in 82 + check_int "2020-W53 week" 53 (Repowatch.Week.week w); 83 + 84 + Printf.printf "All of_year_week tests passed\n" 85 + 86 + let test_start_end_date () = 87 + (* Week 3 of 2024: Monday Jan 15 to Sunday Jan 21 *) 88 + let w = Repowatch.Week.of_year_week ~year:2024 ~week:3 in 89 + check_equal "2024-W03 start" "2024-01-15" (Repowatch.Week.start_date w); 90 + check_equal "2024-W03 end" "2024-01-21" (Repowatch.Week.end_date w); 91 + 92 + (* Week 1 of 2024: Monday Jan 1 to Sunday Jan 7 *) 93 + let w = Repowatch.Week.of_year_week ~year:2024 ~week:1 in 94 + check_equal "2024-W01 start" "2024-01-01" (Repowatch.Week.start_date w); 95 + check_equal "2024-W01 end" "2024-01-07" (Repowatch.Week.end_date w); 96 + 97 + (* Week 1 of 2023: Monday Jan 2 to Sunday Jan 8 *) 98 + let w = Repowatch.Week.of_year_week ~year:2023 ~week:1 in 99 + check_equal "2023-W01 start" "2023-01-02" (Repowatch.Week.start_date w); 100 + check_equal "2023-W01 end" "2023-01-08" (Repowatch.Week.end_date w); 101 + 102 + Printf.printf "All start_end_date tests passed\n" 103 + 104 + let test_contains_timestamp () = 105 + let w = Repowatch.Week.of_year_week ~year:2024 ~week:3 in 106 + 107 + (* Monday at midnight should be included *) 108 + check_bool "contains monday start" true 109 + (Repowatch.Week.contains_timestamp w "2024-01-15T00:00:00Z"); 110 + 111 + (* Sunday at 23:59:59 should be included *) 112 + check_bool "contains sunday end" true 113 + (Repowatch.Week.contains_timestamp w "2024-01-21T23:59:59Z"); 114 + 115 + (* Middle of the week *) 116 + check_bool "contains wednesday" true 117 + (Repowatch.Week.contains_timestamp w "2024-01-17T14:30:00Z"); 118 + 119 + (* Saturday before should not be included *) 120 + check_bool "excludes saturday before" false 121 + (Repowatch.Week.contains_timestamp w "2024-01-14T23:59:59Z"); 122 + 123 + (* Monday after should not be included *) 124 + check_bool "excludes monday after" false 125 + (Repowatch.Week.contains_timestamp w "2024-01-22T00:00:00Z"); 126 + 127 + (* Invalid timestamp should return false *) 128 + check_bool "invalid timestamp" false 129 + (Repowatch.Week.contains_timestamp w "not-a-timestamp"); 130 + 131 + Printf.printf "All contains_timestamp tests passed\n" 132 + 133 + let test_prev_next () = 134 + let w = Repowatch.Week.of_year_week ~year:2024 ~week:3 in 135 + 136 + let prev = Repowatch.Week.prev w in 137 + check_int "prev year" 2024 (Repowatch.Week.year prev); 138 + check_int "prev week" 2 (Repowatch.Week.week prev); 139 + 140 + let next = Repowatch.Week.next w in 141 + check_int "next year" 2024 (Repowatch.Week.year next); 142 + check_int "next week" 4 (Repowatch.Week.week next); 143 + 144 + (* Test year boundary *) 145 + let w1 = Repowatch.Week.of_year_week ~year:2024 ~week:1 in 146 + let prev_year = Repowatch.Week.prev w1 in 147 + check_int "prev crosses year - year" 2023 (Repowatch.Week.year prev_year); 148 + check_int "prev crosses year - week" 52 (Repowatch.Week.week prev_year); 149 + 150 + let w52 = Repowatch.Week.of_year_week ~year:2023 ~week:52 in 151 + let next_year = Repowatch.Week.next w52 in 152 + check_int "next crosses year - year" 2024 (Repowatch.Week.year next_year); 153 + check_int "next crosses year - week" 1 (Repowatch.Week.week next_year); 154 + 155 + Printf.printf "All prev_next tests passed\n" 156 + 157 + let test_range () = 158 + let from = Repowatch.Week.of_year_week ~year:2024 ~week:2 in 159 + let to_ = Repowatch.Week.of_year_week ~year:2024 ~week:5 in 160 + let range = Repowatch.Week.range ~from ~to_ in 161 + check_int "range length" 4 (List.length range); 162 + check_equal "range[0]" "2024-W02" 163 + (Repowatch.Week.to_string (List.nth range 0)); 164 + check_equal "range[3]" "2024-W05" 165 + (Repowatch.Week.to_string (List.nth range 3)); 166 + 167 + (* Empty range when from > to *) 168 + let empty_range = Repowatch.Week.range ~from:to_ ~to_:from in 169 + check_int "empty range" 0 (List.length empty_range); 170 + 171 + (* Single week range *) 172 + let single = Repowatch.Week.range ~from ~to_:from in 173 + check_int "single week range" 1 (List.length single); 174 + 175 + Printf.printf "All range tests passed\n" 176 + 177 + let test_compare () = 178 + let w1 = Repowatch.Week.of_year_week ~year:2024 ~week:3 in 179 + let w2 = Repowatch.Week.of_year_week ~year:2024 ~week:5 in 180 + let w3 = Repowatch.Week.of_year_week ~year:2023 ~week:52 in 181 + 182 + check_bool "w1 < w2" true (Repowatch.Week.compare w1 w2 < 0); 183 + check_bool "w2 > w1" true (Repowatch.Week.compare w2 w1 > 0); 184 + check_bool "w1 = w1" true (Repowatch.Week.compare w1 w1 = 0); 185 + check_bool "w3 < w1" true (Repowatch.Week.compare w3 w1 < 0); 186 + check_bool "w1 equal w1" true (Repowatch.Week.equal w1 w1); 187 + check_bool "w1 not equal w2" false (Repowatch.Week.equal w1 w2); 188 + 189 + Printf.printf "All compare tests passed\n" 190 + 191 + let () = 192 + Printf.printf "Running Week module tests...\n\n"; 193 + test_of_date (); 194 + test_of_year_week (); 195 + test_start_end_date (); 196 + test_contains_timestamp (); 197 + test_prev_next (); 198 + test_range (); 199 + test_compare (); 200 + Printf.printf "\nAll Week module tests passed!\n"