#!/usr/bin/env -S uv run --script --quiet # /// script # requires-python = ">=3.12" # dependencies = ["httpx", "rich", "numpy", "scikit-learn", "python-dateutil", "pandas", "pydantic-settings"] # /// """ Predict when a GitHub repository will reach a target number of stars. Usage: ./predict-github-stars owner/repo 10000 Details: - Uses GitHub REST API to fetch star history (with timestamps). - Fits polynomial regression (degree 1–3) to full history. - Falls back to recent‑trend linear extrapolation if the polynomial cannot reach the target within ten years. - Shows recent growth rate and a caution for long‑range estimates. - Requires `GITHUB_TOKEN` in the environment for higher rate limits (optional). """ from __future__ import annotations import argparse import os import sys from datetime import datetime, timezone import httpx import numpy as np import pandas as pd from dateutil import parser as date_parser from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict from rich.console import Console from rich.panel import Panel from rich.table import Table from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score from sklearn.preprocessing import PolynomialFeatures console = Console() class Settings(BaseSettings): """Load settings (e.g. GitHub token) from environment.""" model_config = SettingsConfigDict( env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" ) github_token: str = Field(default="", description="GitHub API token") # ──────────────────────────────── GitHub helpers ──────────────────────────────── def _headers(token: str | None = None) -> dict[str, str]: h = {"Accept": "application/vnd.github.v3+json"} if token: h["Authorization"] = f"token {token}" return h def get_repo_data(owner: str, repo: str, token: str | None = None) -> dict: url = f"https://api.github.com/repos/{owner}/{repo}" with httpx.Client() as c: r = c.get(url, headers=_headers(token)) r.raise_for_status() return r.json() def get_star_history( owner: str, repo: str, token: str | None, total_stars: int ) -> list[tuple[datetime, int]]: """Return (timestamp, cumulative_star_count) pairs, sampled if repo is huge.""" hdrs = _headers(token) hdrs["Accept"] = "application/vnd.github.v3.star+json" # need starred_at history: list[tuple[datetime, int]] = [] if total_stars > 10_000: # sample ~200 evenly‑spaced star indices sample_points = 200 step = max(1, total_stars // sample_points) pages_needed: dict[int, list[int]] = {} for s in range(1, total_stars, step): pg = (s - 1) // 100 + 1 idx = (s - 1) % 100 pages_needed.setdefault(pg, []).append(idx) # always include final star last_pg = (total_stars - 1) // 100 + 1 last_idx = (total_stars - 1) % 100 pages_needed.setdefault(last_pg, []).append(last_idx) with httpx.Client() as c: for pg, idxs in pages_needed.items(): url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={pg}&per_page=100" r = c.get(url, headers=hdrs) r.raise_for_status() data = r.json() for i in sorted(set(idxs)): if i < len(data) and "starred_at" in data[i]: ts = date_parser.parse(data[i]["starred_at"]) history.append((ts, (pg - 1) * 100 + i + 1)) console.print(f"[dim]sampled {len(history)} points across star history[/dim]") else: # fetch all pages page = 1 with httpx.Client() as c: while True: url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page=100" r = c.get(url, headers=hdrs) r.raise_for_status() data = r.json() if not data: break for i, star in enumerate(data): if "starred_at" in star: ts = date_parser.parse(star["starred_at"]) history.append((ts, (page - 1) * 100 + i + 1)) page += 1 # ensure order and anchor today’s count history.sort(key=lambda t: t[0]) if history and history[-1][1] < total_stars: history.append((datetime.now(timezone.utc), total_stars)) return history # ──────────────────────────────── modelling ───────────────────────────────────── def best_poly_fit( X: np.ndarray, y: np.ndarray ) -> tuple[LinearRegression, PolynomialFeatures, int, float]: best_r2 = -1.0 best_model: LinearRegression | None = None best_poly: PolynomialFeatures | None = None best_deg = 1 for deg in (1, 2, 3): poly = PolynomialFeatures(degree=deg) Xpoly = poly.fit_transform(X) model = LinearRegression().fit(Xpoly, y) r2 = r2_score(y, model.predict(Xpoly)) if r2 > best_r2: best_r2, best_model, best_poly, best_deg = r2, model, poly, deg return best_model, best_poly, best_deg, best_r2 # type: ignore def predict_date(history: list[tuple[datetime, int]], target: int) -> datetime | None: if len(history) < 10: return None origin = history[0][0] X = np.array([(t - origin).total_seconds() / 86400 for t, _ in history]).reshape( -1, 1 ) y = np.array([s for _, s in history]) model, poly, deg, r2 = best_poly_fit(X, y) console.print(f"[dim]best fit: degree {deg} polynomial (r² = {r2:.3f})[/dim]") current_day = X[-1, 0] for d in range(0, 3650): # up to 10 years future = current_day + d if model.predict(poly.transform([[future]]))[0] >= target: return origin + pd.Timedelta(days=future) return None # ──────────────────────────────── utils ───────────────────────────────────────── def timeframe_str(dt: datetime) -> str: now = datetime.now(timezone.utc) if dt <= now: return "already reached" days = (dt - now).days if days == 0: return "today" if days == 1: return "tomorrow" if days < 7: return f"in {days} days" if days < 30: return f"in {days // 7} week(s)" if days < 365: return f"in {days // 30} month(s)" return f"in {days // 365} year(s)" # ──────────────────────────────── main ────────────────────────────────────────── def main() -> None: p = argparse.ArgumentParser( description="Predict when a GitHub repo will reach a target number of stars" ) p.add_argument("repo", help="owner/repo") p.add_argument("stars", type=int, help="target star count") args = p.parse_args() if "/" not in args.repo: console.print("[red]error: repo must be owner/repo[/red]") sys.exit(1) owner, repo = args.repo.split("/", 1) try: settings = Settings() # load token except Exception as e: # pragma: no cover console.print(f"[red]error loading settings: {e}[/red]") sys.exit(1) token = settings.github_token.strip() or None try: repo_data = get_repo_data(owner, repo, token) current_stars = repo_data["stargazers_count"] created_at = date_parser.parse(repo_data["created_at"]) console.print( Panel.fit( f"[bold cyan]{owner}/{repo}[/bold cyan]\n" f"[dim]current stars: {current_stars:,}\ncreated: {created_at:%Y-%m-%d}[/dim]", border_style="blue", ) ) if current_stars >= args.stars: console.print("\n[green]✓ already at or above target![/green]") sys.exit(0) console.print("\nfetching star history…") history = get_star_history(owner, repo, token, current_stars) if not history: console.print("[red]error: no star history[/red]") sys.exit(1) if len(history) > 1000: # down‑sample for speed step = len(history) // 1000 history = history[::step] + [history[-1]] console.print(f"[dim]analysing {len(history)} data points…[/dim]") poly_date = predict_date(history, args.stars) def recent_rate(window: int = 30) -> float: cutoff = datetime.now(timezone.utc) - pd.Timedelta(days=window) pts = [s for t, s in history if t >= cutoff] return (pts[-1] - pts[0]) / window if len(pts) >= 2 else 0.0 rate = recent_rate() or recent_rate(90) if poly_date: out_date, tf = poly_date, timeframe_str(poly_date) elif rate > 0: days_needed = (args.stars - current_stars) / rate out_date = datetime.now(timezone.utc) + pd.Timedelta(days=days_needed) tf = timeframe_str(out_date) console.print( "[dim]poly model pessimistic; using recent growth trend[/dim]" ) else: console.print( f"[red]✗ unlikely to reach {args.stars:,} stars in the next 10 years[/red]" ) sys.exit(0) table = Table(show_header=True, header_style="bold magenta") table.add_column("metric") table.add_column("value", style="white") table.add_row("target stars", f"{args.stars:,}") table.add_row("current stars", f"{current_stars:,}") table.add_row("stars needed", f"{args.stars - current_stars:,}") table.add_row("predicted date", out_date.strftime("%Y-%m-%d")) table.add_row("timeframe", tf) if rate: table.add_row("recent growth", f"{rate:.1f} stars/day") console.print() console.print(table) if "year" in tf and "1 year" not in tf: console.print("\n[dim]⚠ prediction far in future; uncertainty high[/dim]") except httpx.HTTPStatusError as e: if e.response.status_code == 404: msg = "repository not found" elif e.response.status_code == 403: msg = "rate limit exceeded (set GITHUB_TOKEN)" else: msg = f"GitHub API error {e.response.status_code}" console.print(f"[red]error: {msg}[/red]") sys.exit(1) except Exception as e: # pragma: no cover console.print(f"[red]error: {e}[/red]") sys.exit(1) if __name__ == "__main__": main()