for assorted things
1#!/usr/bin/env -S uv run --script --quiet
2# /// script
3# requires-python = ">=3.12"
4# dependencies = ["httpx", "rich", "numpy", "scikit-learn", "python-dateutil", "pandas", "pydantic-settings"]
5# ///
6"""
7Predict when a GitHub repository will reach a target number of stars.
8
9Usage:
10 ./predict-github-stars owner/repo 10000
11
12Details:
13- Uses GitHub REST API to fetch star history (with timestamps).
14- Fits polynomial regression (degree 1–3) to full history.
15- Falls back to recent‑trend linear extrapolation if the polynomial
16 cannot reach the target within ten years.
17- Shows recent growth rate and a caution for long‑range estimates.
18- Requires `GITHUB_TOKEN` in the environment for higher rate limits (optional).
19"""
20
21from __future__ import annotations
22
23import argparse
24import os
25import sys
26from datetime import datetime, timezone
27
28import httpx
29import numpy as np
30import pandas as pd
31from dateutil import parser as date_parser
32from pydantic import Field
33from pydantic_settings import BaseSettings, SettingsConfigDict
34from rich.console import Console
35from rich.panel import Panel
36from rich.table import Table
37from sklearn.linear_model import LinearRegression
38from sklearn.metrics import r2_score
39from sklearn.preprocessing import PolynomialFeatures
40
41console = Console()
42
43
44class Settings(BaseSettings):
45 """Load settings (e.g. GitHub token) from environment."""
46
47 model_config = SettingsConfigDict(
48 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
49 )
50 github_token: str = Field(default="", description="GitHub API token")
51
52
53# ──────────────────────────────── GitHub helpers ────────────────────────────────
54
55
56def _headers(token: str | None = None) -> dict[str, str]:
57 h = {"Accept": "application/vnd.github.v3+json"}
58 if token:
59 h["Authorization"] = f"token {token}"
60 return h
61
62
63def get_repo_data(owner: str, repo: str, token: str | None = None) -> dict:
64 url = f"https://api.github.com/repos/{owner}/{repo}"
65 with httpx.Client() as c:
66 r = c.get(url, headers=_headers(token))
67 r.raise_for_status()
68 return r.json()
69
70
71def get_star_history(
72 owner: str, repo: str, token: str | None, total_stars: int
73) -> list[tuple[datetime, int]]:
74 """Return (timestamp, cumulative_star_count) pairs, sampled if repo is huge."""
75 hdrs = _headers(token)
76 hdrs["Accept"] = "application/vnd.github.v3.star+json" # need starred_at
77
78 history: list[tuple[datetime, int]] = []
79
80 if total_stars > 10_000:
81 # sample ~200 evenly‑spaced star indices
82 sample_points = 200
83 step = max(1, total_stars // sample_points)
84 pages_needed: dict[int, list[int]] = {}
85 for s in range(1, total_stars, step):
86 pg = (s - 1) // 100 + 1
87 idx = (s - 1) % 100
88 pages_needed.setdefault(pg, []).append(idx)
89
90 # always include final star
91 last_pg = (total_stars - 1) // 100 + 1
92 last_idx = (total_stars - 1) % 100
93 pages_needed.setdefault(last_pg, []).append(last_idx)
94
95 with httpx.Client() as c:
96 for pg, idxs in pages_needed.items():
97 url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={pg}&per_page=100"
98 r = c.get(url, headers=hdrs)
99 r.raise_for_status()
100 data = r.json()
101 for i in sorted(set(idxs)):
102 if i < len(data) and "starred_at" in data[i]:
103 ts = date_parser.parse(data[i]["starred_at"])
104 history.append((ts, (pg - 1) * 100 + i + 1))
105
106 console.print(f"[dim]sampled {len(history)} points across star history[/dim]")
107
108 else:
109 # fetch all pages
110 page = 1
111 with httpx.Client() as c:
112 while True:
113 url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page=100"
114 r = c.get(url, headers=hdrs)
115 r.raise_for_status()
116 data = r.json()
117 if not data:
118 break
119 for i, star in enumerate(data):
120 if "starred_at" in star:
121 ts = date_parser.parse(star["starred_at"])
122 history.append((ts, (page - 1) * 100 + i + 1))
123 page += 1
124
125 # ensure order and anchor today’s count
126 history.sort(key=lambda t: t[0])
127 if history and history[-1][1] < total_stars:
128 history.append((datetime.now(timezone.utc), total_stars))
129 return history
130
131
132# ──────────────────────────────── modelling ─────────────────────────────────────
133
134
135def best_poly_fit(
136 X: np.ndarray, y: np.ndarray
137) -> tuple[LinearRegression, PolynomialFeatures, int, float]:
138 best_r2 = -1.0
139 best_model: LinearRegression | None = None
140 best_poly: PolynomialFeatures | None = None
141 best_deg = 1
142 for deg in (1, 2, 3):
143 poly = PolynomialFeatures(degree=deg)
144 Xpoly = poly.fit_transform(X)
145 model = LinearRegression().fit(Xpoly, y)
146 r2 = r2_score(y, model.predict(Xpoly))
147 if r2 > best_r2:
148 best_r2, best_model, best_poly, best_deg = r2, model, poly, deg
149 return best_model, best_poly, best_deg, best_r2 # type: ignore
150
151
152def predict_date(history: list[tuple[datetime, int]], target: int) -> datetime | None:
153 if len(history) < 10:
154 return None
155 origin = history[0][0]
156 X = np.array([(t - origin).total_seconds() / 86400 for t, _ in history]).reshape(
157 -1, 1
158 )
159 y = np.array([s for _, s in history])
160
161 model, poly, deg, r2 = best_poly_fit(X, y)
162 console.print(f"[dim]best fit: degree {deg} polynomial (r² = {r2:.3f})[/dim]")
163
164 current_day = X[-1, 0]
165 for d in range(0, 3650): # up to 10 years
166 future = current_day + d
167 if model.predict(poly.transform([[future]]))[0] >= target:
168 return origin + pd.Timedelta(days=future)
169 return None
170
171
172# ──────────────────────────────── utils ─────────────────────────────────────────
173
174
175def timeframe_str(dt: datetime) -> str:
176 now = datetime.now(timezone.utc)
177 if dt <= now:
178 return "already reached"
179 days = (dt - now).days
180 if days == 0:
181 return "today"
182 if days == 1:
183 return "tomorrow"
184 if days < 7:
185 return f"in {days} days"
186 if days < 30:
187 return f"in {days // 7} week(s)"
188 if days < 365:
189 return f"in {days // 30} month(s)"
190 return f"in {days // 365} year(s)"
191
192
193# ──────────────────────────────── main ──────────────────────────────────────────
194
195
196def main() -> None:
197 p = argparse.ArgumentParser(
198 description="Predict when a GitHub repo will reach a target number of stars"
199 )
200 p.add_argument("repo", help="owner/repo")
201 p.add_argument("stars", type=int, help="target star count")
202 args = p.parse_args()
203
204 if "/" not in args.repo:
205 console.print("[red]error: repo must be owner/repo[/red]")
206 sys.exit(1)
207 owner, repo = args.repo.split("/", 1)
208
209 try:
210 settings = Settings() # load token
211 except Exception as e: # pragma: no cover
212 console.print(f"[red]error loading settings: {e}[/red]")
213 sys.exit(1)
214 token = settings.github_token.strip() or None
215
216 try:
217 repo_data = get_repo_data(owner, repo, token)
218 current_stars = repo_data["stargazers_count"]
219 created_at = date_parser.parse(repo_data["created_at"])
220
221 console.print(
222 Panel.fit(
223 f"[bold cyan]{owner}/{repo}[/bold cyan]\n"
224 f"[dim]current stars: {current_stars:,}\ncreated: {created_at:%Y-%m-%d}[/dim]",
225 border_style="blue",
226 )
227 )
228
229 if current_stars >= args.stars:
230 console.print("\n[green]✓ already at or above target![/green]")
231 sys.exit(0)
232
233 console.print("\nfetching star history…")
234 history = get_star_history(owner, repo, token, current_stars)
235 if not history:
236 console.print("[red]error: no star history[/red]")
237 sys.exit(1)
238 if len(history) > 1000: # down‑sample for speed
239 step = len(history) // 1000
240 history = history[::step] + [history[-1]]
241
242 console.print(f"[dim]analysing {len(history)} data points…[/dim]")
243 poly_date = predict_date(history, args.stars)
244
245 def recent_rate(window: int = 30) -> float:
246 cutoff = datetime.now(timezone.utc) - pd.Timedelta(days=window)
247 pts = [s for t, s in history if t >= cutoff]
248 return (pts[-1] - pts[0]) / window if len(pts) >= 2 else 0.0
249
250 rate = recent_rate() or recent_rate(90)
251
252 if poly_date:
253 out_date, tf = poly_date, timeframe_str(poly_date)
254 elif rate > 0:
255 days_needed = (args.stars - current_stars) / rate
256 out_date = datetime.now(timezone.utc) + pd.Timedelta(days=days_needed)
257 tf = timeframe_str(out_date)
258 console.print(
259 "[dim]poly model pessimistic; using recent growth trend[/dim]"
260 )
261 else:
262 console.print(
263 f"[red]✗ unlikely to reach {args.stars:,} stars in the next 10 years[/red]"
264 )
265 sys.exit(0)
266
267 table = Table(show_header=True, header_style="bold magenta")
268 table.add_column("metric")
269 table.add_column("value", style="white")
270 table.add_row("target stars", f"{args.stars:,}")
271 table.add_row("current stars", f"{current_stars:,}")
272 table.add_row("stars needed", f"{args.stars - current_stars:,}")
273 table.add_row("predicted date", out_date.strftime("%Y-%m-%d"))
274 table.add_row("timeframe", tf)
275 if rate:
276 table.add_row("recent growth", f"{rate:.1f} stars/day")
277
278 console.print()
279 console.print(table)
280 if "year" in tf and "1 year" not in tf:
281 console.print("\n[dim]⚠ prediction far in future; uncertainty high[/dim]")
282
283 except httpx.HTTPStatusError as e:
284 if e.response.status_code == 404:
285 msg = "repository not found"
286 elif e.response.status_code == 403:
287 msg = "rate limit exceeded (set GITHUB_TOKEN)"
288 else:
289 msg = f"GitHub API error {e.response.status_code}"
290 console.print(f"[red]error: {msg}[/red]")
291 sys.exit(1)
292 except Exception as e: # pragma: no cover
293 console.print(f"[red]error: {e}[/red]")
294 sys.exit(1)
295
296
297if __name__ == "__main__":
298 main()