commit 1f32a12a392761ee2c26e1df914d1fbd63293a82 · zzstoatzz.io/scripts

+19

README.md

··· 13 13 - [`check-files-for-bad-links`](#check-files-for-bad-links) 14 14 - [`find-longest-bsky-thread`](#find-longest-bsky-thread) 15 15 - [`kill-processes`](#kill-processes) 16 + - [`predict-github-stars`](#predict-github-stars) 16 17 - [`update-lights`](#update-lights) 17 18 - [`update-readme`](#update-readme) 18 19 ··· 67 68 Details: 68 69 - uses [`textual`](https://textual.textualize.io/) for the TUI 69 70 - uses [`marvin`](https://github.com/prefecthq/marvin) (built on [`pydantic-ai`](https://github.com/pydantic/pydantic-ai)) to annotate processes 71 + 72 + --- 73 + 74 + ### `predict-github-stars` 75 + 76 + Predict when a GitHub repository will reach a target number of stars. 77 + 78 + Usage: 79 + 80 + ```bash 81 + ./predict-github-stars anthropics/claude-dev 10000 82 + ``` 83 + 84 + Details: 85 + - uses github api to fetch star history 86 + - uses polynomial regression to predict future star growth 87 + - shows confidence intervals based on historical variance 88 + - requires `GITHUB_TOKEN` in environment for higher rate limits (optional) 70 89 71 90 --- 72 91

+333

predict-github-stars

··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["httpx", "rich", "numpy", "scikit-learn", "python-dateutil", "pandas", "pydantic-settings"] 5 + # /// 6 + """ 7 + Predict when a GitHub repository will reach a target number of stars. 8 + 9 + Usage: 10 + 11 + ```bash 12 + ./predict-github-stars anthropics/claude-dev 10000 13 + ``` 14 + 15 + Details: 16 + - uses github api to fetch star history 17 + - uses polynomial regression to predict future star growth 18 + - shows confidence intervals based on historical variance 19 + - requires `GITHUB_TOKEN` in environment for higher rate limits (optional) 20 + """ 21 + 22 + import argparse 23 + import os 24 + import sys 25 + from datetime import datetime, timezone 26 + from typing import Optional 27 + import numpy as np 28 + from sklearn.preprocessing import PolynomialFeatures 29 + from sklearn.linear_model import LinearRegression 30 + from sklearn.metrics import r2_score 31 + import httpx 32 + from rich.console import Console 33 + from rich.table import Table 34 + from rich.panel import Panel 35 + from dateutil import parser as date_parser 36 + import pandas as pd 37 + from pydantic_settings import BaseSettings, SettingsConfigDict 38 + from pydantic import Field 39 + 40 + console = Console() 41 + 42 + 43 + class Settings(BaseSettings): 44 + """App settings loaded from environment variables""" 45 + 46 + model_config = SettingsConfigDict( 47 + env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 48 + ) 49 + 50 + github_token: str = Field(default="") 51 + 52 + 53 + GREY = "\033[90m" 54 + GREEN = "\033[92m" 55 + YELLOW = "\033[93m" 56 + RED = "\033[91m" 57 + _END = "\033[0m" 58 + 59 + 60 + def get_repo_data(owner: str, repo: str, token: Optional[str] = None) -> dict: 61 + """fetch basic repository data from github api""" 62 + headers = {"Accept": "application/vnd.github.v3+json"} 63 + if token: 64 + headers["Authorization"] = f"token {token}" 65 + 66 + url = f"https://api.github.com/repos/{owner}/{repo}" 67 + 68 + with httpx.Client() as client: 69 + response = client.get(url, headers=headers) 70 + response.raise_for_status() 71 + return response.json() 72 + 73 + 74 + def get_star_history( 75 + owner: str, repo: str, token: Optional[str] = None, current_stars: int = 0 76 + ) -> list[tuple[datetime, int]]: 77 + """fetch star history using github api stargazers endpoint""" 78 + headers = { 79 + "Accept": "application/vnd.github.v3.star+json" # includes starred_at timestamps 80 + } 81 + if token: 82 + headers["Authorization"] = f"token {token}" 83 + 84 + star_history = [] 85 + 86 + # for repos with many stars, sample across the range 87 + # instead of just getting the first ones 88 + if current_stars > 10000: 89 + # sample ~200 points across the star range for performance 90 + sample_points = 200 91 + step = max(1, current_stars // sample_points) 92 + 93 + # batch requests with a single client 94 + with httpx.Client() as client: 95 + # get samples at regular intervals 96 + for target_star in range(1, current_stars, step): 97 + page = (target_star // 100) + 1 98 + position = (target_star % 100) - 1 99 + 100 + url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page=100" 101 + response = client.get(url, headers=headers) 102 + response.raise_for_status() 103 + 104 + data = response.json() 105 + if data and position < len(data) and "starred_at" in data[position]: 106 + starred_at = date_parser.parse(data[position]["starred_at"]) 107 + star_history.append((starred_at, target_star)) 108 + 109 + console.print( 110 + f"{GREY}sampled {len(star_history)} points across star history{_END}" 111 + ) 112 + else: 113 + # for smaller repos, get all stars 114 + page = 1 115 + per_page = 100 116 + 117 + with httpx.Client() as client: 118 + while True: 119 + url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page={per_page}" 120 + response = client.get(url, headers=headers) 121 + response.raise_for_status() 122 + 123 + data = response.json() 124 + if not data: 125 + break 126 + 127 + for i, star in enumerate(data): 128 + if "starred_at" in star: 129 + starred_at = date_parser.parse(star["starred_at"]) 130 + cumulative_stars = (page - 1) * per_page + i + 1 131 + star_history.append((starred_at, cumulative_stars)) 132 + 133 + page += 1 134 + 135 + return star_history 136 + 137 + 138 + def predict_star_growth( 139 + star_history: list[tuple[datetime, int]], target_stars: int, current_stars: int 140 + ) -> Optional[datetime]: 141 + """use polynomial regression to predict when repo will reach target stars""" 142 + if len(star_history) < 10: 143 + return None 144 + 145 + # convert to days since first star 146 + first_date = star_history[0][0] 147 + X = np.array( 148 + [(date - first_date).total_seconds() / 86400 for date, _ in star_history] 149 + ).reshape(-1, 1) 150 + y = np.array([stars for _, stars in star_history]) 151 + 152 + # try different polynomial degrees and pick best fit 153 + best_r2 = -float("inf") 154 + best_model = None 155 + best_poly = None 156 + best_degree = 1 157 + 158 + for degree in range(1, 4): # try linear, quadratic, cubic 159 + poly = PolynomialFeatures(degree=degree) 160 + X_poly = poly.fit_transform(X) 161 + 162 + model = LinearRegression() 163 + model.fit(X_poly, y) 164 + 165 + y_pred = model.predict(X_poly) 166 + r2 = r2_score(y, y_pred) 167 + 168 + if r2 > best_r2: 169 + best_r2 = r2 170 + best_model = model 171 + best_poly = poly 172 + best_degree = degree 173 + 174 + console.print( 175 + f"{GREY}best fit: degree {best_degree} polynomial (r² = {best_r2:.3f}){_END}" 176 + ) 177 + 178 + # predict future 179 + # search for when we'll hit target stars 180 + days_to_check = np.arange(0, 3650, 1) # check up to 10 years 181 + 182 + for days_ahead in days_to_check: 183 + current_days = X[-1][0] 184 + future_days = current_days + days_ahead 185 + X_future = best_poly.transform([[future_days]]) 186 + predicted_stars = best_model.predict(X_future)[0] 187 + 188 + if predicted_stars >= target_stars: 189 + predicted_date = first_date + pd.Timedelta(days=future_days) 190 + return predicted_date 191 + 192 + return None # won't reach target in 10 years 193 + 194 + 195 + def format_timeframe(date: datetime) -> str: 196 + """format a future date as a human-readable timeframe""" 197 + now = datetime.now(timezone.utc) 198 + delta = date - now 199 + 200 + if delta.days < 0: 201 + return "already reached" 202 + elif delta.days == 0: 203 + return "today" 204 + elif delta.days == 1: 205 + return "tomorrow" 206 + elif delta.days < 7: 207 + return f"in {delta.days} days" 208 + elif delta.days < 30: 209 + weeks = delta.days // 7 210 + return f"in {weeks} week{'s' if weeks > 1 else ''}" 211 + elif delta.days < 365: 212 + months = delta.days // 30 213 + return f"in {months} month{'s' if months > 1 else ''}" 214 + else: 215 + years = delta.days // 365 216 + return f"in {years} year{'s' if years > 1 else ''}" 217 + 218 + 219 + def main(): 220 + parser = argparse.ArgumentParser( 221 + description="predict when a github repository will reach a target number of stars" 222 + ) 223 + parser.add_argument("repo", help="repository in format owner/repo") 224 + parser.add_argument("stars", type=int, help="target number of stars") 225 + 226 + args = parser.parse_args() 227 + 228 + try: 229 + settings = Settings() # type: ignore 230 + except Exception as e: 231 + console.print(f"{RED}error loading settings: {e}{_END}") 232 + sys.exit(1) 233 + 234 + token = settings.github_token 235 + 236 + try: 237 + owner, repo = args.repo.split("/") 238 + except ValueError: 239 + console.print(f"{RED}error: repository must be in format owner/repo{_END}") 240 + sys.exit(1) 241 + 242 + # fetch current repo data 243 + try: 244 + repo_data = get_repo_data(owner, repo, token) 245 + current_stars = repo_data["stargazers_count"] 246 + created_at = date_parser.parse(repo_data["created_at"]) 247 + 248 + console.print( 249 + Panel.fit( 250 + f"[bold cyan]{args.repo}[/bold cyan]\n" 251 + f"[dim]current stars: {current_stars:,}\n" 252 + f"created: {created_at.strftime('%Y-%m-%d')}[/dim]", 253 + border_style="blue", 254 + ) 255 + ) 256 + 257 + if current_stars >= args.stars: 258 + console.print(f"\n{GREEN}✓ already has {current_stars:,} stars!{_END}") 259 + sys.exit(0) 260 + 261 + console.print("\nfetching star history...") 262 + star_history = get_star_history(owner, repo, token, current_stars) 263 + 264 + if not star_history: 265 + console.print(f"{RED}error: no star history available{_END}") 266 + sys.exit(1) 267 + 268 + # sample the history if too large 269 + if len(star_history) > 1000: 270 + # take every nth star to get ~1000 data points 271 + n = len(star_history) // 1000 272 + star_history = star_history[::n] 273 + 274 + console.print(f"{GREY}analyzing {len(star_history)} data points...{_END}") 275 + 276 + predicted_date = predict_star_growth(star_history, args.stars, current_stars) 277 + 278 + if predicted_date: 279 + timeframe = format_timeframe(predicted_date) 280 + 281 + # create results table 282 + table = Table(show_header=True, header_style="bold magenta") 283 + table.add_column("metric", style="cyan") 284 + table.add_column("value", style="white") 285 + 286 + table.add_row("target stars", f"{args.stars:,}") 287 + table.add_row("current stars", f"{current_stars:,}") 288 + table.add_row("stars needed", f"{args.stars - current_stars:,}") 289 + table.add_row("predicted date", predicted_date.strftime("%Y-%m-%d")) 290 + table.add_row("timeframe", timeframe) 291 + 292 + # calculate current growth rate 293 + if len(star_history) > 1: 294 + recent_days = 30 295 + recent_date = datetime.now(timezone.utc) - pd.Timedelta( 296 + days=recent_days 297 + ) 298 + recent_stars = [s for d, s in star_history if d >= recent_date] 299 + if len(recent_stars) > 1: 300 + daily_rate = (recent_stars[-1] - recent_stars[0]) / recent_days 301 + table.add_row("recent growth", f"{daily_rate:.1f} stars/day") 302 + 303 + console.print("\n") 304 + console.print(table) 305 + 306 + if "year" in timeframe and "1 year" not in timeframe: 307 + console.print( 308 + f"\n{YELLOW}⚠ prediction is far in the future and may be unreliable{_END}" 309 + ) 310 + else: 311 + console.print( 312 + f"\n{RED}✗ unlikely to reach {args.stars:,} stars in the next 10 years{_END}" 313 + ) 314 + 315 + except httpx.HTTPStatusError as e: 316 + if e.response.status_code == 404: 317 + console.print(f"{RED}error: repository {args.repo} not found{_END}") 318 + elif e.response.status_code == 403: 319 + console.print( 320 + f"{RED}error: rate limit exceeded. set GITHUB_TOKEN environment variable{_END}" 321 + ) 322 + else: 323 + console.print( 324 + f"{RED}error: github api error {e.response.status_code}{_END}" 325 + ) 326 + sys.exit(1) 327 + except Exception as e: 328 + console.print(f"{RED}error: {e}{_END}") 329 + sys.exit(1) 330 + 331 + 332 + if __name__ == "__main__": 333 + main()