+19
README.md
+19
README.md
···
13
13
- [`check-files-for-bad-links`](#check-files-for-bad-links)
14
14
- [`find-longest-bsky-thread`](#find-longest-bsky-thread)
15
15
- [`kill-processes`](#kill-processes)
16
+
- [`predict-github-stars`](#predict-github-stars)
16
17
- [`update-lights`](#update-lights)
17
18
- [`update-readme`](#update-readme)
18
19
···
67
68
Details:
68
69
- uses [`textual`](https://textual.textualize.io/) for the TUI
69
70
- uses [`marvin`](https://github.com/prefecthq/marvin) (built on [`pydantic-ai`](https://github.com/pydantic/pydantic-ai)) to annotate processes
71
+
72
+
---
73
+
74
+
### `predict-github-stars`
75
+
76
+
Predict when a GitHub repository will reach a target number of stars.
77
+
78
+
Usage:
79
+
80
+
```bash
81
+
./predict-github-stars anthropics/claude-dev 10000
82
+
```
83
+
84
+
Details:
85
+
- uses github api to fetch star history
86
+
- uses polynomial regression to predict future star growth
87
+
- shows confidence intervals based on historical variance
88
+
- requires `GITHUB_TOKEN` in environment for higher rate limits (optional)
70
89
71
90
---
72
91
+333
predict-github-stars
+333
predict-github-stars
···
1
+
#!/usr/bin/env -S uv run --script --quiet
2
+
# /// script
3
+
# requires-python = ">=3.12"
4
+
# dependencies = ["httpx", "rich", "numpy", "scikit-learn", "python-dateutil", "pandas", "pydantic-settings"]
5
+
# ///
6
+
"""
7
+
Predict when a GitHub repository will reach a target number of stars.
8
+
9
+
Usage:
10
+
11
+
```bash
12
+
./predict-github-stars anthropics/claude-dev 10000
13
+
```
14
+
15
+
Details:
16
+
- uses github api to fetch star history
17
+
- uses polynomial regression to predict future star growth
18
+
- shows confidence intervals based on historical variance
19
+
- requires `GITHUB_TOKEN` in environment for higher rate limits (optional)
20
+
"""
21
+
22
+
import argparse
23
+
import os
24
+
import sys
25
+
from datetime import datetime, timezone
26
+
from typing import Optional
27
+
import numpy as np
28
+
from sklearn.preprocessing import PolynomialFeatures
29
+
from sklearn.linear_model import LinearRegression
30
+
from sklearn.metrics import r2_score
31
+
import httpx
32
+
from rich.console import Console
33
+
from rich.table import Table
34
+
from rich.panel import Panel
35
+
from dateutil import parser as date_parser
36
+
import pandas as pd
37
+
from pydantic_settings import BaseSettings, SettingsConfigDict
38
+
from pydantic import Field
39
+
40
+
console = Console()
41
+
42
+
43
+
class Settings(BaseSettings):
44
+
"""App settings loaded from environment variables"""
45
+
46
+
model_config = SettingsConfigDict(
47
+
env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
48
+
)
49
+
50
+
github_token: str = Field(default="")
51
+
52
+
53
+
GREY = "\033[90m"
54
+
GREEN = "\033[92m"
55
+
YELLOW = "\033[93m"
56
+
RED = "\033[91m"
57
+
_END = "\033[0m"
58
+
59
+
60
+
def get_repo_data(owner: str, repo: str, token: Optional[str] = None) -> dict:
61
+
"""fetch basic repository data from github api"""
62
+
headers = {"Accept": "application/vnd.github.v3+json"}
63
+
if token:
64
+
headers["Authorization"] = f"token {token}"
65
+
66
+
url = f"https://api.github.com/repos/{owner}/{repo}"
67
+
68
+
with httpx.Client() as client:
69
+
response = client.get(url, headers=headers)
70
+
response.raise_for_status()
71
+
return response.json()
72
+
73
+
74
+
def get_star_history(
75
+
owner: str, repo: str, token: Optional[str] = None, current_stars: int = 0
76
+
) -> list[tuple[datetime, int]]:
77
+
"""fetch star history using github api stargazers endpoint"""
78
+
headers = {
79
+
"Accept": "application/vnd.github.v3.star+json" # includes starred_at timestamps
80
+
}
81
+
if token:
82
+
headers["Authorization"] = f"token {token}"
83
+
84
+
star_history = []
85
+
86
+
# for repos with many stars, sample across the range
87
+
# instead of just getting the first ones
88
+
if current_stars > 10000:
89
+
# sample ~200 points across the star range for performance
90
+
sample_points = 200
91
+
step = max(1, current_stars // sample_points)
92
+
93
+
# batch requests with a single client
94
+
with httpx.Client() as client:
95
+
# get samples at regular intervals
96
+
for target_star in range(1, current_stars, step):
97
+
page = (target_star // 100) + 1
98
+
position = (target_star % 100) - 1
99
+
100
+
url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page=100"
101
+
response = client.get(url, headers=headers)
102
+
response.raise_for_status()
103
+
104
+
data = response.json()
105
+
if data and position < len(data) and "starred_at" in data[position]:
106
+
starred_at = date_parser.parse(data[position]["starred_at"])
107
+
star_history.append((starred_at, target_star))
108
+
109
+
console.print(
110
+
f"{GREY}sampled {len(star_history)} points across star history{_END}"
111
+
)
112
+
else:
113
+
# for smaller repos, get all stars
114
+
page = 1
115
+
per_page = 100
116
+
117
+
with httpx.Client() as client:
118
+
while True:
119
+
url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?page={page}&per_page={per_page}"
120
+
response = client.get(url, headers=headers)
121
+
response.raise_for_status()
122
+
123
+
data = response.json()
124
+
if not data:
125
+
break
126
+
127
+
for i, star in enumerate(data):
128
+
if "starred_at" in star:
129
+
starred_at = date_parser.parse(star["starred_at"])
130
+
cumulative_stars = (page - 1) * per_page + i + 1
131
+
star_history.append((starred_at, cumulative_stars))
132
+
133
+
page += 1
134
+
135
+
return star_history
136
+
137
+
138
+
def predict_star_growth(
139
+
star_history: list[tuple[datetime, int]], target_stars: int, current_stars: int
140
+
) -> Optional[datetime]:
141
+
"""use polynomial regression to predict when repo will reach target stars"""
142
+
if len(star_history) < 10:
143
+
return None
144
+
145
+
# convert to days since first star
146
+
first_date = star_history[0][0]
147
+
X = np.array(
148
+
[(date - first_date).total_seconds() / 86400 for date, _ in star_history]
149
+
).reshape(-1, 1)
150
+
y = np.array([stars for _, stars in star_history])
151
+
152
+
# try different polynomial degrees and pick best fit
153
+
best_r2 = -float("inf")
154
+
best_model = None
155
+
best_poly = None
156
+
best_degree = 1
157
+
158
+
for degree in range(1, 4): # try linear, quadratic, cubic
159
+
poly = PolynomialFeatures(degree=degree)
160
+
X_poly = poly.fit_transform(X)
161
+
162
+
model = LinearRegression()
163
+
model.fit(X_poly, y)
164
+
165
+
y_pred = model.predict(X_poly)
166
+
r2 = r2_score(y, y_pred)
167
+
168
+
if r2 > best_r2:
169
+
best_r2 = r2
170
+
best_model = model
171
+
best_poly = poly
172
+
best_degree = degree
173
+
174
+
console.print(
175
+
f"{GREY}best fit: degree {best_degree} polynomial (r² = {best_r2:.3f}){_END}"
176
+
)
177
+
178
+
# predict future
179
+
# search for when we'll hit target stars
180
+
days_to_check = np.arange(0, 3650, 1) # check up to 10 years
181
+
182
+
for days_ahead in days_to_check:
183
+
current_days = X[-1][0]
184
+
future_days = current_days + days_ahead
185
+
X_future = best_poly.transform([[future_days]])
186
+
predicted_stars = best_model.predict(X_future)[0]
187
+
188
+
if predicted_stars >= target_stars:
189
+
predicted_date = first_date + pd.Timedelta(days=future_days)
190
+
return predicted_date
191
+
192
+
return None # won't reach target in 10 years
193
+
194
+
195
+
def format_timeframe(date: datetime) -> str:
196
+
"""format a future date as a human-readable timeframe"""
197
+
now = datetime.now(timezone.utc)
198
+
delta = date - now
199
+
200
+
if delta.days < 0:
201
+
return "already reached"
202
+
elif delta.days == 0:
203
+
return "today"
204
+
elif delta.days == 1:
205
+
return "tomorrow"
206
+
elif delta.days < 7:
207
+
return f"in {delta.days} days"
208
+
elif delta.days < 30:
209
+
weeks = delta.days // 7
210
+
return f"in {weeks} week{'s' if weeks > 1 else ''}"
211
+
elif delta.days < 365:
212
+
months = delta.days // 30
213
+
return f"in {months} month{'s' if months > 1 else ''}"
214
+
else:
215
+
years = delta.days // 365
216
+
return f"in {years} year{'s' if years > 1 else ''}"
217
+
218
+
219
+
def main():
220
+
parser = argparse.ArgumentParser(
221
+
description="predict when a github repository will reach a target number of stars"
222
+
)
223
+
parser.add_argument("repo", help="repository in format owner/repo")
224
+
parser.add_argument("stars", type=int, help="target number of stars")
225
+
226
+
args = parser.parse_args()
227
+
228
+
try:
229
+
settings = Settings() # type: ignore
230
+
except Exception as e:
231
+
console.print(f"{RED}error loading settings: {e}{_END}")
232
+
sys.exit(1)
233
+
234
+
token = settings.github_token
235
+
236
+
try:
237
+
owner, repo = args.repo.split("/")
238
+
except ValueError:
239
+
console.print(f"{RED}error: repository must be in format owner/repo{_END}")
240
+
sys.exit(1)
241
+
242
+
# fetch current repo data
243
+
try:
244
+
repo_data = get_repo_data(owner, repo, token)
245
+
current_stars = repo_data["stargazers_count"]
246
+
created_at = date_parser.parse(repo_data["created_at"])
247
+
248
+
console.print(
249
+
Panel.fit(
250
+
f"[bold cyan]{args.repo}[/bold cyan]\n"
251
+
f"[dim]current stars: {current_stars:,}\n"
252
+
f"created: {created_at.strftime('%Y-%m-%d')}[/dim]",
253
+
border_style="blue",
254
+
)
255
+
)
256
+
257
+
if current_stars >= args.stars:
258
+
console.print(f"\n{GREEN}✓ already has {current_stars:,} stars!{_END}")
259
+
sys.exit(0)
260
+
261
+
console.print("\nfetching star history...")
262
+
star_history = get_star_history(owner, repo, token, current_stars)
263
+
264
+
if not star_history:
265
+
console.print(f"{RED}error: no star history available{_END}")
266
+
sys.exit(1)
267
+
268
+
# sample the history if too large
269
+
if len(star_history) > 1000:
270
+
# take every nth star to get ~1000 data points
271
+
n = len(star_history) // 1000
272
+
star_history = star_history[::n]
273
+
274
+
console.print(f"{GREY}analyzing {len(star_history)} data points...{_END}")
275
+
276
+
predicted_date = predict_star_growth(star_history, args.stars, current_stars)
277
+
278
+
if predicted_date:
279
+
timeframe = format_timeframe(predicted_date)
280
+
281
+
# create results table
282
+
table = Table(show_header=True, header_style="bold magenta")
283
+
table.add_column("metric", style="cyan")
284
+
table.add_column("value", style="white")
285
+
286
+
table.add_row("target stars", f"{args.stars:,}")
287
+
table.add_row("current stars", f"{current_stars:,}")
288
+
table.add_row("stars needed", f"{args.stars - current_stars:,}")
289
+
table.add_row("predicted date", predicted_date.strftime("%Y-%m-%d"))
290
+
table.add_row("timeframe", timeframe)
291
+
292
+
# calculate current growth rate
293
+
if len(star_history) > 1:
294
+
recent_days = 30
295
+
recent_date = datetime.now(timezone.utc) - pd.Timedelta(
296
+
days=recent_days
297
+
)
298
+
recent_stars = [s for d, s in star_history if d >= recent_date]
299
+
if len(recent_stars) > 1:
300
+
daily_rate = (recent_stars[-1] - recent_stars[0]) / recent_days
301
+
table.add_row("recent growth", f"{daily_rate:.1f} stars/day")
302
+
303
+
console.print("\n")
304
+
console.print(table)
305
+
306
+
if "year" in timeframe and "1 year" not in timeframe:
307
+
console.print(
308
+
f"\n{YELLOW}⚠ prediction is far in the future and may be unreliable{_END}"
309
+
)
310
+
else:
311
+
console.print(
312
+
f"\n{RED}✗ unlikely to reach {args.stars:,} stars in the next 10 years{_END}"
313
+
)
314
+
315
+
except httpx.HTTPStatusError as e:
316
+
if e.response.status_code == 404:
317
+
console.print(f"{RED}error: repository {args.repo} not found{_END}")
318
+
elif e.response.status_code == 403:
319
+
console.print(
320
+
f"{RED}error: rate limit exceeded. set GITHUB_TOKEN environment variable{_END}"
321
+
)
322
+
else:
323
+
console.print(
324
+
f"{RED}error: github api error {e.response.status_code}{_END}"
325
+
)
326
+
sys.exit(1)
327
+
except Exception as e:
328
+
console.print(f"{RED}error: {e}{_END}")
329
+
sys.exit(1)
330
+
331
+
332
+
if __name__ == "__main__":
333
+
main()