+17
README.md
+17
README.md
···
11
11
## scripts
12
12
13
13
- [`check-files-for-bad-links`](#check-files-for-bad-links)
14
+
- [`find-longest-bsky-thread`](#find-longest-bsky-thread)
14
15
- [`kill-processes`](#kill-processes)
15
16
- [`update-lights`](#update-lights)
16
17
- [`update-readme`](#update-readme)
···
34
35
- pass exclude globs to skip (e.g. `*.md`)
35
36
- pass ignore-url prefixes to ignore (e.g. `http://localhost` or `https://localhost`)
36
37
- pass concurrency to run the checks concurrently (default is 50)
38
+
39
+
---
40
+
41
+
### `find-longest-bsky-thread`
42
+
43
+
Find the longest reply thread from a Bluesky post.
44
+
45
+
Usage:
46
+
47
+
```bash
48
+
./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23
49
+
```
50
+
51
+
Details:
52
+
- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread
53
+
- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread
37
54
38
55
---
39
56
+1
-1
check-files-for-bad-links
+1
-1
check-files-for-bad-links
···
60
60
async def _probe(client: httpx.AsyncClient, url: str) -> LinkResult:
61
61
try:
62
62
r = await client.head(url, follow_redirects=True)
63
-
if r.status_code in {405, 403}:
63
+
if r.status_code in {405, 403, 404}:
64
64
r = await client.get(url, follow_redirects=True)
65
65
return LinkResult(url, r.status_code, 200 <= r.status_code < 400, frozenset())
66
66
except Exception as exc:
+236
find-longest-bsky-thread
+236
find-longest-bsky-thread
···
1
+
#!/usr/bin/env -S uv run --script --quiet
2
+
# /// script
3
+
# requires-python = ">=3.12"
4
+
# dependencies = ["atproto", "jinja2", "pydantic-settings"]
5
+
# ///
6
+
"""
7
+
Find the longest reply thread from a Bluesky post.
8
+
9
+
Usage:
10
+
11
+
```bash
12
+
./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23
13
+
```
14
+
15
+
Details:
16
+
- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread
17
+
- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread
18
+
"""
19
+
20
+
import argparse
21
+
import os
22
+
from datetime import datetime
23
+
from typing import Any
24
+
25
+
from atproto import Client
26
+
from atproto.exceptions import BadRequestError
27
+
from atproto_client.models.app.bsky.feed.defs import ThreadViewPost
28
+
from jinja2 import Environment
29
+
from pydantic_settings import BaseSettings, SettingsConfigDict
30
+
31
+
32
+
class Settings(BaseSettings):
33
+
"""App settings loaded from environment variables"""
34
+
35
+
model_config = SettingsConfigDict(
36
+
env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
37
+
)
38
+
39
+
bsky_handle: str
40
+
bsky_password: str
41
+
42
+
43
+
def extract_post_uri(bluesky_url: str) -> str:
44
+
"""Extract the AT URI from a Bluesky post URL"""
45
+
import re
46
+
47
+
pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)"
48
+
match = re.match(pattern, bluesky_url)
49
+
if not match:
50
+
raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}")
51
+
profile_did_or_handle = match.group(1)
52
+
post_id = match.group(2)
53
+
54
+
# We need the DID, not necessarily the handle, for the URI
55
+
# However, getPostThread seems to work with handles too, but let's be robust
56
+
# For now, we construct the URI assuming the input might be a handle or DID
57
+
# A more robust solution would resolve the handle to a DID if needed.
58
+
# Let's try constructing a basic URI first. `get_post_thread` might handle resolution.
59
+
return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}"
60
+
61
+
62
+
def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None:
63
+
"""Fetch the full thread view for a given post URI."""
64
+
# Slightly reduced depth, as we might fetch sub-threads explicitly
65
+
depth = 50
66
+
# Parent height arguably less crucial for finding the *longest child* path
67
+
parent_height = 2
68
+
try:
69
+
response = client.app.bsky.feed.get_post_thread(
70
+
{"uri": post_uri, "depth": depth, "parent_height": parent_height}
71
+
)
72
+
if isinstance(response.thread, ThreadViewPost):
73
+
return response.thread
74
+
else:
75
+
# Handle cases where the post is not found, blocked, or deleted
76
+
# Suppress print for non-root calls later if needed
77
+
print(
78
+
f"Could not fetch thread or it's not a standard post thread: {post_uri}"
79
+
)
80
+
return None
81
+
except BadRequestError as e:
82
+
print(f"Error fetching thread {post_uri}: {e}")
83
+
return None
84
+
except Exception as e:
85
+
print(f"An unexpected error occurred fetching thread {post_uri}: {e}")
86
+
return None
87
+
88
+
89
+
def find_longest_thread_path(
90
+
client: Client, thread: ThreadViewPost | None
91
+
) -> list[ThreadViewPost]:
92
+
"""Find the longest path of replies starting from the given thread view."""
93
+
if not thread or not isinstance(thread, ThreadViewPost) or not thread.post:
94
+
# Base case: Invalid or deleted/blocked post in the middle of a thread
95
+
return []
96
+
97
+
longest_reply_extension: list[ThreadViewPost] = []
98
+
max_len = 0
99
+
100
+
# Use replies from the current view, but potentially refresh if they seem incomplete
101
+
replies_to_check = thread.replies if thread.replies else []
102
+
103
+
for reply_view in replies_to_check:
104
+
# Recurse only on valid ThreadViewPost replies
105
+
if isinstance(reply_view, ThreadViewPost) and reply_view.post:
106
+
current_reply_view = reply_view
107
+
108
+
# If this reply has no children loaded, try fetching its thread directly
109
+
if not current_reply_view.replies:
110
+
# Check if the post *claims* to have replies (optional optimization, needs PostView check)
111
+
# For simplicity now, just always try fetching if replies are empty.
112
+
fetched_reply_view = get_thread(client, current_reply_view.post.uri)
113
+
if fetched_reply_view and fetched_reply_view.replies:
114
+
current_reply_view = fetched_reply_view # Use the richer view
115
+
116
+
# Now recurse with the potentially updated view
117
+
recursive_path = find_longest_thread_path(client, current_reply_view)
118
+
if len(recursive_path) > max_len:
119
+
max_len = len(recursive_path)
120
+
longest_reply_extension = recursive_path
121
+
122
+
# The full path includes the current post + the longest path found among its replies
123
+
return [thread] + longest_reply_extension
124
+
125
+
126
+
def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None:
127
+
"""Extract relevant data from a ThreadViewPost for template rendering."""
128
+
if not post_view or not post_view.post:
129
+
return None
130
+
131
+
post = post_view.post
132
+
record = post.record
133
+
134
+
# Attempt to parse the timestamp
135
+
timestamp_str = getattr(record, "created_at", None)
136
+
timestamp_dt = None
137
+
if timestamp_str:
138
+
try:
139
+
# Handle different possible ISO 8601 formats from Bluesky
140
+
if "." in timestamp_str and "Z" in timestamp_str:
141
+
# Format like 2024-07-26T15:07:19.123Z
142
+
timestamp_dt = datetime.fromisoformat(
143
+
timestamp_str.replace("Z", "+00:00")
144
+
)
145
+
else:
146
+
# Potentially other formats, add more parsing if needed
147
+
print(f"Warning: Unrecognized timestamp format {timestamp_str}")
148
+
timestamp_dt = None # Or handle error appropriately
149
+
except ValueError:
150
+
print(f"Warning: Could not parse timestamp {timestamp_str}")
151
+
timestamp_dt = None
152
+
153
+
return {
154
+
"author": post.author.handle,
155
+
"text": getattr(record, "text", "[No text content]"),
156
+
"timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
157
+
if timestamp_dt
158
+
else "[Unknown time]",
159
+
"uri": post.uri,
160
+
"cid": post.cid,
161
+
}
162
+
163
+
164
+
def main(post_url: str, template_str: str):
165
+
"""Main function to find and render the longest thread."""
166
+
try:
167
+
settings = Settings() # type: ignore
168
+
except Exception as e:
169
+
print(
170
+
f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}"
171
+
)
172
+
return
173
+
174
+
client = Client()
175
+
try:
176
+
client.login(settings.bsky_handle, settings.bsky_password)
177
+
except Exception as e:
178
+
print(f"Error logging into Bluesky: {e}")
179
+
return
180
+
181
+
try:
182
+
post_uri = extract_post_uri(post_url)
183
+
except ValueError as e:
184
+
print(e)
185
+
return
186
+
187
+
print(f"Fetching thread for: {post_uri}")
188
+
root_thread_view = get_thread(client, post_uri)
189
+
190
+
if not root_thread_view:
191
+
print("Failed to fetch the root post thread.")
192
+
return
193
+
194
+
# --- Finding the longest path ---
195
+
print("Finding the longest thread path...")
196
+
longest_path_views = find_longest_thread_path(client, root_thread_view)
197
+
print(f"Found {len(longest_path_views)} post(s) in the longest path.")
198
+
# --- End Finding ---
199
+
200
+
thread_data = [
201
+
data
202
+
for view in longest_path_views
203
+
if (data := format_post_for_template(view)) is not None
204
+
]
205
+
206
+
if not thread_data:
207
+
print("No valid posts found in the path to render.")
208
+
return
209
+
210
+
# Render using Jinja
211
+
environment = Environment()
212
+
template = environment.from_string(template_str)
213
+
output = template.render(posts=thread_data)
214
+
215
+
print("\\n--- Rendered Thread ---")
216
+
print(output)
217
+
print("--- End Rendered Thread ---")
218
+
219
+
220
+
if __name__ == "__main__":
221
+
parser = argparse.ArgumentParser(
222
+
description="Find and render the longest reply thread from a Bluesky post."
223
+
)
224
+
parser.add_argument("post_url", help="The URL of the starting Bluesky post.")
225
+
args = parser.parse_args()
226
+
227
+
# Default Jinja Template
228
+
default_template = """
229
+
{% for post in posts %}
230
+
{{ loop.index }}. {{ post.author }} at {{ post.timestamp }}
231
+
URI: {{ post.uri }}
232
+
Text: {{ post.text | indent(width=4, first=false) }}
233
+
{% endfor %}
234
+
"""
235
+
236
+
main(args.post_url, default_template)