for assorted things
1#!/usr/bin/env -S uv run --script --quiet
2# /// script
3# requires-python = ">=3.12"
4# dependencies = ["atproto", "jinja2", "pydantic-settings"]
5# ///
6"""
7Find the longest reply thread from a Bluesky post.
8
9Usage:
10
11```bash
12./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23
13```
14
15Details:
16- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread
17- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread
18"""
19
20import argparse
21import os
22from datetime import datetime
23from typing import Any
24
25from atproto import Client
26from atproto.exceptions import BadRequestError
27from atproto_client.models.app.bsky.feed.defs import ThreadViewPost
28from jinja2 import Environment
29from pydantic_settings import BaseSettings, SettingsConfigDict
30
31
32class Settings(BaseSettings):
33 """App settings loaded from environment variables"""
34
35 model_config = SettingsConfigDict(
36 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
37 )
38
39 bsky_handle: str
40 bsky_password: str
41
42
43def extract_post_uri(bluesky_url: str) -> str:
44 """Extract the AT URI from a Bluesky post URL"""
45 import re
46
47 pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)"
48 match = re.match(pattern, bluesky_url)
49 if not match:
50 raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}")
51 profile_did_or_handle = match.group(1)
52 post_id = match.group(2)
53
54 # We need the DID, not necessarily the handle, for the URI
55 # However, getPostThread seems to work with handles too, but let's be robust
56 # For now, we construct the URI assuming the input might be a handle or DID
57 # A more robust solution would resolve the handle to a DID if needed.
58 # Let's try constructing a basic URI first. `get_post_thread` might handle resolution.
59 return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}"
60
61
62def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None:
63 """Fetch the full thread view for a given post URI."""
64 # Slightly reduced depth, as we might fetch sub-threads explicitly
65 depth = 50
66 # Parent height arguably less crucial for finding the *longest child* path
67 parent_height = 2
68 try:
69 response = client.app.bsky.feed.get_post_thread(
70 {"uri": post_uri, "depth": depth, "parent_height": parent_height}
71 )
72 if isinstance(response.thread, ThreadViewPost):
73 return response.thread
74 else:
75 # Handle cases where the post is not found, blocked, or deleted
76 # Suppress print for non-root calls later if needed
77 print(
78 f"Could not fetch thread or it's not a standard post thread: {post_uri}"
79 )
80 return None
81 except BadRequestError as e:
82 print(f"Error fetching thread {post_uri}: {e}")
83 return None
84 except Exception as e:
85 print(f"An unexpected error occurred fetching thread {post_uri}: {e}")
86 return None
87
88
89def find_longest_thread_path(
90 client: Client, thread: ThreadViewPost | None
91) -> list[ThreadViewPost]:
92 """Find the longest path of replies starting from the given thread view."""
93 if not thread or not isinstance(thread, ThreadViewPost) or not thread.post:
94 # Base case: Invalid or deleted/blocked post in the middle of a thread
95 return []
96
97 longest_reply_extension: list[ThreadViewPost] = []
98 max_len = 0
99
100 # Use replies from the current view, but potentially refresh if they seem incomplete
101 replies_to_check = thread.replies if thread.replies else []
102
103 for reply_view in replies_to_check:
104 # Recurse only on valid ThreadViewPost replies
105 if isinstance(reply_view, ThreadViewPost) and reply_view.post:
106 current_reply_view = reply_view
107
108 # If this reply has no children loaded, try fetching its thread directly
109 if not current_reply_view.replies:
110 # Check if the post *claims* to have replies (optional optimization, needs PostView check)
111 # For simplicity now, just always try fetching if replies are empty.
112 fetched_reply_view = get_thread(client, current_reply_view.post.uri)
113 if fetched_reply_view and fetched_reply_view.replies:
114 current_reply_view = fetched_reply_view # Use the richer view
115
116 # Now recurse with the potentially updated view
117 recursive_path = find_longest_thread_path(client, current_reply_view)
118 if len(recursive_path) > max_len:
119 max_len = len(recursive_path)
120 longest_reply_extension = recursive_path
121
122 # The full path includes the current post + the longest path found among its replies
123 return [thread] + longest_reply_extension
124
125
126def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None:
127 """Extract relevant data from a ThreadViewPost for template rendering."""
128 if not post_view or not post_view.post:
129 return None
130
131 post = post_view.post
132 record = post.record
133
134 # Attempt to parse the timestamp
135 timestamp_str = getattr(record, "created_at", None)
136 timestamp_dt = None
137 if timestamp_str:
138 try:
139 # Handle different possible ISO 8601 formats from Bluesky
140 if "." in timestamp_str and "Z" in timestamp_str:
141 # Format like 2024-07-26T15:07:19.123Z
142 timestamp_dt = datetime.fromisoformat(
143 timestamp_str.replace("Z", "+00:00")
144 )
145 else:
146 # Potentially other formats, add more parsing if needed
147 print(f"Warning: Unrecognized timestamp format {timestamp_str}")
148 timestamp_dt = None # Or handle error appropriately
149 except ValueError:
150 print(f"Warning: Could not parse timestamp {timestamp_str}")
151 timestamp_dt = None
152
153 return {
154 "author": post.author.handle,
155 "text": getattr(record, "text", "[No text content]"),
156 "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
157 if timestamp_dt
158 else "[Unknown time]",
159 "uri": post.uri,
160 "cid": post.cid,
161 }
162
163
164def main(post_url: str, template_str: str):
165 """Main function to find and render the longest thread."""
166 try:
167 settings = Settings() # type: ignore
168 except Exception as e:
169 print(
170 f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}"
171 )
172 return
173
174 client = Client()
175 try:
176 client.login(settings.bsky_handle, settings.bsky_password)
177 except Exception as e:
178 print(f"Error logging into Bluesky: {e}")
179 return
180
181 try:
182 post_uri = extract_post_uri(post_url)
183 except ValueError as e:
184 print(e)
185 return
186
187 print(f"Fetching thread for: {post_uri}")
188 root_thread_view = get_thread(client, post_uri)
189
190 if not root_thread_view:
191 print("Failed to fetch the root post thread.")
192 return
193
194 # --- Finding the longest path ---
195 print("Finding the longest thread path...")
196 longest_path_views = find_longest_thread_path(client, root_thread_view)
197 print(f"Found {len(longest_path_views)} post(s) in the longest path.")
198 # --- End Finding ---
199
200 thread_data = [
201 data
202 for view in longest_path_views
203 if (data := format_post_for_template(view)) is not None
204 ]
205
206 if not thread_data:
207 print("No valid posts found in the path to render.")
208 return
209
210 # Render using Jinja
211 environment = Environment()
212 template = environment.from_string(template_str)
213 output = template.render(posts=thread_data)
214
215 print("\\n--- Rendered Thread ---")
216 print(output)
217 print("--- End Rendered Thread ---")
218
219
220if __name__ == "__main__":
221 parser = argparse.ArgumentParser(
222 description="Find and render the longest reply thread from a Bluesky post."
223 )
224 parser.add_argument("post_url", help="The URL of the starting Bluesky post.")
225 args = parser.parse_args()
226
227 # Default Jinja Template
228 default_template = """
229{% for post in posts %}
230{{ loop.index }}. {{ post.author }} at {{ post.timestamp }}
231 URI: {{ post.uri }}
232 Text: {{ post.text | indent(width=4, first=false) }}
233{% endfor %}
234"""
235
236 main(args.post_url, default_template)