for assorted things
1#!/usr/bin/env -S uv run --script --quiet
2# /// script
3# requires-python = ">=3.12"
4# dependencies = ["atproto", "jinja2", "pydantic-settings"]
5# ///
6"""
7Find the longest reply thread from a Bluesky post.
8
9Usage:
10
11```bash
12./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23
13```
14
15Details:
16- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread
17- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread
18"""
19
20import argparse
21import os
22from datetime import datetime
23from typing import Any
24
25from atproto import Client
26from atproto.exceptions import BadRequestError
27from atproto_client.models.app.bsky.feed.defs import ThreadViewPost
28from jinja2 import Environment
29from pydantic_settings import BaseSettings, SettingsConfigDict
30
31
32class Settings(BaseSettings):
33 """App settings loaded from environment variables"""
34
35 model_config = SettingsConfigDict(
36 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
37 )
38
39 bsky_handle: str
40 bsky_password: str
41 bsky_pds_url: str = "https://bsky.social"
42
43
44def extract_post_uri(bluesky_url: str) -> str:
45 """Extract the AT URI from a Bluesky post URL"""
46 import re
47
48 pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)"
49 match = re.match(pattern, bluesky_url)
50 if not match:
51 raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}")
52 profile_did_or_handle = match.group(1)
53 post_id = match.group(2)
54
55 # We need the DID, not necessarily the handle, for the URI
56 # However, getPostThread seems to work with handles too, but let's be robust
57 # For now, we construct the URI assuming the input might be a handle or DID
58 # A more robust solution would resolve the handle to a DID if needed.
59 # Let's try constructing a basic URI first. `get_post_thread` might handle resolution.
60 return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}"
61
62
63def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None:
64 """Fetch the full thread view for a given post URI."""
65 # Slightly reduced depth, as we might fetch sub-threads explicitly
66 depth = 50
67 # Parent height arguably less crucial for finding the *longest child* path
68 parent_height = 2
69 try:
70 response = client.app.bsky.feed.get_post_thread(
71 {"uri": post_uri, "depth": depth, "parent_height": parent_height}
72 )
73 if isinstance(response.thread, ThreadViewPost):
74 return response.thread
75 else:
76 # Handle cases where the post is not found, blocked, or deleted
77 # Suppress print for non-root calls later if needed
78 print(
79 f"Could not fetch thread or it's not a standard post thread: {post_uri}"
80 )
81 return None
82 except BadRequestError as e:
83 print(f"Error fetching thread {post_uri}: {e}")
84 return None
85 except Exception as e:
86 print(f"An unexpected error occurred fetching thread {post_uri}: {e}")
87 return None
88
89
90def find_longest_thread_path(
91 client: Client, thread: ThreadViewPost | None
92) -> list[ThreadViewPost]:
93 """Find the longest path of replies starting from the given thread view."""
94 if not thread or not isinstance(thread, ThreadViewPost) or not thread.post:
95 # Base case: Invalid or deleted/blocked post in the middle of a thread
96 return []
97
98 longest_reply_extension: list[ThreadViewPost] = []
99 max_len = 0
100
101 # Use replies from the current view, but potentially refresh if they seem incomplete
102 replies_to_check = thread.replies if thread.replies else []
103
104 for reply_view in replies_to_check:
105 # Recurse only on valid ThreadViewPost replies
106 if isinstance(reply_view, ThreadViewPost) and reply_view.post:
107 current_reply_view = reply_view
108
109 # If this reply has no children loaded, try fetching its thread directly
110 if not current_reply_view.replies:
111 # Check if the post *claims* to have replies (optional optimization, needs PostView check)
112 # For simplicity now, just always try fetching if replies are empty.
113 fetched_reply_view = get_thread(client, current_reply_view.post.uri)
114 if fetched_reply_view and fetched_reply_view.replies:
115 current_reply_view = fetched_reply_view # Use the richer view
116
117 # Now recurse with the potentially updated view
118 recursive_path = find_longest_thread_path(client, current_reply_view)
119 if len(recursive_path) > max_len:
120 max_len = len(recursive_path)
121 longest_reply_extension = recursive_path
122
123 # The full path includes the current post + the longest path found among its replies
124 return [thread] + longest_reply_extension
125
126
127def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None:
128 """Extract relevant data from a ThreadViewPost for template rendering."""
129 if not post_view or not post_view.post:
130 return None
131
132 post = post_view.post
133 record = post.record
134
135 # Attempt to parse the timestamp
136 timestamp_str = getattr(record, "created_at", None)
137 timestamp_dt = None
138 if timestamp_str:
139 try:
140 # Handle different possible ISO 8601 formats from Bluesky
141 if "." in timestamp_str and "Z" in timestamp_str:
142 # Format like 2024-07-26T15:07:19.123Z
143 timestamp_dt = datetime.fromisoformat(
144 timestamp_str.replace("Z", "+00:00")
145 )
146 else:
147 # Potentially other formats, add more parsing if needed
148 print(f"Warning: Unrecognized timestamp format {timestamp_str}")
149 timestamp_dt = None # Or handle error appropriately
150 except ValueError:
151 print(f"Warning: Could not parse timestamp {timestamp_str}")
152 timestamp_dt = None
153
154 return {
155 "author": post.author.handle,
156 "text": getattr(record, "text", "[No text content]"),
157 "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
158 if timestamp_dt
159 else "[Unknown time]",
160 "uri": post.uri,
161 "cid": post.cid,
162 }
163
164
165def main(post_url: str, template_str: str):
166 """Main function to find and render the longest thread."""
167 try:
168 settings = Settings() # type: ignore
169 except Exception as e:
170 print(
171 f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}"
172 )
173 return
174
175 client = Client(base_url=settings.bsky_pds_url)
176 try:
177 client.login(settings.bsky_handle, settings.bsky_password)
178 except Exception as e:
179 print(f"Error logging into Bluesky: {e}")
180 return
181
182 try:
183 post_uri = extract_post_uri(post_url)
184 except ValueError as e:
185 print(e)
186 return
187
188 print(f"Fetching thread for: {post_uri}")
189 root_thread_view = get_thread(client, post_uri)
190
191 if not root_thread_view:
192 print("Failed to fetch the root post thread.")
193 return
194
195 # --- Finding the longest path ---
196 print("Finding the longest thread path...")
197 longest_path_views = find_longest_thread_path(client, root_thread_view)
198 print(f"Found {len(longest_path_views)} post(s) in the longest path.")
199 # --- End Finding ---
200
201 thread_data = [
202 data
203 for view in longest_path_views
204 if (data := format_post_for_template(view)) is not None
205 ]
206
207 if not thread_data:
208 print("No valid posts found in the path to render.")
209 return
210
211 # Render using Jinja
212 environment = Environment()
213 template = environment.from_string(template_str)
214 output = template.render(posts=thread_data)
215
216 print("\\n--- Rendered Thread ---")
217 print(output)
218 print("--- End Rendered Thread ---")
219
220
221if __name__ == "__main__":
222 parser = argparse.ArgumentParser(
223 description="Find and render the longest reply thread from a Bluesky post."
224 )
225 parser.add_argument("post_url", help="The URL of the starting Bluesky post.")
226 args = parser.parse_args()
227
228 # Default Jinja Template
229 default_template = """
230{% for post in posts %}
231{{ loop.index }}. {{ post.author }} at {{ post.timestamp }}
232 URI: {{ post.uri }}
233 Text: {{ post.text | indent(width=4, first=false) }}
234{% endfor %}
235"""
236
237 main(args.post_url, default_template)