for assorted things
1#!/usr/bin/env -S uv run --script --quiet 2# /// script 3# requires-python = ">=3.12" 4# dependencies = ["atproto", "jinja2", "pydantic-settings"] 5# /// 6""" 7Find the longest reply thread from a Bluesky post. 8 9Usage: 10 11```bash 12./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23 13``` 14 15Details: 16- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread 17- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread 18""" 19 20import argparse 21import os 22from datetime import datetime 23from typing import Any 24 25from atproto import Client 26from atproto.exceptions import BadRequestError 27from atproto_client.models.app.bsky.feed.defs import ThreadViewPost 28from jinja2 import Environment 29from pydantic_settings import BaseSettings, SettingsConfigDict 30 31 32class Settings(BaseSettings): 33 """App settings loaded from environment variables""" 34 35 model_config = SettingsConfigDict( 36 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 37 ) 38 39 bsky_handle: str 40 bsky_password: str 41 42 43def extract_post_uri(bluesky_url: str) -> str: 44 """Extract the AT URI from a Bluesky post URL""" 45 import re 46 47 pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)" 48 match = re.match(pattern, bluesky_url) 49 if not match: 50 raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}") 51 profile_did_or_handle = match.group(1) 52 post_id = match.group(2) 53 54 # We need the DID, not necessarily the handle, for the URI 55 # However, getPostThread seems to work with handles too, but let's be robust 56 # For now, we construct the URI assuming the input might be a handle or DID 57 # A more robust solution would resolve the handle to a DID if needed. 58 # Let's try constructing a basic URI first. `get_post_thread` might handle resolution. 59 return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}" 60 61 62def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None: 63 """Fetch the full thread view for a given post URI.""" 64 # Slightly reduced depth, as we might fetch sub-threads explicitly 65 depth = 50 66 # Parent height arguably less crucial for finding the *longest child* path 67 parent_height = 2 68 try: 69 response = client.app.bsky.feed.get_post_thread( 70 {"uri": post_uri, "depth": depth, "parent_height": parent_height} 71 ) 72 if isinstance(response.thread, ThreadViewPost): 73 return response.thread 74 else: 75 # Handle cases where the post is not found, blocked, or deleted 76 # Suppress print for non-root calls later if needed 77 print( 78 f"Could not fetch thread or it's not a standard post thread: {post_uri}" 79 ) 80 return None 81 except BadRequestError as e: 82 print(f"Error fetching thread {post_uri}: {e}") 83 return None 84 except Exception as e: 85 print(f"An unexpected error occurred fetching thread {post_uri}: {e}") 86 return None 87 88 89def find_longest_thread_path( 90 client: Client, thread: ThreadViewPost | None 91) -> list[ThreadViewPost]: 92 """Find the longest path of replies starting from the given thread view.""" 93 if not thread or not isinstance(thread, ThreadViewPost) or not thread.post: 94 # Base case: Invalid or deleted/blocked post in the middle of a thread 95 return [] 96 97 longest_reply_extension: list[ThreadViewPost] = [] 98 max_len = 0 99 100 # Use replies from the current view, but potentially refresh if they seem incomplete 101 replies_to_check = thread.replies if thread.replies else [] 102 103 for reply_view in replies_to_check: 104 # Recurse only on valid ThreadViewPost replies 105 if isinstance(reply_view, ThreadViewPost) and reply_view.post: 106 current_reply_view = reply_view 107 108 # If this reply has no children loaded, try fetching its thread directly 109 if not current_reply_view.replies: 110 # Check if the post *claims* to have replies (optional optimization, needs PostView check) 111 # For simplicity now, just always try fetching if replies are empty. 112 fetched_reply_view = get_thread(client, current_reply_view.post.uri) 113 if fetched_reply_view and fetched_reply_view.replies: 114 current_reply_view = fetched_reply_view # Use the richer view 115 116 # Now recurse with the potentially updated view 117 recursive_path = find_longest_thread_path(client, current_reply_view) 118 if len(recursive_path) > max_len: 119 max_len = len(recursive_path) 120 longest_reply_extension = recursive_path 121 122 # The full path includes the current post + the longest path found among its replies 123 return [thread] + longest_reply_extension 124 125 126def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None: 127 """Extract relevant data from a ThreadViewPost for template rendering.""" 128 if not post_view or not post_view.post: 129 return None 130 131 post = post_view.post 132 record = post.record 133 134 # Attempt to parse the timestamp 135 timestamp_str = getattr(record, "created_at", None) 136 timestamp_dt = None 137 if timestamp_str: 138 try: 139 # Handle different possible ISO 8601 formats from Bluesky 140 if "." in timestamp_str and "Z" in timestamp_str: 141 # Format like 2024-07-26T15:07:19.123Z 142 timestamp_dt = datetime.fromisoformat( 143 timestamp_str.replace("Z", "+00:00") 144 ) 145 else: 146 # Potentially other formats, add more parsing if needed 147 print(f"Warning: Unrecognized timestamp format {timestamp_str}") 148 timestamp_dt = None # Or handle error appropriately 149 except ValueError: 150 print(f"Warning: Could not parse timestamp {timestamp_str}") 151 timestamp_dt = None 152 153 return { 154 "author": post.author.handle, 155 "text": getattr(record, "text", "[No text content]"), 156 "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC") 157 if timestamp_dt 158 else "[Unknown time]", 159 "uri": post.uri, 160 "cid": post.cid, 161 } 162 163 164def main(post_url: str, template_str: str): 165 """Main function to find and render the longest thread.""" 166 try: 167 settings = Settings() # type: ignore 168 except Exception as e: 169 print( 170 f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}" 171 ) 172 return 173 174 client = Client() 175 try: 176 client.login(settings.bsky_handle, settings.bsky_password) 177 except Exception as e: 178 print(f"Error logging into Bluesky: {e}") 179 return 180 181 try: 182 post_uri = extract_post_uri(post_url) 183 except ValueError as e: 184 print(e) 185 return 186 187 print(f"Fetching thread for: {post_uri}") 188 root_thread_view = get_thread(client, post_uri) 189 190 if not root_thread_view: 191 print("Failed to fetch the root post thread.") 192 return 193 194 # --- Finding the longest path --- 195 print("Finding the longest thread path...") 196 longest_path_views = find_longest_thread_path(client, root_thread_view) 197 print(f"Found {len(longest_path_views)} post(s) in the longest path.") 198 # --- End Finding --- 199 200 thread_data = [ 201 data 202 for view in longest_path_views 203 if (data := format_post_for_template(view)) is not None 204 ] 205 206 if not thread_data: 207 print("No valid posts found in the path to render.") 208 return 209 210 # Render using Jinja 211 environment = Environment() 212 template = environment.from_string(template_str) 213 output = template.render(posts=thread_data) 214 215 print("\\n--- Rendered Thread ---") 216 print(output) 217 print("--- End Rendered Thread ---") 218 219 220if __name__ == "__main__": 221 parser = argparse.ArgumentParser( 222 description="Find and render the longest reply thread from a Bluesky post." 223 ) 224 parser.add_argument("post_url", help="The URL of the starting Bluesky post.") 225 args = parser.parse_args() 226 227 # Default Jinja Template 228 default_template = """ 229{% for post in posts %} 230{{ loop.index }}. {{ post.author }} at {{ post.timestamp }} 231 URI: {{ post.uri }} 232 Text: {{ post.text | indent(width=4, first=false) }} 233{% endfor %} 234""" 235 236 main(args.post_url, default_template)