for assorted things
at main 8.4 kB view raw
1#!/usr/bin/env -S uv run --script --quiet 2# /// script 3# requires-python = ">=3.12" 4# dependencies = ["atproto", "jinja2", "pydantic-settings"] 5# /// 6""" 7Find the longest reply thread from a Bluesky post. 8 9Usage: 10 11```bash 12./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23 13``` 14 15Details: 16- uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread 17- uses [`jinja2`](https://github.com/pallets/jinja) to render the thread 18""" 19 20import argparse 21import os 22from datetime import datetime 23from typing import Any 24 25from atproto import Client 26from atproto.exceptions import BadRequestError 27from atproto_client.models.app.bsky.feed.defs import ThreadViewPost 28from jinja2 import Environment 29from pydantic_settings import BaseSettings, SettingsConfigDict 30 31 32class Settings(BaseSettings): 33 """App settings loaded from environment variables""" 34 35 model_config = SettingsConfigDict( 36 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 37 ) 38 39 bsky_handle: str 40 bsky_password: str 41 bsky_pds_url: str = "https://bsky.social" 42 43 44def extract_post_uri(bluesky_url: str) -> str: 45 """Extract the AT URI from a Bluesky post URL""" 46 import re 47 48 pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)" 49 match = re.match(pattern, bluesky_url) 50 if not match: 51 raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}") 52 profile_did_or_handle = match.group(1) 53 post_id = match.group(2) 54 55 # We need the DID, not necessarily the handle, for the URI 56 # However, getPostThread seems to work with handles too, but let's be robust 57 # For now, we construct the URI assuming the input might be a handle or DID 58 # A more robust solution would resolve the handle to a DID if needed. 59 # Let's try constructing a basic URI first. `get_post_thread` might handle resolution. 60 return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}" 61 62 63def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None: 64 """Fetch the full thread view for a given post URI.""" 65 # Slightly reduced depth, as we might fetch sub-threads explicitly 66 depth = 50 67 # Parent height arguably less crucial for finding the *longest child* path 68 parent_height = 2 69 try: 70 response = client.app.bsky.feed.get_post_thread( 71 {"uri": post_uri, "depth": depth, "parent_height": parent_height} 72 ) 73 if isinstance(response.thread, ThreadViewPost): 74 return response.thread 75 else: 76 # Handle cases where the post is not found, blocked, or deleted 77 # Suppress print for non-root calls later if needed 78 print( 79 f"Could not fetch thread or it's not a standard post thread: {post_uri}" 80 ) 81 return None 82 except BadRequestError as e: 83 print(f"Error fetching thread {post_uri}: {e}") 84 return None 85 except Exception as e: 86 print(f"An unexpected error occurred fetching thread {post_uri}: {e}") 87 return None 88 89 90def find_longest_thread_path( 91 client: Client, thread: ThreadViewPost | None 92) -> list[ThreadViewPost]: 93 """Find the longest path of replies starting from the given thread view.""" 94 if not thread or not isinstance(thread, ThreadViewPost) or not thread.post: 95 # Base case: Invalid or deleted/blocked post in the middle of a thread 96 return [] 97 98 longest_reply_extension: list[ThreadViewPost] = [] 99 max_len = 0 100 101 # Use replies from the current view, but potentially refresh if they seem incomplete 102 replies_to_check = thread.replies if thread.replies else [] 103 104 for reply_view in replies_to_check: 105 # Recurse only on valid ThreadViewPost replies 106 if isinstance(reply_view, ThreadViewPost) and reply_view.post: 107 current_reply_view = reply_view 108 109 # If this reply has no children loaded, try fetching its thread directly 110 if not current_reply_view.replies: 111 # Check if the post *claims* to have replies (optional optimization, needs PostView check) 112 # For simplicity now, just always try fetching if replies are empty. 113 fetched_reply_view = get_thread(client, current_reply_view.post.uri) 114 if fetched_reply_view and fetched_reply_view.replies: 115 current_reply_view = fetched_reply_view # Use the richer view 116 117 # Now recurse with the potentially updated view 118 recursive_path = find_longest_thread_path(client, current_reply_view) 119 if len(recursive_path) > max_len: 120 max_len = len(recursive_path) 121 longest_reply_extension = recursive_path 122 123 # The full path includes the current post + the longest path found among its replies 124 return [thread] + longest_reply_extension 125 126 127def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None: 128 """Extract relevant data from a ThreadViewPost for template rendering.""" 129 if not post_view or not post_view.post: 130 return None 131 132 post = post_view.post 133 record = post.record 134 135 # Attempt to parse the timestamp 136 timestamp_str = getattr(record, "created_at", None) 137 timestamp_dt = None 138 if timestamp_str: 139 try: 140 # Handle different possible ISO 8601 formats from Bluesky 141 if "." in timestamp_str and "Z" in timestamp_str: 142 # Format like 2024-07-26T15:07:19.123Z 143 timestamp_dt = datetime.fromisoformat( 144 timestamp_str.replace("Z", "+00:00") 145 ) 146 else: 147 # Potentially other formats, add more parsing if needed 148 print(f"Warning: Unrecognized timestamp format {timestamp_str}") 149 timestamp_dt = None # Or handle error appropriately 150 except ValueError: 151 print(f"Warning: Could not parse timestamp {timestamp_str}") 152 timestamp_dt = None 153 154 return { 155 "author": post.author.handle, 156 "text": getattr(record, "text", "[No text content]"), 157 "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC") 158 if timestamp_dt 159 else "[Unknown time]", 160 "uri": post.uri, 161 "cid": post.cid, 162 } 163 164 165def main(post_url: str, template_str: str): 166 """Main function to find and render the longest thread.""" 167 try: 168 settings = Settings() # type: ignore 169 except Exception as e: 170 print( 171 f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}" 172 ) 173 return 174 175 client = Client(base_url=settings.bsky_pds_url) 176 try: 177 client.login(settings.bsky_handle, settings.bsky_password) 178 except Exception as e: 179 print(f"Error logging into Bluesky: {e}") 180 return 181 182 try: 183 post_uri = extract_post_uri(post_url) 184 except ValueError as e: 185 print(e) 186 return 187 188 print(f"Fetching thread for: {post_uri}") 189 root_thread_view = get_thread(client, post_uri) 190 191 if not root_thread_view: 192 print("Failed to fetch the root post thread.") 193 return 194 195 # --- Finding the longest path --- 196 print("Finding the longest thread path...") 197 longest_path_views = find_longest_thread_path(client, root_thread_view) 198 print(f"Found {len(longest_path_views)} post(s) in the longest path.") 199 # --- End Finding --- 200 201 thread_data = [ 202 data 203 for view in longest_path_views 204 if (data := format_post_for_template(view)) is not None 205 ] 206 207 if not thread_data: 208 print("No valid posts found in the path to render.") 209 return 210 211 # Render using Jinja 212 environment = Environment() 213 template = environment.from_string(template_str) 214 output = template.render(posts=thread_data) 215 216 print("\\n--- Rendered Thread ---") 217 print(output) 218 print("--- End Rendered Thread ---") 219 220 221if __name__ == "__main__": 222 parser = argparse.ArgumentParser( 223 description="Find and render the longest reply thread from a Bluesky post." 224 ) 225 parser.add_argument("post_url", help="The URL of the starting Bluesky post.") 226 args = parser.parse_args() 227 228 # Default Jinja Template 229 default_template = """ 230{% for post in posts %} 231{{ loop.index }}. {{ post.author }} at {{ post.timestamp }} 232 URI: {{ post.uri }} 233 Text: {{ post.text | indent(width=4, first=false) }} 234{% endfor %} 235""" 236 237 main(args.post_url, default_template)