for assorted things

find deep threads

+17
README.md
··· 11 ## scripts 12 13 - [`check-files-for-bad-links`](#check-files-for-bad-links) 14 - [`kill-processes`](#kill-processes) 15 - [`update-lights`](#update-lights) 16 - [`update-readme`](#update-readme) ··· 34 - pass exclude globs to skip (e.g. `*.md`) 35 - pass ignore-url prefixes to ignore (e.g. `http://localhost` or `https://localhost`) 36 - pass concurrency to run the checks concurrently (default is 50) 37 38 --- 39
··· 11 ## scripts 12 13 - [`check-files-for-bad-links`](#check-files-for-bad-links) 14 + - [`find-longest-bsky-thread`](#find-longest-bsky-thread) 15 - [`kill-processes`](#kill-processes) 16 - [`update-lights`](#update-lights) 17 - [`update-readme`](#update-readme) ··· 35 - pass exclude globs to skip (e.g. `*.md`) 36 - pass ignore-url prefixes to ignore (e.g. `http://localhost` or `https://localhost`) 37 - pass concurrency to run the checks concurrently (default is 50) 38 + 39 + --- 40 + 41 + ### `find-longest-bsky-thread` 42 + 43 + Find the longest reply thread from a Bluesky post. 44 + 45 + Usage: 46 + 47 + ```bash 48 + ./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23 49 + ``` 50 + 51 + Details: 52 + - uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread 53 + - uses [`jinja2`](https://github.com/pallets/jinja) to render the thread 54 55 --- 56
+236
find-longest-bsky-thread
···
··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["atproto", "jinja2", "pydantic-settings"] 5 + # /// 6 + """ 7 + Find the longest reply thread from a Bluesky post. 8 + 9 + Usage: 10 + 11 + ```bash 12 + ./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23 13 + ``` 14 + 15 + Details: 16 + - uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread 17 + - uses [`jinja2`](https://github.com/pallets/jinja) to render the thread 18 + """ 19 + 20 + import argparse 21 + import os 22 + from datetime import datetime 23 + from typing import Any 24 + 25 + from atproto import Client 26 + from atproto.exceptions import BadRequestError 27 + from atproto_client.models.app.bsky.feed.defs import ThreadViewPost 28 + from jinja2 import Environment 29 + from pydantic_settings import BaseSettings, SettingsConfigDict 30 + 31 + 32 + class Settings(BaseSettings): 33 + """App settings loaded from environment variables""" 34 + 35 + model_config = SettingsConfigDict( 36 + env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 37 + ) 38 + 39 + bsky_handle: str 40 + bsky_password: str 41 + 42 + 43 + def extract_post_uri(bluesky_url: str) -> str: 44 + """Extract the AT URI from a Bluesky post URL""" 45 + import re 46 + 47 + pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)" 48 + match = re.match(pattern, bluesky_url) 49 + if not match: 50 + raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}") 51 + profile_did_or_handle = match.group(1) 52 + post_id = match.group(2) 53 + 54 + # We need the DID, not necessarily the handle, for the URI 55 + # However, getPostThread seems to work with handles too, but let's be robust 56 + # For now, we construct the URI assuming the input might be a handle or DID 57 + # A more robust solution would resolve the handle to a DID if needed. 58 + # Let's try constructing a basic URI first. `get_post_thread` might handle resolution. 59 + return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}" 60 + 61 + 62 + def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None: 63 + """Fetch the full thread view for a given post URI.""" 64 + # Slightly reduced depth, as we might fetch sub-threads explicitly 65 + depth = 50 66 + # Parent height arguably less crucial for finding the *longest child* path 67 + parent_height = 2 68 + try: 69 + response = client.app.bsky.feed.get_post_thread( 70 + {"uri": post_uri, "depth": depth, "parent_height": parent_height} 71 + ) 72 + if isinstance(response.thread, ThreadViewPost): 73 + return response.thread 74 + else: 75 + # Handle cases where the post is not found, blocked, or deleted 76 + # Suppress print for non-root calls later if needed 77 + print( 78 + f"Could not fetch thread or it's not a standard post thread: {post_uri}" 79 + ) 80 + return None 81 + except BadRequestError as e: 82 + print(f"Error fetching thread {post_uri}: {e}") 83 + return None 84 + except Exception as e: 85 + print(f"An unexpected error occurred fetching thread {post_uri}: {e}") 86 + return None 87 + 88 + 89 + def find_longest_thread_path( 90 + client: Client, thread: ThreadViewPost | None 91 + ) -> list[ThreadViewPost]: 92 + """Find the longest path of replies starting from the given thread view.""" 93 + if not thread or not isinstance(thread, ThreadViewPost) or not thread.post: 94 + # Base case: Invalid or deleted/blocked post in the middle of a thread 95 + return [] 96 + 97 + longest_reply_extension: list[ThreadViewPost] = [] 98 + max_len = 0 99 + 100 + # Use replies from the current view, but potentially refresh if they seem incomplete 101 + replies_to_check = thread.replies if thread.replies else [] 102 + 103 + for reply_view in replies_to_check: 104 + # Recurse only on valid ThreadViewPost replies 105 + if isinstance(reply_view, ThreadViewPost) and reply_view.post: 106 + current_reply_view = reply_view 107 + 108 + # If this reply has no children loaded, try fetching its thread directly 109 + if not current_reply_view.replies: 110 + # Check if the post *claims* to have replies (optional optimization, needs PostView check) 111 + # For simplicity now, just always try fetching if replies are empty. 112 + fetched_reply_view = get_thread(client, current_reply_view.post.uri) 113 + if fetched_reply_view and fetched_reply_view.replies: 114 + current_reply_view = fetched_reply_view # Use the richer view 115 + 116 + # Now recurse with the potentially updated view 117 + recursive_path = find_longest_thread_path(client, current_reply_view) 118 + if len(recursive_path) > max_len: 119 + max_len = len(recursive_path) 120 + longest_reply_extension = recursive_path 121 + 122 + # The full path includes the current post + the longest path found among its replies 123 + return [thread] + longest_reply_extension 124 + 125 + 126 + def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None: 127 + """Extract relevant data from a ThreadViewPost for template rendering.""" 128 + if not post_view or not post_view.post: 129 + return None 130 + 131 + post = post_view.post 132 + record = post.record 133 + 134 + # Attempt to parse the timestamp 135 + timestamp_str = getattr(record, "created_at", None) 136 + timestamp_dt = None 137 + if timestamp_str: 138 + try: 139 + # Handle different possible ISO 8601 formats from Bluesky 140 + if "." in timestamp_str and "Z" in timestamp_str: 141 + # Format like 2024-07-26T15:07:19.123Z 142 + timestamp_dt = datetime.fromisoformat( 143 + timestamp_str.replace("Z", "+00:00") 144 + ) 145 + else: 146 + # Potentially other formats, add more parsing if needed 147 + print(f"Warning: Unrecognized timestamp format {timestamp_str}") 148 + timestamp_dt = None # Or handle error appropriately 149 + except ValueError: 150 + print(f"Warning: Could not parse timestamp {timestamp_str}") 151 + timestamp_dt = None 152 + 153 + return { 154 + "author": post.author.handle, 155 + "text": getattr(record, "text", "[No text content]"), 156 + "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC") 157 + if timestamp_dt 158 + else "[Unknown time]", 159 + "uri": post.uri, 160 + "cid": post.cid, 161 + } 162 + 163 + 164 + def main(post_url: str, template_str: str): 165 + """Main function to find and render the longest thread.""" 166 + try: 167 + settings = Settings() # type: ignore 168 + except Exception as e: 169 + print( 170 + f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}" 171 + ) 172 + return 173 + 174 + client = Client() 175 + try: 176 + client.login(settings.bsky_handle, settings.bsky_password) 177 + except Exception as e: 178 + print(f"Error logging into Bluesky: {e}") 179 + return 180 + 181 + try: 182 + post_uri = extract_post_uri(post_url) 183 + except ValueError as e: 184 + print(e) 185 + return 186 + 187 + print(f"Fetching thread for: {post_uri}") 188 + root_thread_view = get_thread(client, post_uri) 189 + 190 + if not root_thread_view: 191 + print("Failed to fetch the root post thread.") 192 + return 193 + 194 + # --- Finding the longest path --- 195 + print("Finding the longest thread path...") 196 + longest_path_views = find_longest_thread_path(client, root_thread_view) 197 + print(f"Found {len(longest_path_views)} post(s) in the longest path.") 198 + # --- End Finding --- 199 + 200 + thread_data = [ 201 + data 202 + for view in longest_path_views 203 + if (data := format_post_for_template(view)) is not None 204 + ] 205 + 206 + if not thread_data: 207 + print("No valid posts found in the path to render.") 208 + return 209 + 210 + # Render using Jinja 211 + environment = Environment() 212 + template = environment.from_string(template_str) 213 + output = template.render(posts=thread_data) 214 + 215 + print("\\n--- Rendered Thread ---") 216 + print(output) 217 + print("--- End Rendered Thread ---") 218 + 219 + 220 + if __name__ == "__main__": 221 + parser = argparse.ArgumentParser( 222 + description="Find and render the longest reply thread from a Bluesky post." 223 + ) 224 + parser.add_argument("post_url", help="The URL of the starting Bluesky post.") 225 + args = parser.parse_args() 226 + 227 + # Default Jinja Template 228 + default_template = """ 229 + {% for post in posts %} 230 + {{ loop.index }}. {{ post.author }} at {{ post.timestamp }} 231 + URI: {{ post.uri }} 232 + Text: {{ post.text | indent(width=4, first=false) }} 233 + {% endfor %} 234 + """ 235 + 236 + main(args.post_url, default_template)