#!/usr/bin/env -S uv run --script --quiet # /// script # requires-python = ">=3.12" # dependencies = ["atproto", "jinja2", "pydantic-settings"] # /// """ Find the longest reply thread from a Bluesky post. Usage: ```bash ./find-longest-bsky-thread https://bsky.app/profile/nerditry.bsky.social/post/3lnofix5nlc23 ``` Details: - uses [`atproto`](https://github.com/MarshalX/atproto) to fetch the thread - uses [`jinja2`](https://github.com/pallets/jinja) to render the thread """ import argparse import os from datetime import datetime from typing import Any from atproto import Client from atproto.exceptions import BadRequestError from atproto_client.models.app.bsky.feed.defs import ThreadViewPost from jinja2 import Environment from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): """App settings loaded from environment variables""" model_config = SettingsConfigDict( env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" ) bsky_handle: str bsky_password: str bsky_pds_url: str = "https://bsky.social" def extract_post_uri(bluesky_url: str) -> str: """Extract the AT URI from a Bluesky post URL""" import re pattern = r"https?://bsky\.app/profile/([^/]+)/post/([a-zA-Z0-9]+)" match = re.match(pattern, bluesky_url) if not match: raise ValueError(f"Invalid Bluesky URL format: {bluesky_url}") profile_did_or_handle = match.group(1) post_id = match.group(2) # We need the DID, not necessarily the handle, for the URI # However, getPostThread seems to work with handles too, but let's be robust # For now, we construct the URI assuming the input might be a handle or DID # A more robust solution would resolve the handle to a DID if needed. # Let's try constructing a basic URI first. `get_post_thread` might handle resolution. return f"at://{profile_did_or_handle}/app.bsky.feed.post/{post_id}" def get_thread(client: Client, post_uri: str) -> ThreadViewPost | None: """Fetch the full thread view for a given post URI.""" # Slightly reduced depth, as we might fetch sub-threads explicitly depth = 50 # Parent height arguably less crucial for finding the *longest child* path parent_height = 2 try: response = client.app.bsky.feed.get_post_thread( {"uri": post_uri, "depth": depth, "parent_height": parent_height} ) if isinstance(response.thread, ThreadViewPost): return response.thread else: # Handle cases where the post is not found, blocked, or deleted # Suppress print for non-root calls later if needed print( f"Could not fetch thread or it's not a standard post thread: {post_uri}" ) return None except BadRequestError as e: print(f"Error fetching thread {post_uri}: {e}") return None except Exception as e: print(f"An unexpected error occurred fetching thread {post_uri}: {e}") return None def find_longest_thread_path( client: Client, thread: ThreadViewPost | None ) -> list[ThreadViewPost]: """Find the longest path of replies starting from the given thread view.""" if not thread or not isinstance(thread, ThreadViewPost) or not thread.post: # Base case: Invalid or deleted/blocked post in the middle of a thread return [] longest_reply_extension: list[ThreadViewPost] = [] max_len = 0 # Use replies from the current view, but potentially refresh if they seem incomplete replies_to_check = thread.replies if thread.replies else [] for reply_view in replies_to_check: # Recurse only on valid ThreadViewPost replies if isinstance(reply_view, ThreadViewPost) and reply_view.post: current_reply_view = reply_view # If this reply has no children loaded, try fetching its thread directly if not current_reply_view.replies: # Check if the post *claims* to have replies (optional optimization, needs PostView check) # For simplicity now, just always try fetching if replies are empty. fetched_reply_view = get_thread(client, current_reply_view.post.uri) if fetched_reply_view and fetched_reply_view.replies: current_reply_view = fetched_reply_view # Use the richer view # Now recurse with the potentially updated view recursive_path = find_longest_thread_path(client, current_reply_view) if len(recursive_path) > max_len: max_len = len(recursive_path) longest_reply_extension = recursive_path # The full path includes the current post + the longest path found among its replies return [thread] + longest_reply_extension def format_post_for_template(post_view: ThreadViewPost) -> dict[str, Any] | None: """Extract relevant data from a ThreadViewPost for template rendering.""" if not post_view or not post_view.post: return None post = post_view.post record = post.record # Attempt to parse the timestamp timestamp_str = getattr(record, "created_at", None) timestamp_dt = None if timestamp_str: try: # Handle different possible ISO 8601 formats from Bluesky if "." in timestamp_str and "Z" in timestamp_str: # Format like 2024-07-26T15:07:19.123Z timestamp_dt = datetime.fromisoformat( timestamp_str.replace("Z", "+00:00") ) else: # Potentially other formats, add more parsing if needed print(f"Warning: Unrecognized timestamp format {timestamp_str}") timestamp_dt = None # Or handle error appropriately except ValueError: print(f"Warning: Could not parse timestamp {timestamp_str}") timestamp_dt = None return { "author": post.author.handle, "text": getattr(record, "text", "[No text content]"), "timestamp": timestamp_dt.strftime("%Y-%m-%d %H:%M:%S UTC") if timestamp_dt else "[Unknown time]", "uri": post.uri, "cid": post.cid, } def main(post_url: str, template_str: str): """Main function to find and render the longest thread.""" try: settings = Settings() # type: ignore except Exception as e: print( f"Error loading settings (ensure .env file exists with BSKY_HANDLE and BSKY_PASSWORD): {e}" ) return client = Client(base_url=settings.bsky_pds_url) try: client.login(settings.bsky_handle, settings.bsky_password) except Exception as e: print(f"Error logging into Bluesky: {e}") return try: post_uri = extract_post_uri(post_url) except ValueError as e: print(e) return print(f"Fetching thread for: {post_uri}") root_thread_view = get_thread(client, post_uri) if not root_thread_view: print("Failed to fetch the root post thread.") return # --- Finding the longest path --- print("Finding the longest thread path...") longest_path_views = find_longest_thread_path(client, root_thread_view) print(f"Found {len(longest_path_views)} post(s) in the longest path.") # --- End Finding --- thread_data = [ data for view in longest_path_views if (data := format_post_for_template(view)) is not None ] if not thread_data: print("No valid posts found in the path to render.") return # Render using Jinja environment = Environment() template = environment.from_string(template_str) output = template.render(posts=thread_data) print("\\n--- Rendered Thread ---") print(output) print("--- End Rendered Thread ---") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Find and render the longest reply thread from a Bluesky post." ) parser.add_argument("post_url", help="The URL of the starting Bluesky post.") args = parser.parse_args() # Default Jinja Template default_template = """ {% for post in posts %} {{ loop.index }}. {{ post.author }} at {{ post.timestamp }} URI: {{ post.uri }} Text: {{ post.text | indent(width=4, first=false) }} {% endfor %} """ main(args.post_url, default_template)