a digital person for bluesky

Add hashtag support to Bluesky post facets

Added hashtag parsing to both reply_to_post() in bsky_utils.py and
create_new_bluesky_post() in tools/post.py. Hashtags are now properly
parsed and converted to app.bsky.richtext.facet#tag facets with correct
byte positions.

- Parse hashtags using regex pattern that matches #word patterns
- Create facets with proper byte slice positioning
- Include hashtag tracking in debug logging
- Tested regex with various hashtag patterns

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Changed files
+48 -3
tools
+27 -1
bsky_utils.py
··· 364 364 365 365 # Parse URLs - fixed to handle URLs at start of text 366 366 url_regex = rb"(?:^|[$|\W])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)" 367 - 367 + 368 368 for m in re.finditer(url_regex, text_bytes): 369 369 url = m.group(1).decode("UTF-8") 370 370 urls_found.append(url) ··· 381 381 ) 382 382 ) 383 383 logger.debug(f"[{correlation_id}] Found URL: {url}") 384 + 385 + # Parse hashtags 386 + hashtag_regex = rb"(?:^|[$|\s])#([a-zA-Z0-9_]+)" 387 + hashtags_found = [] 388 + 389 + for m in re.finditer(hashtag_regex, text_bytes): 390 + tag = m.group(1).decode("UTF-8") # Get tag without # prefix 391 + hashtags_found.append(tag) 392 + # Get byte positions for the entire hashtag including # 393 + tag_start = m.start(0) 394 + # Adjust start if there's a space/prefix 395 + if text_bytes[tag_start:tag_start+1] in (b' ', b'$'): 396 + tag_start += 1 397 + tag_end = m.end(0) 398 + facets.append( 399 + models.AppBskyRichtextFacet.Main( 400 + index=models.AppBskyRichtextFacet.ByteSlice( 401 + byteStart=tag_start, 402 + byteEnd=tag_end 403 + ), 404 + features=[models.AppBskyRichtextFacet.Tag(tag=tag)] 405 + ) 406 + ) 407 + logger.debug(f"[{correlation_id}] Found hashtag: #{tag}") 384 408 385 409 logger.debug(f"[{correlation_id}] Facet parsing complete", extra={ 386 410 'correlation_id': correlation_id, ··· 388 412 'mentions': mentions_found, 389 413 'urls_count': len(urls_found), 390 414 'urls': urls_found, 415 + 'hashtags_count': len(hashtags_found), 416 + 'hashtags': hashtags_found, 391 417 'total_facets': len(facets) 392 418 }) 393 419
+21 -2
tools/post.py
··· 134 134 135 135 # Parse URLs - fixed to handle URLs at start of text 136 136 url_regex = rb"(?:^|[$|\W])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)" 137 - 137 + 138 138 for m in re.finditer(url_regex, text_bytes): 139 139 url = m.group(1).decode("UTF-8") 140 140 # Adjust byte positions to account for the optional prefix ··· 147 147 }, 148 148 "features": [{"$type": "app.bsky.richtext.facet#link", "uri": url}], 149 149 }) 150 - 150 + 151 + # Parse hashtags 152 + hashtag_regex = rb"(?:^|[$|\s])#([a-zA-Z0-9_]+)" 153 + 154 + for m in re.finditer(hashtag_regex, text_bytes): 155 + tag = m.group(1).decode("UTF-8") # Get tag without # prefix 156 + # Get byte positions for the entire hashtag including # 157 + tag_start = m.start(0) 158 + # Adjust start if there's a space/prefix 159 + if text_bytes[tag_start:tag_start+1] in (b' ', b'$'): 160 + tag_start += 1 161 + tag_end = m.end(0) 162 + facets.append({ 163 + "index": { 164 + "byteStart": tag_start, 165 + "byteEnd": tag_end, 166 + }, 167 + "features": [{"$type": "app.bsky.richtext.facet#tag", "tag": tag}], 168 + }) 169 + 151 170 if facets: 152 171 post_record["facets"] = facets 153 172