A fullstack app for indexing standard.site documents
7
fork

Configure Feed

Select the types of activity you want to include in your feed.

chore: impoved data indexing and added more data to the client

+586 -110
+2 -1
package.json
··· 11 11 "dev:client": "cd packages/client && npm run dev", 12 12 "deploy": "cd packages/server && npm run deploy", 13 13 "deploy:client": "cd packages/client && npm run pages:deploy", 14 - "db:create": "cd packages/server && npm run db:create", 14 + "db:init": "cd packages/server && npm run db:init", 15 + "db:init:prod": "cd packages/server && npm run db:init:prod", 15 16 "db:migrate": "cd packages/server && npm run db:migrate", 16 17 "db:migrate:prod": "cd packages/server && npm run db:migrate:prod", 17 18 "secret:set": "cd packages/server && npm run secret:set"
+1 -1
packages/client/package.json
··· 6 6 "dev": "vite", 7 7 "build": "tsc && vite build", 8 8 "preview": "vite preview", 9 - "pages:deploy": "vite build && wrangler pages deploy dist --project-name=atfeeds" 9 + "deploy": "vite build && wrangler pages deploy dist" 10 10 }, 11 11 "dependencies": { 12 12 "react": "^18.2.0",
+130 -15
packages/client/src/App.tsx
··· 3 3 // API base URL - empty for same-origin (local dev), or set via env var for production 4 4 const API_URL = "https://atfeeds-api.stevedsimkins.workers.dev"; 5 5 6 + interface BskyPostRef { 7 + uri: string; 8 + cid: string; 9 + } 10 + 11 + interface Publication { 12 + url: string; 13 + name: string; 14 + description?: string; 15 + iconCid?: string; 16 + iconUrl?: string; 17 + } 18 + 6 19 interface Document { 7 20 uri: string; 8 21 did: string; 9 22 rkey: string; 10 23 title: string; 11 - path: string | null; 12 - site: string | null; 13 - content: { 24 + description?: string; 25 + path?: string; 26 + site?: string; 27 + content?: { 14 28 $type: string; 15 29 markdown?: string; 16 - } | null; 17 - textContent: string | null; 18 - publishedAt: string | null; 19 - viewUrl: string | null; 30 + }; 31 + textContent?: string; 32 + coverImageCid?: string; 33 + coverImageUrl?: string; 34 + bskyPostRef?: BskyPostRef; 35 + tags?: string[]; 36 + publishedAt?: string; 37 + updatedAt?: string; 38 + publication?: Publication; 39 + viewUrl?: string; 40 + pdsEndpoint?: string; 20 41 } 21 42 22 43 interface FeedResponse { ··· 52 73 fetchFeed(); 53 74 }, []); 54 75 55 - const formatDate = (dateString: string | null) => { 76 + const formatDate = (dateString?: string) => { 56 77 if (!dateString) return "Unknown date"; 57 78 return new Date(dateString).toLocaleDateString("en-US", { 58 79 year: "numeric", ··· 61 82 }); 62 83 }; 63 84 64 - const truncateText = (text: string | null, maxLength: number = 200) => { 85 + const truncateText = (text?: string, maxLength: number = 200) => { 65 86 if (!text) return ""; 66 87 if (text.length <= maxLength) return text; 67 88 return text.slice(0, maxLength) + "..."; 89 + }; 90 + 91 + const getDescription = (doc: Document) => { 92 + return doc.description || doc.textContent || ""; 68 93 }; 69 94 70 95 ··· 130 155 )} 131 156 </legend> 132 157 <div style={{ padding: "8px" }}> 158 + {/* Publication info */} 159 + {doc.publication && ( 160 + <div 161 + style={{ 162 + display: "flex", 163 + alignItems: "center", 164 + gap: "8px", 165 + marginBottom: "8px", 166 + fontSize: "0.85em", 167 + }} 168 + > 169 + {doc.publication.iconUrl && ( 170 + <img 171 + src={doc.publication.iconUrl} 172 + alt={doc.publication.name} 173 + style={{ 174 + width: "16px", 175 + height: "16px", 176 + objectFit: "cover", 177 + }} 178 + /> 179 + )} 180 + <span style={{ fontWeight: "bold" }}> 181 + {doc.publication.name} 182 + </span> 183 + </div> 184 + )} 185 + 186 + {/* Cover image */} 187 + {doc.coverImageUrl && ( 188 + <div style={{ marginBottom: "8px" }}> 189 + <img 190 + src={doc.coverImageUrl} 191 + alt={doc.title} 192 + style={{ 193 + maxWidth: "100%", 194 + maxHeight: "200px", 195 + objectFit: "cover", 196 + border: "1px solid #888", 197 + }} 198 + /> 199 + </div> 200 + )} 201 + 202 + {/* Date */} 133 203 <div 134 204 style={{ 135 205 marginBottom: "8px", ··· 138 208 }} 139 209 > 140 210 Published: {formatDate(doc.publishedAt)} 211 + {doc.updatedAt && doc.updatedAt !== doc.publishedAt && ( 212 + <> | Updated: {formatDate(doc.updatedAt)}</> 213 + )} 141 214 </div> 142 - {doc.textContent && ( 215 + 216 + {/* Description */} 217 + {getDescription(doc) && ( 143 218 <p style={{ marginBottom: "12px" }}> 144 - {truncateText(doc.textContent)} 219 + {truncateText(getDescription(doc))} 145 220 </p> 146 221 )} 147 - {doc.viewUrl && ( 148 - <div style={{ textAlign: "right" }}> 222 + 223 + {/* Tags */} 224 + {doc.tags && doc.tags.length > 0 && ( 225 + <div 226 + style={{ 227 + display: "flex", 228 + flexWrap: "wrap", 229 + gap: "4px", 230 + marginBottom: "12px", 231 + }} 232 + > 233 + {doc.tags.map((tag) => ( 234 + <span 235 + key={tag} 236 + style={{ 237 + background: "#c0c0c0", 238 + padding: "2px 6px", 239 + fontSize: "0.75em", 240 + border: "1px solid #808080", 241 + }} 242 + > 243 + {tag} 244 + </span> 245 + ))} 246 + </div> 247 + )} 248 + 249 + {/* Actions */} 250 + <div style={{ display: "flex", gap: "8px", justifyContent: "flex-end" }}> 251 + {doc.bskyPostRef && ( 252 + <button 253 + onClick={() => 254 + window.open( 255 + `https://bsky.app/profile/${doc.did}/post/${doc.bskyPostRef!.uri.split("/").pop()}`, 256 + "_blank" 257 + ) 258 + } 259 + > 260 + View on Bluesky 261 + </button> 262 + )} 263 + {doc.viewUrl && ( 149 264 <button 150 265 onClick={() => 151 266 window.open(doc.viewUrl || "", "_blank") ··· 153 268 > 154 269 Read More 155 270 </button> 156 - </div> 157 - )} 271 + )} 272 + </div> 158 273 </div> 159 274 </fieldset> 160 275 ))}
+23
packages/server/migrations/001_add_document_fields.sql
··· 1 + -- Migration: Add full Document and Publication fields to resolved_documents 2 + -- Run with: wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote 3 + 4 + -- Document fields 5 + ALTER TABLE resolved_documents ADD COLUMN description TEXT; 6 + ALTER TABLE resolved_documents ADD COLUMN cover_image_cid TEXT; 7 + ALTER TABLE resolved_documents ADD COLUMN cover_image_url TEXT; 8 + ALTER TABLE resolved_documents ADD COLUMN bsky_post_ref TEXT; 9 + ALTER TABLE resolved_documents ADD COLUMN tags TEXT; 10 + ALTER TABLE resolved_documents ADD COLUMN updated_at TEXT; 11 + 12 + -- Publication fields 13 + ALTER TABLE resolved_documents ADD COLUMN pub_url TEXT; 14 + ALTER TABLE resolved_documents ADD COLUMN pub_name TEXT; 15 + ALTER TABLE resolved_documents ADD COLUMN pub_description TEXT; 16 + ALTER TABLE resolved_documents ADD COLUMN pub_icon_cid TEXT; 17 + ALTER TABLE resolved_documents ADD COLUMN pub_icon_url TEXT; 18 + 19 + -- Metadata 20 + ALTER TABLE resolved_documents ADD COLUMN pds_endpoint TEXT; 21 + 22 + -- Index for publication queries 23 + CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url);
+5 -1
packages/server/package.json
··· 4 4 "private": true, 5 5 "scripts": { 6 6 "dev": "wrangler dev", 7 - "deploy": "wrangler deploy" 7 + "deploy": "wrangler deploy", 8 + "db:init": "wrangler d1 execute atfeeds-db --file=schema.sql --local", 9 + "db:init:prod": "wrangler d1 execute atfeeds-db --file=schema.sql --remote", 10 + "db:migrate": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --local", 11 + "db:migrate:prod": "wrangler d1 execute atfeeds-db --file=migrations/001_add_document_fields.sql --remote" 8 12 }, 9 13 "dependencies": { 10 14 "hono": "^4.0.0"
+18 -2
packages/server/schema.sql
··· 32 32 uri TEXT PRIMARY KEY, 33 33 did TEXT NOT NULL, 34 34 rkey TEXT NOT NULL, 35 + -- Document fields 35 36 title TEXT, 37 + description TEXT, 36 38 path TEXT, 37 39 site TEXT, 38 - content TEXT, -- JSON blob 40 + content TEXT, -- JSON blob for content union 39 41 text_content TEXT, 42 + cover_image_cid TEXT, -- CID for cover image blob 43 + cover_image_url TEXT, -- Full URL: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid} 44 + bsky_post_ref TEXT, -- JSON blob for strong reference {uri, cid} 45 + tags TEXT, -- JSON array of strings 40 46 published_at TEXT, 41 - view_url TEXT, 47 + updated_at TEXT, 48 + -- Publication fields (resolved from site at:// URI) 49 + pub_url TEXT, -- Publication base URL 50 + pub_name TEXT, 51 + pub_description TEXT, 52 + pub_icon_cid TEXT, -- CID for publication icon blob 53 + pub_icon_url TEXT, -- Full URL to publication icon 54 + -- Metadata 55 + view_url TEXT, -- Constructed canonical URL (pub_url + path) 56 + pds_endpoint TEXT, -- Cached PDS endpoint for this DID 42 57 resolved_at TEXT DEFAULT (datetime('now')), 43 58 stale_at TEXT -- When this record should be re-resolved 44 59 ); 45 60 46 61 CREATE INDEX IF NOT EXISTS idx_resolved_documents_rkey ON resolved_documents(rkey DESC); 47 62 CREATE INDEX IF NOT EXISTS idx_resolved_documents_stale ON resolved_documents(stale_at); 63 + CREATE INDEX IF NOT EXISTS idx_resolved_documents_pub_url ON resolved_documents(pub_url);
+2 -1
packages/server/src/index.ts
··· 1 1 import { Hono } from "hono"; 2 2 import { cors } from "hono/cors"; 3 3 import type { Bindings } from "./types"; 4 - import { health, webhook, feed, stats, records } from "./routes"; 4 + import { health, webhook, feed, stats, records, admin } from "./routes"; 5 5 import { processDocument } from "./utils"; 6 6 7 7 const app = new Hono<{ Bindings: Bindings }>(); ··· 15 15 app.route("/feed", feed); 16 16 app.route("/stats", stats); 17 17 app.route("/records", records); 18 + app.route("/admin", admin); 18 19 19 20 // Legacy alias: /feed-raw -> /feed/raw 20 21 app.get("/feed-raw", async (c) => {
+77
packages/server/src/routes/admin.ts
··· 1 + import { Hono } from "hono"; 2 + import type { Bindings } from "../types"; 3 + 4 + const admin = new Hono<{ Bindings: Bindings }>(); 5 + 6 + // Queue all documents for re-processing 7 + admin.post("/resolve-all", async (c) => { 8 + try { 9 + const db = c.env.DB; 10 + const queue = c.env.RESOLUTION_QUEUE; 11 + 12 + // Get all records from repo_records 13 + const { results } = await db 14 + .prepare( 15 + `SELECT did, rkey FROM repo_records 16 + WHERE collection = 'site.standard.document'` 17 + ) 18 + .all<{ did: string; rkey: string }>(); 19 + 20 + if (!results || results.length === 0) { 21 + return c.json({ message: "No documents to process", queued: 0 }); 22 + } 23 + 24 + // Queue in batches of 100 (Cloudflare Queue limit) 25 + const batchSize = 100; 26 + let queued = 0; 27 + 28 + for (let i = 0; i < results.length; i += batchSize) { 29 + const batch = results.slice(i, i + batchSize); 30 + const messages = batch.map((row) => ({ 31 + body: { 32 + did: row.did, 33 + collection: "site.standard.document", 34 + rkey: row.rkey, 35 + }, 36 + })); 37 + 38 + await queue.sendBatch(messages); 39 + queued += messages.length; 40 + } 41 + 42 + return c.json({ 43 + message: "Documents queued for re-processing", 44 + queued, 45 + }); 46 + } catch (error) { 47 + return c.json( 48 + { error: "Failed to queue documents", details: String(error) }, 49 + 500 50 + ); 51 + } 52 + }); 53 + 54 + // Mark all documents as stale (alternative - lets cron handle it) 55 + admin.post("/mark-stale", async (c) => { 56 + try { 57 + const db = c.env.DB; 58 + 59 + const result = await db 60 + .prepare( 61 + `UPDATE resolved_documents SET stale_at = datetime('now', '-1 hour')` 62 + ) 63 + .run(); 64 + 65 + return c.json({ 66 + message: "All documents marked as stale", 67 + affected: result.meta.changes, 68 + }); 69 + } catch (error) { 70 + return c.json( 71 + { error: "Failed to mark documents as stale", details: String(error) }, 72 + 500 73 + ); 74 + } 75 + }); 76 + 77 + export default admin;
+76 -26
packages/server/src/routes/feed.ts
··· 1 1 import { Hono } from "hono"; 2 - import type { Bindings } from "../types"; 2 + import type { Bindings, ResolvedDocumentRow, Document, Publication, BskyPostRef } from "../types"; 3 3 4 4 const feed = new Hono<{ Bindings: Bindings }>(); 5 5 6 + /** 7 + * Transforms a database row into a Document object for the API response. 8 + */ 9 + function rowToDocument(row: ResolvedDocumentRow): Document { 10 + // Build publication object if we have publication data 11 + let publication: Publication | undefined; 12 + if (row.pub_url && row.pub_name) { 13 + publication = { 14 + url: row.pub_url, 15 + name: row.pub_name, 16 + description: row.pub_description || undefined, 17 + iconCid: row.pub_icon_cid || undefined, 18 + iconUrl: row.pub_icon_url || undefined, 19 + }; 20 + } 21 + 22 + // Parse bskyPostRef if present 23 + let bskyPostRef: BskyPostRef | undefined; 24 + if (row.bsky_post_ref) { 25 + try { 26 + bskyPostRef = JSON.parse(row.bsky_post_ref); 27 + } catch { 28 + // Ignore parse errors 29 + } 30 + } 31 + 32 + // Parse tags if present 33 + let tags: string[] | undefined; 34 + if (row.tags) { 35 + try { 36 + tags = JSON.parse(row.tags); 37 + } catch { 38 + // Ignore parse errors 39 + } 40 + } 41 + 42 + // Parse content if present 43 + let content: unknown | undefined; 44 + if (row.content) { 45 + try { 46 + content = JSON.parse(row.content); 47 + } catch { 48 + // Ignore parse errors 49 + } 50 + } 51 + 52 + return { 53 + uri: row.uri, 54 + did: row.did, 55 + rkey: row.rkey, 56 + title: row.title || "Untitled", 57 + description: row.description || undefined, 58 + path: row.path || undefined, 59 + site: row.site || undefined, 60 + content, 61 + textContent: row.text_content || undefined, 62 + coverImageCid: row.cover_image_cid || undefined, 63 + coverImageUrl: row.cover_image_url || undefined, 64 + bskyPostRef, 65 + tags, 66 + publishedAt: row.published_at || undefined, 67 + updatedAt: row.updated_at || undefined, 68 + publication, 69 + viewUrl: row.view_url || undefined, 70 + pdsEndpoint: row.pds_endpoint || undefined, 71 + }; 72 + } 73 + 6 74 // Get raw feed data (for client-side fetching) 7 75 // Accessible at both /feed/raw and /feed-raw (via alias in index.ts) 8 76 feed.get("/raw", async (c) => { ··· 44 112 45 113 const { results } = await db 46 114 .prepare( 47 - `SELECT uri, did, rkey, title, path, site, content, text_content, published_at, view_url 115 + `SELECT uri, did, rkey, title, description, path, site, content, text_content, 116 + cover_image_cid, cover_image_url, bsky_post_ref, tags, 117 + published_at, updated_at, pub_url, pub_name, pub_description, 118 + pub_icon_cid, pub_icon_url, view_url, pds_endpoint, 119 + resolved_at, stale_at 48 120 FROM resolved_documents 49 121 ORDER BY rkey DESC 50 122 LIMIT ? OFFSET ?` 51 123 ) 52 124 .bind(limit, offset) 53 - .all<{ 54 - uri: string; 55 - did: string; 56 - rkey: string; 57 - title: string | null; 58 - path: string | null; 59 - site: string | null; 60 - content: string | null; 61 - text_content: string | null; 62 - published_at: string | null; 63 - view_url: string | null; 64 - }>(); 125 + .all<ResolvedDocumentRow>(); 65 126 66 - const documents = (results || []).map((doc) => ({ 67 - uri: doc.uri, 68 - did: doc.did, 69 - rkey: doc.rkey, 70 - title: doc.title || "Untitled", 71 - path: doc.path, 72 - site: doc.site, 73 - content: doc.content ? JSON.parse(doc.content) : null, 74 - textContent: doc.text_content, 75 - publishedAt: doc.published_at, 76 - viewUrl: doc.view_url, 77 - })); 127 + const documents = (results || []).map(rowToDocument); 78 128 79 129 return c.json({ 80 130 count: documents.length,
+1
packages/server/src/routes/index.ts
··· 3 3 export { default as feed } from "./feed"; 4 4 export { default as stats } from "./stats"; 5 5 export { default as records } from "./records"; 6 + export { default as admin } from "./admin";
+59 -4
packages/server/src/types/index.ts
··· 32 32 33 33 export type TapEvent = TapRecordEvent | TapIdentityEvent; 34 34 35 + // Strong reference to a Bluesky post 36 + export interface BskyPostRef { 37 + uri: string; 38 + cid: string; 39 + } 40 + 41 + // Publication record from site.standard.publication 42 + export interface Publication { 43 + url: string; // Base publication URL 44 + name: string; 45 + description?: string; 46 + iconCid?: string; // CID for icon blob 47 + iconUrl?: string; // Resolved full URL to icon 48 + } 49 + 50 + // Document record from site.standard.document 35 51 export interface Document { 36 52 uri: string; 37 53 did: string; 38 54 rkey: string; 55 + // Document fields 39 56 title: string; 57 + description?: string; 58 + path?: string; 59 + site?: string; // at:// URI to publication or https:// URL 60 + content?: unknown; 61 + textContent?: string; 62 + coverImageCid?: string; // CID for cover image blob 63 + coverImageUrl?: string; // Resolved full URL to cover image 64 + bskyPostRef?: BskyPostRef; 65 + tags?: string[]; 66 + publishedAt?: string; 67 + updatedAt?: string; 68 + // Resolved publication data 69 + publication?: Publication; 70 + // Metadata 71 + viewUrl?: string; // Canonical URL (publication.url + path) 72 + pdsEndpoint?: string; // PDS endpoint used for blob URLs 73 + } 74 + 75 + // Database row for resolved_documents table 76 + export interface ResolvedDocumentRow { 77 + uri: string; 78 + did: string; 79 + rkey: string; 80 + title: string | null; 81 + description: string | null; 40 82 path: string | null; 41 83 site: string | null; 42 - content: unknown; 43 - textContent: string | null; 44 - publishedAt: string | null; 45 - viewUrl: string | null; 84 + content: string | null; 85 + text_content: string | null; 86 + cover_image_cid: string | null; 87 + cover_image_url: string | null; 88 + bsky_post_ref: string | null; 89 + tags: string | null; 90 + published_at: string | null; 91 + updated_at: string | null; 92 + pub_url: string | null; 93 + pub_name: string | null; 94 + pub_description: string | null; 95 + pub_icon_cid: string | null; 96 + pub_icon_url: string | null; 97 + view_url: string | null; 98 + pds_endpoint: string | null; 99 + resolved_at: string | null; 100 + stale_at: string | null; 46 101 }
+35
packages/server/src/utils/blob.ts
··· 1 + /** 2 + * Constructs a full URL to fetch a blob from a PDS. 3 + * Format: {pds}/xrpc/com.atproto.sync.getBlob?did={did}&cid={cid} 4 + */ 5 + export function buildBlobUrl(pds: string, did: string, cid: string): string { 6 + const baseUrl = pds.endsWith("/") ? pds.slice(0, -1) : pds; 7 + return `${baseUrl}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 8 + } 9 + 10 + /** 11 + * Extracts the CID from a blob reference object. 12 + * Blob refs can be in different formats: 13 + * - { $link: "cid" } (legacy) 14 + * - { ref: { $link: "cid" } } (current) 15 + * - { cid: "cid" } (simple) 16 + */ 17 + export function extractBlobCid(blob: unknown): string | null { 18 + if (!blob || typeof blob !== "object") return null; 19 + 20 + const b = blob as Record<string, unknown>; 21 + 22 + // Current format: { ref: { $link: "cid" } } 23 + if (b.ref && typeof b.ref === "object") { 24 + const ref = b.ref as Record<string, unknown>; 25 + if (typeof ref.$link === "string") return ref.$link; 26 + } 27 + 28 + // Legacy format: { $link: "cid" } 29 + if (typeof b.$link === "string") return b.$link; 30 + 31 + // Simple format: { cid: "cid" } 32 + if (typeof b.cid === "string") return b.cid; 33 + 34 + return null; 35 + }
+156 -59
packages/server/src/utils/document.ts
··· 1 1 import { resolvePds } from "./resolver"; 2 2 import { parseAtUri } from "./at-uri"; 3 + import { buildBlobUrl, extractBlobCid } from "./blob"; 3 4 4 - export async function resolveViewUrl( 5 + // Raw document record from PDS 6 + interface DocumentRecord { 7 + site?: string; 8 + path?: string; 9 + title?: string; 10 + description?: string; 11 + coverImage?: unknown; 12 + content?: unknown; 13 + textContent?: string; 14 + bskyPostRef?: { uri: string; cid: string }; 15 + tags?: string[]; 16 + publishedAt?: string; 17 + updatedAt?: string; 18 + } 19 + 20 + // Raw publication record from PDS 21 + interface PublicationRecord { 22 + url?: string; 23 + name?: string; 24 + description?: string; 25 + icon?: unknown; 26 + } 27 + 28 + // Resolved publication data 29 + interface ResolvedPublication { 30 + url: string; 31 + name: string; 32 + description: string | null; 33 + iconCid: string | null; 34 + iconUrl: string | null; 35 + } 36 + 37 + /** 38 + * Fetches a publication record from an at:// URI 39 + */ 40 + async function fetchPublication( 5 41 db: D1Database, 6 - siteUri: string, 7 - path: string 8 - ): Promise<string | null> { 42 + siteUri: string 43 + ): Promise<ResolvedPublication | null> { 9 44 const parsed = parseAtUri(siteUri); 10 45 if (!parsed) return null; 11 46 ··· 18 53 )}&collection=${encodeURIComponent(parsed.collection)}&rkey=${encodeURIComponent( 19 54 parsed.rkey 20 55 )}`; 56 + 21 57 const response = await fetch(url); 22 58 if (!response.ok) return null; 23 59 24 - const data = (await response.json()) as { 25 - value?: { url?: string; domain?: string }; 26 - }; 27 - const siteUrl = data.value?.url || data.value?.domain; 28 - if (!siteUrl) return null; 60 + const data = (await response.json()) as { value?: PublicationRecord }; 61 + const pub = data.value; 62 + if (!pub?.url || !pub?.name) return null; 29 63 30 - const baseUrl = siteUrl.startsWith("http") ? siteUrl : `https://${siteUrl}`; 31 - return `${baseUrl}${path}`; 64 + const iconCid = extractBlobCid(pub.icon); 65 + const iconUrl = iconCid ? buildBlobUrl(pds, parsed.did, iconCid) : null; 66 + 67 + return { 68 + url: pub.url, 69 + name: pub.name, 70 + description: pub.description || null, 71 + iconCid, 72 + iconUrl, 73 + }; 32 74 } catch { 33 75 return null; 34 76 } 35 77 } 36 78 79 + /** 80 + * Resolves the view URL for a document. 81 + * If site is an at:// URI, fetches the publication to get the base URL. 82 + * If site is an https:// URL, uses it directly. 83 + */ 84 + export async function resolveViewUrl( 85 + db: D1Database, 86 + siteUri: string, 87 + path: string 88 + ): Promise<string | null> { 89 + // Check if site is an at:// URI or direct URL 90 + if (siteUri.startsWith("at://")) { 91 + const pub = await fetchPublication(db, siteUri); 92 + if (!pub?.url) return null; 93 + const baseUrl = pub.url.startsWith("http") ? pub.url : `https://${pub.url}`; 94 + return `${baseUrl.replace(/\/$/, "")}${path}`; 95 + } 96 + 97 + // Direct URL 98 + const baseUrl = siteUri.startsWith("http") ? siteUri : `https://${siteUri}`; 99 + return `${baseUrl.replace(/\/$/, "")}${path}`; 100 + } 101 + 102 + /** 103 + * Processes a document record: fetches from PDS, resolves publication, 104 + * and stores all fields in resolved_documents table. 105 + */ 37 106 export async function processDocument( 38 107 db: D1Database, 39 108 did: string, ··· 48 117 return; 49 118 } 50 119 51 - // 2. Fetch Record 120 + // 2. Fetch Document Record 52 121 const url = `${pds}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent( 53 122 did 54 123 )}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`; 55 - 124 + 56 125 const response = await fetch(url); 57 126 if (!response.ok) { 58 127 if (response.status === 404) { 59 - // Record deleted? 60 - console.warn(`Record not found: ${did}/${collection}/${rkey}`); 128 + console.warn(`Record not found: ${did}/${collection}/${rkey}`); 61 129 } 62 130 return; 63 131 } ··· 65 133 const data = (await response.json()) as { 66 134 uri: string; 67 135 cid?: string; 68 - value: { 69 - title?: string; 70 - path?: string; 71 - site?: string; 72 - content?: unknown; 73 - textContent?: string; 74 - publishedAt?: string; 75 - [key: string]: unknown; 76 - }; 136 + value: DocumentRecord; 77 137 }; 78 138 79 139 const { value, cid } = data; ··· 90 150 .bind(did, rkey, collection, cid || null, cid || null) 91 151 .run(); 92 152 93 - // 4. Resolve View URL and Update resolved_documents 94 - const uri = `at://${did}/${collection}/${rkey}`; 153 + // 4. Extract document fields 154 + const title = value.title || null; 155 + const description = value.description || null; 156 + const path = value.path || null; 157 + const site = value.site || null; 158 + const content = value.content ? JSON.stringify(value.content) : null; 159 + const textContent = value.textContent || null; 160 + const coverImageCid = extractBlobCid(value.coverImage); 161 + const coverImageUrl = coverImageCid ? buildBlobUrl(pds, did, coverImageCid) : null; 162 + const bskyPostRef = value.bskyPostRef ? JSON.stringify(value.bskyPostRef) : null; 163 + const tags = value.tags ? JSON.stringify(value.tags) : null; 164 + const publishedAt = value.publishedAt || null; 165 + const updatedAt = value.updatedAt || null; 166 + 167 + // 5. Resolve publication if site is at:// URI 168 + let pubUrl: string | null = null; 169 + let pubName: string | null = null; 170 + let pubDescription: string | null = null; 171 + let pubIconCid: string | null = null; 172 + let pubIconUrl: string | null = null; 95 173 let viewUrl: string | null = null; 96 - if (value.site && value.path) { 97 - viewUrl = await resolveViewUrl(db, value.site, value.path); 174 + 175 + if (site) { 176 + if (site.startsWith("at://")) { 177 + // Fetch publication record 178 + const pub = await fetchPublication(db, site); 179 + if (pub) { 180 + pubUrl = pub.url; 181 + pubName = pub.name; 182 + pubDescription = pub.description; 183 + pubIconCid = pub.iconCid; 184 + pubIconUrl = pub.iconUrl; 185 + // Construct view URL 186 + if (pubUrl && path) { 187 + const baseUrl = pubUrl.startsWith("http") ? pubUrl : `https://${pubUrl}`; 188 + viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`; 189 + } 190 + } 191 + } else { 192 + // Site is a direct URL (loose document) 193 + pubUrl = site; 194 + if (path) { 195 + const baseUrl = site.startsWith("http") ? site : `https://${site}`; 196 + viewUrl = `${baseUrl.replace(/\/$/, "")}${path}`; 197 + } 198 + } 98 199 } 99 200 100 - // Set stale_at to 12 hours from now 201 + // 6. Insert/update resolved_documents 202 + const uri = `at://${did}/${collection}/${rkey}`; 101 203 const STALE_OFFSET_HOURS = 12; 102 204 103 205 await db 104 206 .prepare( 105 - `INSERT INTO resolved_documents (uri, did, rkey, title, path, site, content, text_content, published_at, view_url, resolved_at, stale_at) 106 - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours')) 107 - ON CONFLICT(uri) DO UPDATE SET 108 - title = ?, 109 - path = ?, 110 - site = ?, 111 - content = ?, 112 - text_content = ?, 113 - published_at = ?, 114 - view_url = ?, 115 - resolved_at = datetime('now'), 116 - stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')` 207 + `INSERT INTO resolved_documents ( 208 + uri, did, rkey, title, description, path, site, content, text_content, 209 + cover_image_cid, cover_image_url, bsky_post_ref, tags, 210 + published_at, updated_at, pub_url, pub_name, pub_description, 211 + pub_icon_cid, pub_icon_url, view_url, pds_endpoint, 212 + resolved_at, stale_at 213 + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now', '+${STALE_OFFSET_HOURS} hours')) 214 + ON CONFLICT(uri) DO UPDATE SET 215 + title = ?, description = ?, path = ?, site = ?, content = ?, text_content = ?, 216 + cover_image_cid = ?, cover_image_url = ?, bsky_post_ref = ?, tags = ?, 217 + published_at = ?, updated_at = ?, pub_url = ?, pub_name = ?, pub_description = ?, 218 + pub_icon_cid = ?, pub_icon_url = ?, view_url = ?, pds_endpoint = ?, 219 + resolved_at = datetime('now'), stale_at = datetime('now', '+${STALE_OFFSET_HOURS} hours')` 117 220 ) 118 221 .bind( 119 - uri, 120 - did, 121 - rkey, 122 - value.title || null, 123 - value.path || null, 124 - value.site || null, 125 - value.content ? JSON.stringify(value.content) : null, 126 - value.textContent || null, 127 - value.publishedAt || null, 128 - viewUrl, 129 - // Update bindings 130 - value.title || null, 131 - value.path || null, 132 - value.site || null, 133 - value.content ? JSON.stringify(value.content) : null, 134 - value.textContent || null, 135 - value.publishedAt || null, 136 - viewUrl 222 + // INSERT values 223 + uri, did, rkey, title, description, path, site, content, textContent, 224 + coverImageCid, coverImageUrl, bskyPostRef, tags, 225 + publishedAt, updatedAt, pubUrl, pubName, pubDescription, 226 + pubIconCid, pubIconUrl, viewUrl, pds, 227 + // UPDATE values 228 + title, description, path, site, content, textContent, 229 + coverImageCid, coverImageUrl, bskyPostRef, tags, 230 + publishedAt, updatedAt, pubUrl, pubName, pubDescription, 231 + pubIconCid, pubIconUrl, viewUrl, pds 137 232 ) 138 233 .run(); 234 + 235 + console.log(`Processed document: ${uri}`); 139 236 } catch (error) { 140 237 console.error(`Error processing document ${did}/${collection}/${rkey}:`, error); 141 238 }
+1
packages/server/src/utils/index.ts
··· 1 1 export { parseAtUri, buildAtUri, type AtUriComponents } from "./at-uri"; 2 2 export { resolvePds } from "./resolver"; 3 3 export { resolveViewUrl, processDocument } from "./document"; 4 + export { buildBlobUrl, extractBlobCid } from "./blob";