···23232424use jacquard::IntoStatic;
2525use jacquard::from_json_value;
2626+use jacquard::smol_str::SmolStr;
2627use jacquard::types::string::AtUri;
2728use weaver_api::com_atproto::repo::strong_ref::StrongRef;
2829use weaver_api::sh_weaver::embed::images::Image;
···9091 /// None for new entries that haven't been published yet.
9192 /// Signal so cloned docs share the same state after publish.
9293 pub entry_ref: Signal<Option<StrongRef<'static>>>,
9494+9595+ /// AT-URI of the notebook this draft belongs to (for re-publishing)
9696+ pub notebook_uri: Signal<Option<SmolStr>>,
93979498 // --- Edit sync state (for PDS sync) ---
9599 /// StrongRef to the sh.weaver.edit.root record for this edit session.
···235239 /// Pre-resolved embed content fetched during load.
236240 /// Avoids embed pop-in on initial render.
237241 pub resolved_content: weaver_common::ResolvedContent,
242242+ /// Notebook URI for re-publishing to the same notebook.
243243+ pub notebook_uri: Option<SmolStr>,
238244}
239245240246impl PartialEq for LoadedDocState {
···316322 tags,
317323 embeds,
318324 entry_ref: Signal::new(None),
325325+ notebook_uri: Signal::new(None),
319326 edit_root: Signal::new(None),
320327 last_diff: Signal::new(None),
321328 last_synced_version: Signal::new(None),
···490497 /// Set the StrongRef when editing an existing entry.
491498 pub fn set_entry_ref(&mut self, entry: Option<StrongRef<'static>>) {
492499 self.entry_ref.set(entry);
500500+ }
501501+502502+ /// Get the notebook URI if this draft belongs to a notebook.
503503+ pub fn notebook_uri(&self) -> Option<SmolStr> {
504504+ self.notebook_uri.read().clone()
505505+ }
506506+507507+ /// Set the notebook URI for re-publishing to the same notebook.
508508+ pub fn set_notebook_uri(&mut self, uri: Option<SmolStr>) {
509509+ self.notebook_uri.set(uri);
493510 }
494511495512 // --- Tags accessors ---
···10901107 tags,
10911108 embeds,
10921109 entry_ref: Signal::new(None),
11101110+ notebook_uri: Signal::new(None),
10931111 edit_root: Signal::new(None),
10941112 last_diff: Signal::new(None),
10951113 last_synced_version: Signal::new(None),
···11481166 tags,
11491167 embeds,
11501168 entry_ref: Signal::new(state.entry_ref),
11691169+ notebook_uri: Signal::new(state.notebook_uri),
11511170 edit_root: Signal::new(state.edit_root),
11521171 last_diff: Signal::new(state.last_diff),
11531172 // Use the synced version from state (tracks the PDS version vector)
···388388 // Use WeaverExt to upsert entry (handles notebook + entry creation/updates)
389389 use jacquard::http_client::HttpClient;
390390 use weaver_common::WeaverExt;
391391- let (entry_ref, was_created) = agent
391391+ let (entry_ref, _, was_created) = agent
392392 .upsert_entry(&title, entry_title.as_ref(), entry, None)
393393 .await?;
394394
+97-27
crates/weaver-common/src/agent.rs
···123123 }
124124 }
125125126126+ /// Fetch a notebook by URI and return its entry list
127127+ ///
128128+ /// Returns Ok(Some((uri, entry_list))) if the notebook exists and can be parsed,
129129+ /// Ok(None) if the notebook doesn't exist,
130130+ /// Err if there's a network or parsing error.
131131+ fn get_notebook_by_uri(
132132+ &self,
133133+ uri: &str,
134134+ ) -> impl Future<Output = Result<Option<(AtUri<'static>, Vec<StrongRef<'static>>)>, WeaverError>>
135135+ where
136136+ Self: Sized,
137137+ {
138138+ async move {
139139+ use weaver_api::sh_weaver::notebook::book::Book;
140140+141141+ let at_uri = AtUri::new(uri)
142142+ .map_err(|e| WeaverError::InvalidNotebook(format!("Invalid notebook URI: {}", e)))?;
143143+144144+ let response = match self.get_record::<Book>(&at_uri).await {
145145+ Ok(r) => r,
146146+ Err(_) => return Ok(None), // Notebook doesn't exist
147147+ };
148148+149149+ let output = match response.into_output() {
150150+ Ok(o) => o,
151151+ Err(_) => return Ok(None), // Failed to parse
152152+ };
153153+154154+ let entries = output
155155+ .value
156156+ .entry_list
157157+ .iter()
158158+ .cloned()
159159+ .map(IntoStatic::into_static)
160160+ .collect();
161161+162162+ Ok(Some((at_uri.into_static(), entries)))
163163+ }
164164+ }
165165+126166 /// Find or create a notebook by title, returning its URI and entry list
127167 ///
128168 /// If the notebook doesn't exist, creates it with the given DID as author.
···208248 }
209249 }
210250211211- /// Find or create an entry within a notebook
251251+ /// Find or create an entry within a notebook (with pre-fetched notebook data)
212252 ///
213213- /// Multi-step workflow:
214214- /// 1. Find the notebook by title
215215- /// 2. If existing_rkey is provided, match by rkey; otherwise match by title
216216- /// 3. If found: update the entry with new content
217217- /// 4. If not found: create new entry and append to notebook's entry_list
218218- ///
219219- /// The `existing_rkey` parameter allows updating an entry even if its title changed,
220220- /// and enables pre-generating rkeys for path rewriting before publish.
253253+ /// This variant accepts notebook URI and entry_refs directly to avoid redundant
254254+ /// notebook lookups when the caller has already fetched this data.
221255 ///
222222- /// Returns (entry_ref, was_created)
223223- fn upsert_entry(
256256+ /// Returns (entry_ref, notebook_uri, was_created)
257257+ fn upsert_entry_with_notebook(
224258 &self,
225225- notebook_title: &str,
259259+ notebook_uri: AtUri<'static>,
260260+ entry_refs: Vec<StrongRef<'static>>,
226261 entry_title: &str,
227262 entry: entry::Entry<'_>,
228263 existing_rkey: Option<&str>,
229229- ) -> impl Future<Output = Result<(StrongRef<'static>, bool), WeaverError>>
264264+ ) -> impl Future<Output = Result<(StrongRef<'static>, AtUri<'static>, bool), WeaverError>>
230265 where
231266 Self: Sized,
232267 {
233268 async move {
234234- // Get our own DID
235235- let (did, _) = self.session_info().await.ok_or_else(|| {
236236- AgentError::from(ClientError::invalid_request("No session info available"))
237237- })?;
238238-239239- // Find or create notebook
240240- let (notebook_uri, entry_refs) = self.upsert_notebook(notebook_title, &did).await?;
241241-242269 // If we have an existing rkey, try to find and update that specific entry
243270 if let Some(rkey) = existing_rkey {
244271 // Check if this entry exists in the notebook by comparing rkeys
···259286 .uri(output.uri.into_static())
260287 .cid(output.cid.into_static())
261288 .build();
262262- return Ok((updated_ref, false));
289289+ return Ok((updated_ref, notebook_uri, false));
263290 }
264291 }
265292···283310 })
284311 .await?;
285312286286- return Ok((new_ref, true));
313313+ return Ok((new_ref, notebook_uri, true));
287314 }
288315289289- // No existing rkey - use title-based matching (original behavior)
316316+ // No existing rkey - use title-based matching
290317291318 // Fast path: if notebook is empty, skip search and create directly
292319 if entry_refs.is_empty() {
···307334 })
308335 .await?;
309336310310- return Ok((new_ref, true));
337337+ return Ok((new_ref, notebook_uri, true));
311338 }
312339313340 // Check if entry with this title exists in the notebook
···331358 .uri(output.uri.into_static())
332359 .cid(output.cid.into_static())
333360 .build();
334334- return Ok((updated_ref, false));
361361+ return Ok((updated_ref, notebook_uri, false));
335362 }
336363 }
337364 }
···355382 })
356383 .await?;
357384358358- Ok((new_ref, true))
385385+ Ok((new_ref, notebook_uri, true))
386386+ }
387387+ }
388388+389389+ /// Find or create an entry within a notebook
390390+ ///
391391+ /// Multi-step workflow:
392392+ /// 1. Find the notebook by title
393393+ /// 2. If existing_rkey is provided, match by rkey; otherwise match by title
394394+ /// 3. If found: update the entry with new content
395395+ /// 4. If not found: create new entry and append to notebook's entry_list
396396+ ///
397397+ /// The `existing_rkey` parameter allows updating an entry even if its title changed,
398398+ /// and enables pre-generating rkeys for path rewriting before publish.
399399+ ///
400400+ /// Returns (entry_ref, notebook_uri, was_created)
401401+ fn upsert_entry(
402402+ &self,
403403+ notebook_title: &str,
404404+ entry_title: &str,
405405+ entry: entry::Entry<'_>,
406406+ existing_rkey: Option<&str>,
407407+ ) -> impl Future<Output = Result<(StrongRef<'static>, AtUri<'static>, bool), WeaverError>>
408408+ where
409409+ Self: Sized,
410410+ {
411411+ async move {
412412+ // Get our own DID
413413+ let (did, _) = self.session_info().await.ok_or_else(|| {
414414+ AgentError::from(ClientError::invalid_request("No session info available"))
415415+ })?;
416416+417417+ // Find or create notebook
418418+ let (notebook_uri, entry_refs) = self.upsert_notebook(notebook_title, &did).await?;
419419+420420+ // Delegate to the variant with pre-fetched notebook data
421421+ self.upsert_entry_with_notebook(
422422+ notebook_uri,
423423+ entry_refs,
424424+ entry_title,
425425+ entry,
426426+ existing_rkey,
427427+ )
428428+ .await
359429 }
360430 }
361431
···11+-- Draft titles extracted from Loro snapshots
22+-- Updated by background task when edit_heads changes
33+44+CREATE TABLE IF NOT EXISTS draft_titles (
55+ -- Draft identity (matches drafts table)
66+ did String,
77+ rkey String,
88+99+ -- Extracted title from Loro doc
1010+ title String DEFAULT '',
1111+1212+ -- Head used for extraction (stale if doesn't match edit_heads)
1313+ head_did String DEFAULT '',
1414+ head_rkey String DEFAULT '',
1515+ head_cid String DEFAULT '',
1616+1717+ -- Timestamps
1818+ updated_at DateTime64(3) DEFAULT now64(3),
1919+ indexed_at DateTime64(3) DEFAULT now64(3)
2020+)
2121+ENGINE = ReplacingMergeTree(indexed_at)
2222+ORDER BY (did, rkey)
+15-2
crates/weaver-index/src/bin/weaver_indexer.rs
···2233use clap::{Parser, Subcommand};
44use tracing::{error, info, warn};
55+use jacquard::client::UnauthenticatedSession;
56use weaver_index::clickhouse::InserterConfig;
67use weaver_index::clickhouse::{Client, Migrator};
78use weaver_index::config::{
···910};
1011use weaver_index::firehose::FirehoseConsumer;
1112use weaver_index::server::{AppState, ServerConfig, TelemetryConfig, telemetry};
1212-use weaver_index::{FirehoseIndexer, ServiceIdentity, TapIndexer, load_cursor};
1313+use weaver_index::{
1414+ DraftTitleTaskConfig, FirehoseIndexer, ServiceIdentity, TapIndexer, load_cursor,
1515+ run_draft_title_task,
1616+};
13171418#[derive(Parser)]
1519#[command(name = "indexer")]
···165169 });
166170 let did_doc = identity.did_document_with_service(&server_config.service_did, &service_endpoint);
167171168168- // Create separate clients for indexer and server
172172+ // Create separate clients for indexer, server, and background tasks
169173 let indexer_client = Client::new(&ch_config)?;
170174 let server_client = Client::new(&ch_config)?;
175175+ let task_client = std::sync::Arc::new(Client::new(&ch_config)?);
171176172177 // Build AppState for server
173178 let state = AppState::new(
···208213 tokio::spawn(async move { indexer.run().await })
209214 }
210215 };
216216+217217+ // Spawn background tasks
218218+ let resolver = UnauthenticatedSession::new_public();
219219+ tokio::spawn(run_draft_title_task(
220220+ task_client,
221221+ resolver,
222222+ DraftTitleTaskConfig::default(),
223223+ ));
211224212225 // Run server, monitoring indexer health
213226 tokio::select! {
+2-2
crates/weaver-index/src/clickhouse.rs
···77pub use client::{Client, TableSize};
88pub use migrations::{DbObject, MigrationResult, Migrator, ObjectType};
99pub use queries::{
1010- CollaboratorRow, EditHeadRow, EditNodeRow, EntryRow, HandleMappingRow, NotebookRow,
1111- ProfileCountsRow, ProfileRow, ProfileWithCounts,
1010+ CollaboratorRow, EditChainNode, EditHeadRow, EditNodeRow, EntryRow, HandleMappingRow,
1111+ NotebookRow, ProfileCountsRow, ProfileRow, ProfileWithCounts, StaleDraftRow,
1212};
1313pub use resilient_inserter::{InserterConfig, ResilientRecordInserter};
1414pub use schema::{
+1-1
crates/weaver-index/src/clickhouse/queries.rs
···12121313pub use collab::PermissionRow;
1414pub use collab_state::{CollaboratorRow, EditHeadRow};
1515-pub use edit::EditNodeRow;
1515+pub use edit::{EditChainNode, EditNodeRow, StaleDraftRow};
1616pub use identity::HandleMappingRow;
1717pub use notebooks::{EntryRow, NotebookRow};
1818pub use profiles::{ProfileCountsRow, ProfileRow, ProfileWithCounts};
···163163 e.indexed_at AS indexed_at,
164164 e.record AS record
165165 FROM notebook_entries ne FINAL
166166- INNER JOIN entries FINAL AS e ON
166166+ INNER JOIN entries e FINAL ON
167167 e.did = ne.entry_did
168168 AND e.rkey = ne.entry_rkey
169169 AND e.deleted_at = toDateTime64(0, 3)
···731731 e.indexed_at AS indexed_at,
732732 e.record AS record
733733 FROM notebook_entries ne FINAL
734734- INNER JOIN entries FINAL AS e ON
734734+ INNER JOIN entries e FINAL ON
735735 e.did = ne.entry_did
736736 AND e.rkey = ne.entry_rkey
737737 AND e.deleted_at = toDateTime64(0, 3)
+8
crates/weaver-index/src/endpoints/edit.rs
···369369370370 let last_edit_at = row.last_edit_at.map(|dt| Datetime::new(dt.fixed_offset()));
371371372372+ // Include title if available
373373+ let title = if row.title.is_empty() {
374374+ None
375375+ } else {
376376+ Some(row.title.to_cowstr().into_static())
377377+ };
378378+372379 drafts.push(
373380 DraftView::new()
374381 .uri(uri)
···376383 .created_at(created_at)
377384 .maybe_edit_root(edit_root)
378385 .maybe_last_edit_at(last_edit_at)
386386+ .maybe_title(title)
379387 .build(),
380388 );
381389 }
+18-1
crates/weaver-index/src/endpoints/notebook.rs
···152152 XrpcErrorResponse::internal_error("Invalid CID stored")
153153 })?;
154154155155+ let entry_contributors = state
156156+ .clickhouse
157157+ .get_entry_contributors(did_str, &entry_row.rkey)
158158+ .await
159159+ .map_err(|e| {
160160+ tracing::error!("Failed to get entry contributors: {}", e);
161161+ XrpcErrorResponse::internal_error("Database query failed")
162162+ })?;
163163+164164+ let mut all_author_dids: HashSet<SmolStr> = entry_contributors.iter().cloned().collect();
165165+ // Also include author_dids from the record (explicit declarations)
166166+ for did in &entry_row.author_dids {
167167+ all_author_dids.insert(did.clone());
168168+ }
169169+170170+ let author_dids_vec: Vec<SmolStr> = all_author_dids.into_iter().collect();
171171+155172 // Hydrate entry authors
156156- let entry_authors = hydrate_authors(&entry_row.author_dids, &profile_map)?;
173173+ let entry_authors = hydrate_authors(&author_dids_vec, &profile_map)?;
157174158175 // Parse record JSON
159176 let entry_record = parse_record_json(&entry_row.record)?;
···99pub mod service_identity;
1010pub mod sqlite;
1111pub mod tap;
1212+pub mod tasks;
12131314pub use config::Config;
1415pub use error::{IndexError, Result};
···1718pub use server::{AppState, ServerConfig};
1819pub use service_identity::ServiceIdentity;
1920pub use sqlite::{ShardKey, ShardRouter, SqliteShard};
2121+pub use tasks::{run_draft_title_task, DraftTitleTaskConfig};
+380
crates/weaver-index/src/tasks/draft_titles.rs
···11+//! Background task for extracting draft titles from Loro snapshots.
22+//!
33+//! Periodically scans for drafts where the edit head has changed since
44+//! the last title extraction, fetches the edit chain from PDS, reconstructs
55+//! the Loro document, and extracts the title.
66+77+use std::sync::Arc;
88+use std::time::Duration;
99+1010+use jacquard::client::UnauthenticatedSession;
1111+use jacquard::identity::JacquardResolver;
1212+use jacquard::prelude::{IdentityResolver, XrpcExt};
1313+use jacquard::types::ident::AtIdentifier;
1414+use jacquard::types::string::{Cid, Did};
1515+use loro::LoroDoc;
1616+use mini_moka::sync::Cache;
1717+use tracing::{debug, error, info, warn};
1818+1919+use crate::clickhouse::{Client, StaleDraftRow};
2020+use crate::error::IndexError;
2121+2222+use weaver_api::com_atproto::repo::get_record::GetRecord;
2323+use weaver_api::com_atproto::sync::get_blob::GetBlob;
2424+use weaver_api::sh_weaver::edit::diff::Diff;
2525+use weaver_api::sh_weaver::edit::root::Root;
2626+2727+/// Cache for PDS blob fetches.
2828+///
2929+/// Blobs are content-addressed so safe to cache indefinitely.
3030+/// Key is (did, cid) as a string.
3131+#[derive(Clone)]
3232+pub struct BlobCache {
3333+ cache: Cache<String, Arc<Vec<u8>>>,
3434+}
3535+3636+impl BlobCache {
3737+ pub fn new(max_capacity: u64) -> Self {
3838+ Self {
3939+ cache: Cache::new(max_capacity),
4040+ }
4141+ }
4242+4343+ fn key(did: &str, cid: &str) -> String {
4444+ format!("{}:{}", did, cid)
4545+ }
4646+4747+ pub fn get(&self, did: &str, cid: &str) -> Option<Arc<Vec<u8>>> {
4848+ self.cache.get(&Self::key(did, cid))
4949+ }
5050+5151+ pub fn insert(&self, did: &str, cid: &str, data: Vec<u8>) {
5252+ self.cache.insert(Self::key(did, cid), Arc::new(data));
5353+ }
5454+}
5555+5656+/// Configuration for the draft title extraction task
5757+#[derive(Debug, Clone)]
5858+pub struct DraftTitleTaskConfig {
5959+ /// How often to check for stale titles
6060+ pub interval: Duration,
6161+ /// Maximum drafts to process per run
6262+ pub batch_size: i64,
6363+}
6464+6565+impl Default for DraftTitleTaskConfig {
6666+ fn default() -> Self {
6767+ Self {
6868+ interval: Duration::from_secs(120), // 2 minutes
6969+ batch_size: 50,
7070+ }
7171+ }
7272+}
7373+7474+/// Run the draft title extraction task in a loop
7575+pub async fn run_draft_title_task(
7676+ client: Arc<Client>,
7777+ resolver: UnauthenticatedSession<JacquardResolver>,
7878+ config: DraftTitleTaskConfig,
7979+) {
8080+ info!(
8181+ interval_secs = config.interval.as_secs(),
8282+ batch_size = config.batch_size,
8383+ "starting draft title extraction task"
8484+ );
8585+8686+ // Cache for blob fetches - blobs are content-addressed, safe to cache indefinitely
8787+ // 1000 entries is plenty for typical edit chains
8888+ let blob_cache = BlobCache::new(1000);
8989+9090+ loop {
9191+ match process_stale_drafts(&client, &resolver, &blob_cache, config.batch_size).await {
9292+ Ok(count) => {
9393+ if count > 0 {
9494+ info!(processed = count, "draft title extraction complete");
9595+ } else {
9696+ debug!("no stale draft titles to process");
9797+ }
9898+ }
9999+ Err(e) => {
100100+ error!(error = ?e, "draft title extraction failed");
101101+ }
102102+ }
103103+104104+ tokio::time::sleep(config.interval).await;
105105+ }
106106+}
107107+108108+/// Process a batch of stale drafts
109109+async fn process_stale_drafts(
110110+ client: &Client,
111111+ resolver: &UnauthenticatedSession<JacquardResolver>,
112112+ blob_cache: &BlobCache,
113113+ batch_size: i64,
114114+) -> Result<usize, IndexError> {
115115+ let stale = client.get_stale_draft_titles(batch_size).await?;
116116+117117+ if stale.is_empty() {
118118+ return Ok(0);
119119+ }
120120+121121+ debug!(count = stale.len(), "found stale draft titles");
122122+123123+ let mut processed = 0;
124124+ for draft in stale {
125125+ match extract_and_save_title(client, resolver, blob_cache, &draft).await {
126126+ Ok(title) => {
127127+ debug!(
128128+ did = %draft.did,
129129+ rkey = %draft.rkey,
130130+ title = %title,
131131+ "extracted draft title"
132132+ );
133133+ processed += 1;
134134+ }
135135+ Err(e) => {
136136+ warn!(
137137+ did = %draft.did,
138138+ rkey = %draft.rkey,
139139+ error = ?e,
140140+ "failed to extract draft title"
141141+ );
142142+ }
143143+ }
144144+ }
145145+146146+ Ok(processed)
147147+}
148148+149149+/// Extract title from a single draft and save it
150150+async fn extract_and_save_title(
151151+ client: &Client,
152152+ resolver: &UnauthenticatedSession<JacquardResolver>,
153153+ blob_cache: &BlobCache,
154154+ draft: &StaleDraftRow,
155155+) -> Result<String, IndexError> {
156156+ // Get the edit chain from ClickHouse
157157+ let chain = client
158158+ .get_edit_chain(
159159+ &draft.root_did,
160160+ &draft.root_rkey,
161161+ &draft.head_did,
162162+ &draft.head_rkey,
163163+ )
164164+ .await?;
165165+166166+ if chain.is_empty() {
167167+ return Err(IndexError::NotFound {
168168+ resource: format!("edit chain for {}:{}", draft.did, draft.rkey),
169169+ });
170170+ }
171171+172172+ // Resolve PDS for the root DID
173173+ let root_did = Did::new(&draft.root_did).map_err(|e| IndexError::NotFound {
174174+ resource: format!("invalid root DID: {}", e),
175175+ })?;
176176+177177+ let pds_url = resolver
178178+ .pds_for_did(&root_did)
179179+ .await
180180+ .map_err(|e| IndexError::NotFound {
181181+ resource: format!("PDS for {}: {}", root_did, e),
182182+ })?;
183183+184184+ // Initialize Loro doc
185185+ let doc = LoroDoc::new();
186186+187187+ // Process chain: first node should be root, rest are diffs
188188+ for (i, node) in chain.iter().enumerate() {
189189+ let node_did = Did::new(&node.did).map_err(|e| IndexError::NotFound {
190190+ resource: format!("invalid node DID: {}", e),
191191+ })?;
192192+193193+ if node.node_type == "root" {
194194+ // Fetch root record
195195+ let root_record =
196196+ fetch_root_record(resolver, pds_url.clone(), &node_did, &node.rkey).await?;
197197+198198+ // Fetch snapshot blob
199199+ let snapshot_cid = root_record.snapshot.blob().cid();
200200+ let snapshot_bytes =
201201+ fetch_blob(resolver, blob_cache, pds_url.clone(), &node_did, snapshot_cid).await?;
202202+203203+ // Import snapshot
204204+ doc.import(&snapshot_bytes)
205205+ .map_err(|e| IndexError::NotFound {
206206+ resource: format!("failed to import root snapshot: {}", e),
207207+ })?;
208208+209209+ debug!(
210210+ did = %node.did,
211211+ rkey = %node.rkey,
212212+ bytes = snapshot_bytes.len(),
213213+ "imported root snapshot"
214214+ );
215215+ } else {
216216+ // Fetch diff record
217217+ let diff_record =
218218+ fetch_diff_record(resolver, pds_url.clone(), &node_did, &node.rkey).await?;
219219+220220+ // Diffs can have inline diff bytes or a snapshot blob reference
221221+ let diff_bytes = if let Some(ref inline) = diff_record.inline_diff {
222222+ // Use inline diff (base64 decoded by serde)
223223+ inline.to_vec()
224224+ } else if let Some(ref snapshot_blob) = diff_record.snapshot {
225225+ // Fetch snapshot blob
226226+ let snapshot_cid = snapshot_blob.blob().cid();
227227+ fetch_blob(resolver, blob_cache, pds_url.clone(), &node_did, snapshot_cid).await?
228228+ } else {
229229+ warn!(
230230+ did = %node.did,
231231+ rkey = %node.rkey,
232232+ "diff has neither inline nor snapshot data, skipping"
233233+ );
234234+ continue;
235235+ };
236236+237237+ // Import diff
238238+ doc.import(&diff_bytes).map_err(|e| IndexError::NotFound {
239239+ resource: format!("failed to import diff {}: {}", i, e),
240240+ })?;
241241+242242+ debug!(
243243+ did = %node.did,
244244+ rkey = %node.rkey,
245245+ bytes = diff_bytes.len(),
246246+ "imported diff"
247247+ );
248248+ }
249249+ }
250250+251251+ // Extract title from Loro doc
252252+ let title = doc.get_text("title").to_string();
253253+254254+ // Save to ClickHouse
255255+ client
256256+ .upsert_draft_title(
257257+ &draft.did,
258258+ &draft.rkey,
259259+ &title,
260260+ &draft.head_did,
261261+ &draft.head_rkey,
262262+ &draft.head_cid,
263263+ )
264264+ .await?;
265265+266266+ Ok(title)
267267+}
268268+269269+/// Fetch an edit.root record from PDS
270270+async fn fetch_root_record(
271271+ resolver: &UnauthenticatedSession<JacquardResolver>,
272272+ pds_url: jacquard::url::Url,
273273+ did: &Did<'_>,
274274+ rkey: &str,
275275+) -> Result<Root<'static>, IndexError> {
276276+ use jacquard::IntoStatic;
277277+ use jacquard::types::string::Nsid;
278278+279279+ let request = GetRecord::new()
280280+ .repo(AtIdentifier::Did(did.clone()))
281281+ .collection(Nsid::new_static("sh.weaver.edit.root").unwrap())
282282+ .rkey(
283283+ jacquard::types::recordkey::RecordKey::any(rkey).map_err(|e| IndexError::NotFound {
284284+ resource: format!("invalid rkey: {}", e),
285285+ })?,
286286+ )
287287+ .build();
288288+289289+ let response =
290290+ resolver
291291+ .xrpc(pds_url)
292292+ .send(&request)
293293+ .await
294294+ .map_err(|e| IndexError::NotFound {
295295+ resource: format!("root record {}/{}: {}", did, rkey, e),
296296+ })?;
297297+298298+ let output = response.into_output().map_err(|e| IndexError::NotFound {
299299+ resource: format!("parse root record: {}", e),
300300+ })?;
301301+302302+ let root: Root = jacquard::from_data(&output.value).map_err(|e| IndexError::NotFound {
303303+ resource: format!("deserialize root: {}", e),
304304+ })?;
305305+306306+ Ok(root.into_static())
307307+}
308308+309309+/// Fetch an edit.diff record from PDS
310310+async fn fetch_diff_record(
311311+ resolver: &UnauthenticatedSession<JacquardResolver>,
312312+ pds_url: jacquard::url::Url,
313313+ did: &Did<'_>,
314314+ rkey: &str,
315315+) -> Result<Diff<'static>, IndexError> {
316316+ use jacquard::IntoStatic;
317317+ use jacquard::types::string::Nsid;
318318+319319+ let request = GetRecord::new()
320320+ .repo(AtIdentifier::Did(did.clone()))
321321+ .collection(Nsid::new_static("sh.weaver.edit.diff").unwrap())
322322+ .rkey(
323323+ jacquard::types::recordkey::RecordKey::any(rkey).map_err(|e| IndexError::NotFound {
324324+ resource: format!("invalid rkey: {}", e),
325325+ })?,
326326+ )
327327+ .build();
328328+329329+ let response =
330330+ resolver
331331+ .xrpc(pds_url)
332332+ .send(&request)
333333+ .await
334334+ .map_err(|e| IndexError::NotFound {
335335+ resource: format!("diff record {}/{}: {}", did, rkey, e),
336336+ })?;
337337+338338+ let output = response.into_output().map_err(|e| IndexError::NotFound {
339339+ resource: format!("parse diff record: {}", e),
340340+ })?;
341341+342342+ let diff: Diff = jacquard::from_data(&output.value).map_err(|e| IndexError::NotFound {
343343+ resource: format!("deserialize diff: {}", e),
344344+ })?;
345345+346346+ Ok(diff.into_static())
347347+}
348348+349349+/// Fetch a blob from PDS, using cache when available
350350+async fn fetch_blob(
351351+ resolver: &UnauthenticatedSession<JacquardResolver>,
352352+ cache: &BlobCache,
353353+ pds_url: jacquard::url::Url,
354354+ did: &Did<'_>,
355355+ cid: &Cid<'_>,
356356+) -> Result<Vec<u8>, IndexError> {
357357+ // Check cache first - blobs are content-addressed
358358+ if let Some(cached) = cache.get(did.as_str(), cid.as_str()) {
359359+ debug!(cid = %cid, "blob cache hit");
360360+ return Ok(cached.as_ref().clone());
361361+ }
362362+363363+ let request = GetBlob::new().did(did.clone()).cid(cid.clone()).build();
364364+365365+ let response =
366366+ resolver
367367+ .xrpc(pds_url)
368368+ .send(&request)
369369+ .await
370370+ .map_err(|e| IndexError::NotFound {
371371+ resource: format!("blob {}: {}", cid, e),
372372+ })?;
373373+374374+ let bytes = response.buffer().to_vec();
375375+376376+ // Cache for future use
377377+ cache.insert(did.as_str(), cid.as_str(), bytes.clone());
378378+379379+ Ok(bytes)
380380+}
+5
crates/weaver-index/src/tasks/mod.rs
···11+//! Background tasks for the indexer
22+33+mod draft_titles;
44+55+pub use draft_titles::{run_draft_title_task, DraftTitleTaskConfig};
···11+I recently used Jacquard to write an ~AppView~ Index for Weaver. I alluded in my posts about my devlog about that experience how easy I had made the actual web server side of that. Lexicon as a specification language provides a lot of ways to specify data types and a few to specify API endpoints. XRPC is the canonical way to do that, and it's an opinionated subset of HTTP, which narrows down to a specific endpoint format and set of "verbs". Your path is `/xrpc/your.lexicon.nsidEndpoint?argument=value`, your bodies are mostly JSON.
22+33+I'm going to lead off by tooting someone else's horn. Chad Miller's https://quickslice.slices.network/ provides an excellent example of the kind of thing you can do with atproto lexicons, and it doesn't use XRPC at all, but instead generates GraphQL's equivalents. This is more freeform, requires less of you upfront, and is in a lot of ways more granular than XRPC could possibly allow. Jacquard is for the moment built around the expectations of XRPC. If someone want's Jacquard support for GraphQL on atproto lexicons, I'm all ears, though.
44+55+Here's to me one of the benefits of XRPC, and one of the challenges. XRPC only specifies your inputs and your output. everything else between you need to figure out. This means more work, but it also means you have internal flexibility. And Jacquard's server-side XRPC helpers follow that. Jacquard XRPC code generation itself provides the output type and the errors. For the server side it generates one additional marker type, generally labeled `YourXrpcQueryRequest`, and a trait implementation for `XrpcEndpoint`. You can also get these with `derive(XrpcRequest)` on existing Rust structs without writing out lexicon JSON.
66+77+```rust
88+pub trait XrpcEndpoint {
99+ /// Fully-qualified path ('/xrpc/\[nsid\]') where this endpoint should live on the server
1010+ const PATH: &'static str;
1111+ /// XRPC method (query/GET or procedure/POST)
1212+ const METHOD: XrpcMethod;
1313+ /// XRPC Request data type
1414+ type Request<'de>: XrpcRequest + Deserialize<'de> + IntoStatic;
1515+ /// XRPC Response data type
1616+ type Response: XrpcResp;
1717+}
1818+1919+/// Endpoint type for
2020+///sh.weaver.actor.getActorNotebooks
2121+pub struct GetActorNotebooksRequest;
2222+impl XrpcEndpoint for GetActorNotebooksRequest {
2323+ const PATH: &'static str = "/xrpc/sh.weaver.actor.getActorNotebooks";
2424+ const METHOD: XrpcMethod = XrpcMethod::Query;
2525+ type Request<'de> = GetActorNotebooks<'de>;
2626+ type Response = GetActorNotebooksResponse;
2727+}
2828+```
2929+3030+As with many Jacquard traits you see the associated types carrying the lifetime. You may ask, why a second struct and trait? This is very similar to the `XrpcRequest` trait, which is implemented on the request struct itself, after all.
3131+3232+```rust
3333+impl<'a> XrpcRequest for GetActorNotebooks<'a> {
3434+ const NSID: &'static str = "sh.weaver.actor.getActorNotebooks";
3535+ const METHOD: XrpcMethod = XrpcMethod::Query;
3636+ type Response = GetActorNotebooksResponse;
3737+}
3838+```
3939+4040+## Time for magic
4141+The reason is that lifetime when combined with the constraints Axum puts on extractors. Because the request type includes a lifetime, if we were to attempt to implement `FromRequest` directly for `XrpcRequest`, the trait would require that `XrpcRequest` be implemented for all lifetimes, and also apply an effective `DeserializeOwned` bound, even if we were to specify the `'static` lifetime as we do. And of course `XrpcRequest` is implemented for one specific lifetime, `'a`, the lifetime of whatever it's borrowed from. Meanwhile `XrpcEndpoint` has no lifetime itself, but instead carries the lifetime on the `Request` associated type. This allows us to do the following implementation, where `ExtractXrpc<E>` has no lifetime itself and contains an owned version of the deserialized request. And we can then implement `FromRequest` for `ExtractXrpc<R>`, and put the `for<'any>` bound on the `IntoStatic` trait requirement in a where clause, where it works perfectly. In combination with the code generation in `jacquard-lexicon`, this is the full implementation of Jacquard's Axum XRPC request extractor. Not so bad.
4242+4343+```rust
4444+pub struct ExtractXrpc<E: XrpcEndpoint>(pub E::Request<'static>);
4545+4646+impl<S, R> FromRequest<S> for ExtractXrpc<R>
4747+where
4848+ S: Send + Sync,
4949+ R: XrpcEndpoint,
5050+ for<'a> R::Request<'a>: IntoStatic<Output = R::Request<'static>>,
5151+{
5252+ type Rejection = Response;
5353+5454+ fn from_request(
5555+ req: Request,
5656+ state: &S,
5757+ ) -> impl Future<Output = Result<Self, Self::Rejection>> + Send {
5858+ async {
5959+ match R::METHOD {
6060+ XrpcMethod::Procedure(_) => {
6161+ let body = Bytes::from_request(req, state)
6262+ .await
6363+ .map_err(IntoResponse::into_response)?;
6464+ let decoded = R::Request::decode_body(&body);
6565+ match decoded {
6666+ Ok(value) => Ok(ExtractXrpc(*value.into_static())),
6767+ Err(err) => Err((
6868+ StatusCode::BAD_REQUEST,
6969+ Json(json!({
7070+ "error": "InvalidRequest",
7171+ "message": format!("failed to decode request: {}", err)
7272+ })),
7373+ ).into_response()),
7474+ }
7575+ }
7676+ XrpcMethod::Query => {
7777+ if let Some(path_query) = req.uri().path_and_query() {
7878+ let query = path_query.query().unwrap_or("");
7979+ let value: R::Request<'_> =
8080+ serde_html_form::from_str::<R::Request<'_>>(query).map_err(|e| {
8181+ (
8282+ StatusCode::BAD_REQUEST,
8383+ Json(json!({
8484+ "error": "InvalidRequest",
8585+ "message": format!("failed to decode request: {}", e)
8686+ })),
8787+ ).into_response()
8888+ })?;
8989+ Ok(ExtractXrpc(value.into_static()))
9090+ } else {
9191+ Err((
9292+ StatusCode::BAD_REQUEST,
9393+ Json(json!({
9494+ "error": "InvalidRequest",
9595+ "message": "wrong path"
9696+ })),
9797+ ).into_response())
9898+ }
9999+ }
100100+ }
101101+ }
102102+ }
103103+```
104104+105105+Jacquard then also provides an additional utility to round things out, using the associated `PATH` constant to put the handler for your XRPC request at the right spot in your router.
106106+```rust
107107+/// Conversion trait to turn an XrpcEndpoint and a handler into an axum Router
108108+pub trait IntoRouter {
109109+ fn into_router<T, S, U>(handler: U) -> Router<S>
110110+ where
111111+ T: 'static,
112112+ S: Clone + Send + Sync + 'static,
113113+ U: axum::handler::Handler<T, S>;
114114+}
115115+116116+impl<X> IntoRouter for X
117117+where
118118+ X: XrpcEndpoint,
119119+{
120120+ /// Creates an axum router that will invoke `handler` in response to xrpc
121121+ /// request `X`.
122122+ fn into_router<T, S, U>(handler: U) -> Router<S>
123123+ where
124124+ T: 'static,
125125+ S: Clone + Send + Sync + 'static,
126126+ U: axum::handler::Handler<T, S>,
127127+ {
128128+ Router::new().route(
129129+ X::PATH,
130130+ (match X::METHOD {
131131+ XrpcMethod::Query => axum::routing::get,
132132+ XrpcMethod::Procedure(_) => axum::routing::post,
133133+ })(handler),
134134+ )
135135+ }
136136+}
137137+```
138138+139139+Which then lets the Axum router for Weaver's Index look like this (truncated for length):
140140+141141+```rust
142142+pub fn router(state: AppState, did_doc: DidDocument<'static>) -> Router {
143143+ Router::new()
144144+ .route("/", get(landing))
145145+ .route(
146146+ "/assets/IoskeleyMono-Regular.woff2",
147147+ get(font_ioskeley_regular),
148148+ )
149149+ .route("/assets/IoskeleyMono-Bold.woff2", get(font_ioskeley_bold))
150150+ .route(
151151+ "/assets/IoskeleyMono-Italic.woff2",
152152+ get(font_ioskeley_italic),
153153+ )
154154+ .route("/xrpc/_health", get(health))
155155+ .route("/metrics", get(metrics))
156156+ // com.atproto.identity.* endpoints
157157+ .merge(ResolveHandleRequest::into_router(identity::resolve_handle))
158158+ // com.atproto.repo.* endpoints (record cache)
159159+ .merge(GetRecordRequest::into_router(repo::get_record))
160160+ .merge(ListRecordsRequest::into_router(repo::list_records))
161161+ // app.bsky.* passthrough endpoints
162162+ .merge(BskyGetProfileRequest::into_router(bsky::get_profile))
163163+ .merge(BskyGetPostsRequest::into_router(bsky::get_posts))
164164+ // sh.weaver.actor.* endpoints
165165+ .merge(GetProfileRequest::into_router(actor::get_profile))
166166+ .merge(GetActorNotebooksRequest::into_router(
167167+ actor::get_actor_notebooks,
168168+ ))
169169+ .merge(GetActorEntriesRequest::into_router(
170170+ actor::get_actor_entries,
171171+ ))
172172+ // sh.weaver.notebook.* endpoints
173173+ ...
174174+ // sh.weaver.collab.* endpoints
175175+ ...
176176+ // sh.weaver.edit.* endpoints
177177+ ...
178178+ .layer(TraceLayer::new_for_http())
179179+ .layer(CorsLayer::permissive()
180180+ .max_age(std::time::Duration::from_secs(86400))
181181+ ).with_state(state)
182182+ .merge(did_web_router(did_doc))
183183+}
184184+```
185185+186186+Each of the handlers is a fairly straightforward async function that takes `AppState`, the XrpcExtractor, and an extractor and validator for service auth, which allows it to be accessed through via your PDS via the `atproto-proxy` header, and return user-specific data, or gate specific endpoints as requiring authentication.
187187+188188+> And so yeah, the actual HTTP server part of the index was dead-easy to write. The handlers themselves are some of them fairly *long* functions, as they need to pull together the required data from the database over a couple of queries and then do some conversion, but they're straightforward. At some point I may end up either adding additional specialized view tables to the database or rewriting my queries to do more in SQL or both, but for now it made sense to keep the final decision-making and assembly in Rust, where it's easier to iterate on.
189189+### Service Auth
190190+Service Auth is, for those not familiar, the non-OAuth way to talk to an XRPC server other than your PDS with an authenticated identity. It's the method the Bluesky AppView uses. There are some downsides to proxying through the PDS, like delay in being able to read your own writes without some PDS-side or app-level handling, but it is conceptually very simple. The PDS, when it pipes through an XRPC request to another service, validates authentication, then generates a short-lived JWT, signs it with the user's private key, and puts it in a header. The service then extracts that, decodes it, and validates it using the public key in the user's DID document. Jacquard provides a middleware that can be used to gate routes based on service auth validation and it also provides an extractor. Initially I provided just one where authentication is required, but as part of building the index I added an additional one for optional authentication, where the endpoint is public, but returns user-specific information when there is an authenticated user. It returns this structure.
191191+192192+```rust
193193+#[derive(Debug, Clone, jacquard_derive::IntoStatic)]
194194+pub struct VerifiedServiceAuth<'a> {
195195+ /// The authenticated user's DID (from `iss` claim)
196196+ did: Did<'a>,
197197+ /// The audience (should match your service DID)
198198+ aud: Did<'a>,
199199+ /// The lexicon method NSID, if present
200200+ lxm: Option<Nsid<'a>>,
201201+ /// JWT ID (nonce), if present
202202+ jti: Option<CowStr<'a>>,
203203+}
204204+```
205205+206206+Ultimately I want to provide a similar set of OAuth extractors as well, but those need to be built, still. If I move away from service proxying for the Weaver index, they will definitely get written at that point.
207207+208208+> I mentioned some bug-fixing in Jacquard was required to make this work. There were a couple of oversights in the `DidDocument` struct and a spot I had incorrectly held a tracing span across an await point. Also, while using the `slingshot_resolver` set of options for `JacquardResolver` is great under normal circumstances (and normally I default to it), the mini-doc does NOT in fact include the signing keys, and cannot be used to validate service auth.
209209+>
210210+> I am not always a smart woman.
211211+212212+## Why not go full magic?
213213+One thing the Jacquard service auth validation extractor does **not** provide is validation of that jti nonce. That is left as an exercise for the server developer, to maintain a cache of recent nonces and compare against them. I leave a number of things this way, and this is deliberate. I think this is the correct approach. As powerful as "magic" all-in-one frameworks like Dioxus (or the various full-stack JS frameworks) are, the magic usually ends up constraining you in a number of ways. There are a number of awkward things in the front-end app implementation which are downstream of constraints Dioxus applies to your types and functions in order to work its magic.
214214+215215+There are a lot of possible things you might want to do as an XRPC server. You might be a PDS, you might be an AppView or index, you might be some other sort of service that doesn't really fit into the boxes (like a Tangled knot server or Streamplace node) you might authenticate via service auth or OAuth, communicate via the PDS or directly with the client app. And as such, while my approach to everything in Jacquard is to provide a comprehensive box of tools rather than a complete end-to-end solution, this is especially true on the server side of things, because of that diversity in requirements, and my desire to not constrain developers using the library to work a certain way, so that they can build anything they want on atproto.
216216+217217+> If you haven't read the Not An AppView entry, here it is. I might recommend reading it, and some other previous entries in that notebook, as it will help put the following in context.
218218+219219+![[at://did:plc:yfvwmnlztr4dwkb7hwz55r2g/sh.weaver.notebook.entry/3m7ysqf2z5s22]]
220220+## Dogfooding again
221221+That being said, my experience writing the Weaver front-end and now the index server does leave me wanting a few things. One is a "BFF" session type, which forwards requests through a server to the PDS (or index), acting somewhat like [oatproxy](https://github.com/streamplace/oatproxy) (prototype jacquard version of that [here](https://github.com/espeon/istat/tree/main/jacquard-oatproxy) courtesy of Nat and Claude). This allows easier reading of your own writes via server-side caching, some caching and deduplication of common requests to reduce load on the PDS and roundtrip time. If the seession lives server-side it allows longer-lived confidential sessions for OAuth, and avoids putting OAuth tokens on the client device.
222222+223223+Once implemented, I will likely refactor the Weaver app to use this session type in fullstack-server mode, which will then help dramatically simplify a bunch of client-side code. The refactored app will likely include an internal XRPC "server" of sorts that will elide differences between the index's XRPC APIs and the index-less flow. With the "fullstack-server" and "use-index" features, the client app running in the browser will forward authenticated requests through the app server to the index or PDS. With "fullstack-server" only, the app server itself acts like a discount version of the index, implemented via generic services like Constellation. Performance will be significantly improved over the original index-less implementation due to better caching, and unifying the cache. In client-only mode there are a couple of options, and I am not sure which is ultimately correct. The straightforward way as far as separation of concerns goes would be to essentially use a web worker as intermediary and local cache. That worker would be compiled to either use the index or to make Constellation and direct PDS requests, depending on the "use-index" feature. However that brings with it the obvious overhead of copying data from the worker to the app in the default mode, and I haven't yet investigated how feasible the available options which might allow zero-copy transfer via SharedArrayBuffer are. That being said, the real-time collaboration feature already works this way (sans SharedArrayBuffer) and lag is comparable to when the iroh connection was handled in the UI thread.
224224+225225+A fair bit of this is somewhat new territory for me, when it comes to the browser, and I would be ***very*** interested in hearing from people with more domain experience on the likely correct approach.
226226+227227+On that note, one of my main frustrations with Jacquard as a library is how heavy it is in terms of compiled binary size due to monomorphization. I made that choice, to do everything via static dispatch, but when you want to ship as small a binary as possible over the network, it works against you. On WASM I haven't gotten a great number of exactly the granular damage, but on x86_64 (albeit with less aggressive optimisation for size) we're talking kilobytes of pure duplicated functions for every jacquard type used in the application, plus whatever else.
228228+```rust
229229+0.0% 0.0% 9.3KiB weaver_app weaver_app::components::editor::sync::create_diff::{closure#0}
230230+0.0% 0.0% 9.2KiB loro_internal <loro_internal::txn::Transaction>::_commit
231231+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Fetcher as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::collab::invite::Invite>::{closure#0}
232232+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Fetcher as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::actor::profile::ProfileRecord>::{closure#0}
233233+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Fetcher as jacquard::client::AgentSessionExt>::get_record::<weaver_api::app_bsky::actor::profile::ProfileRecord>::{closure#0}
234234+0.0% 0.0% 9.2KiB weaver_renderer <jacquard_identity::JacquardResolver as jacquard_identity::resolver::IdentityResolver>::resolve_did_doc::{closure#0}::{closure#0}
235235+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::notebook::theme::Theme>::{closure#0}
236236+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::notebook::entry::Entry>::{closure#0}
237237+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::notebook::book::Book>::{closure#0}
238238+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::notebook::colour_scheme::ColourScheme>::{closure#0}
239239+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::actor::profile::ProfileRecord>::{closure#0}
240240+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::edit::draft::Draft>::{closure#0}
241241+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::edit::root::Root>::{closure#0}
242242+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::sh_weaver::edit::diff::Diff>::{closure#0}
243243+0.0% 0.0% 9.2KiB weaver_app <weaver_app::fetch::Client as jacquard::client::AgentSessionExt>::get_record::<weaver_api::app_bsky::actor::profile::ProfileRecord>::{closure#0}
244244+0.0% 0.0% 9.2KiB resvg <image_webp::vp8::Vp8Decoder<std::io::Take<&mut std::io::cursor::Cursor<&[u8]>>>>::loop_filter
245245+0.0% 0.0% 9.2KiB miette <miette::handlers::graphical::GraphicalReportHandler>::render_context::<alloc::string::String>
246246+0.0% 0.0% 9.1KiB miette <miette::handlers::graphical::GraphicalReportHandler>::render_context::<core::fmt::Formatter>
247247+0.0% 0.0% 9.1KiB weaver_app weaver_app::components::record_editor::EditableRecordContent::{closure#7}::{closure#0}
248248+```
249249+250250+I've taken a couple stabs at refactors to help with this, but haven't found a solution that satisfies me, in part because one of the problems in practice is of course overhead from `serde_json` monomorphization. Unfortunately, the alternatives trade off in frustrating ways. [`facet`](https://github.com/facet-rs/facet) has its own binary size impacts and `facet-json` is missing a couple of critical features to work with atproto JSON data (internally-tagged enums, most notably). Something like [`simd-json`](https://github.com/simd-lite/simd-json) or [`serde_json_borrow`](https://github.com/PSeitz/serde_json_borrow) is fast and can borrow from the buffer in a way that is very useful to us (and honestly I intend to swap to them for some uses at some point), but `serde_json_borrow` only provides a value type, and I would then be uncertain at the monomorphization overhead of transforming that type into jacquard types. The `serde` implementation for `simd-json` is heavily based on `serde_json` and thus likely has much the same overhead problem. And [`miniserde`](https://github.com/dtolnay/miniserde) similarly lacks support for parts of JSON that atproto data requires (enums again). And writing my own custom JSON parser that deserializes into Jacquard's `Data` or `RawData` types (from where it can then be deserialized more simply into concrete types, ideally with much less code duplication) is not a project I have time for, and is on the tedious side of the kind of thing I enjoy, particularly the process of ensuring it is sufficiently robust for real-world use, and doesn't perform terribly.
251251+252252+`dyn` compatibility for some of the Jacquard traits is possible but comes with its own challenges, as currently `Serialize` is a supertrait of `XrpcRequest`, and rewriting around removing that bound that is both a nontrivial refactor (and a breaking API change, and it's not the only barrier to dyn compatibility) and may not actually reduce the number of copies of `get_record()` in the binary as much as one would hope. Now, if most of the code could be taken out of that and put into a function that could be totally shared between all implementations or at least most, that would be ideal but the solution I found prevented the compiler from inferring the output type from the request type, it decoupled those two things too much. Obviously if I were to do a bunch of cursed internal unsafe rust I could probably make this work, but while I'm comfortable writing unsafe Rust I'm also conscious that I'm writing Jacquard not just for myself. My code will run in situations I cannot anticipate, and it needs to be as reliable as possible and as usable as possible. Additional use of unsafe could help with the latter (laundering lifetimes would make a number of things in Jacquard's main code paths much easier, both for me and for users of the library) but at potential cost to the former if I'm not smart enough or comprehensive enough in my testing.
253253+254254+So I leave you, dear reader, with some questions this time.
255255+256256+What choices make sense here? For Jacquard as a library, for writing web applications in Rust, and so on. I'm pretty damn good at this (if I do say so myself, and enough other people agree that I must accept it), but I'm also one person, with a necessarily incomplete understanding of the totality of the field.