···136136 .build(),
137137 )
138138 .await
139139- .map_err(|e| CapturedError::from_display(format_smolstr!("Failed to fetch entry: {}", e).as_str().to_string()))?;
139139+ .map_err(|e| {
140140+ CapturedError::from_display(
141141+ format_smolstr!("Failed to fetch entry: {}", e)
142142+ .as_str()
143143+ .to_string(),
144144+ )
145145+ })?;
140146141141- let record = resp
142142- .into_output()
143143- .map_err(|e| CapturedError::from_display(format_smolstr!("Failed to parse entry: {}", e).as_str().to_string()))?;
147147+ let record = resp.into_output().map_err(|e| {
148148+ CapturedError::from_display(
149149+ format_smolstr!("Failed to parse entry: {}", e)
150150+ .as_str()
151151+ .to_string(),
152152+ )
153153+ })?;
144154145155 // Parse the entry
146156 let entry: Entry = jacquard::from_data(&record.value).map_err(|e| {
147147- CapturedError::from_display(format_smolstr!("Failed to deserialize entry: {}", e).as_str().to_string())
157157+ CapturedError::from_display(
158158+ format_smolstr!("Failed to deserialize entry: {}", e)
159159+ .as_str()
160160+ .to_string(),
161161+ )
148162 })?;
149163150164 // Find the image by name
···159173 })
160174 .map(|img| img.image.blob().cid().clone().into_static())
161175 .ok_or_else(|| {
162162- CapturedError::from_display(format_smolstr!("Image '{}' not found in entry", name).as_str().to_string())
176176+ CapturedError::from_display(
177177+ format_smolstr!("Image '{}' not found in entry", name)
178178+ .as_str()
179179+ .to_string(),
180180+ )
163181 })?;
164182165183 // Check cache first
···199217 )
200218 .await
201219 .map_err(|e| {
202202- CapturedError::from_display(format_smolstr!("Failed to fetch PublishedBlob: {}", e).as_str().to_string())
220220+ CapturedError::from_display(
221221+ format_smolstr!("Failed to fetch PublishedBlob: {}", e)
222222+ .as_str()
223223+ .to_string(),
224224+ )
203225 })?;
204226205227 let record = resp.into_output().map_err(|e| {
206206- CapturedError::from_display(format_smolstr!("Failed to parse PublishedBlob: {}", e).as_str().to_string())
228228+ CapturedError::from_display(
229229+ format_smolstr!("Failed to parse PublishedBlob: {}", e)
230230+ .as_str()
231231+ .to_string(),
232232+ )
207233 })?;
208234209235 // Parse the PublishedBlob
210236 let published: PublishedBlob = jacquard::from_data(&record.value).map_err(|e| {
211211- CapturedError::from_display(format_smolstr!("Failed to deserialize PublishedBlob: {}", e).as_str().to_string())
237237+ CapturedError::from_display(
238238+ format_smolstr!("Failed to deserialize PublishedBlob: {}", e)
239239+ .as_str()
240240+ .to_string(),
241241+ )
212242 })?;
213243214244 // Get CID from the upload blob ref
···248278 .get_notebook_by_key(notebook_key)
249279 .await?
250280 .ok_or_else(|| {
251251- CapturedError::from_display(format_smolstr!("Notebook '{}' not found", notebook_key).as_str().to_string())
281281+ CapturedError::from_display(
282282+ format_smolstr!("Notebook '{}' not found", notebook_key)
283283+ .as_str()
284284+ .to_string(),
285285+ )
252286 })?;
253287254288 let (view, entry_refs) = notebook.as_ref();
255289256290 // Get the DID from the notebook URI for blob fetching
257291 let notebook_did = jacquard::types::aturi::AtUri::new(view.uri.as_ref())
258258- .map_err(|e| CapturedError::from_display(format_smolstr!("Invalid notebook URI: {}", e).as_str().to_string()))?
292292+ .map_err(|e| {
293293+ CapturedError::from_display(
294294+ format_smolstr!("Invalid notebook URI: {}", e)
295295+ .as_str()
296296+ .to_string(),
297297+ )
298298+ })?
259299 .authority()
260300 .clone()
261301 .into_static();
···277317 let client = self.fetcher.get_client();
278318 for entry_ref in entry_refs {
279319 // Parse the entry URI to get rkey
280280- let entry_uri = jacquard::types::aturi::AtUri::new(entry_ref.uri.as_ref())
281281- .map_err(|e| CapturedError::from_display(format_smolstr!("Invalid entry URI: {}", e).as_str().to_string()))?;
320320+ let entry_uri = jacquard::types::aturi::AtUri::new(entry_ref.entry.uri.as_ref())
321321+ .map_err(|e| {
322322+ CapturedError::from_display(
323323+ format_smolstr!("Invalid entry URI: {}", e)
324324+ .as_str()
325325+ .to_string(),
326326+ )
327327+ })?;
282328 let rkey = entry_uri
283329 .rkey()
284330 .ok_or_else(|| CapturedError::from_display("Entry URI missing rkey"))?;
···319365 }
320366 }
321367322322- Err(CapturedError::from_display(format_smolstr!(
323323- "Image '{}' not found in notebook '{}'",
324324- image_name, notebook_key
325325- ).as_str().to_string()))
368368+ Err(CapturedError::from_display(
369369+ format_smolstr!(
370370+ "Image '{}' not found in notebook '{}'",
371371+ image_name,
372372+ notebook_key
373373+ )
374374+ .as_str()
375375+ .to_string(),
376376+ ))
326377 }
327378328379 /// Insert bytes directly into cache (for pre-warming after upload)
+67-3
crates/weaver-app/src/components/editor/writer.rs
···1437143714381438 self.last_char_offset = closing_char_end;
14391439 }
14401440- Html(html) | InlineHtml(html) => {
14401440+ Html(html) => {
14411441 // Track offset mapping for raw HTML
14421442 let char_start = self.last_char_offset;
14431443 let html_char_len = html.chars().count();
···1445144514461446 self.write(&html)?;
1447144714481448+ // Record mapping for inline HTML
14491449+ self.record_mapping(range.clone(), char_start..char_end);
14501450+ self.last_char_offset = char_end;
14511451+ }
14521452+ InlineHtml(html) => {
14531453+ // Track offset mapping for raw HTML
14541454+ let char_start = self.last_char_offset;
14551455+ let html_char_len = html.chars().count();
14561456+ let char_end = char_start + html_char_len;
14571457+ self.write(r#"<span class="html-embed html-embed-inline">"#)?;
14581458+ self.write(&html)?;
14591459+ self.write("</span>")?;
14481460 // Record mapping for inline HTML
14491461 self.record_mapping(range.clone(), char_start..char_end);
14501462 self.last_char_offset = char_end;
···1743175517441756 // Emit the opening tag
17451757 match tag {
17461746- Tag::HtmlBlock => Ok(()),
17581758+ // HTML blocks get their own paragraph to try and corral them better
17591759+ Tag::HtmlBlock => {
17601760+ // Record paragraph start for boundary tracking
17611761+ // BUT skip if inside a list - list owns the paragraph boundary
17621762+ if self.list_depth == 0 {
17631763+ self.current_paragraph_start =
17641764+ Some((self.last_byte_offset, self.last_char_offset));
17651765+ }
17661766+ let node_id = self.gen_node_id();
17671767+17681768+ if self.end_newline {
17691769+ write!(
17701770+ &mut self.writer,
17711771+ r#"<p id="{}", class="html-embed html-embed-block">"#,
17721772+ node_id
17731773+ )?;
17741774+ } else {
17751775+ write!(
17761776+ &mut self.writer,
17771777+ r#"\n<p id="{}", class="html-embed html-embed-block">"#,
17781778+ node_id
17791779+ )?;
17801780+ }
17811781+ self.begin_node(node_id.clone());
17821782+17831783+ // Map the start position of the paragraph (before any content)
17841784+ // This allows cursor to be placed at the very beginning
17851785+ let para_start_char = self.last_char_offset;
17861786+ let mapping = OffsetMapping {
17871787+ byte_range: range.start..range.start,
17881788+ char_range: para_start_char..para_start_char,
17891789+ node_id,
17901790+ char_offset_in_node: 0,
17911791+ child_index: Some(0), // position before first child
17921792+ utf16_len: 0,
17931793+ };
17941794+ self.offset_maps.push(mapping);
17951795+17961796+ Ok(())
17971797+ }
17471798 Tag::Paragraph => {
17481799 // Record paragraph start for boundary tracking
17491800 // BUT skip if inside a list - list owns the paragraph boundary
···2454250524552506 // Emit tag HTML first
24562507 let result = match tag {
24572457- TagEnd::HtmlBlock => Ok(()),
25082508+ TagEnd::HtmlBlock => {
25092509+ // Record paragraph end for boundary tracking
25102510+ // BUT skip if inside a list - list owns the paragraph boundary
25112511+ if self.list_depth == 0 {
25122512+ if let Some((byte_start, char_start)) = self.current_paragraph_start.take() {
25132513+ let byte_range = byte_start..self.last_byte_offset;
25142514+ let char_range = char_start..self.last_char_offset;
25152515+ self.paragraph_ranges.push((byte_range, char_range));
25162516+ }
25172517+ }
25182518+25192519+ self.end_node();
25202520+ self.write("</p>\n")
25212521+ }
24582522 TagEnd::Paragraph => {
24592523 // Record paragraph end for boundary tracking
24602524 // BUT skip if inside a list - list owns the paragraph boundary
+111-40
crates/weaver-app/src/components/identity.rs
···66use jacquard::{smol_str::SmolStr, types::ident::AtIdentifier};
77use std::collections::HashSet;
88use weaver_api::com_atproto::repo::strong_ref::StrongRef;
99-use weaver_api::sh_weaver::notebook::{EntryView, NotebookView, entry::Entry};
99+use weaver_api::sh_weaver::notebook::{
1010+ BookEntryRef, BookEntryView, EntryView, NotebookView, entry::Entry,
1111+};
1212+1313+/// Constructs BookEntryViews from notebook entry refs and all available entries.
1414+///
1515+/// Matches StrongRefs by URI to find the corresponding EntryView,
1616+/// then builds BookEntryView with index and prev/next navigation refs.
1717+fn build_book_entry_views(
1818+ entry_refs: &[StrongRef<'static>],
1919+ all_entries: &[(EntryView<'static>, Entry<'static>)],
2020+) -> Vec<BookEntryView<'static>> {
2121+ use jacquard::IntoStatic;
2222+2323+ // Build a lookup map for faster matching
2424+ let entry_map: std::collections::HashMap<&str, &EntryView<'static>> = all_entries
2525+ .iter()
2626+ .map(|(view, _)| (view.uri.as_ref(), view))
2727+ .collect();
2828+2929+ let mut views = Vec::with_capacity(entry_refs.len());
3030+3131+ for (idx, strong_ref) in entry_refs.iter().enumerate() {
3232+ let Some(entry_view) = entry_map.get(strong_ref.uri.as_ref()).copied() else {
3333+ continue;
3434+ };
3535+3636+ // Build prev ref (if not first)
3737+ let prev = if idx > 0 {
3838+ entry_refs
3939+ .get(idx - 1)
4040+ .and_then(|prev_ref| entry_map.get(prev_ref.uri.as_ref()).copied())
4141+ .map(|prev_view| {
4242+ BookEntryRef::new()
4343+ .entry(prev_view.clone())
4444+ .build()
4545+ .into_static()
4646+ })
4747+ } else {
4848+ None
4949+ };
5050+5151+ // Build next ref (if not last)
5252+ let next = if idx + 1 < entry_refs.len() {
5353+ entry_refs
5454+ .get(idx + 1)
5555+ .and_then(|next_ref| entry_map.get(next_ref.uri.as_ref()).copied())
5656+ .map(|next_view| {
5757+ BookEntryRef::new()
5858+ .entry(next_view.clone())
5959+ .build()
6060+ .into_static()
6161+ })
6262+ } else {
6363+ None
6464+ };
6565+6666+ views.push(
6767+ BookEntryView::new()
6868+ .entry(entry_view.clone())
6969+ .index(idx as i64)
7070+ .maybe_prev(prev)
7171+ .maybe_next(next)
7272+ .build()
7373+ .into_static(),
7474+ );
7575+ }
7676+7777+ views
7878+}
10791180/// A single item in the profile timeline (either notebook or standalone entry)
1281#[derive(Clone, PartialEq)]
1382pub enum ProfileTimelineItem {
1483 Notebook {
1584 notebook: NotebookView<'static>,
1616- entries: Vec<StrongRef<'static>>,
8585+ entries: Vec<BookEntryView<'static>>,
1786 /// Most recent entry's created_at for sorting
1887 sort_date: jacquard::types::string::Datetime,
1988 },
···93162 let auth_state = use_context::<Signal<AuthState>>();
9416395164 // Use client-only versions to avoid SSR issues with concurrent server futures
9696- let (_profile_res, profile) = data::use_profile_data_client(ident);
9797- let (_notebooks_res, notebooks) = data::use_notebooks_for_did_client(ident);
9898- let (_entries_res, all_entries) = data::use_entries_for_did_client(ident);
165165+ let (_profile_res, profile) = data::use_profile_data(ident);
166166+ let (_notebooks_res, notebooks) = data::use_notebooks_for_did(ident);
167167+ let (_entries_res, all_entries) = data::use_entries_for_did(ident);
168168+169169+ #[cfg(feature = "fullstack-server")]
170170+ {
171171+ _profile_res?;
172172+ _notebooks_res?;
173173+ _entries_res?;
174174+ }
99175100176 // Check if viewing own profile
101177 let is_own_profile = use_memo(move || {
···171247 if let Some(all_ents) = ents.as_ref() {
172248 for (notebook, entry_refs) in nbs {
173249 if is_pinned(notebook.uri.as_ref(), pinned_set) {
174174- let sort_date = entry_refs
250250+ let book_entries = build_book_entry_views(entry_refs, all_ents);
251251+ let sort_date = book_entries
175252 .iter()
176176- .filter_map(|r| {
253253+ .filter_map(|bev| {
177254 all_ents
178255 .iter()
179179- .find(|(v, _)| v.uri.as_ref() == r.uri.as_ref())
256256+ .find(|(v, _)| v.uri.as_ref() == bev.entry.uri.as_ref())
180257 })
181258 .map(|(_, entry)| entry.created_at.clone())
182259 .max()
···184261185262 items.push(ProfileTimelineItem::Notebook {
186263 notebook: notebook.clone(),
187187- entries: entry_refs.clone(),
264264+ entries: book_entries,
188265 sort_date,
189266 });
190267 }
···231308 if let Some(all_ents) = ents.as_ref() {
232309 for (notebook, entry_refs) in nbs {
233310 if !is_pinned(notebook.uri.as_ref(), pinned_set) {
234234- let sort_date = entry_refs
311311+ let book_entries = build_book_entry_views(entry_refs, all_ents);
312312+ let sort_date = book_entries
235313 .iter()
236236- .filter_map(|r| {
314314+ .filter_map(|bev| {
237315 all_ents
238316 .iter()
239239- .find(|(v, _)| v.uri.as_ref() == r.uri.as_ref())
317317+ .find(|(v, _)| v.uri.as_ref() == bev.entry.uri.as_ref())
240318 })
241319 .map(|(_, entry)| entry.created_at.clone())
242320 .max()
···244322245323 items.push(ProfileTimelineItem::Notebook {
246324 notebook: notebook.clone(),
247247- entries: entry_refs.clone(),
325325+ entries: book_entries,
248326 sort_date,
249327 });
250328 }
···361439 class: "pinned-item",
362440 NotebookCard {
363441 notebook: notebook.clone(),
364364- entry_refs: entries.clone(),
442442+ entries: entries.clone(),
365443 is_pinned: true,
366444 profile_ident: Some(ident()),
367445 }
···409487 key: "notebook-{notebook.cid}",
410488 NotebookCard {
411489 notebook: notebook.clone(),
412412- entry_refs: entries.clone(),
490490+ entries: entries.clone(),
413491 is_pinned: false,
414492 profile_ident: Some(ident()),
415493 }
···458536459537 let entry_view = &book_entry_view.entry;
460538461461- let entry_title = entry_view.title.as_ref()
539539+ let entry_title = entry_view
540540+ .title
541541+ .as_ref()
462542 .map(|t| t.as_ref())
463543 .unwrap_or("Untitled");
464544465465- let entry_path = entry_view.path
545545+ let entry_path = entry_view
546546+ .path
466547 .as_ref()
467548 .map(|p| p.as_ref().to_string())
468549 .unwrap_or_else(|| entry_title.to_string());
···476557 html_buf
477558 });
478559479479- let created_at = parsed_entry.as_ref()
560560+ let created_at = parsed_entry
561561+ .as_ref()
480562 .map(|entry| entry.created_at.as_ref().format("%B %d, %Y").to_string());
481563482564 let entry_uri = entry_view.uri.clone().into_static();
···529611#[component]
530612pub fn NotebookCard(
531613 notebook: NotebookView<'static>,
532532- entry_refs: Vec<StrongRef<'static>>,
614614+ entries: Vec<BookEntryView<'static>>,
533615 #[props(default = false)] is_pinned: bool,
534616 #[props(default)] show_author: Option<bool>,
535617 /// Profile identity for context-aware author visibility (hides single author on their own profile)
536536- #[props(default)] profile_ident: Option<AtIdentifier<'static>>,
618618+ #[props(default)]
619619+ profile_ident: Option<AtIdentifier<'static>>,
537620 #[props(default)] on_pinned_changed: Option<EventHandler<bool>>,
538621 #[props(default)] on_deleted: Option<EventHandler<()>>,
539622) -> Element {
···575658 let ident = notebook.uri.authority().clone().into_static();
576659 let book_title: SmolStr = notebook_path.clone().into();
577660578578- // Fetch all entries to get first/last
579579- let ident_for_fetch = ident.clone();
580580- let book_title_for_fetch = book_title.clone();
581581- let entries = use_resource(use_reactive!(|(ident_for_fetch, book_title_for_fetch)| {
582582- let fetcher = fetcher.clone();
583583- async move {
584584- fetcher
585585- .list_notebook_entries(ident_for_fetch, book_title_for_fetch)
586586- .await
587587- .ok()
588588- .flatten()
589589- }
590590- }));
591661 rsx! {
592662 div { class: "notebook-card",
593663 div { class: "notebook-card-container",
···642712 }
643713644714 // Entry previews section
645645- if let Some(Some(entry_list)) = entries() {
646715 div { class: "notebook-card-previews",
647716 {
648717 use jacquard::from_data;
649718 use weaver_api::sh_weaver::notebook::entry::Entry;
719719+ tracing::info!("rendering entries: {:?}", entries.iter().map(|e|
720720+ e.entry.uri.as_ref()).collect::<Vec<_>>());
650721651651- if entry_list.len() <= 5 {
722722+ if entries.len() <= 5 {
652723 // Show all entries if 5 or fewer
653724 rsx! {
654654- for entry_view in entry_list.iter() {
725725+ for entry_view in entries.iter() {
655726 NotebookEntryPreview {
656727 book_entry_view: entry_view.clone(),
657728 ident: ident.clone(),
···662733 } else {
663734 // Show first, interstitial, and last
664735 rsx! {
665665- if let Some(first_entry) = entry_list.first() {
736736+ if let Some(first_entry) = entries.first() {
666737 NotebookEntryPreview {
667738 book_entry_view: first_entry.clone(),
668739 ident: ident.clone(),
···673744674745 // Interstitial showing count
675746 {
676676- let middle_count = entry_list.len().saturating_sub(2);
747747+ let middle_count = entries.len().saturating_sub(2);
677748 rsx! {
678749 div { class: "notebook-entry-interstitial",
679750 "... {middle_count} more "
···683754 }
684755 }
685756686686- if let Some(last_entry) = entry_list.last() {
757757+ if let Some(last_entry) = entries.last() {
687758 NotebookEntryPreview {
688759 book_entry_view: last_entry.clone(),
689760 ident: ident.clone(),
···695766 }
696767 }
697768 }
698698- }
769769+699770700771 if let Some(ref tags) = notebook.tags {
701772 if !tags.is_empty() {
···3939 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")]
4040 pub created_at: chrono::DateTime<chrono::Utc>,
4141 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")]
4242+ pub updated_at: chrono::DateTime<chrono::Utc>,
4343+ #[serde(with = "clickhouse::serde::chrono::datetime64::millis")]
4244 pub indexed_at: chrono::DateTime<chrono::Utc>,
4345 pub record: SmolStr,
4446}
···136138 /// List entries for a specific notebook, ordered by position in the notebook.
137139 ///
138140 /// Uses notebook_entries table to get entries that belong to this notebook.
141141+ /// Deduplicates entries by rkey, keeping the most recently updated version.
139142 pub async fn list_notebook_entries(
140143 &self,
141144 notebook_did: &str,
···143146 limit: u32,
144147 cursor: Option<u32>,
145148 ) -> Result<Vec<EntryRow>, IndexError> {
149149+ use std::collections::HashMap;
150150+146151 let query = r#"
147152 SELECT
148153 e.did AS did,
···154159 e.tags AS tags,
155160 e.author_dids AS author_dids,
156161 e.created_at AS created_at,
162162+ e.updated_at AS updated_at,
157163 e.indexed_at AS indexed_at,
158164 e.record AS record
159165 FROM notebook_entries ne FINAL
160160- INNER JOIN entries e ON
166166+ INNER JOIN entries FINAL AS e ON
161167 e.did = ne.entry_did
162168 AND e.rkey = ne.entry_rkey
163169 AND e.deleted_at = toDateTime64(0, 3)
···176182 .bind(notebook_did)
177183 .bind(notebook_rkey)
178184 .bind(cursor_val)
179179- .bind(limit)
185185+ // Fetch extra to account for duplicates we'll filter out
186186+ .bind(limit * 2)
180187 .fetch_all::<EntryRow>()
181188 .await
182189 .map_err(|e| ClickHouseError::Query {
···184191 source: e,
185192 })?;
186193187187- Ok(rows)
194194+ // Dedupe by rkey, keeping the most recently updated version
195195+ let mut seen: HashMap<SmolStr, usize> = HashMap::new();
196196+ let mut deduped: Vec<EntryRow> = Vec::with_capacity(rows.len());
197197+198198+ for row in rows {
199199+ if let Some(&existing_idx) = seen.get(&row.rkey) {
200200+ // Keep the one with the more recent updated_at
201201+ if row.updated_at > deduped[existing_idx].updated_at {
202202+ deduped[existing_idx] = row;
203203+ }
204204+ } else {
205205+ seen.insert(row.rkey.clone(), deduped.len());
206206+ deduped.push(row);
207207+ }
208208+209209+ // Stop once we have enough unique entries
210210+ if deduped.len() >= limit as usize {
211211+ break;
212212+ }
213213+ }
214214+215215+ Ok(deduped)
188216 }
189217190218 /// Get an entry by rkey, picking the most recent version across collaborators.
···216244 tags,
217245 author_dids,
218246 created_at,
247247+ updated_at,
219248 indexed_at,
220249 record
221250 FROM entries FINAL
···263292 tags,
264293 author_dids,
265294 created_at,
295295+ updated_at,
266296 indexed_at,
267297 record
268298 FROM entries FINAL
···305335 tags,
306336 author_dids,
307337 created_at,
338338+ updated_at,
308339 indexed_at,
309340 record
310341 FROM entries FINAL
···414445 ) -> Result<Vec<EntryRow>, IndexError> {
415446 let query = if cursor.is_some() {
416447 r#"
417417- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
448448+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
418449 FROM (
419450 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
420451 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···429460 "#
430461 } else {
431462 r#"
432432- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
463463+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
433464 FROM (
434465 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
435466 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···592623 ) -> Result<Vec<EntryRow>, IndexError> {
593624 let base_query = if tags.is_some() && cursor.is_some() {
594625 r#"
595595- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
626626+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
596627 FROM (
597628 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
598629 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···607638 "#
608639 } else if tags.is_some() {
609640 r#"
610610- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
641641+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
611642 FROM (
612643 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
613644 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···621652 "#
622653 } else if cursor.is_some() {
623654 r#"
624624- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
655655+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
625656 FROM (
626657 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
627658 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···635666 "#
636667 } else {
637668 r#"
638638- SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record
669669+ SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record
639670 FROM (
640671 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record,
641672 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn
···696727 e.tags AS tags,
697728 e.author_dids AS author_dids,
698729 e.created_at AS created_at,
730730+ e.updated_at AS updated_at,
699731 e.indexed_at AS indexed_at,
700732 e.record AS record
701733 FROM notebook_entries ne FINAL
702702- INNER JOIN entries e ON
734734+ INNER JOIN entries FINAL AS e ON
703735 e.did = ne.entry_did
704736 AND e.rkey = ne.entry_rkey
705737 AND e.deleted_at = toDateTime64(0, 3)
+36-1
crates/weaver-index/src/endpoints/actor.rs
···66use jacquard::IntoStatic;
77use jacquard::cowstr::ToCowStr;
88use jacquard::identity::resolver::IdentityResolver;
99+use jacquard::prelude::*;
910use jacquard::types::ident::AtIdentifier;
1011use jacquard::types::string::{AtUri, Cid, Did, Handle, Uri};
1112use jacquard_axum::ExtractXrpc;
···5253 })?;
53545455 let Some(data) = profile_data else {
5555- return Err(XrpcErrorResponse::not_found("Profile not found"));
5656+ // get the bluesky profile
5757+ // TODO: either cache this or yell at tap to start tracking their account!
5858+ let profile_resp = state
5959+ .resolver
6060+ .send(
6161+ weaver_api::app_bsky::actor::get_profile::GetProfile::new()
6262+ .actor(did)
6363+ .build(),
6464+ )
6565+ .await
6666+ .map_err(|e| XrpcErrorResponse::not_found(e.to_string()))?;
6767+ let bsky_profile = profile_resp
6868+ .into_output()
6969+ .map_err(|e| XrpcErrorResponse::not_found(e.to_string()))?
7070+ .value;
7171+ let inner_profile = ProfileView::new()
7272+ .did(bsky_profile.did)
7373+ .handle(bsky_profile.handle)
7474+ .maybe_display_name(bsky_profile.display_name)
7575+ .maybe_description(bsky_profile.description)
7676+ .maybe_avatar(bsky_profile.avatar)
7777+ .maybe_banner(bsky_profile.banner)
7878+ .build();
7979+8080+ let inner = ProfileDataViewInner::ProfileView(Box::new(inner_profile));
8181+8282+ let output = ProfileDataView::new().inner(inner).build();
8383+8484+ return Ok(Json(
8585+ GetProfileOutput {
8686+ value: output,
8787+ extra_data: None,
8888+ }
8989+ .into_static(),
9090+ ));
5691 };
57925893 // Build the response
+1
crates/weaver-index/src/endpoints/bsky.rs
···1616 State(state): State<AppState>,
1717 ExtractXrpc(args): ExtractXrpc<GetProfileRequest>,
1818) -> Result<Json<GetProfileOutput<'static>>, XrpcErrorResponse> {
1919+ // TODO: either cache this or yell at tap to start tracking their account!
1920 let response = state.resolver.send(args).await.map_err(|e| {
2021 tracing::warn!("Appview getProfile failed: {}", e);
2122 XrpcErrorResponse::internal_error("Failed to fetch profile from appview")
+11-2
crates/weaver-index/src/endpoints/edit.rs
···289289290290/// Handle sh.weaver.edit.listDrafts
291291///
292292-/// Returns draft records for an actor.
292292+/// Returns draft records for an actor. Requires authentication.
293293+/// Only returns drafts if viewer is the actor or has collab permission.
293294pub async fn list_drafts(
294295 State(state): State<AppState>,
295296 ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth,
296297 ExtractXrpc(args): ExtractXrpc<ListDraftsRequest>,
297298) -> Result<Json<ListDraftsOutput<'static>>, XrpcErrorResponse> {
298298- let _viewer: Viewer = viewer;
299299+ // Require authentication
300300+ let viewer = viewer.ok_or_else(|| XrpcErrorResponse::auth_required("Authentication required"))?;
301301+ let viewer_did = viewer.did();
299302300303 let limit = args.limit.unwrap_or(50).min(100).max(1);
301304···309312310313 // Resolve actor to DID
311314 let actor_did = resolve_actor(&state, &args.actor).await?;
315315+316316+ // Permission check: viewer must be the actor (owner access)
317317+ // TODO: Add collab grant check for draft sharing
318318+ if viewer_did.as_str() != actor_did.as_str() {
319319+ return Err(XrpcErrorResponse::forbidden("Cannot view another user's drafts"));
320320+ }
312321313322 // Fetch drafts
314323 let draft_rows = state
+1-4
crates/weaver-index/src/endpoints/notebook.rs
···4646 let name = args.name.as_ref();
47474848 let limit = args.entry_limit.unwrap_or(50).clamp(1, 100) as u32;
4949- let cursor: Option<u32> = args
5050- .entry_cursor
5151- .as_deref()
5252- .and_then(|c| c.parse().ok());
4949+ let cursor: Option<u32> = args.entry_cursor.as_deref().and_then(|c| c.parse().ok());
53505451 // Fetch notebook first to get its rkey
5552 let notebook_row = state
···11+12If you've been to this site before, you maybe noticed it loaded a fair bit more quickly this time. That's not really because the web server creating this HTML got a whole lot better. It did require some refactoring, but it was mostly in the vein of taking some code and adding new code that did the same thing gated behind a cargo feature. This did, however, have the side effect of, in the final binary, replacing functions that are literally hundreds of lines, that in turn call functions that may also be hundreds of lines, making several cascading network requests, with functions that look like this, which make by and large a single network request and return exactly what is required.
2334```rust
···2930Of course the reason is that I finally got round to building the Weaver AppView. I'm going to be calling mine the Index, because Weaver is about writing and I think "AppView" as a term kind of sucks and "index" is much more elegant, on top of being a good descriptor of what the big backend service now powering Weaver does. ![[at://did:plc:ragtjsm2j2vknwkz3zp4oxrd/app.bsky.feed.post/3lyucxfxq622w]]
3031For the uninitiated, because I expect at least some people reading this aren't big into AT Protocol development, an AppView is an instance of the kind of big backend service that Bluesky PBLLC runs which powers essentially every Bluesky client, with a few notable exceptions, such as [Red Dwarf](https://reddwarf.app/), and (partially, eventually more completely) [Blacksky](https://blacksky.community/). It listens to the [Firehose](https://bsky.network/) [event stream](https://atproto.com/specs/event-stream) from the main Bluesky Relay and analyzes the data which comes through that pertains to Bluesky, producing your timeline feeds, figuring out who follows you, who you block and who blocks you (and filtering them out of your view of the app), how many people liked your last post, and so on. Because the records in your PDS (and those of all the other people on Bluesky) need context and relationship and so on to give them meaning, and then that context can be passed along to you without your app having to go collect it all. ![[at://did:plc:uu5axsmbm2or2dngy4gwchec/app.bsky.feed.post/3lsc2tzfsys2f]]
3132It's a very normal backend with some weird constraints because of the protocol, and in it's practice the thing that separates the day-to-day Bluesky experience from the Mastodon experience the most. It's also by far the most centralising force in the network, because it also does moderation, and because it's quite expensive to run. A full index of all Bluesky activity takes a lot of storage (futur's Zeppelin experiment detailed above took about 16 terabytes of storage using PostgreSQL for the database and cost $200/month to run), and then it takes that much more computing power to calculate all the relationships between the data on the fly as new events come in and then serve personalized versions to everyone that uses it.
3333+3434+32353336It's not the only AppView out there, most atproto apps have something like this. Tangled, Streamplace, Leaflet, and so on all have substantial backends. Some (like Tangled) actually combine the front end you interact with and the AppView into a single service. But in general these are big, complicated persistent services you have to backfill from existing data to bootstrap, and they really strongly shape your app, whether they're literally part of the same executable or hosted on the same server or not. And when I started building Weaver in earnest, not only did I still have a few big unanswered questions about how I wanted Weaver to work, how it needed to work, I also didn't want to fundamentally tie it to some big server, create this centralising force. I wanted it to be possible for someone else to run it without being dependent on me personally, ideally possible even if all they had access to was a static site host like GitHub Pages or a browser runtime platform like Cloudflare Workers, so long as someone somewhere was running a couple of generic services. I wanted to be able to distribute the fullstack server version as basically just an executable in a directory of files with no other dependencies, which could easily be run in any container hosting environment with zero persistent storage required. Hell, you could technically serve it as a blob or series of blobs from your PDS with the right entry point if I did my job right.
3437···5760In contrast, notebook entry records lack links to other parts of the notebook in and of themselves because calculating them would be challenging, and updating one entry would require not just updating the entry itself and notebook it's in, but also neighbouring entries in said notebook. With the shape of collaborative publishing in Weaver, that would result in up to 4 writes to the PDS when you publish an entry, in addition to any blob uploads. And trying to link the other way in edit history (root to edit head) is similarly challenging.
58615962I anticipated some of these. but others emerged only because I ran into them while building the web app. I've had to manually fix up records more than once because I made breaking changes to my lexicons after discovering I really wanted X piece of metadata or cross-linkage. If I'd built the index first or alongside—particularly if the index remained a separate service from the web app as I intended it to, to keep the web app simple—it would likely have constrained my choices and potentially cut off certain solutions, due to the time it takes to dump the database and re-run backfill even at a very small scale. Building a big chunk of the front end first told me exactly what the index needed to provide easy access to.
6363+6464+You can access it here: [index.weaver.sh](https://index.weaver.sh)
6065# ClickHAUS
6161-So what does Weaver's index look like? Well it starts with either the firehose or the new Tap sync tool. The index ingests from either over a WebSocket connection, does a bit of processing (less is required when ingesting from Tap, and that's currently what I've deployed) and then dumps them in the Clickhouse database. I chose it as the primary index database on recommendation from a friend, and after doing a lot of reading. It fits atproto data well, as Graze found. Because it isolates concurrent inserts and selects so that you can just dump data in, while it cleans things up asynchronously after, it does wonderfully when you have a single major input point or a set of them to dump into that fans out, which you can then transform and then read from.
6666+So what does Weaver's index look like? Well it starts with either the firehose or the new [Tap](https://docs.bsky.app/blog/introducing-tap) sync tool. The index ingests from either over a WebSocket connection, does a bit of processing (less is required when ingesting from Tap, and that's currently what I've deployed) and then dumps them in the Clickhouse database. I chose it as the primary index database on recommendation from a friend, and after doing a lot of reading. It fits atproto data well, as Graze found. Because it isolates concurrent inserts and selects so that you can just dump data in, while it cleans things up asynchronously after, it does wonderfully when you have a single major input point or a set of them to dump into that fans out, which you can then transform and then read from.
6767+6868+I will not claim that the tables you can find in the weaver repository are especially **good** database design overall, but they work, they're very much a work in progress, and we'll see how they scale. Also, Tap makes re-backfilling the data a hell of a lot easier.
62696363-I will not claim that the tables you can find in the weaver repository are especially **good** database design overall, but they work, and we'll see how they scale. This is one of three main input tables. One for record writes, one for identity events, and one for account events.
7070+This is one of three main input tables. One for record writes, one for identity events, and one for account events.
6471```SQL
6572CREATE TABLE IF NOT EXISTS raw_records (
6673 did String,
···9299ENGINE = MergeTree()
93100ORDER BY (collection, did, rkey, event_time, indexed_at);
94101```
9595-From here we fan out into a cascading series of materialized views and other specialised tables. These break out the different record types, calculate metadata, and pull critical fields out of the record JSON for easier querying. Clickhouse's wild-ass compression means we're not too badly off replicating data on disk this way. Seriously, their JSON type ends up being the same size as a CBOR BLOB on disk in my testing, though it *does* have some quirks, as I discovered when I read back Datetime fields and got...not the format I put in. Thankfully there's a config setting for that. We also build out the list of who contributed to a published entry and determine the canonical record for it, so that fetching a fully hydrated entry with all contributor profiles only takes a couple of `SELECT` queries that themselves avoid performing extensive table scans due to reasonable choices of `ORDER BY` fields in the denormalized tables they query. And then I can do quirky things like power a profile fetch endpoint that will provide either a Weaver or a Bluesky profile, while also unifying fields so that we can easily get at the critical stuff in common. This is a relatively expensive calculation, but people thankfully don't edit their profiles that often, and this is why we don't keep the stats in the same table.
102102+From here we fan out into a cascading series of materialized views and other specialised tables. These break out the different record types, calculate metadata, and pull critical fields out of the record JSON for easier querying. Clickhouse's wild-ass compression means we're not too badly off replicating data on disk this way. Seriously, their JSON type ends up being the same size as a CBOR BLOB on disk in my testing, though it *does* have some quirks, as I discovered when I read back Datetime fields and got...not the format I put in. Thankfully there's a config setting for that. We also build out the list of who contributed to a published entry and determine the canonical record for it, so that fetching a fully hydrated entry with all contributor profiles only takes a couple of `SELECT` queries that themselves avoid performing extensive table scans due to reasonable choices of `ORDER BY` fields in the denormalized tables they query and are thus very fast. And then I can do quirky things like power a profile fetch endpoint that will provide either a Weaver or a Bluesky profile, while also unifying fields so that we can easily get at the critical stuff in common. This is a relatively expensive calculation, but people thankfully don't edit their profiles that often, and this is why we don't keep the stats in the same table.
9610397104However, this is ***also*** why Clickhouse will not be the only database used in the index.
98105···140147141148If people have ideas, I'm all ears.
142149143143-I hope you found this interesting. I enjoyed writing it out.150150+## Future
151151+Having this available obviously improves the performance of the app, but it also enables a lot of new stuff. I have plans for social features which would have been much harder to implement without it, and can later be backfilled into the non-indexed implementation. I have more substantial rewrites of the data fetching code planned as well, beyond the straightforward replacement I did in this first pass. And there's still a **lot** more to do on the editor before it's done.
152152+153153+I've been joking about all sorts of ambitious things, but legitimately I think Weaver ends up being almost uniquely flexible and powerful among the atproto-based long-form writing platforms with how it's designed, and in particular how it enables people to create things together, and can end up filling some big shoes, given enough time and development effort.
154154+155155+I hope you found this interesting. I enjoyed writing it out. There's still a lot more to do, but this was a big milestone for me.
156156+157157+If you'd like to support this project, there's a GitHub Sponsorship link at the bottom of the page, but honestly I'd love if you used it to write something.