wip bugfixes

Orual 6f2e5ec3 d3c5919b

+634 -222
+14 -14
Cargo.lock
··· 1496 1496 1497 1497 [[package]] 1498 1498 name = "cookie_store" 1499 - version = "0.21.1" 1499 + version = "0.22.0" 1500 1500 source = "registry+https://github.com/rust-lang/crates.io-index" 1501 - checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9" 1501 + checksum = "3fc4bff745c9b4c7fb1e97b25d13153da2bc7796260141df62378998d070207f" 1502 1502 dependencies = [ 1503 1503 "cookie", 1504 1504 "document-features", ··· 5567 5567 [[package]] 5568 5568 name = "jacquard" 5569 5569 version = "0.9.4" 5570 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5570 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5571 5571 dependencies = [ 5572 5572 "bytes", 5573 5573 "getrandom 0.2.16", ··· 5599 5599 [[package]] 5600 5600 name = "jacquard-api" 5601 5601 version = "0.9.2" 5602 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5602 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5603 5603 dependencies = [ 5604 5604 "bon", 5605 5605 "bytes", ··· 5618 5618 [[package]] 5619 5619 name = "jacquard-axum" 5620 5620 version = "0.9.2" 5621 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5621 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5622 5622 dependencies = [ 5623 5623 "axum", 5624 5624 "bytes", ··· 5640 5640 [[package]] 5641 5641 name = "jacquard-common" 5642 5642 version = "0.9.2" 5643 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5643 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5644 5644 dependencies = [ 5645 5645 "base64 0.22.1", 5646 5646 "bon", ··· 5688 5688 [[package]] 5689 5689 name = "jacquard-derive" 5690 5690 version = "0.9.4" 5691 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5691 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5692 5692 dependencies = [ 5693 5693 "heck 0.5.0", 5694 5694 "jacquard-lexicon", ··· 5700 5700 [[package]] 5701 5701 name = "jacquard-identity" 5702 5702 version = "0.9.2" 5703 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5703 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5704 5704 dependencies = [ 5705 5705 "bon", 5706 5706 "bytes", ··· 5729 5729 [[package]] 5730 5730 name = "jacquard-lexicon" 5731 5731 version = "0.9.2" 5732 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5732 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5733 5733 dependencies = [ 5734 5734 "cid", 5735 5735 "dashmap 6.1.0", ··· 5755 5755 [[package]] 5756 5756 name = "jacquard-oauth" 5757 5757 version = "0.9.2" 5758 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5758 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5759 5759 dependencies = [ 5760 5760 "base64 0.22.1", 5761 5761 "bytes", ··· 5788 5788 [[package]] 5789 5789 name = "jacquard-repo" 5790 5790 version = "0.9.4" 5791 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 5791 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 5792 5792 dependencies = [ 5793 5793 "bytes", 5794 5794 "cid", ··· 6746 6746 [[package]] 6747 6747 name = "mini-moka" 6748 6748 version = "0.10.99" 6749 - source = "git+https://tangled.org/@nonbinary.computer/jacquard#bf8f91add8747b64b1d1d74af4b358960f69d6e7" 6749 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#87e15baeadf555a107a56c25c7f2e0008f46a5e2" 6750 6750 dependencies = [ 6751 6751 "crossbeam-channel", 6752 6752 "crossbeam-utils", ··· 8653 8653 8654 8654 [[package]] 8655 8655 name = "reqwest" 8656 - version = "0.12.25" 8656 + version = "0.12.26" 8657 8657 source = "registry+https://github.com/rust-lang/crates.io-index" 8658 - checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" 8658 + checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" 8659 8659 dependencies = [ 8660 8660 "base64 0.22.1", 8661 8661 "bytes",
+1 -1
crates/weaver-app/Dockerfile
··· 32 32 # Build wasm workers 33 33 RUN RUSTFLAGS='--cfg getrandom_backend="wasm_js"' cargo build -p weaver-app --bin editor_worker --bin embed_worker \ 34 34 --target wasm32-unknown-unknown --release \ 35 - --no-default-features --features "web" 35 + --no-default-features --features "web, use-index" 36 36 37 37 # Run wasm-bindgen on workers 38 38 RUN wasm-bindgen target/wasm32-unknown-unknown/release/editor_worker.wasm \
+68 -17
crates/weaver-app/src/blobcache.rs
··· 136 136 .build(), 137 137 ) 138 138 .await 139 - .map_err(|e| CapturedError::from_display(format_smolstr!("Failed to fetch entry: {}", e).as_str().to_string()))?; 139 + .map_err(|e| { 140 + CapturedError::from_display( 141 + format_smolstr!("Failed to fetch entry: {}", e) 142 + .as_str() 143 + .to_string(), 144 + ) 145 + })?; 140 146 141 - let record = resp 142 - .into_output() 143 - .map_err(|e| CapturedError::from_display(format_smolstr!("Failed to parse entry: {}", e).as_str().to_string()))?; 147 + let record = resp.into_output().map_err(|e| { 148 + CapturedError::from_display( 149 + format_smolstr!("Failed to parse entry: {}", e) 150 + .as_str() 151 + .to_string(), 152 + ) 153 + })?; 144 154 145 155 // Parse the entry 146 156 let entry: Entry = jacquard::from_data(&record.value).map_err(|e| { 147 - CapturedError::from_display(format_smolstr!("Failed to deserialize entry: {}", e).as_str().to_string()) 157 + CapturedError::from_display( 158 + format_smolstr!("Failed to deserialize entry: {}", e) 159 + .as_str() 160 + .to_string(), 161 + ) 148 162 })?; 149 163 150 164 // Find the image by name ··· 159 173 }) 160 174 .map(|img| img.image.blob().cid().clone().into_static()) 161 175 .ok_or_else(|| { 162 - CapturedError::from_display(format_smolstr!("Image '{}' not found in entry", name).as_str().to_string()) 176 + CapturedError::from_display( 177 + format_smolstr!("Image '{}' not found in entry", name) 178 + .as_str() 179 + .to_string(), 180 + ) 163 181 })?; 164 182 165 183 // Check cache first ··· 199 217 ) 200 218 .await 201 219 .map_err(|e| { 202 - CapturedError::from_display(format_smolstr!("Failed to fetch PublishedBlob: {}", e).as_str().to_string()) 220 + CapturedError::from_display( 221 + format_smolstr!("Failed to fetch PublishedBlob: {}", e) 222 + .as_str() 223 + .to_string(), 224 + ) 203 225 })?; 204 226 205 227 let record = resp.into_output().map_err(|e| { 206 - CapturedError::from_display(format_smolstr!("Failed to parse PublishedBlob: {}", e).as_str().to_string()) 228 + CapturedError::from_display( 229 + format_smolstr!("Failed to parse PublishedBlob: {}", e) 230 + .as_str() 231 + .to_string(), 232 + ) 207 233 })?; 208 234 209 235 // Parse the PublishedBlob 210 236 let published: PublishedBlob = jacquard::from_data(&record.value).map_err(|e| { 211 - CapturedError::from_display(format_smolstr!("Failed to deserialize PublishedBlob: {}", e).as_str().to_string()) 237 + CapturedError::from_display( 238 + format_smolstr!("Failed to deserialize PublishedBlob: {}", e) 239 + .as_str() 240 + .to_string(), 241 + ) 212 242 })?; 213 243 214 244 // Get CID from the upload blob ref ··· 248 278 .get_notebook_by_key(notebook_key) 249 279 .await? 250 280 .ok_or_else(|| { 251 - CapturedError::from_display(format_smolstr!("Notebook '{}' not found", notebook_key).as_str().to_string()) 281 + CapturedError::from_display( 282 + format_smolstr!("Notebook '{}' not found", notebook_key) 283 + .as_str() 284 + .to_string(), 285 + ) 252 286 })?; 253 287 254 288 let (view, entry_refs) = notebook.as_ref(); 255 289 256 290 // Get the DID from the notebook URI for blob fetching 257 291 let notebook_did = jacquard::types::aturi::AtUri::new(view.uri.as_ref()) 258 - .map_err(|e| CapturedError::from_display(format_smolstr!("Invalid notebook URI: {}", e).as_str().to_string()))? 292 + .map_err(|e| { 293 + CapturedError::from_display( 294 + format_smolstr!("Invalid notebook URI: {}", e) 295 + .as_str() 296 + .to_string(), 297 + ) 298 + })? 259 299 .authority() 260 300 .clone() 261 301 .into_static(); ··· 277 317 let client = self.fetcher.get_client(); 278 318 for entry_ref in entry_refs { 279 319 // Parse the entry URI to get rkey 280 - let entry_uri = jacquard::types::aturi::AtUri::new(entry_ref.uri.as_ref()) 281 - .map_err(|e| CapturedError::from_display(format_smolstr!("Invalid entry URI: {}", e).as_str().to_string()))?; 320 + let entry_uri = jacquard::types::aturi::AtUri::new(entry_ref.entry.uri.as_ref()) 321 + .map_err(|e| { 322 + CapturedError::from_display( 323 + format_smolstr!("Invalid entry URI: {}", e) 324 + .as_str() 325 + .to_string(), 326 + ) 327 + })?; 282 328 let rkey = entry_uri 283 329 .rkey() 284 330 .ok_or_else(|| CapturedError::from_display("Entry URI missing rkey"))?; ··· 319 365 } 320 366 } 321 367 322 - Err(CapturedError::from_display(format_smolstr!( 323 - "Image '{}' not found in notebook '{}'", 324 - image_name, notebook_key 325 - ).as_str().to_string())) 368 + Err(CapturedError::from_display( 369 + format_smolstr!( 370 + "Image '{}' not found in notebook '{}'", 371 + image_name, 372 + notebook_key 373 + ) 374 + .as_str() 375 + .to_string(), 376 + )) 326 377 } 327 378 328 379 /// Insert bytes directly into cache (for pre-warming after upload)
+67 -3
crates/weaver-app/src/components/editor/writer.rs
··· 1437 1437 1438 1438 self.last_char_offset = closing_char_end; 1439 1439 } 1440 - Html(html) | InlineHtml(html) => { 1440 + Html(html) => { 1441 1441 // Track offset mapping for raw HTML 1442 1442 let char_start = self.last_char_offset; 1443 1443 let html_char_len = html.chars().count(); ··· 1445 1445 1446 1446 self.write(&html)?; 1447 1447 1448 + // Record mapping for inline HTML 1449 + self.record_mapping(range.clone(), char_start..char_end); 1450 + self.last_char_offset = char_end; 1451 + } 1452 + InlineHtml(html) => { 1453 + // Track offset mapping for raw HTML 1454 + let char_start = self.last_char_offset; 1455 + let html_char_len = html.chars().count(); 1456 + let char_end = char_start + html_char_len; 1457 + self.write(r#"<span class="html-embed html-embed-inline">"#)?; 1458 + self.write(&html)?; 1459 + self.write("</span>")?; 1448 1460 // Record mapping for inline HTML 1449 1461 self.record_mapping(range.clone(), char_start..char_end); 1450 1462 self.last_char_offset = char_end; ··· 1743 1755 1744 1756 // Emit the opening tag 1745 1757 match tag { 1746 - Tag::HtmlBlock => Ok(()), 1758 + // HTML blocks get their own paragraph to try and corral them better 1759 + Tag::HtmlBlock => { 1760 + // Record paragraph start for boundary tracking 1761 + // BUT skip if inside a list - list owns the paragraph boundary 1762 + if self.list_depth == 0 { 1763 + self.current_paragraph_start = 1764 + Some((self.last_byte_offset, self.last_char_offset)); 1765 + } 1766 + let node_id = self.gen_node_id(); 1767 + 1768 + if self.end_newline { 1769 + write!( 1770 + &mut self.writer, 1771 + r#"<p id="{}", class="html-embed html-embed-block">"#, 1772 + node_id 1773 + )?; 1774 + } else { 1775 + write!( 1776 + &mut self.writer, 1777 + r#"\n<p id="{}", class="html-embed html-embed-block">"#, 1778 + node_id 1779 + )?; 1780 + } 1781 + self.begin_node(node_id.clone()); 1782 + 1783 + // Map the start position of the paragraph (before any content) 1784 + // This allows cursor to be placed at the very beginning 1785 + let para_start_char = self.last_char_offset; 1786 + let mapping = OffsetMapping { 1787 + byte_range: range.start..range.start, 1788 + char_range: para_start_char..para_start_char, 1789 + node_id, 1790 + char_offset_in_node: 0, 1791 + child_index: Some(0), // position before first child 1792 + utf16_len: 0, 1793 + }; 1794 + self.offset_maps.push(mapping); 1795 + 1796 + Ok(()) 1797 + } 1747 1798 Tag::Paragraph => { 1748 1799 // Record paragraph start for boundary tracking 1749 1800 // BUT skip if inside a list - list owns the paragraph boundary ··· 2454 2505 2455 2506 // Emit tag HTML first 2456 2507 let result = match tag { 2457 - TagEnd::HtmlBlock => Ok(()), 2508 + TagEnd::HtmlBlock => { 2509 + // Record paragraph end for boundary tracking 2510 + // BUT skip if inside a list - list owns the paragraph boundary 2511 + if self.list_depth == 0 { 2512 + if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2513 + let byte_range = byte_start..self.last_byte_offset; 2514 + let char_range = char_start..self.last_char_offset; 2515 + self.paragraph_ranges.push((byte_range, char_range)); 2516 + } 2517 + } 2518 + 2519 + self.end_node(); 2520 + self.write("</p>\n") 2521 + } 2458 2522 TagEnd::Paragraph => { 2459 2523 // Record paragraph end for boundary tracking 2460 2524 // BUT skip if inside a list - list owns the paragraph boundary
+111 -40
crates/weaver-app/src/components/identity.rs
··· 6 6 use jacquard::{smol_str::SmolStr, types::ident::AtIdentifier}; 7 7 use std::collections::HashSet; 8 8 use weaver_api::com_atproto::repo::strong_ref::StrongRef; 9 - use weaver_api::sh_weaver::notebook::{EntryView, NotebookView, entry::Entry}; 9 + use weaver_api::sh_weaver::notebook::{ 10 + BookEntryRef, BookEntryView, EntryView, NotebookView, entry::Entry, 11 + }; 12 + 13 + /// Constructs BookEntryViews from notebook entry refs and all available entries. 14 + /// 15 + /// Matches StrongRefs by URI to find the corresponding EntryView, 16 + /// then builds BookEntryView with index and prev/next navigation refs. 17 + fn build_book_entry_views( 18 + entry_refs: &[StrongRef<'static>], 19 + all_entries: &[(EntryView<'static>, Entry<'static>)], 20 + ) -> Vec<BookEntryView<'static>> { 21 + use jacquard::IntoStatic; 22 + 23 + // Build a lookup map for faster matching 24 + let entry_map: std::collections::HashMap<&str, &EntryView<'static>> = all_entries 25 + .iter() 26 + .map(|(view, _)| (view.uri.as_ref(), view)) 27 + .collect(); 28 + 29 + let mut views = Vec::with_capacity(entry_refs.len()); 30 + 31 + for (idx, strong_ref) in entry_refs.iter().enumerate() { 32 + let Some(entry_view) = entry_map.get(strong_ref.uri.as_ref()).copied() else { 33 + continue; 34 + }; 35 + 36 + // Build prev ref (if not first) 37 + let prev = if idx > 0 { 38 + entry_refs 39 + .get(idx - 1) 40 + .and_then(|prev_ref| entry_map.get(prev_ref.uri.as_ref()).copied()) 41 + .map(|prev_view| { 42 + BookEntryRef::new() 43 + .entry(prev_view.clone()) 44 + .build() 45 + .into_static() 46 + }) 47 + } else { 48 + None 49 + }; 50 + 51 + // Build next ref (if not last) 52 + let next = if idx + 1 < entry_refs.len() { 53 + entry_refs 54 + .get(idx + 1) 55 + .and_then(|next_ref| entry_map.get(next_ref.uri.as_ref()).copied()) 56 + .map(|next_view| { 57 + BookEntryRef::new() 58 + .entry(next_view.clone()) 59 + .build() 60 + .into_static() 61 + }) 62 + } else { 63 + None 64 + }; 65 + 66 + views.push( 67 + BookEntryView::new() 68 + .entry(entry_view.clone()) 69 + .index(idx as i64) 70 + .maybe_prev(prev) 71 + .maybe_next(next) 72 + .build() 73 + .into_static(), 74 + ); 75 + } 76 + 77 + views 78 + } 10 79 11 80 /// A single item in the profile timeline (either notebook or standalone entry) 12 81 #[derive(Clone, PartialEq)] 13 82 pub enum ProfileTimelineItem { 14 83 Notebook { 15 84 notebook: NotebookView<'static>, 16 - entries: Vec<StrongRef<'static>>, 85 + entries: Vec<BookEntryView<'static>>, 17 86 /// Most recent entry's created_at for sorting 18 87 sort_date: jacquard::types::string::Datetime, 19 88 }, ··· 93 162 let auth_state = use_context::<Signal<AuthState>>(); 94 163 95 164 // Use client-only versions to avoid SSR issues with concurrent server futures 96 - let (_profile_res, profile) = data::use_profile_data_client(ident); 97 - let (_notebooks_res, notebooks) = data::use_notebooks_for_did_client(ident); 98 - let (_entries_res, all_entries) = data::use_entries_for_did_client(ident); 165 + let (_profile_res, profile) = data::use_profile_data(ident); 166 + let (_notebooks_res, notebooks) = data::use_notebooks_for_did(ident); 167 + let (_entries_res, all_entries) = data::use_entries_for_did(ident); 168 + 169 + #[cfg(feature = "fullstack-server")] 170 + { 171 + _profile_res?; 172 + _notebooks_res?; 173 + _entries_res?; 174 + } 99 175 100 176 // Check if viewing own profile 101 177 let is_own_profile = use_memo(move || { ··· 171 247 if let Some(all_ents) = ents.as_ref() { 172 248 for (notebook, entry_refs) in nbs { 173 249 if is_pinned(notebook.uri.as_ref(), pinned_set) { 174 - let sort_date = entry_refs 250 + let book_entries = build_book_entry_views(entry_refs, all_ents); 251 + let sort_date = book_entries 175 252 .iter() 176 - .filter_map(|r| { 253 + .filter_map(|bev| { 177 254 all_ents 178 255 .iter() 179 - .find(|(v, _)| v.uri.as_ref() == r.uri.as_ref()) 256 + .find(|(v, _)| v.uri.as_ref() == bev.entry.uri.as_ref()) 180 257 }) 181 258 .map(|(_, entry)| entry.created_at.clone()) 182 259 .max() ··· 184 261 185 262 items.push(ProfileTimelineItem::Notebook { 186 263 notebook: notebook.clone(), 187 - entries: entry_refs.clone(), 264 + entries: book_entries, 188 265 sort_date, 189 266 }); 190 267 } ··· 231 308 if let Some(all_ents) = ents.as_ref() { 232 309 for (notebook, entry_refs) in nbs { 233 310 if !is_pinned(notebook.uri.as_ref(), pinned_set) { 234 - let sort_date = entry_refs 311 + let book_entries = build_book_entry_views(entry_refs, all_ents); 312 + let sort_date = book_entries 235 313 .iter() 236 - .filter_map(|r| { 314 + .filter_map(|bev| { 237 315 all_ents 238 316 .iter() 239 - .find(|(v, _)| v.uri.as_ref() == r.uri.as_ref()) 317 + .find(|(v, _)| v.uri.as_ref() == bev.entry.uri.as_ref()) 240 318 }) 241 319 .map(|(_, entry)| entry.created_at.clone()) 242 320 .max() ··· 244 322 245 323 items.push(ProfileTimelineItem::Notebook { 246 324 notebook: notebook.clone(), 247 - entries: entry_refs.clone(), 325 + entries: book_entries, 248 326 sort_date, 249 327 }); 250 328 } ··· 361 439 class: "pinned-item", 362 440 NotebookCard { 363 441 notebook: notebook.clone(), 364 - entry_refs: entries.clone(), 442 + entries: entries.clone(), 365 443 is_pinned: true, 366 444 profile_ident: Some(ident()), 367 445 } ··· 409 487 key: "notebook-{notebook.cid}", 410 488 NotebookCard { 411 489 notebook: notebook.clone(), 412 - entry_refs: entries.clone(), 490 + entries: entries.clone(), 413 491 is_pinned: false, 414 492 profile_ident: Some(ident()), 415 493 } ··· 458 536 459 537 let entry_view = &book_entry_view.entry; 460 538 461 - let entry_title = entry_view.title.as_ref() 539 + let entry_title = entry_view 540 + .title 541 + .as_ref() 462 542 .map(|t| t.as_ref()) 463 543 .unwrap_or("Untitled"); 464 544 465 - let entry_path = entry_view.path 545 + let entry_path = entry_view 546 + .path 466 547 .as_ref() 467 548 .map(|p| p.as_ref().to_string()) 468 549 .unwrap_or_else(|| entry_title.to_string()); ··· 476 557 html_buf 477 558 }); 478 559 479 - let created_at = parsed_entry.as_ref() 560 + let created_at = parsed_entry 561 + .as_ref() 480 562 .map(|entry| entry.created_at.as_ref().format("%B %d, %Y").to_string()); 481 563 482 564 let entry_uri = entry_view.uri.clone().into_static(); ··· 529 611 #[component] 530 612 pub fn NotebookCard( 531 613 notebook: NotebookView<'static>, 532 - entry_refs: Vec<StrongRef<'static>>, 614 + entries: Vec<BookEntryView<'static>>, 533 615 #[props(default = false)] is_pinned: bool, 534 616 #[props(default)] show_author: Option<bool>, 535 617 /// Profile identity for context-aware author visibility (hides single author on their own profile) 536 - #[props(default)] profile_ident: Option<AtIdentifier<'static>>, 618 + #[props(default)] 619 + profile_ident: Option<AtIdentifier<'static>>, 537 620 #[props(default)] on_pinned_changed: Option<EventHandler<bool>>, 538 621 #[props(default)] on_deleted: Option<EventHandler<()>>, 539 622 ) -> Element { ··· 575 658 let ident = notebook.uri.authority().clone().into_static(); 576 659 let book_title: SmolStr = notebook_path.clone().into(); 577 660 578 - // Fetch all entries to get first/last 579 - let ident_for_fetch = ident.clone(); 580 - let book_title_for_fetch = book_title.clone(); 581 - let entries = use_resource(use_reactive!(|(ident_for_fetch, book_title_for_fetch)| { 582 - let fetcher = fetcher.clone(); 583 - async move { 584 - fetcher 585 - .list_notebook_entries(ident_for_fetch, book_title_for_fetch) 586 - .await 587 - .ok() 588 - .flatten() 589 - } 590 - })); 591 661 rsx! { 592 662 div { class: "notebook-card", 593 663 div { class: "notebook-card-container", ··· 642 712 } 643 713 644 714 // Entry previews section 645 - if let Some(Some(entry_list)) = entries() { 646 715 div { class: "notebook-card-previews", 647 716 { 648 717 use jacquard::from_data; 649 718 use weaver_api::sh_weaver::notebook::entry::Entry; 719 + tracing::info!("rendering entries: {:?}", entries.iter().map(|e| 720 + e.entry.uri.as_ref()).collect::<Vec<_>>()); 650 721 651 - if entry_list.len() <= 5 { 722 + if entries.len() <= 5 { 652 723 // Show all entries if 5 or fewer 653 724 rsx! { 654 - for entry_view in entry_list.iter() { 725 + for entry_view in entries.iter() { 655 726 NotebookEntryPreview { 656 727 book_entry_view: entry_view.clone(), 657 728 ident: ident.clone(), ··· 662 733 } else { 663 734 // Show first, interstitial, and last 664 735 rsx! { 665 - if let Some(first_entry) = entry_list.first() { 736 + if let Some(first_entry) = entries.first() { 666 737 NotebookEntryPreview { 667 738 book_entry_view: first_entry.clone(), 668 739 ident: ident.clone(), ··· 673 744 674 745 // Interstitial showing count 675 746 { 676 - let middle_count = entry_list.len().saturating_sub(2); 747 + let middle_count = entries.len().saturating_sub(2); 677 748 rsx! { 678 749 div { class: "notebook-entry-interstitial", 679 750 "... {middle_count} more " ··· 683 754 } 684 755 } 685 756 686 - if let Some(last_entry) = entry_list.last() { 757 + if let Some(last_entry) = entries.last() { 687 758 NotebookEntryPreview { 688 759 book_entry_view: last_entry.clone(), 689 760 ident: ident.clone(), ··· 695 766 } 696 767 } 697 768 } 698 - } 769 + 699 770 700 771 if let Some(ref tags) = notebook.tags { 701 772 if !tags.is_empty() {
+9 -8
crates/weaver-app/src/components/login.rs
··· 113 113 div { class: "error", "{err}" } 114 114 } 115 115 Button { 116 + r#type: "submit", 117 + disabled: is_loading(), 118 + onclick: move |_| { 119 + submit_closure2(); 120 + }, 121 + if is_loading() { "Authenticating..." } else { "Sign In" } 122 + } 123 + Button { 116 124 r#type: "button", 117 125 onclick: move |_| { 118 126 open.set(false) ··· 121 129 variant: ButtonVariant::Secondary, 122 130 "Cancel" 123 131 } 124 - Button { 125 - r#type: "submit", 126 - disabled: is_loading(), 127 - onclick: move |_| { 128 - submit_closure2(); 129 - }, 130 - if is_loading() { "Authenticating..." } else { "Sign In" } 131 - } 132 + 132 133 133 134 } 134 135 }
+4 -4
crates/weaver-app/src/data.rs
··· 977 977 book_title: ReadSignal<SmolStr>, 978 978 ) -> ( 979 979 Result<Resource<Option<serde_json::Value>>, RenderError>, 980 - Memo<Option<(NotebookView<'static>, Vec<StrongRef<'static>>)>>, 980 + Memo<Option<(NotebookView<'static>, Vec<BookEntryView<'static>>)>>, 981 981 ) { 982 982 let fetcher = use_context::<crate::fetch::Fetcher>(); 983 983 let res = use_server_future(use_reactive!(|(ident, book_title)| { ··· 995 995 let memo = use_memo(use_reactive!(|res| { 996 996 let res = res.as_ref().ok()?; 997 997 if let Some(Some(value)) = &*res.read() { 998 - jacquard::from_json_value::<(NotebookView, Vec<StrongRef>)>(value.clone()).ok() 998 + jacquard::from_json_value::<(NotebookView, Vec<BookEntryView>)>(value.clone()).ok() 999 999 } else { 1000 1000 None 1001 1001 } ··· 1009 1009 ident: ReadSignal<AtIdentifier<'static>>, 1010 1010 book_title: ReadSignal<SmolStr>, 1011 1011 ) -> ( 1012 - Resource<Option<(NotebookView<'static>, Vec<StrongRef<'static>>)>>, 1013 - Memo<Option<(NotebookView<'static>, Vec<StrongRef<'static>>)>>, 1012 + Resource<Option<(NotebookView<'static>, Vec<BookEntryView<'static>>)>>, 1013 + Memo<Option<(NotebookView<'static>, Vec<BookEntryView<'static>>)>>, 1014 1014 ) { 1015 1015 let fetcher = use_context::<crate::fetch::Fetcher>(); 1016 1016 let res = use_resource(move || {
+38 -66
crates/weaver-app/src/fetch.rs
··· 9 9 use jacquard::client::AgentKind; 10 10 use jacquard::error::ClientError; 11 11 use jacquard::error::XrpcResult; 12 + use jacquard::from_data; 13 + use jacquard::from_data_owned; 12 14 use jacquard::identity::JacquardResolver; 13 15 use jacquard::identity::lexicon_resolver::{ 14 16 LexiconResolutionError, LexiconSchemaResolver, ResolvedLexiconSchema, ··· 46 48 }; 47 49 use weaver_common::WeaverError; 48 50 use weaver_common::WeaverExt; 51 + use weaver_common::agent::title_matches; 49 52 50 53 #[derive(Debug, Clone, Deserialize, Serialize)] 51 54 struct UfosRecord { ··· 363 366 #[cfg(feature = "server")] 364 367 book_cache: cache_impl::Cache< 365 368 (AtIdentifier<'static>, SmolStr), 366 - Arc<(NotebookView<'static>, Vec<StrongRef<'static>>)>, 369 + Arc<(NotebookView<'static>, Vec<BookEntryView<'static>>)>, 367 370 >, 368 371 /// Maps notebook title OR path to ident (book_cache accepts either as key) 369 372 #[cfg(feature = "server")] ··· 453 456 &self, 454 457 ident: AtIdentifier<'static>, 455 458 title: SmolStr, 456 - ) -> Result<Option<Arc<(NotebookView<'static>, Vec<StrongRef<'static>>)>>> { 459 + ) -> Result<Option<Arc<(NotebookView<'static>, Vec<BookEntryView<'static>>)>>> { 457 460 #[cfg(feature = "server")] 458 461 if let Some(cached) = cache_impl::get(&self.book_cache, &(ident.clone(), title.clone())) { 459 462 return Ok(Some(cached)); ··· 490 493 pub async fn get_notebook_by_key( 491 494 &self, 492 495 key: &str, 493 - ) -> Result<Option<Arc<(NotebookView<'static>, Vec<StrongRef<'static>>)>>> { 496 + ) -> Result<Option<Arc<(NotebookView<'static>, Vec<BookEntryView<'static>>)>>> { 494 497 let key: SmolStr = key.into(); 495 498 496 499 // Check cache first (key could be title or path) ··· 500 503 501 504 // Fallback: query UFOS and populate caches 502 505 let notebooks = self.fetch_notebooks_from_ufos().await?; 503 - Ok(notebooks.into_iter().find(|arc| { 506 + let notebook = notebooks.into_iter().find(|arc| { 504 507 let (view, _) = arc.as_ref(); 505 508 view.title.as_deref() == Some(key.as_str()) 506 509 || view.path.as_deref() == Some(key.as_str()) 507 - })) 510 + }); 511 + if let Some(notebook) = notebook { 512 + let ident = notebook.0.uri.authority().clone().into_static(); 513 + return self.get_notebook(ident, key).await; 514 + } 515 + Ok(None) 508 516 } 509 517 510 518 pub async fn get_entry( ··· 522 530 523 531 if let Some(result) = self.get_notebook(ident.clone(), book_title).await? { 524 532 let (notebook, entries) = result.as_ref(); 525 - let client = self.get_client(); 526 - if let Some(entry) = client 527 - .entry_by_title(notebook, entries.as_ref(), &entry_title) 528 - .await 529 - .map_err(|e| dioxus::CapturedError::from_display(e))? 530 - { 531 - let stored = Arc::new(entry); 533 + if let Some(entry) = entries.iter().find(|e| { 534 + if let Some(path) = e.entry.path.as_deref() { 535 + path == entry_title.as_str() 536 + } else if let Some(title) = e.entry.title.as_deref() { 537 + title_matches(title, &entry_title) 538 + } else { 539 + false 540 + } 541 + }) { 542 + let stored = Arc::new(( 543 + entry.clone(), 544 + from_data_owned(entry.entry.record.clone()).expect("should deserialize"), 545 + )); 532 546 #[cfg(feature = "server")] 533 547 cache_impl::insert(&self.entry_cache, (ident, entry_title), stored.clone()); 534 548 Ok(Some(stored)) ··· 583 597 #[cfg(feature = "server")] 584 598 { 585 599 cache_impl::insert(&self.notebook_key_cache, title.clone(), ident.clone()); 600 + #[cfg(not(feature = "use-index"))] 586 601 cache_impl::insert(&self.book_cache, (ident.clone(), title), result.clone()); 602 + 587 603 if let Some(path) = result.0.path.as_ref() { 588 604 let path: SmolStr = path.as_ref().into(); 589 605 cache_impl::insert(&self.notebook_key_cache, path.clone(), ident.clone()); 606 + #[cfg(not(feature = "use-index"))] 590 607 cache_impl::insert(&self.book_cache, (ident, path), result.clone()); 591 608 } 592 609 } ··· 640 657 { 641 658 // Cache by title 642 659 cache_impl::insert(&self.notebook_key_cache, title.clone(), ident.clone()); 660 + 661 + #[cfg(not(feature = "use-index"))] 643 662 cache_impl::insert( 644 663 &self.book_cache, 645 664 (ident.clone(), title), ··· 653 672 path.clone(), 654 673 ident.clone(), 655 674 ); 675 + 676 + #[cfg(not(feature = "use-index"))] 656 677 cache_impl::insert(&self.book_cache, (ident, path), result.clone()); 657 678 } 658 679 } ··· 815 836 title.clone(), 816 837 ident_static.clone(), 817 838 ); 818 - cache_impl::insert( 819 - &self.book_cache, 820 - (ident_static.clone(), title), 821 - result.clone(), 822 - ); 823 839 if let Some(path) = result.0.path.as_ref() { 824 840 let path: SmolStr = path.as_ref().into(); 825 841 cache_impl::insert( ··· 827 843 path.clone(), 828 844 ident_static.clone(), 829 845 ); 830 - cache_impl::insert(&self.book_cache, (ident_static, path), result.clone()); 831 846 } 832 847 } 833 848 notebooks.push(result); ··· 1062 1077 ident: AtIdentifier<'static>, 1063 1078 book_title: SmolStr, 1064 1079 ) -> Result<Option<Vec<BookEntryView<'static>>>> { 1065 - use jacquard::types::aturi::AtUri; 1066 - 1067 1080 if let Some(result) = self.get_notebook(ident.clone(), book_title).await? { 1068 - let (notebook, entry_refs) = result.as_ref(); 1069 - let mut book_entries = Vec::new(); 1070 - let client = self.get_client(); 1071 - 1072 - for (index, entry_ref) in entry_refs.iter().enumerate() { 1073 - // Try to extract rkey from URI 1074 - let rkey = AtUri::new(entry_ref.uri.as_ref()) 1075 - .ok() 1076 - .and_then(|uri| uri.rkey().map(|r| SmolStr::new(r.as_ref()))); 1077 - 1078 - // Check cache first 1079 - #[cfg(feature = "server")] 1080 - if let Some(ref rkey) = rkey { 1081 - if let Some(cached) = 1082 - cache_impl::get(&self.entry_cache, &(ident.clone(), rkey.clone())) 1083 - { 1084 - book_entries.push(cached.0.clone()); 1085 - continue; 1086 - } 1087 - } 1088 - 1089 - // Fetch if not cached 1090 - if let Ok(book_entry) = client.view_entry(notebook, entry_refs, index).await { 1091 - // Try to populate cache by deserializing Entry from the view's record 1092 - #[cfg(feature = "server")] 1093 - if let Some(rkey) = rkey { 1094 - use jacquard::IntoStatic; 1095 - use weaver_api::sh_weaver::notebook::entry::Entry; 1096 - if let Ok(entry) = 1097 - jacquard::from_data::<Entry<'_>>(&book_entry.entry.record) 1098 - { 1099 - let cached = 1100 - Arc::new((book_entry.clone().into_static(), entry.into_static())); 1101 - cache_impl::insert(&self.entry_cache, (ident.clone(), rkey), cached); 1102 - } 1103 - } 1104 - book_entries.push(book_entry); 1105 - } 1106 - } 1107 - 1108 - Ok(Some(book_entries)) 1081 + Ok(Some(result.as_ref().1.clone())) 1109 1082 } else { 1110 1083 Err(dioxus::CapturedError::from_display("Notebook not found")) 1111 1084 } ··· 1269 1242 }; 1270 1243 1271 1244 // Find entry position in notebook 1272 - let book_entry_view = client 1273 - .entry_in_notebook_by_rkey(&notebook, &entries, &rkey) 1274 - .await 1275 - .map_err(|e| dioxus::CapturedError::from_display(e))?; 1245 + let book_entry_view = entries 1246 + .iter() 1247 + .find(|e| e.entry.uri.rkey().as_ref().map(|k| k.as_ref()) == Some(rkey.as_ref())); 1276 1248 1277 1249 let mut book_entry_view = match book_entry_view { 1278 - Some(bev) => bev, 1250 + Some(bev) => bev.clone(), 1279 1251 None => { 1280 1252 // Entry not in this notebook's entry list - return basic view without nav 1281 1253 use weaver_api::sh_weaver::notebook::BookEntryView;
+60 -1
crates/weaver-app/src/views/callback.rs
··· 8 8 smol_str::SmolStr, 9 9 }; 10 10 use tracing::{error, info}; 11 + use weaver_api::sh_weaver::actor::profile::Profile as WeaverProfile; 11 12 12 13 #[component] 13 14 pub fn Callback( ··· 35 36 .callback(callback_params) 36 37 .await?; 37 38 let (did, session_id) = session.session_info().await; 38 - auth.write().set_authenticated(did, session_id); 39 + let did_owned = did.into_static(); 40 + auth.write() 41 + .set_authenticated(did_owned.clone(), session_id); 39 42 fetcher.upgrade_to_authenticated(session).await; 43 + 44 + // Create weaver profile if it doesn't exist 45 + if let Err(e) = ensure_weaver_profile(&fetcher, &did_owned).await { 46 + error!("Failed to ensure weaver profile: {:?}", e); 47 + } 48 + 40 49 Ok::<(), OAuthError>(()) 41 50 } 42 51 }) ··· 83 92 }, 84 93 } 85 94 } 95 + 96 + /// Ensures a weaver profile exists for the authenticated user. 97 + /// If no weaver profile exists, creates one by mirroring the bsky profile. 98 + #[cfg(feature = "web")] 99 + async fn ensure_weaver_profile( 100 + fetcher: &Fetcher, 101 + did: &jacquard::types::string::Did<'_>, 102 + ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { 103 + use jacquard::{ 104 + client::AgentSessionExt, 105 + types::string::{Datetime, RecordKey}, 106 + }; 107 + use weaver_api::app_bsky::actor::profile::Profile as BskyProfile; 108 + 109 + let weaver_uri_str = format!("at://{}/sh.weaver.actor.profile/self", did); 110 + let weaver_uri = WeaverProfile::uri(&weaver_uri_str)?; 111 + 112 + // Check if weaver profile already exists 113 + if fetcher.fetch_record(&weaver_uri).await.is_ok() { 114 + info!("Weaver profile already exists for {}", did); 115 + return Ok(()); 116 + } 117 + 118 + info!( 119 + "No weaver profile found for {}, creating from bsky profile", 120 + did 121 + ); 122 + 123 + // Fetch bsky profile 124 + let bsky_uri_str = format!("at://{}/app.bsky.actor.profile/self", did); 125 + let bsky_uri = BskyProfile::uri(&bsky_uri_str)?; 126 + let bsky_record = fetcher.fetch_record(&bsky_uri).await?; 127 + 128 + // Create weaver profile mirroring bsky 129 + let weaver_profile = WeaverProfile::new() 130 + .maybe_display_name(bsky_record.value.display_name.clone()) 131 + .maybe_description(bsky_record.value.description.clone()) 132 + .maybe_avatar(bsky_record.value.avatar.clone()) 133 + .maybe_banner(bsky_record.value.banner.clone()) 134 + .bluesky(true) 135 + .created_at(Datetime::now()) 136 + .build(); 137 + 138 + let self_rkey = RecordKey::any("self").expect("self is valid record key"); 139 + 140 + fetcher.put_record(self_rkey, weaver_profile).await?; 141 + info!("Created weaver profile for {}", did); 142 + 143 + Ok(()) 144 + }
+1 -1
crates/weaver-app/src/views/home.rs
··· 153 153 Some((view, entries)) => rsx! { 154 154 NotebookCard { 155 155 notebook: view.clone(), 156 - entry_refs: entries.clone(), 156 + entries: entries.clone(), 157 157 show_author: Some(true) 158 158 } 159 159 },
+16 -25
crates/weaver-common/src/agent.rs
··· 39 39 } 40 40 41 41 /// Check if a search term matches a value, with fallback to stripped punctuation 42 - fn title_matches(value: &str, search: &str) -> bool { 42 + pub fn title_matches(value: &str, search: &str) -> bool { 43 43 // Exact match first 44 44 if value == search { 45 45 return true; ··· 682 682 ident: &jacquard::types::ident::AtIdentifier<'_>, 683 683 title: &str, 684 684 ) -> impl Future< 685 - Output = Result<Option<(NotebookView<'static>, Vec<StrongRef<'static>>)>, WeaverError>, 685 + Output = Result<Option<(NotebookView<'static>, Vec<BookEntryView<'static>>)>, WeaverError>, 686 686 > 687 687 where 688 688 Self: Sized, ··· 701 701 .map_err(|e| AgentError::from(ClientError::from(e)))?; 702 702 703 703 match resp.into_output() { 704 - Ok(output) => { 705 - // Extract StrongRefs from the BookEntryViews for compatibility 706 - let entries: Vec<StrongRef<'static>> = output 707 - .entries 708 - .iter() 709 - .map(|bev| { 710 - StrongRef::new() 711 - .uri(bev.entry.uri.clone()) 712 - .cid(bev.entry.cid.clone()) 713 - .build() 714 - .into_static() 715 - }) 716 - .collect(); 717 - 718 - Ok(Some((output.notebook.into_static(), entries))) 719 - } 704 + Ok(output) => Ok(Some(( 705 + output.notebook.into_static(), 706 + output.entries.into_static(), 707 + ))), 720 708 Err(_) => Ok(None), 721 709 } 722 710 } ··· 729 717 ident: &jacquard::types::ident::AtIdentifier<'_>, 730 718 title: &str, 731 719 ) -> impl Future< 732 - Output = Result<Option<(NotebookView<'static>, Vec<StrongRef<'static>>)>, WeaverError>, 720 + Output = Result<Option<(NotebookView<'static>, Vec<BookEntryView<'static>>)>, WeaverError>, 733 721 > 734 722 where 735 723 Self: Sized, ··· 817 805 ); 818 806 } 819 807 820 - let entries = notebook 821 - .entry_list 822 - .iter() 823 - .cloned() 824 - .map(IntoStatic::into_static) 825 - .collect(); 808 + // TODO: Fix this - entries building is broken because we need NotebookView 809 + // to call view_entry but we're still building the NotebookView 810 + let entries = Vec::new(); // Temporarily empty 811 + 812 + // let mut entries = Vec::with_capacity(notebook.entry_list.len()); 813 + // for (index, _) in notebook.entry_list.iter().enumerate() { 814 + // let entry_view = self.view_entry(&notebook_view, &notebook.entry_list, index).await?; 815 + // entries.push(entry_view); 816 + // } 826 817 827 818 // Fetch permissions for this notebook 828 819 let permissions = self.get_permissions_for_resource(&record.uri).await?;
+42 -10
crates/weaver-index/src/clickhouse/queries/notebooks.rs
··· 39 39 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 40 40 pub created_at: chrono::DateTime<chrono::Utc>, 41 41 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 42 + pub updated_at: chrono::DateTime<chrono::Utc>, 43 + #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 42 44 pub indexed_at: chrono::DateTime<chrono::Utc>, 43 45 pub record: SmolStr, 44 46 } ··· 136 138 /// List entries for a specific notebook, ordered by position in the notebook. 137 139 /// 138 140 /// Uses notebook_entries table to get entries that belong to this notebook. 141 + /// Deduplicates entries by rkey, keeping the most recently updated version. 139 142 pub async fn list_notebook_entries( 140 143 &self, 141 144 notebook_did: &str, ··· 143 146 limit: u32, 144 147 cursor: Option<u32>, 145 148 ) -> Result<Vec<EntryRow>, IndexError> { 149 + use std::collections::HashMap; 150 + 146 151 let query = r#" 147 152 SELECT 148 153 e.did AS did, ··· 154 159 e.tags AS tags, 155 160 e.author_dids AS author_dids, 156 161 e.created_at AS created_at, 162 + e.updated_at AS updated_at, 157 163 e.indexed_at AS indexed_at, 158 164 e.record AS record 159 165 FROM notebook_entries ne FINAL 160 - INNER JOIN entries e ON 166 + INNER JOIN entries FINAL AS e ON 161 167 e.did = ne.entry_did 162 168 AND e.rkey = ne.entry_rkey 163 169 AND e.deleted_at = toDateTime64(0, 3) ··· 176 182 .bind(notebook_did) 177 183 .bind(notebook_rkey) 178 184 .bind(cursor_val) 179 - .bind(limit) 185 + // Fetch extra to account for duplicates we'll filter out 186 + .bind(limit * 2) 180 187 .fetch_all::<EntryRow>() 181 188 .await 182 189 .map_err(|e| ClickHouseError::Query { ··· 184 191 source: e, 185 192 })?; 186 193 187 - Ok(rows) 194 + // Dedupe by rkey, keeping the most recently updated version 195 + let mut seen: HashMap<SmolStr, usize> = HashMap::new(); 196 + let mut deduped: Vec<EntryRow> = Vec::with_capacity(rows.len()); 197 + 198 + for row in rows { 199 + if let Some(&existing_idx) = seen.get(&row.rkey) { 200 + // Keep the one with the more recent updated_at 201 + if row.updated_at > deduped[existing_idx].updated_at { 202 + deduped[existing_idx] = row; 203 + } 204 + } else { 205 + seen.insert(row.rkey.clone(), deduped.len()); 206 + deduped.push(row); 207 + } 208 + 209 + // Stop once we have enough unique entries 210 + if deduped.len() >= limit as usize { 211 + break; 212 + } 213 + } 214 + 215 + Ok(deduped) 188 216 } 189 217 190 218 /// Get an entry by rkey, picking the most recent version across collaborators. ··· 216 244 tags, 217 245 author_dids, 218 246 created_at, 247 + updated_at, 219 248 indexed_at, 220 249 record 221 250 FROM entries FINAL ··· 263 292 tags, 264 293 author_dids, 265 294 created_at, 295 + updated_at, 266 296 indexed_at, 267 297 record 268 298 FROM entries FINAL ··· 305 335 tags, 306 336 author_dids, 307 337 created_at, 338 + updated_at, 308 339 indexed_at, 309 340 record 310 341 FROM entries FINAL ··· 414 445 ) -> Result<Vec<EntryRow>, IndexError> { 415 446 let query = if cursor.is_some() { 416 447 r#" 417 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 448 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 418 449 FROM ( 419 450 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 420 451 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 429 460 "# 430 461 } else { 431 462 r#" 432 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 463 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 433 464 FROM ( 434 465 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 435 466 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 592 623 ) -> Result<Vec<EntryRow>, IndexError> { 593 624 let base_query = if tags.is_some() && cursor.is_some() { 594 625 r#" 595 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 626 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 596 627 FROM ( 597 628 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 598 629 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 607 638 "# 608 639 } else if tags.is_some() { 609 640 r#" 610 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 641 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 611 642 FROM ( 612 643 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 613 644 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 621 652 "# 622 653 } else if cursor.is_some() { 623 654 r#" 624 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 655 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 625 656 FROM ( 626 657 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 627 658 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 635 666 "# 636 667 } else { 637 668 r#" 638 - SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, indexed_at, record 669 + SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record 639 670 FROM ( 640 671 SELECT did, rkey, cid, uri, title, path, tags, author_dids, created_at, updated_at, indexed_at, record, 641 672 ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY updated_at DESC) as rn ··· 696 727 e.tags AS tags, 697 728 e.author_dids AS author_dids, 698 729 e.created_at AS created_at, 730 + e.updated_at AS updated_at, 699 731 e.indexed_at AS indexed_at, 700 732 e.record AS record 701 733 FROM notebook_entries ne FINAL 702 - INNER JOIN entries e ON 734 + INNER JOIN entries FINAL AS e ON 703 735 e.did = ne.entry_did 704 736 AND e.rkey = ne.entry_rkey 705 737 AND e.deleted_at = toDateTime64(0, 3)
+36 -1
crates/weaver-index/src/endpoints/actor.rs
··· 6 6 use jacquard::IntoStatic; 7 7 use jacquard::cowstr::ToCowStr; 8 8 use jacquard::identity::resolver::IdentityResolver; 9 + use jacquard::prelude::*; 9 10 use jacquard::types::ident::AtIdentifier; 10 11 use jacquard::types::string::{AtUri, Cid, Did, Handle, Uri}; 11 12 use jacquard_axum::ExtractXrpc; ··· 52 53 })?; 53 54 54 55 let Some(data) = profile_data else { 55 - return Err(XrpcErrorResponse::not_found("Profile not found")); 56 + // get the bluesky profile 57 + // TODO: either cache this or yell at tap to start tracking their account! 58 + let profile_resp = state 59 + .resolver 60 + .send( 61 + weaver_api::app_bsky::actor::get_profile::GetProfile::new() 62 + .actor(did) 63 + .build(), 64 + ) 65 + .await 66 + .map_err(|e| XrpcErrorResponse::not_found(e.to_string()))?; 67 + let bsky_profile = profile_resp 68 + .into_output() 69 + .map_err(|e| XrpcErrorResponse::not_found(e.to_string()))? 70 + .value; 71 + let inner_profile = ProfileView::new() 72 + .did(bsky_profile.did) 73 + .handle(bsky_profile.handle) 74 + .maybe_display_name(bsky_profile.display_name) 75 + .maybe_description(bsky_profile.description) 76 + .maybe_avatar(bsky_profile.avatar) 77 + .maybe_banner(bsky_profile.banner) 78 + .build(); 79 + 80 + let inner = ProfileDataViewInner::ProfileView(Box::new(inner_profile)); 81 + 82 + let output = ProfileDataView::new().inner(inner).build(); 83 + 84 + return Ok(Json( 85 + GetProfileOutput { 86 + value: output, 87 + extra_data: None, 88 + } 89 + .into_static(), 90 + )); 56 91 }; 57 92 58 93 // Build the response
+1
crates/weaver-index/src/endpoints/bsky.rs
··· 16 16 State(state): State<AppState>, 17 17 ExtractXrpc(args): ExtractXrpc<GetProfileRequest>, 18 18 ) -> Result<Json<GetProfileOutput<'static>>, XrpcErrorResponse> { 19 + // TODO: either cache this or yell at tap to start tracking their account! 19 20 let response = state.resolver.send(args).await.map_err(|e| { 20 21 tracing::warn!("Appview getProfile failed: {}", e); 21 22 XrpcErrorResponse::internal_error("Failed to fetch profile from appview")
+11 -2
crates/weaver-index/src/endpoints/edit.rs
··· 289 289 290 290 /// Handle sh.weaver.edit.listDrafts 291 291 /// 292 - /// Returns draft records for an actor. 292 + /// Returns draft records for an actor. Requires authentication. 293 + /// Only returns drafts if viewer is the actor or has collab permission. 293 294 pub async fn list_drafts( 294 295 State(state): State<AppState>, 295 296 ExtractOptionalServiceAuth(viewer): ExtractOptionalServiceAuth, 296 297 ExtractXrpc(args): ExtractXrpc<ListDraftsRequest>, 297 298 ) -> Result<Json<ListDraftsOutput<'static>>, XrpcErrorResponse> { 298 - let _viewer: Viewer = viewer; 299 + // Require authentication 300 + let viewer = viewer.ok_or_else(|| XrpcErrorResponse::auth_required("Authentication required"))?; 301 + let viewer_did = viewer.did(); 299 302 300 303 let limit = args.limit.unwrap_or(50).min(100).max(1); 301 304 ··· 309 312 310 313 // Resolve actor to DID 311 314 let actor_did = resolve_actor(&state, &args.actor).await?; 315 + 316 + // Permission check: viewer must be the actor (owner access) 317 + // TODO: Add collab grant check for draft sharing 318 + if viewer_did.as_str() != actor_did.as_str() { 319 + return Err(XrpcErrorResponse::forbidden("Cannot view another user's drafts")); 320 + } 312 321 313 322 // Fetch drafts 314 323 let draft_rows = state
+1 -4
crates/weaver-index/src/endpoints/notebook.rs
··· 46 46 let name = args.name.as_ref(); 47 47 48 48 let limit = args.entry_limit.unwrap_or(50).clamp(1, 100) as u32; 49 - let cursor: Option<u32> = args 50 - .entry_cursor 51 - .as_deref() 52 - .and_then(|c| c.parse().ok()); 49 + let cursor: Option<u32> = args.entry_cursor.as_deref().and_then(|c| c.parse().ok()); 53 50 54 51 // Fetch notebook first to get its rkey 55 52 let notebook_row = state
+16
crates/weaver-index/src/endpoints/repo.rs
··· 51 51 message: Some(message.into()), 52 52 } 53 53 } 54 + 55 + pub fn auth_required(message: impl Into<String>) -> Self { 56 + Self { 57 + status: StatusCode::UNAUTHORIZED, 58 + error: "AuthRequired".to_string(), 59 + message: Some(message.into()), 60 + } 61 + } 62 + 63 + pub fn forbidden(message: impl Into<String>) -> Self { 64 + Self { 65 + status: StatusCode::FORBIDDEN, 66 + error: "Forbidden".to_string(), 67 + message: Some(message.into()), 68 + } 69 + } 54 70 } 55 71 56 72 impl IntoResponse for XrpcErrorResponse {
+10 -10
crates/weaver-index/src/landing.html
··· 100 100 <body> 101 101 <pre> 102 102 103 - .dP' db `Yb .dP' 104 - dP' db db 88 dP' 105 - 88 106 - `Yb d888b 'Yb 'Yb 88 d88b d88b 'Yb `Yb dP' 107 - 88P 88 88 88 88P 8Y 8b 88 Yb dP 108 - 88 8P 88 88 88 8P 88 88 YbdP 109 - 88 .dP .8P .8P 88 .dP' .dP' .8P .8P 110 - .88888888b. 8888888888888b. dP' b 111 - Y. ,P 112 - `""'</pre 103 + `Yb. db 104 + db `Yb db db 105 + "Ybaaaaaaaaad8' Yb 106 + .dP' dP' 88 `Yb d888b 'Yb `Yb dP' `Yb.d888b Yb 'Yb `Yb dP' 107 + 88 88 88 88P 88 88 Yb dP 88' 8Y dPYb 88 Yb dP 108 + Y8 Y8 .88 88 8P 88 YbdP 88 8P ,dP Yb 88 YbdP 109 + `Y88P`Y88P'88 88 .dP .8P .8P 88 ,dP .dP' `Yb. .8P .8P 110 + 88 .88888888b. dP' b 88 dP' b 111 + 88 Y. ,P 88 Y. ,P 112 + Y8. `""' .8P `""' </pre 113 113 > 114 114 <h1>Weaver Index</h1> 115 115 <p class="subtitle">AT Protocol Record Index</p>
+83 -3
crates/weaver-index/src/parallel_tap.rs
··· 12 12 }; 13 13 use crate::config::{IndexerConfig, TapConfig}; 14 14 use crate::error::{ClickHouseError, Result}; 15 - use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent}; 15 + use crate::tap::{ 16 + RecordAction, TapConfig as TapConsumerConfig, TapConsumer, TapEvent, TapRecordEvent, 17 + }; 16 18 17 19 /// Tap indexer with multiple parallel websocket connections 18 20 /// ··· 183 185 ); 184 186 let raw_data = format!( 185 187 r#"{{"did":"{}","collection":"{}","rkey":"{}","cid":"{}","error":"serialization_failed"}}"#, 186 - record.did, record.collection, record.rkey, record.cid 188 + record.did, 189 + record.collection, 190 + record.rkey, 191 + record 192 + .cid 193 + .as_ref() 194 + .unwrap_or(&SmolStr::new_static("no cid")) 187 195 ); 188 196 records 189 197 .write_raw_to_dlq( ··· 207 215 len = json.len(), 208 216 "writing record" 209 217 ); 218 + 219 + if record.action == RecordAction::Delete { 220 + let client = client.clone(); 221 + let record_clone = record.clone(); 222 + tokio::spawn(async move { 223 + if let Err(e) = handle_delete(&client, record_clone).await { 224 + warn!(error = ?e, "delete handling failed"); 225 + } 226 + }); 227 + } 210 228 211 229 records 212 230 .write(RawRecordInsert { 213 231 did: record.did.clone(), 214 232 collection: record.collection.clone(), 215 233 rkey: record.rkey.clone(), 216 - cid: record.cid.clone(), 234 + cid: record.cid.clone().unwrap_or_default(), 217 235 rev: record.rev.clone(), 218 236 record: json.to_smolstr(), 219 237 operation: record.action.as_str().to_smolstr(), ··· 333 351 334 352 info!("backfill: all incremental MVs processed"); 335 353 } 354 + 355 + #[derive(Debug, Clone, clickhouse::Row, serde::Deserialize)] 356 + struct LookupRawRecord { 357 + #[allow(dead_code)] 358 + did: SmolStr, 359 + #[allow(dead_code)] 360 + collection: SmolStr, 361 + #[allow(dead_code)] 362 + cid: SmolStr, 363 + #[allow(dead_code)] 364 + record: SmolStr, // JSON string of the original record 365 + } 366 + 367 + async fn handle_delete(client: &Client, record: TapRecordEvent) -> Result<()> { 368 + let deadline = Instant::now() + Duration::from_secs(15); 369 + 370 + loop { 371 + // Try to find the record by rkey 372 + let query = format!( 373 + r#" 374 + SELECT did, collection, cid, record 375 + FROM raw_records 376 + WHERE did = '{}' AND rkey = '{}' 377 + ORDER BY event_time DESC 378 + LIMIT 1 379 + "#, 380 + record.did, record.rkey 381 + ); 382 + 383 + let original: Option<LookupRawRecord> = client 384 + .inner() 385 + .query(&query) 386 + .fetch_optional() 387 + .await 388 + .map_err(|e| crate::error::ClickHouseError::Query { 389 + message: "delete lookup failed".into(), 390 + source: e, 391 + })?; 392 + 393 + if let Some(original) = original { 394 + // Found the record - the main insert path already handles creating 395 + // the delete row, so we're done. In phase 2, this is where we'd 396 + // parse original.record and insert count deltas for denormalized tables. 397 + debug!(did = %record.did, cid = %original.cid, "delete found original record"); 398 + return Ok(()); 399 + } 400 + 401 + if Instant::now() > deadline { 402 + // Gave up - create stub tombstone 403 + // The record will be inserted via the main batch path with operation='delete' 404 + // and empty record content, which serves as our stub tombstone 405 + warn!( 406 + did = %record.did, 407 + cid = %original.as_ref().map(|o| o.cid.clone()).unwrap_or(SmolStr::new_static("")), 408 + "delete timeout, stub tombstone will be created" 409 + ); 410 + return Ok(()); 411 + } 412 + 413 + tokio::time::sleep(Duration::from_secs(1)).await; 414 + } 415 + }
+1 -1
crates/weaver-index/src/tap/types.rs
··· 41 41 /// Operation: create, update, delete 42 42 pub action: RecordAction, 43 43 /// Content identifier 44 - pub cid: SmolStr, 44 + pub cid: Option<SmolStr>, 45 45 /// The actual record data (only present for create/update) 46 46 #[serde(default)] 47 47 pub record: Option<serde_json::Value>,
+12 -4
crates/weaver-renderer/src/atproto/writer.rs
··· 246 246 self.write(&html)?; 247 247 } 248 248 }, 249 - Html(html) | InlineHtml(html) => { 249 + Html(html) => { 250 + self.write(&html)?; 251 + } 252 + InlineHtml(html) => { 253 + self.write(r#"<span class="html-embed html-embed-inline">"#)?; 250 254 self.write(&html)?; 255 + self.write("</span>")?; 251 256 } 252 257 SoftBreak => self.write_newline()?, 253 258 HardBreak => self.write("<br />\n")?, ··· 281 286 282 287 fn start_tag(&mut self, tag: Tag<'_>) -> Result<(), W::Error> { 283 288 match tag { 284 - Tag::HtmlBlock => Ok(()), 289 + Tag::HtmlBlock => self.write(r#"<span class="html-embed html-embed-block">"#), 285 290 Tag::Paragraph => { 286 291 if self.end_newline { 287 292 self.write("<p>") ··· 493 498 self.consume_until_end(); 494 499 return self.write(&html); 495 500 } else { 496 - tracing::debug!("[ClientWriter] No embed content from provider for {}", dest_url); 501 + tracing::debug!( 502 + "[ClientWriter] No embed content from provider for {}", 503 + dest_url 504 + ); 497 505 } 498 506 } else { 499 507 tracing::debug!("[ClientWriter] No embed provider available"); ··· 575 583 fn end_tag(&mut self, tag: markdown_weaver::TagEnd) -> Result<(), W::Error> { 576 584 use markdown_weaver::TagEnd; 577 585 match tag { 578 - TagEnd::HtmlBlock => Ok(()), 586 + TagEnd::HtmlBlock => self.write("</span>\n"), 579 587 TagEnd::Paragraph => self.write("</p>\n"), 580 588 TagEnd::Heading(level) => { 581 589 self.write("</")?;
+10
crates/weaver-renderer/src/css.rs
··· 307 307 border-radius: 4px; 308 308 }} 309 309 310 + /* Hygiene for iframes */ 311 + .html-embed-block {{ 312 + max-width: 100%; 313 + height: auto; 314 + display: block; 315 + margin: 1rem 0; 316 + }} 317 + 310 318 /* AT Protocol Embeds - Container */ 311 319 /* Light mode: paper with shadow, dark mode: blueprint with borders */ 312 320 .atproto-embed {{ ··· 434 442 margin-bottom: 0.75rem; 435 443 white-space: pre-wrap; 436 444 }} 445 + 446 + 437 447 438 448 .embed-description {{ 439 449 display: block;
+2 -1
docker-compose.yml
··· 24 24 TAP_LOG_LEVEL: info 25 25 TAP_OUTBOX_PARALLELISM: 5 26 26 #TAP_FULL_NETWORK: true 27 - TAP_SIGNAL_COLLECTION: place.stream.chat.profile 27 + #TAP_SIGNAL_COLLECTION: place.stream.chat.profile 28 + TAP_SIGNAL_COLLECTION: sh.weaver.actor.profile 28 29 TAP_COLLECTION_FILTERS: "sh.weaver.*,app.bsky.actor.profile,sh.tangled.*,pub.leaflet.*,net.anisota.*,place.stream.*" 29 30 healthcheck: 30 31 test: ["CMD", "wget", "-q", "--spider", "http://localhost:2480/health"]
+2 -2
weaver_notes/.obsidian/workspace.json
··· 201 201 }, 202 202 "active": "6029beecc3d03bce", 203 203 "lastOpenFiles": [ 204 + "weaver_index_html.png", 205 + "2025-12-14T23:32:19.png", 204 206 "diff_record.png", 205 207 "Why I rewrote pdsls in Rust (tm).md", 206 208 "Writing the AppView Last.md", ··· 213 215 "invalid_record.png", 214 216 "Pasted image 20251114125031.png", 215 217 "Pasted image 20251114121431.png", 216 - "json_editor_with_errors.png", 217 - "pretty_editor.png", 218 218 "Arch.md", 219 219 "Weaver - Long-form writing.md" 220 220 ]
+18 -4
weaver_notes/Writing the AppView Last.md
··· 1 + 1 2 If you've been to this site before, you maybe noticed it loaded a fair bit more quickly this time. That's not really because the web server creating this HTML got a whole lot better. It did require some refactoring, but it was mostly in the vein of taking some code and adding new code that did the same thing gated behind a cargo feature. This did, however, have the side effect of, in the final binary, replacing functions that are literally hundreds of lines, that in turn call functions that may also be hundreds of lines, making several cascading network requests, with functions that look like this, which make by and large a single network request and return exactly what is required. 2 3 3 4 ```rust ··· 29 30 Of course the reason is that I finally got round to building the Weaver AppView. I'm going to be calling mine the Index, because Weaver is about writing and I think "AppView" as a term kind of sucks and "index" is much more elegant, on top of being a good descriptor of what the big backend service now powering Weaver does. ![[at://did:plc:ragtjsm2j2vknwkz3zp4oxrd/app.bsky.feed.post/3lyucxfxq622w]] 30 31 For the uninitiated, because I expect at least some people reading this aren't big into AT Protocol development, an AppView is an instance of the kind of big backend service that Bluesky PBLLC runs which powers essentially every Bluesky client, with a few notable exceptions, such as [Red Dwarf](https://reddwarf.app/), and (partially, eventually more completely) [Blacksky](https://blacksky.community/). It listens to the [Firehose](https://bsky.network/) [event stream](https://atproto.com/specs/event-stream) from the main Bluesky Relay and analyzes the data which comes through that pertains to Bluesky, producing your timeline feeds, figuring out who follows you, who you block and who blocks you (and filtering them out of your view of the app), how many people liked your last post, and so on. Because the records in your PDS (and those of all the other people on Bluesky) need context and relationship and so on to give them meaning, and then that context can be passed along to you without your app having to go collect it all. ![[at://did:plc:uu5axsmbm2or2dngy4gwchec/app.bsky.feed.post/3lsc2tzfsys2f]] 31 32 It's a very normal backend with some weird constraints because of the protocol, and in it's practice the thing that separates the day-to-day Bluesky experience from the Mastodon experience the most. It's also by far the most centralising force in the network, because it also does moderation, and because it's quite expensive to run. A full index of all Bluesky activity takes a lot of storage (futur's Zeppelin experiment detailed above took about 16 terabytes of storage using PostgreSQL for the database and cost $200/month to run), and then it takes that much more computing power to calculate all the relationships between the data on the fly as new events come in and then serve personalized versions to everyone that uses it. 33 + 34 + 32 35 33 36 It's not the only AppView out there, most atproto apps have something like this. Tangled, Streamplace, Leaflet, and so on all have substantial backends. Some (like Tangled) actually combine the front end you interact with and the AppView into a single service. But in general these are big, complicated persistent services you have to backfill from existing data to bootstrap, and they really strongly shape your app, whether they're literally part of the same executable or hosted on the same server or not. And when I started building Weaver in earnest, not only did I still have a few big unanswered questions about how I wanted Weaver to work, how it needed to work, I also didn't want to fundamentally tie it to some big server, create this centralising force. I wanted it to be possible for someone else to run it without being dependent on me personally, ideally possible even if all they had access to was a static site host like GitHub Pages or a browser runtime platform like Cloudflare Workers, so long as someone somewhere was running a couple of generic services. I wanted to be able to distribute the fullstack server version as basically just an executable in a directory of files with no other dependencies, which could easily be run in any container hosting environment with zero persistent storage required. Hell, you could technically serve it as a blob or series of blobs from your PDS with the right entry point if I did my job right. 34 37 ··· 57 60 In contrast, notebook entry records lack links to other parts of the notebook in and of themselves because calculating them would be challenging, and updating one entry would require not just updating the entry itself and notebook it's in, but also neighbouring entries in said notebook. With the shape of collaborative publishing in Weaver, that would result in up to 4 writes to the PDS when you publish an entry, in addition to any blob uploads. And trying to link the other way in edit history (root to edit head) is similarly challenging. 58 61 59 62 I anticipated some of these. but others emerged only because I ran into them while building the web app. I've had to manually fix up records more than once because I made breaking changes to my lexicons after discovering I really wanted X piece of metadata or cross-linkage. If I'd built the index first or alongside—particularly if the index remained a separate service from the web app as I intended it to, to keep the web app simple—it would likely have constrained my choices and potentially cut off certain solutions, due to the time it takes to dump the database and re-run backfill even at a very small scale. Building a big chunk of the front end first told me exactly what the index needed to provide easy access to. 63 + 64 + You can access it here: [index.weaver.sh](https://index.weaver.sh) 60 65 # ClickHAUS 61 - So what does Weaver's index look like? Well it starts with either the firehose or the new Tap sync tool. The index ingests from either over a WebSocket connection, does a bit of processing (less is required when ingesting from Tap, and that's currently what I've deployed) and then dumps them in the Clickhouse database. I chose it as the primary index database on recommendation from a friend, and after doing a lot of reading. It fits atproto data well, as Graze found. Because it isolates concurrent inserts and selects so that you can just dump data in, while it cleans things up asynchronously after, it does wonderfully when you have a single major input point or a set of them to dump into that fans out, which you can then transform and then read from. 66 + So what does Weaver's index look like? Well it starts with either the firehose or the new [Tap](https://docs.bsky.app/blog/introducing-tap) sync tool. The index ingests from either over a WebSocket connection, does a bit of processing (less is required when ingesting from Tap, and that's currently what I've deployed) and then dumps them in the Clickhouse database. I chose it as the primary index database on recommendation from a friend, and after doing a lot of reading. It fits atproto data well, as Graze found. Because it isolates concurrent inserts and selects so that you can just dump data in, while it cleans things up asynchronously after, it does wonderfully when you have a single major input point or a set of them to dump into that fans out, which you can then transform and then read from. 67 + 68 + I will not claim that the tables you can find in the weaver repository are especially **good** database design overall, but they work, they're very much a work in progress, and we'll see how they scale. Also, Tap makes re-backfilling the data a hell of a lot easier. 62 69 63 - I will not claim that the tables you can find in the weaver repository are especially **good** database design overall, but they work, and we'll see how they scale. This is one of three main input tables. One for record writes, one for identity events, and one for account events. 70 + This is one of three main input tables. One for record writes, one for identity events, and one for account events. 64 71 ```SQL 65 72 CREATE TABLE IF NOT EXISTS raw_records ( 66 73 did String, ··· 92 99 ENGINE = MergeTree() 93 100 ORDER BY (collection, did, rkey, event_time, indexed_at); 94 101 ``` 95 - From here we fan out into a cascading series of materialized views and other specialised tables. These break out the different record types, calculate metadata, and pull critical fields out of the record JSON for easier querying. Clickhouse's wild-ass compression means we're not too badly off replicating data on disk this way. Seriously, their JSON type ends up being the same size as a CBOR BLOB on disk in my testing, though it *does* have some quirks, as I discovered when I read back Datetime fields and got...not the format I put in. Thankfully there's a config setting for that. ![Clickhouse animation showing parallel inserts into a source table and a transformation query into a materialized view](https://clickhouse.com/docs/assets/images/incremental_materialized_view-1158726e31b08dc9808d96671239467f.gif)We also build out the list of who contributed to a published entry and determine the canonical record for it, so that fetching a fully hydrated entry with all contributor profiles only takes a couple of `SELECT` queries that themselves avoid performing extensive table scans due to reasonable choices of `ORDER BY` fields in the denormalized tables they query. And then I can do quirky things like power a profile fetch endpoint that will provide either a Weaver or a Bluesky profile, while also unifying fields so that we can easily get at the critical stuff in common. This is a relatively expensive calculation, but people thankfully don't edit their profiles that often, and this is why we don't keep the stats in the same table. 102 + From here we fan out into a cascading series of materialized views and other specialised tables. These break out the different record types, calculate metadata, and pull critical fields out of the record JSON for easier querying. Clickhouse's wild-ass compression means we're not too badly off replicating data on disk this way. Seriously, their JSON type ends up being the same size as a CBOR BLOB on disk in my testing, though it *does* have some quirks, as I discovered when I read back Datetime fields and got...not the format I put in. Thankfully there's a config setting for that. ![Clickhouse animation showing parallel inserts into a source table and a transformation query into a materialized view](https://clickhouse.com/docs/assets/images/incremental_materialized_view-1158726e31b08dc9808d96671239467f.gif)We also build out the list of who contributed to a published entry and determine the canonical record for it, so that fetching a fully hydrated entry with all contributor profiles only takes a couple of `SELECT` queries that themselves avoid performing extensive table scans due to reasonable choices of `ORDER BY` fields in the denormalized tables they query and are thus very fast. And then I can do quirky things like power a profile fetch endpoint that will provide either a Weaver or a Bluesky profile, while also unifying fields so that we can easily get at the critical stuff in common. This is a relatively expensive calculation, but people thankfully don't edit their profiles that often, and this is why we don't keep the stats in the same table. 96 103 97 104 However, this is ***also*** why Clickhouse will not be the only database used in the index. 98 105 ··· 140 147 141 148 If people have ideas, I'm all ears. 142 149 143 - I hope you found this interesting. I enjoyed writing it out. 150 + ## Future 151 + Having this available obviously improves the performance of the app, but it also enables a lot of new stuff. I have plans for social features which would have been much harder to implement without it, and can later be backfilled into the non-indexed implementation. I have more substantial rewrites of the data fetching code planned as well, beyond the straightforward replacement I did in this first pass. And there's still a **lot** more to do on the editor before it's done. 152 + 153 + I've been joking about all sorts of ambitious things, but legitimately I think Weaver ends up being almost uniquely flexible and powerful among the atproto-based long-form writing platforms with how it's designed, and in particular how it enables people to create things together, and can end up filling some big shoes, given enough time and development effort. 154 + 155 + I hope you found this interesting. I enjoyed writing it out. There's still a lot more to do, but this was a big milestone for me. 156 + 157 + If you'd like to support this project, there's a GitHub Sponsorship link at the bottom of the page, but honestly I'd love if you used it to write something.
weaver_notes/weaver_index_html.png

This is a binary file and will not be displayed.