Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Add support for reverse ordering in link queries (Issue #1) #6

closed opened by seoul.systems targeting main from seoul.systems/microcosm-rs: order_query

The following adds support for reverse-chronological ordering when fetching back-links.

We add a non-required "reverse" query parameter that allows reversing the default order of returned back-links.

Supports both current storage backends (mem_store and rocks_store).

Labels

None yet.

Participants 2
AT URI
at://did:plc:53wellrw53o7sw4zlpfenvuh/sh.tangled.repo.pull/3majn54wt2l22
+405 -42
Diff #1
+31 -4
constellation/src/server/mod.rs
··· 17 use tokio::task::spawn_blocking; 18 use tokio_util::sync::CancellationToken; 19 20 - use crate::storage::{LinkReader, StorageStats}; 21 use crate::{CountsByCount, Did, RecordId}; 22 23 mod acceptable; ··· 239 /// Set the max number of links to return per page of results 240 #[serde(default = "get_default_cursor_limit")] 241 limit: u64, 242 } 243 #[derive(Serialize)] 244 struct OtherSubjectCount { ··· 295 296 let path_to_other = format!(".{}", query.path_to_other); 297 298 let paged = store 299 .get_many_to_many_counts( 300 &query.subject, 301 collection, 302 &path, 303 &path_to_other, 304 limit, 305 cursor_key, 306 &filter_dids, ··· 409 /// Set the max number of links to return per page of results 410 #[serde(default = "get_default_cursor_limit")] 411 limit: u64, 412 - // TODO: allow reverse (er, forward) order as well 413 } 414 #[derive(Template, Serialize)] 415 #[template(path = "get-backlinks.html.j2")] ··· 455 }; 456 let path = format!(".{path}"); 457 458 let paged = store 459 .get_links( 460 &query.subject, 461 collection, 462 &path, 463 limit, 464 until, 465 &filter_dids, ··· 508 from_dids: Option<String>, // comma separated: gross 509 #[serde(default = "get_default_cursor_limit")] 510 limit: u64, 511 - // TODO: allow reverse (er, forward) order as well 512 } 513 #[derive(Template, Serialize)] 514 #[template(path = "links.html.j2")] ··· 557 } 558 } 559 560 let paged = store 561 .get_links( 562 &query.target, 563 &query.collection, 564 &query.path, 565 limit, 566 until, 567 &filter_dids, ··· 594 path: String, 595 cursor: Option<OpaqueApiCursor>, 596 limit: Option<u64>, 597 - // TODO: allow reverse (er, forward) order as well 598 } 599 #[derive(Template, Serialize)] 600 #[template(path = "dids.html.j2")]
··· 17 use tokio::task::spawn_blocking; 18 use tokio_util::sync::CancellationToken; 19 20 + use crate::storage::{LinkReader, Order, StorageStats}; 21 use crate::{CountsByCount, Did, RecordId}; 22 23 mod acceptable; ··· 239 /// Set the max number of links to return per page of results 240 #[serde(default = "get_default_cursor_limit")] 241 limit: u64, 242 + /// Allow returning links in reverse order (default: false) 243 + #[serde(default)] 244 + reverse: bool, 245 } 246 #[derive(Serialize)] 247 struct OtherSubjectCount { ··· 298 299 let path_to_other = format!(".{}", query.path_to_other); 300 301 + let order = if query.reverse { 302 + Order::OldestToNewest 303 + } else { 304 + Order::NewestToOldest 305 + }; 306 + 307 let paged = store 308 .get_many_to_many_counts( 309 &query.subject, 310 collection, 311 &path, 312 &path_to_other, 313 + order, 314 limit, 315 cursor_key, 316 &filter_dids, ··· 419 /// Set the max number of links to return per page of results 420 #[serde(default = "get_default_cursor_limit")] 421 limit: u64, 422 + /// Allow returning links in reverse order (default: false) 423 + #[serde(default)] 424 + reverse: bool, 425 } 426 #[derive(Template, Serialize)] 427 #[template(path = "get-backlinks.html.j2")] ··· 467 }; 468 let path = format!(".{path}"); 469 470 + let order = if query.reverse { 471 + Order::OldestToNewest 472 + } else { 473 + Order::NewestToOldest 474 + }; 475 + 476 let paged = store 477 .get_links( 478 &query.subject, 479 collection, 480 &path, 481 + order, 482 limit, 483 until, 484 &filter_dids, ··· 527 from_dids: Option<String>, // comma separated: gross 528 #[serde(default = "get_default_cursor_limit")] 529 limit: u64, 530 + /// Allow returning links in reverse order (default: false) 531 + #[serde(default)] 532 + reverse: bool, 533 } 534 #[derive(Template, Serialize)] 535 #[template(path = "links.html.j2")] ··· 578 } 579 } 580 581 + let order = if query.reverse { 582 + Order::OldestToNewest 583 + } else { 584 + Order::NewestToOldest 585 + }; 586 + 587 let paged = store 588 .get_links( 589 &query.target, 590 &query.collection, 591 &query.path, 592 + order, 593 limit, 594 until, 595 &filter_dids, ··· 622 path: String, 623 cursor: Option<OpaqueApiCursor>, 624 limit: Option<u64>, 625 } 626 #[derive(Template, Serialize)] 627 #[template(path = "dids.html.j2")]
+50 -12
constellation/src/storage/mem_store.rs
··· 1 use super::{ 2 - LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats, 3 }; 4 use crate::{ActionableEvent, CountsByCount, Did, RecordId}; 5 use anyhow::Result; ··· 140 collection: &str, 141 path: &str, 142 path_to_other: &str, 143 limit: u64, 144 after: Option<String>, 145 filter_dids: &HashSet<Did>, ··· 157 let filter_to_targets: HashSet<Target> = 158 HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s))); 159 160 - let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new(); 161 - for (did, rkey) in linkers.iter().flatten().cloned() { 162 if !filter_dids.is_empty() && !filter_dids.contains(&did) { 163 continue; 164 } ··· 184 .take(1) 185 .next() 186 { 187 - let e = grouped_counts.entry(fwd_target.clone()).or_default(); 188 e.0 += 1; 189 e.1.insert(did.clone()); 190 } 191 } 192 let mut items: Vec<(String, u64, u64)> = grouped_counts 193 .iter() 194 - .map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64)) 195 .collect(); 196 - items.sort(); 197 items = items 198 .into_iter() 199 .skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false)) ··· 239 target: &str, 240 collection: &str, 241 path: &str, 242 limit: u64, 243 until: Option<u64>, 244 filter_dids: &HashSet<Did>, ··· 276 }; 277 278 let total = did_rkeys.len(); 279 - let end = until 280 - .map(|u| std::cmp::min(u as usize, total)) 281 - .unwrap_or(total); 282 - let begin = end.saturating_sub(limit as usize); 283 - let next = if begin == 0 { None } else { Some(begin as u64) }; 284 285 let alive = did_rkeys.iter().flatten().count(); 286 let gone = total - alive; 287 288 - let items: Vec<_> = did_rkeys[begin..end] 289 .iter() 290 .rev() 291 .flatten() ··· 296 collection: collection.to_string(), 297 }) 298 .collect(); 299 300 Ok(PagedAppendingCollection { 301 version: (total as u64, gone as u64),
··· 1 use super::{ 2 + LinkReader, LinkStorage, Order, PagedAppendingCollection, PagedOrderedCollection, 3 + StorageStats, 4 }; 5 use crate::{ActionableEvent, CountsByCount, Did, RecordId}; 6 use anyhow::Result; ··· 141 collection: &str, 142 path: &str, 143 path_to_other: &str, 144 + order: Order, 145 limit: u64, 146 after: Option<String>, 147 filter_dids: &HashSet<Did>, ··· 159 let filter_to_targets: HashSet<Target> = 160 HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s))); 161 162 + // the last type field here acts as an index to allow keeping track of the order in which 163 + // we encountred single elements 164 + let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>, usize)> = HashMap::new(); 165 + for (idx, (did, rkey)) in linkers.iter().flatten().cloned().enumerate() { 166 if !filter_dids.is_empty() && !filter_dids.contains(&did) { 167 continue; 168 } ··· 188 .take(1) 189 .next() 190 { 191 + let e = 192 + grouped_counts 193 + .entry(fwd_target.clone()) 194 + .or_insert((0, HashSet::new(), idx)); 195 e.0 += 1; 196 e.1.insert(did.clone()); 197 } 198 } 199 let mut items: Vec<(String, u64, u64)> = grouped_counts 200 .iter() 201 + .map(|(k, (n, u, _))| (k.0.clone(), *n, u.len() as u64)) 202 .collect(); 203 + // Sort based on order: OldestToNewest uses descending order, NewestToOldest uses ascending 204 + match order { 205 + Order::OldestToNewest => items.sort_by(|a, b| b.cmp(a)), 206 + Order::NewestToOldest => items.sort(), 207 + } 208 items = items 209 .into_iter() 210 .skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false)) ··· 250 target: &str, 251 collection: &str, 252 path: &str, 253 + order: Order, 254 limit: u64, 255 until: Option<u64>, 256 filter_dids: &HashSet<Did>, ··· 288 }; 289 290 let total = did_rkeys.len(); 291 + 292 + let begin: usize; 293 + let end: usize; 294 + let next: Option<u64>; 295 + 296 + match order { 297 + // OldestToNewest: start from the beginning, paginate forward 298 + Order::OldestToNewest => { 299 + begin = until.map(|u| (u) as usize).unwrap_or(0); 300 + end = std::cmp::min(begin + limit as usize, total); 301 + 302 + next = if end < total { 303 + Some(end as u64 + 1) 304 + } else { 305 + None 306 + }; 307 + } 308 + // NewestToOldest: start from the end, paginate backward 309 + Order::NewestToOldest => { 310 + end = until 311 + .map(|u| std::cmp::min(u as usize, total)) 312 + .unwrap_or(total); 313 + begin = end.saturating_sub(limit as usize); 314 + next = if begin == 0 { None } else { Some(begin as u64) }; 315 + } 316 + } 317 318 let alive = did_rkeys.iter().flatten().count(); 319 let gone = total - alive; 320 321 + let mut items: Vec<_> = did_rkeys[begin..end] 322 .iter() 323 .rev() 324 .flatten() ··· 329 collection: collection.to_string(), 330 }) 331 .collect(); 332 + 333 + // For OldestToNewest, reverse the items to maintain forward chronological order 334 + if order == Order::OldestToNewest { 335 + items.reverse(); 336 + } 337 338 Ok(PagedAppendingCollection { 339 version: (total as u64, gone as u64),
+256 -10
constellation/src/storage/mod.rs
··· 11 #[cfg(feature = "rocks")] 12 pub use rocks_store::RocksStorage; 13 14 #[derive(Debug, PartialEq)] 15 pub struct PagedAppendingCollection<T> { 16 pub version: (u64, u64), // (collection length, deleted item count) // TODO: change to (total, active)? since dedups isn't "deleted" ··· 72 collection: &str, 73 path: &str, 74 path_to_other: &str, 75 limit: u64, 76 after: Option<String>, 77 filter_dids: &HashSet<Did>, ··· 87 target: &str, 88 collection: &str, 89 path: &str, 90 limit: u64, 91 until: Option<u64>, 92 filter_dids: &HashSet<Did>, ··· 180 "a.com", 181 "app.t.c", 182 ".abc.uri", 183 100, 184 None, 185 &HashSet::default() ··· 683 "a.com", 684 "app.t.c", 685 ".abc.uri", 686 100, 687 None, 688 &HashSet::default() ··· 727 0, 728 )?; 729 } 730 - let links = 731 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 732 let dids = storage.get_distinct_dids("a.com", "app.t.c", ".abc.uri", 2, None)?; 733 assert_eq!( 734 links, ··· 763 "a.com", 764 "app.t.c", 765 ".abc.uri", 766 2, 767 links.next, 768 &HashSet::default(), ··· 801 "a.com", 802 "app.t.c", 803 ".abc.uri", 804 2, 805 links.next, 806 &HashSet::default(), ··· 831 assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 832 }); 833 834 test_each_storage!(get_filtered_links, |storage| { 835 let links = storage.get_links( 836 "a.com", 837 "app.t.c", 838 ".abc.uri", 839 2, 840 None, 841 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 869 "a.com", 870 "app.t.c", 871 ".abc.uri", 872 2, 873 None, 874 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 891 "a.com", 892 "app.t.c", 893 ".abc.uri", 894 2, 895 None, 896 &HashSet::from([Did("did:plc:someone-else".to_string())]), ··· 938 "a.com", 939 "app.t.c", 940 ".abc.uri", 941 2, 942 None, 943 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 967 "a.com", 968 "app.t.c", 969 ".abc.uri", 970 2, 971 None, 972 &HashSet::from([ ··· 999 "a.com", 1000 "app.t.c", 1001 ".abc.uri", 1002 2, 1003 None, 1004 &HashSet::from([Did("did:plc:someone-unknown".to_string())]), ··· 1031 0, 1032 )?; 1033 } 1034 - let links = 1035 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1036 assert_eq!( 1037 links, 1038 PagedAppendingCollection { ··· 1057 "a.com", 1058 "app.t.c", 1059 ".abc.uri", 1060 2, 1061 links.next, 1062 &HashSet::default(), ··· 1101 0, 1102 )?; 1103 } 1104 - let links = 1105 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1106 assert_eq!( 1107 links, 1108 PagedAppendingCollection { ··· 1141 "a.com", 1142 "app.t.c", 1143 ".abc.uri", 1144 2, 1145 links.next, 1146 &HashSet::default(), ··· 1185 0, 1186 )?; 1187 } 1188 - let links = 1189 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1190 assert_eq!( 1191 links, 1192 PagedAppendingCollection { ··· 1219 "a.com", 1220 "app.t.c", 1221 ".abc.uri", 1222 2, 1223 links.next, 1224 &HashSet::default(), ··· 1256 0, 1257 )?; 1258 } 1259 - let links = 1260 - storage.get_links("a.com", "app.t.c", ".abc.uri", 2, None, &HashSet::default())?; 1261 assert_eq!( 1262 links, 1263 PagedAppendingCollection { ··· 1286 "a.com", 1287 "app.t.c", 1288 ".abc.uri", 1289 2, 1290 links.next, 1291 &HashSet::default(), ··· 1367 "a.b.c", 1368 ".d.e", 1369 ".f.g", 1370 10, 1371 None, 1372 &HashSet::new(), ··· 1410 "app.t.c", 1411 ".abc.uri", 1412 ".def.uri", 1413 10, 1414 None, 1415 &HashSet::new(), ··· 1509 "app.t.c", 1510 ".abc.uri", 1511 ".def.uri", 1512 10, 1513 None, 1514 &HashSet::new(), ··· 1525 "app.t.c", 1526 ".abc.uri", 1527 ".def.uri", 1528 10, 1529 None, 1530 &HashSet::from_iter([Did("did:plc:fdsa".to_string())]), ··· 1541 "app.t.c", 1542 ".abc.uri", 1543 ".def.uri", 1544 10, 1545 None, 1546 &HashSet::new(), ··· 1551 next: None, 1552 } 1553 ); 1554 }); 1555 }
··· 11 #[cfg(feature = "rocks")] 12 pub use rocks_store::RocksStorage; 13 14 + /// Ordering for paginated link queries 15 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 16 + pub enum Order { 17 + /// Newest links first (default) 18 + NewestToOldest, 19 + /// Oldest links first 20 + OldestToNewest, 21 + } 22 + 23 #[derive(Debug, PartialEq)] 24 pub struct PagedAppendingCollection<T> { 25 pub version: (u64, u64), // (collection length, deleted item count) // TODO: change to (total, active)? since dedups isn't "deleted" ··· 81 collection: &str, 82 path: &str, 83 path_to_other: &str, 84 + order: Order, 85 limit: u64, 86 after: Option<String>, 87 filter_dids: &HashSet<Did>, ··· 97 target: &str, 98 collection: &str, 99 path: &str, 100 + order: Order, 101 limit: u64, 102 until: Option<u64>, 103 filter_dids: &HashSet<Did>, ··· 191 "a.com", 192 "app.t.c", 193 ".abc.uri", 194 + Order::NewestToOldest, 195 100, 196 None, 197 &HashSet::default() ··· 695 "a.com", 696 "app.t.c", 697 ".abc.uri", 698 + Order::NewestToOldest, 699 100, 700 None, 701 &HashSet::default() ··· 740 0, 741 )?; 742 } 743 + let links = storage.get_links( 744 + "a.com", 745 + "app.t.c", 746 + ".abc.uri", 747 + Order::NewestToOldest, 748 + 2, 749 + None, 750 + &HashSet::default(), 751 + )?; 752 let dids = storage.get_distinct_dids("a.com", "app.t.c", ".abc.uri", 2, None)?; 753 assert_eq!( 754 links, ··· 783 "a.com", 784 "app.t.c", 785 ".abc.uri", 786 + Order::NewestToOldest, 787 2, 788 links.next, 789 &HashSet::default(), ··· 822 "a.com", 823 "app.t.c", 824 ".abc.uri", 825 + Order::NewestToOldest, 826 2, 827 links.next, 828 &HashSet::default(), ··· 853 assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 854 }); 855 856 + test_each_storage!(get_links_reverse_order, |storage| { 857 + for i in 1..=5 { 858 + storage.push( 859 + &ActionableEvent::CreateLinks { 860 + record_id: RecordId { 861 + did: format!("did:plc:asdf-{i}").into(), 862 + collection: "app.t.c".into(), 863 + rkey: "asdf".into(), 864 + }, 865 + links: vec![CollectedLink { 866 + target: Link::Uri("a.com".into()), 867 + path: ".abc.uri".into(), 868 + }], 869 + }, 870 + 0, 871 + )?; 872 + } 873 + 874 + // Test OldestToNewest order (oldest first) 875 + let links = storage.get_links( 876 + "a.com", 877 + "app.t.c", 878 + ".abc.uri", 879 + Order::OldestToNewest, 880 + 2, 881 + None, 882 + &HashSet::default(), 883 + )?; 884 + assert_eq!( 885 + links, 886 + PagedAppendingCollection { 887 + version: (5, 0), 888 + items: vec![ 889 + RecordId { 890 + did: "did:plc:asdf-1".into(), 891 + collection: "app.t.c".into(), 892 + rkey: "asdf".into(), 893 + }, 894 + RecordId { 895 + did: "did:plc:asdf-2".into(), 896 + collection: "app.t.c".into(), 897 + rkey: "asdf".into(), 898 + }, 899 + ], 900 + next: Some(3), 901 + total: 5, 902 + } 903 + ); 904 + // Test NewestToOldest order (newest first) 905 + let links = storage.get_links( 906 + "a.com", 907 + "app.t.c", 908 + ".abc.uri", 909 + Order::NewestToOldest, 910 + 2, 911 + None, 912 + &HashSet::default(), 913 + )?; 914 + assert_eq!( 915 + links, 916 + PagedAppendingCollection { 917 + version: (5, 0), 918 + items: vec![ 919 + RecordId { 920 + did: "did:plc:asdf-5".into(), 921 + collection: "app.t.c".into(), 922 + rkey: "asdf".into(), 923 + }, 924 + RecordId { 925 + did: "did:plc:asdf-4".into(), 926 + collection: "app.t.c".into(), 927 + rkey: "asdf".into(), 928 + }, 929 + ], 930 + next: Some(3), 931 + total: 5, 932 + } 933 + ); 934 + assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5); 935 + }); 936 + 937 test_each_storage!(get_filtered_links, |storage| { 938 let links = storage.get_links( 939 "a.com", 940 "app.t.c", 941 ".abc.uri", 942 + Order::NewestToOldest, 943 2, 944 None, 945 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 973 "a.com", 974 "app.t.c", 975 ".abc.uri", 976 + Order::NewestToOldest, 977 2, 978 None, 979 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 996 "a.com", 997 "app.t.c", 998 ".abc.uri", 999 + Order::NewestToOldest, 1000 2, 1001 None, 1002 &HashSet::from([Did("did:plc:someone-else".to_string())]), ··· 1044 "a.com", 1045 "app.t.c", 1046 ".abc.uri", 1047 + Order::NewestToOldest, 1048 2, 1049 None, 1050 &HashSet::from([Did("did:plc:linker".to_string())]), ··· 1074 "a.com", 1075 "app.t.c", 1076 ".abc.uri", 1077 + Order::NewestToOldest, 1078 2, 1079 None, 1080 &HashSet::from([ ··· 1107 "a.com", 1108 "app.t.c", 1109 ".abc.uri", 1110 + Order::NewestToOldest, 1111 2, 1112 None, 1113 &HashSet::from([Did("did:plc:someone-unknown".to_string())]), ··· 1140 0, 1141 )?; 1142 } 1143 + let links = storage.get_links( 1144 + "a.com", 1145 + "app.t.c", 1146 + ".abc.uri", 1147 + Order::NewestToOldest, 1148 + 2, 1149 + None, 1150 + &HashSet::default(), 1151 + )?; 1152 assert_eq!( 1153 links, 1154 PagedAppendingCollection { ··· 1173 "a.com", 1174 "app.t.c", 1175 ".abc.uri", 1176 + Order::NewestToOldest, 1177 2, 1178 links.next, 1179 &HashSet::default(), ··· 1218 0, 1219 )?; 1220 } 1221 + let links = storage.get_links( 1222 + "a.com", 1223 + "app.t.c", 1224 + ".abc.uri", 1225 + Order::NewestToOldest, 1226 + 2, 1227 + None, 1228 + &HashSet::default(), 1229 + )?; 1230 assert_eq!( 1231 links, 1232 PagedAppendingCollection { ··· 1265 "a.com", 1266 "app.t.c", 1267 ".abc.uri", 1268 + Order::NewestToOldest, 1269 2, 1270 links.next, 1271 &HashSet::default(), ··· 1310 0, 1311 )?; 1312 } 1313 + let links = storage.get_links( 1314 + "a.com", 1315 + "app.t.c", 1316 + ".abc.uri", 1317 + Order::NewestToOldest, 1318 + 2, 1319 + None, 1320 + &HashSet::default(), 1321 + )?; 1322 assert_eq!( 1323 links, 1324 PagedAppendingCollection { ··· 1351 "a.com", 1352 "app.t.c", 1353 ".abc.uri", 1354 + Order::NewestToOldest, 1355 2, 1356 links.next, 1357 &HashSet::default(), ··· 1389 0, 1390 )?; 1391 } 1392 + let links = storage.get_links( 1393 + "a.com", 1394 + "app.t.c", 1395 + ".abc.uri", 1396 + Order::NewestToOldest, 1397 + 2, 1398 + None, 1399 + &HashSet::default(), 1400 + )?; 1401 assert_eq!( 1402 links, 1403 PagedAppendingCollection { ··· 1426 "a.com", 1427 "app.t.c", 1428 ".abc.uri", 1429 + Order::NewestToOldest, 1430 2, 1431 links.next, 1432 &HashSet::default(), ··· 1508 "a.b.c", 1509 ".d.e", 1510 ".f.g", 1511 + Order::NewestToOldest, 1512 10, 1513 None, 1514 &HashSet::new(), ··· 1552 "app.t.c", 1553 ".abc.uri", 1554 ".def.uri", 1555 + Order::NewestToOldest, 1556 10, 1557 None, 1558 &HashSet::new(), ··· 1652 "app.t.c", 1653 ".abc.uri", 1654 ".def.uri", 1655 + Order::NewestToOldest, 1656 10, 1657 None, 1658 &HashSet::new(), ··· 1669 "app.t.c", 1670 ".abc.uri", 1671 ".def.uri", 1672 + Order::NewestToOldest, 1673 10, 1674 None, 1675 &HashSet::from_iter([Did("did:plc:fdsa".to_string())]), ··· 1686 "app.t.c", 1687 ".abc.uri", 1688 ".def.uri", 1689 + Order::NewestToOldest, 1690 10, 1691 None, 1692 &HashSet::new(), ··· 1697 next: None, 1698 } 1699 ); 1700 + }); 1701 + 1702 + test_each_storage!(get_m2m_counts_reverse_order, |storage| { 1703 + // Create links from different DIDs to different targets 1704 + storage.push( 1705 + &ActionableEvent::CreateLinks { 1706 + record_id: RecordId { 1707 + did: "did:plc:user1".into(), 1708 + collection: "app.t.c".into(), 1709 + rkey: "post1".into(), 1710 + }, 1711 + links: vec![ 1712 + CollectedLink { 1713 + target: Link::Uri("a.com".into()), 1714 + path: ".abc.uri".into(), 1715 + }, 1716 + CollectedLink { 1717 + target: Link::Uri("b.com".into()), 1718 + path: ".def.uri".into(), 1719 + }, 1720 + ], 1721 + }, 1722 + 0, 1723 + )?; 1724 + storage.push( 1725 + &ActionableEvent::CreateLinks { 1726 + record_id: RecordId { 1727 + did: "did:plc:user2".into(), 1728 + collection: "app.t.c".into(), 1729 + rkey: "post1".into(), 1730 + }, 1731 + links: vec![ 1732 + CollectedLink { 1733 + target: Link::Uri("a.com".into()), 1734 + path: ".abc.uri".into(), 1735 + }, 1736 + CollectedLink { 1737 + target: Link::Uri("c.com".into()), 1738 + path: ".def.uri".into(), 1739 + }, 1740 + ], 1741 + }, 1742 + 1, 1743 + )?; 1744 + storage.push( 1745 + &ActionableEvent::CreateLinks { 1746 + record_id: RecordId { 1747 + did: "did:plc:user3".into(), 1748 + collection: "app.t.c".into(), 1749 + rkey: "post1".into(), 1750 + }, 1751 + links: vec![ 1752 + CollectedLink { 1753 + target: Link::Uri("a.com".into()), 1754 + path: ".abc.uri".into(), 1755 + }, 1756 + CollectedLink { 1757 + target: Link::Uri("d.com".into()), 1758 + path: ".def.uri".into(), 1759 + }, 1760 + ], 1761 + }, 1762 + 2, 1763 + )?; 1764 + 1765 + // Test NewestToOldest order (default order - by target ascending) 1766 + let counts = storage.get_many_to_many_counts( 1767 + "a.com", 1768 + "app.t.c", 1769 + ".abc.uri", 1770 + ".def.uri", 1771 + Order::NewestToOldest, 1772 + 10, 1773 + None, 1774 + &HashSet::new(), 1775 + &HashSet::new(), 1776 + )?; 1777 + assert_eq!(counts.items.len(), 3); 1778 + // Should be sorted by target in ascending order (alphabetical) 1779 + assert_eq!(counts.items[0].0, "b.com"); 1780 + assert_eq!(counts.items[1].0, "c.com"); 1781 + assert_eq!(counts.items[2].0, "d.com"); 1782 + 1783 + // Test OldestToNewest order (descending order - by target descending) 1784 + let counts = storage.get_many_to_many_counts( 1785 + "a.com", 1786 + "app.t.c", 1787 + ".abc.uri", 1788 + ".def.uri", 1789 + Order::OldestToNewest, 1790 + 10, 1791 + None, 1792 + &HashSet::new(), 1793 + &HashSet::new(), 1794 + )?; 1795 + assert_eq!(counts.items.len(), 3); 1796 + // Should be sorted by target in descending order (reverse alphabetical) 1797 + assert_eq!(counts.items[0].0, "d.com"); 1798 + assert_eq!(counts.items[1].0, "c.com"); 1799 + assert_eq!(counts.items[2].0, "b.com"); 1800 }); 1801 }
+41 -6
constellation/src/storage/rocks_store.rs
··· 1 use super::{ 2 - ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, 3 - StorageStats, 4 }; 5 use crate::{CountsByCount, Did, RecordId}; 6 use anyhow::{bail, Result}; ··· 941 collection: &str, 942 path: &str, 943 path_to_other: &str, 944 limit: u64, 945 after: Option<String>, 946 filter_dids: &HashSet<Did>, ··· 1071 } 1072 1073 let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len()); 1074 for (target_id, (n, dids)) in &grouped_counts { 1075 let Some(target) = self 1076 .target_id_table ··· 1082 items.push((target.0 .0, *n, dids.len() as u64)); 1083 } 1084 1085 let next = if grouped_counts.len() as u64 >= limit { 1086 // yeah.... it's a number saved as a string......sorry 1087 grouped_counts ··· 1127 target: &str, 1128 collection: &str, 1129 path: &str, 1130 limit: u64, 1131 until: Option<u64>, 1132 filter_dids: &HashSet<Did>, ··· 1167 1168 let (alive, gone) = linkers.count(); 1169 let total = alive + gone; 1170 - let end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1171 - let begin = end.saturating_sub(limit as usize); 1172 - let next = if begin == 0 { None } else { Some(begin as u64) }; 1173 1174 - let did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1175 1176 let mut items = Vec::with_capacity(did_id_rkeys.len()); 1177 // TODO: use get-many (or multi-get or whatever it's called)
··· 1 use super::{ 2 + ActionableEvent, LinkReader, LinkStorage, Order, PagedAppendingCollection, 3 + PagedOrderedCollection, StorageStats, 4 }; 5 use crate::{CountsByCount, Did, RecordId}; 6 use anyhow::{bail, Result}; ··· 941 collection: &str, 942 path: &str, 943 path_to_other: &str, 944 + order: Order, 945 limit: u64, 946 after: Option<String>, 947 filter_dids: &HashSet<Did>, ··· 1072 } 1073 1074 let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len()); 1075 + 1076 for (target_id, (n, dids)) in &grouped_counts { 1077 let Some(target) = self 1078 .target_id_table ··· 1084 items.push((target.0 .0, *n, dids.len() as u64)); 1085 } 1086 1087 + // Sort based on order: OldestToNewest uses descending order, NewestToOldest uses ascending 1088 + match order { 1089 + Order::OldestToNewest => items.sort_by(|a, b| b.cmp(a)), // descending 1090 + Order::NewestToOldest => items.sort(), // ascending 1091 + } 1092 + 1093 let next = if grouped_counts.len() as u64 >= limit { 1094 // yeah.... it's a number saved as a string......sorry 1095 grouped_counts ··· 1135 target: &str, 1136 collection: &str, 1137 path: &str, 1138 + order: Order, 1139 limit: u64, 1140 until: Option<u64>, 1141 filter_dids: &HashSet<Did>, ··· 1176 1177 let (alive, gone) = linkers.count(); 1178 let total = alive + gone; 1179 + 1180 + let end: usize; 1181 + let begin: usize; 1182 + let next: Option<u64>; 1183 + 1184 + match order { 1185 + // OldestToNewest: start from the beginning, paginate forward 1186 + Order::OldestToNewest => { 1187 + begin = until.map(|u| (u - 1) as usize).unwrap_or(0); 1188 + end = std::cmp::min(begin + limit as usize, total as usize); 1189 + 1190 + next = if end < total as usize { 1191 + Some(end as u64 + 1) 1192 + } else { 1193 + None 1194 + } 1195 + } 1196 + // NewestToOldest: start from the end, paginate backward 1197 + Order::NewestToOldest => { 1198 + end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize; 1199 + begin = end.saturating_sub(limit as usize); 1200 + next = if begin == 0 { None } else { Some(begin as u64) }; 1201 + } 1202 + } 1203 1204 + let mut did_id_rkeys = linkers.0[begin..end].iter().rev().collect::<Vec<_>>(); 1205 + 1206 + // For OldestToNewest, reverse the items to maintain forward chronological order 1207 + if order == Order::OldestToNewest { 1208 + did_id_rkeys.reverse(); 1209 + } 1210 1211 let mut items = Vec::with_capacity(did_id_rkeys.len()); 1212 // TODO: use get-many (or multi-get or whatever it's called)
+4
constellation/templates/base.html.j2
··· 40 padding: 0.5em 0.3em; 41 max-width: 100%; 42 } 43 .stat { 44 color: #f90; 45 font-size: 1.618rem;
··· 40 padding: 0.5em 0.3em; 41 max-width: 100%; 42 } 43 + pre.code input { 44 + margin: 0; 45 + padding: 0; 46 + } 47 .stat { 48 color: #f90; 49 font-size: 1.618rem;
+2 -1
constellation/templates/get-backlinks.html.j2
··· 6 7 {% block content %} 8 9 - {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit) %} 10 11 <h2> 12 Links to <code>{{ query.subject }}</code> ··· 40 <input type="hidden" name="did" value="{{ did }}" /> 41 {% endfor %} 42 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 43 <button type="submit">next page&hellip;</button> 44 </form> 45 {% else %}
··· 6 7 {% block content %} 8 9 + {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit, query.reverse) %} 10 11 <h2> 12 Links to <code>{{ query.subject }}</code> ··· 40 <input type="hidden" name="did" value="{{ did }}" /> 41 {% endfor %} 42 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 43 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 44 <button type="submit">next page&hellip;</button> 45 </form> 46 {% else %}
+2
constellation/templates/get-many-to-many-counts.html.j2
··· 13 query.did, 14 query.other_subject, 15 query.limit, 16 ) %} 17 18 <h2> ··· 53 {% endfor %} 54 <input type="hidden" name="limit" value="{{ query.limit }}" /> 55 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 56 <button type="submit">next page&hellip;</button> 57 </form> 58 {% else %}
··· 13 query.did, 14 query.other_subject, 15 query.limit, 16 + query.reverse, 17 ) %} 18 19 <h2> ··· 54 {% endfor %} 55 <input type="hidden" name="limit" value="{{ query.limit }}" /> 56 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 57 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 58 <button type="submit">next page&hellip;</button> 59 </form> 60 {% else %}
+7 -2
constellation/templates/hello.html.j2
··· 49 <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 50 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 51 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 52 </ul> 53 54 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 55 - {% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %} 56 57 58 <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3> ··· 68 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 69 <li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 70 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 71 </ul> 72 73 <p style="margin-bottom: 0"><strong>Try it:</strong></p> ··· 78 [""], 79 [""], 80 25, 81 ) %} 82 83 ··· 96 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 97 <li><p><code>from_dids</code> [deprecated]: optional. Use <code>did</code> instead. Example: <code>from_dids=did:plc:vc7f4oafdgxsihk4cry2xpze,did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 98 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 99 </ul> 100 101 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 102 - {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16) %} 103 104 105 <h3 class="route"><code>GET /links/distinct-dids</code></h3>
··· 49 <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 50 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 51 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 52 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 53 </ul> 54 55 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 56 + {% call 57 + try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16, false) %} 58 59 60 <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3> ··· 70 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 71 <li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 72 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 73 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 74 </ul> 75 76 <p style="margin-bottom: 0"><strong>Try it:</strong></p> ··· 81 [""], 82 [""], 83 25, 84 + false, 85 ) %} 86 87 ··· 100 <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 101 <li><p><code>from_dids</code> [deprecated]: optional. Use <code>did</code> instead. Example: <code>from_dids=did:plc:vc7f4oafdgxsihk4cry2xpze,did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 102 <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 103 + <li><p><code>reverse</code>: optional, return links in reverse order. Default: <code>false</code></p></li> 104 </ul> 105 106 <p style="margin-bottom: 0"><strong>Try it:</strong></p> 107 + {% call try_it::links("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like", ".subject.uri", [""], 16, false) %} 108 109 110 <h3 class="route"><code>GET /links/distinct-dids</code></h3>
+2 -1
constellation/templates/links.html.j2
··· 6 7 {% block content %} 8 9 - {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit) %} 10 11 <h2> 12 Links to <code>{{ query.target }}</code> ··· 37 <input type="hidden" name="collection" value="{{ query.collection }}" /> 38 <input type="hidden" name="path" value="{{ query.path }}" /> 39 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 40 <button type="submit">next page&hellip;</button> 41 </form> 42 {% else %}
··· 6 7 {% block content %} 8 9 + {% call try_it::links(query.target, query.collection, query.path, query.did, query.limit, query.reverse) %} 10 11 <h2> 12 Links to <code>{{ query.target }}</code> ··· 37 <input type="hidden" name="collection" value="{{ query.collection }}" /> 38 <input type="hidden" name="path" value="{{ query.path }}" /> 39 <input type="hidden" name="cursor" value={{ c|json|safe }} /> 40 + <input type="hidden" name="reverse" value="{{ query.reverse }}"> 41 <button type="submit">next page&hellip;</button> 42 </form> 43 {% else %}
+10 -6
constellation/templates/try-it-macros.html.j2
··· 1 - {% macro get_backlinks(subject, source, dids, limit) %} 2 <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 3 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks 4 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 6 {%- for did in dids %}{% if !did.is_empty() %} 7 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 8 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 9 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 10 </form> 11 <script> 12 const addDidButton = document.getElementById('add-did'); ··· 24 </script> 25 {% endmacro %} 26 27 - {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit) %} 28 <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 29 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts 30 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 36 {%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %} 37 &otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %} 38 <span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button> 39 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 40 </form> 41 <script> 42 const m2mAddDidButton = document.getElementById('m2m-add-did'); ··· 66 </script> 67 {% endmacro %} 68 69 - {% macro links(target, collection, path, dids, limit) %} 70 <form method="get" action="/links"> 71 <pre class="code"><strong>GET</strong> /links 72 ?target= <input type="text" name="target" value="{{ target }}" placeholder="target" /> ··· 75 {%- for did in dids %}{% if !did.is_empty() %} 76 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 77 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 78 - &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 79 </form> 80 <script> 81 const addDidButton = document.getElementById('add-did');
··· 1 + {% macro get_backlinks(subject, source, dids, limit, reverse) %} 2 <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 3 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks 4 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 6 {%- for did in dids %}{% if !did.is_empty() %} 7 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 8 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 9 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 10 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 11 </form> 12 <script> 13 const addDidButton = document.getElementById('add-did'); ··· 25 </script> 26 {% endmacro %} 27 28 + {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit, reverse) %} 29 <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 30 <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts 31 ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> ··· 37 {%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %} 38 &otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %} 39 <span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button> 40 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 41 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"><button type="submit">get links</button></pre> 42 </form> 43 <script> 44 const m2mAddDidButton = document.getElementById('m2m-add-did'); ··· 68 </script> 69 {% endmacro %} 70 71 + {% macro links(target, collection, path, dids, limit, reverse) %} 72 <form method="get" action="/links"> 73 <pre class="code"><strong>GET</strong> /links 74 ?target= <input type="text" name="target" value="{{ target }}" placeholder="target" /> ··· 77 {%- for did in dids %}{% if !did.is_empty() %} 78 &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 79 <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 80 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> 81 + &reverse= <input type="checkbox" name="reverse" value="true" checked="false"> 82 + <button type="submit">get links</button></pre> 83 </form> 84 <script> 85 const addDidButton = document.getElementById('add-did');

History

2 rounds 23 comments
sign up or login to add to the discussion
6 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
Replace boolean reverse parameter with Order enum
expand 16 comments

Damn, I just realized I have not yet added a lexicon for the new endpoint. Will do that now. At least the endpoint logic itself should not be affected by that and thus be ready for review, given its similarity to the existing getManyToManyCounts endpoint.

As Lexicons do not offer support for tuples as primitive field types I had to convert the return to use a new RecordsBySubject struct instead, which closely matches what you already did for the m2m counts endpoint.

The comments obviously don't belong here... please refer to the comments made for the PR #7

alright! reading back my own code gave me such a headache with this!

i'm reverting a few parts of this:

  • deprecated links endpoint: going to hard-code it to the existing order (deprecated endpoints don't get new features)

  • reverse ordering on many-to-many counts.

the many-to-many counts endpoint is a weird one to think about because it's not obvious what an "order" on it means. especially since the counts are aggregated, which row should be first in the response?

the current behaviour is deterministic but arbitrary: items are sorted by the other subject's target id (ie., the first time we saw the joined thingy), which is potentially unrelated to the actual many-to-many records themselves. if someone later creates a many-to-many record linking an earlier other subject, it'll become first in the list.

so: the determinism of that other-subject-target-id sorting is what enables paging to work at this endpoint, but it doesn't impose a client-meaningful order of items in the response. reversing an arbitrary order just gives you a different arbitrary order.

hopefully that makes sense ha

(additionally: there is a bit of optimization in the main join loop in the rocksdb implementation that assumes target-id-order to save work from unused pages, and i could be wrong, but i think it wouldn't stay valid under reverse paging)

(oh but i do think we can add a reverse option to the distinct-dids endpoint! probably under a new xrpc version.)

despite the above, this is merged!

i did a couple edits locally:

Just so that I understand you correctly here: We decided to not proceed with the implementation of a reverse order parameter for many-to-many related things as the current order is deterministic but arbitrary in nature, and thus reversing is essentially not providing any more information that is meaningful in the end.

If it's not too much trouble, do you mind explaining why you used Iterator::skip and ::take for the slicing? Is this simply more idiomatic or more performant even? :)

Really liked the way you resolved empty results with the empty impl by the way. Definitely less verbose and even clearer than what I opted for haha

many-to-many: yes exactly. it's not clear what expectation we would be trying to meet for a client requesting the many-to-many counts in reverse.

i'm open to being wrong here! when thinking about ordering, i was focused on the order of the many-to-many join-records themselves. which is where things felt meaningless/arbitrary because a) they're aggregated together (at this endpoint) and b) their order of creation is independent of the order this endpoint returns its results in.

but maybe having results ordered by the other-target-id does have client-relevant meaning. joined subjects are sorted by their age in a global sense?

thinking through it with concrete examples, i'm more inclined to bring it back:

  • joining tangled issues to a label: issues are ordered oldest-to-newest (independent from when the label was added)
  • joining bluesky users to a bluesky list: users are ordered by the age of their account (again independent from when they were added to the list)

hmm.

the thing that still feels off to me for these is the fact that this endpoint is an aggregate of many-to-many counts. that implies to me that the count is the thing of interest, and the order of responses in relation to the count is arbitrary again.

trying to keep the non-aggregated version of this endpoint in view, does ordering by the linked subject age make sense then? (ordering by the link actually feels more natural to me in that case) (i think i just talked myself out of and then back into bringing back your ordering here!!)

skip and take: yeah, it's a little more functional-style which works well for my brain. i like a big pipeline of chained transformations operating on sequences so i took the chance to do a little refactor with it. especially since it's immediately setting up an iterator right after slicing, it feels more cohesive to me.

it can be technically the other things too:

indexing ranges like thing[start..end] can panic if either index is out of bounds. in some cases the compiler can prove we already did the bounds checks and remove the panic hooks, but i suspect it can't here. even though we can prove that the indices are in range by reasoning about the code, it's nice not to have things-that-can-panic in the code.

i think it eliminated a Vec::reverse() in favour of an Iterator::rev(), which is potentially a very tiny efficiency improvement

m2m: I think this just boils down to how we communicate this fact to users of our API imo. As your internal back and forth shows there can be valid arguments in both ways. I think most users might naively - as I did at first - simply expect the links/records to be returned in the order they were created in. But, clearly documenting this in the endpoint description seems like a good idea here to make sure we don't leave any kind of ambiguity on the table.

skip and take: That's actually a great argument and I tend to agree with you on both fronts. Having worked professionally more with Typescript than, say, C, I always liked that you could chain transformations to a specific object together (map, filter and so on). The additional safety we get here from using iterators to slice data is new for me but definitely worth it.

m2m (again): Depending on how you see this I might reopen the PR again and introduce the parameter again. I let you make the final judgement call ofc :)

thanks for following up! i really appreciate the discussion!

100% agree the most important thing is communicating and setting accurate expectations in the api docs regardless of how this lands.

I really had convinced myself that the order was "meaningless" for clients, but then every example i was thinking of was like, welllll it kind of is actually meaningful. After a few days I'm still feeling that way, so I'm interested in bringing this back.

(the other thing that got me was switching to the other M2M PR, and remembering that this endpoint is useful as a "distinct" version of that query. it's not only useful for its counts)

i do think there's some extra logic needed in one of the loops that currently tries to avoid some work based on cursor assumptions.

aaaaaand i think there might be a bug in the existing logic, so this will be extra fun sorry! (i will see if i can finally get that to a proper repro or disprove it, so we can hopefully fix that first)

Sounds great! I think no matter how you look at it, the order is not entirely arbitrary as we collect results on the order they're stored.

Do you mind elaborating a bit further on your last two remarks before I open the PR again?! Might be easier for me to know what to look for then!

closed without merging
5 commits
expand
Add reverse ordering support to link query endpoints
Fix failing tests
Format tests
Add tests for reverse ordering in link queries
Fix pagination logic for reverse-ordered link queries
expand 7 comments

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

✅ naming the query param reverse sounds good to me since it's consistent with com.atproto.repo.listRecords (and i assume/hopefully most other atproto lexicons)

i think we double-reverse for reverse-order backlinks:

https://tangled.org/microcosm.blue/microcosm-rs/pulls/6/round/0#constellation%2fsrc%2fstorage%2frocks_store.rs-N1200

could probably work it so we only call reverse once (when "forward". not confusing at all.)

Looks like we might need a rebase, seems like this picked up (and is trying to undo) the changes from your other two PRs

I think merging upstream into the fork might work as well and makes things easier for you as the reviewer. I don't mind rebasing ofc.

i think we double-reverse for reverse-order backlinks:

Yeah the existing .rev() calls were a bit confusing at first. Maybe changing the default order to return the oldest links per default might make things a bit clearer? Though I think this is mostly helping us as the maintainers as users might expect to get served the newest ones most of the time.

Another option could be introducing an "order" query parameter with two possible values asc and desc (or something similar) to make things a bit more clear what the default order is, but then we would lose the consistency between our endpoint and com.atproto.repo.listRecords.

We could reduce the number of times we reverse using the following I think:

let mut did_id_rkeys = linkers.0[begin..end].iter().collect::<Vec<_>>();

if !reverse {
    did_id_rkeys.reverse();
}

Some comments might be justified though. That conditional looks super confusing lol

Upon thinking about this a bit more I think we should keep the reverse query parameter as is and don't touch the default order as this would be an unnecessary API break. Instead we could just introduce an enum and convert the query parameter to an enum value immediately after receiving. Other operations would be clearer then. We should still add some more context somewhere in the comments though. I had something like this in mind. What do you think?

// As backlinks are stored chronologically (oldest → newest) we need to reverse for newest-first queries
pub enum Order {
    NewestToOldest,  // default (reverse=false)
    OldestToNewest,  // reverse=true
}

impl From<bool> for Order {
    fn from(reverse: bool) -> Self {
        if reverse {
            Order::OldestToNewest
        } else {
            Order::NewestToOldest
        }
    }
}

// In server/mod.rs, where we receive the query parameter
fn get_links(
    accept: ExtractAccept,
    query: axum_extra::extract::Query<GetLinkItemsQuery>,
    store: impl LinkReader,
) -> Result<impl IntoResponse, http::StatusCode> {
    // Convert boolean to enum right at the boundary
    let order = Order::from(query.reverse);

    // Now pass the enum to storage layer
    let paged = store.get_links(
        &query.target,
        &query.collection,
        &query.path,
        order,  // ← Clean enum instead of mysterious boolean
        limit,
        until,
        &filter_dids,
    )?;
    // ...
}

// In rocks_store.rs get_links() - this replaces the confusing double-reverse!
let did_id_rkeys = match order {
    Order::OldestToNewest => {
        begin = until.map(|u| (u - 1) as usize).unwrap_or(0);
        end = std::cmp::min(begin + limit as usize, total as usize);
        next = if end < total as usize {
            Some(end as u64 + 1)
        } else {
            None
        };
        // No reversal - storage is already chronological
        linkers.0[begin..end].iter().collect::<Vec<_>>()
    }
    Order::NewestToOldest => {
        end = until.map(|u| std::cmp::min(u, total)).unwrap_or(total) as usize;
        begin = end.saturating_sub(limit as usize);
        next = if begin == 0 { None } else { Some(begin as u64) };
        // Reverse chronological storage to get newest-to-oldest
        linkers.0[begin..end].iter().rev().collect::<Vec<_>>()
    }
};

Thanks for the follow-up!

For the API, I agree with where you landed: keep it newest-first by default, optional reverse param being true makes it chronological. Small nit would be to skip the From<bool> impl since a bool itself doesn't inherently carry directional meaning; the endpoint gives it that meaning, so we can put the let order = if query.reverse { Order::OldestToNewest } ... directly in the endpoint.

i think you're right to just put the whole construction of the vec and cursor into branches on Order. part of me wants to try and encapsulate it a bit and switch the indexing to use linkers.0.iter().skip(begin).take(end) or whatever but the .rev() in there will still require a branch and/or make type things annoying and probably all of it less clear. i like your code.

comments about order are excellent thank you :)

Rebased the PR to match the current state of main and introduced the Order enum to make the sorting of items a bit clearer throughout the handlers that use the reverse parameter already (followed your suggestion regarding the From<bool> and put the definition of the order parameter directly in the endpoint itself before passing it as a parameter to the underlying handler).