Parakeet is a Rust-based Bluesky AppView aiming to implement most of the functionality required to support the Bluesky client

Compare changes

Choose any two refs to compare.

Changed files
+5632 -1085
consumer
dataloader-rs
lexica
migrations
2025-02-16-142357_posts
2025-08-03-125504_mutes
2025-09-02-190833_bookmarks
2025-09-17-190406_viewer-interactions
2025-09-24-205239_profiles-4224
2025-09-27-171241_post-tweaks
parakeet
parakeet-db
parakeet-index
+1
.gitlab-ci.yml
··· 3 3 - component: $CI_SERVER_FQDN/to-be-continuous/docker/gitlab-ci-docker@6.1.7 4 4 inputs: 5 5 hadolint-args: --ignore DL3008 6 + trivy-disabled: true 6 7 7 8 .docker-base: 8 9 parallel:
+394 -257
Cargo.lock
··· 125 125 checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" 126 126 127 127 [[package]] 128 - name = "async-channel" 129 - version = "1.9.0" 130 - source = "registry+https://github.com/rust-lang/crates.io-index" 131 - checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" 132 - dependencies = [ 133 - "concurrent-queue", 134 - "event-listener 2.5.3", 135 - "futures-core", 136 - ] 137 - 138 - [[package]] 139 - name = "async-channel" 140 - version = "2.3.1" 141 - source = "registry+https://github.com/rust-lang/crates.io-index" 142 - checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" 143 - dependencies = [ 144 - "concurrent-queue", 145 - "event-listener-strategy", 146 - "futures-core", 147 - "pin-project-lite", 148 - ] 149 - 150 - [[package]] 151 128 name = "async-compression" 152 129 version = "0.4.22" 153 130 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 161 138 ] 162 139 163 140 [[package]] 164 - name = "async-executor" 165 - version = "1.13.1" 166 - source = "registry+https://github.com/rust-lang/crates.io-index" 167 - checksum = "30ca9a001c1e8ba5149f91a74362376cc6bc5b919d92d988668657bd570bdcec" 168 - dependencies = [ 169 - "async-task", 170 - "concurrent-queue", 171 - "fastrand", 172 - "futures-lite", 173 - "slab", 174 - ] 175 - 176 - [[package]] 177 - name = "async-global-executor" 178 - version = "2.4.1" 179 - source = "registry+https://github.com/rust-lang/crates.io-index" 180 - checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" 181 - dependencies = [ 182 - "async-channel 2.3.1", 183 - "async-executor", 184 - "async-io", 185 - "async-lock", 186 - "blocking", 187 - "futures-lite", 188 - "once_cell", 189 - ] 190 - 191 - [[package]] 192 - name = "async-io" 193 - version = "2.4.0" 194 - source = "registry+https://github.com/rust-lang/crates.io-index" 195 - checksum = "43a2b323ccce0a1d90b449fd71f2a06ca7faa7c54c2751f06c9bd851fc061059" 196 - dependencies = [ 197 - "async-lock", 198 - "cfg-if", 199 - "concurrent-queue", 200 - "futures-io", 201 - "futures-lite", 202 - "parking", 203 - "polling", 204 - "rustix", 205 - "slab", 206 - "tracing", 207 - "windows-sys 0.59.0", 208 - ] 209 - 210 - [[package]] 211 - name = "async-lock" 212 - version = "3.4.0" 213 - source = "registry+https://github.com/rust-lang/crates.io-index" 214 - checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" 215 - dependencies = [ 216 - "event-listener 5.4.0", 217 - "event-listener-strategy", 218 - "pin-project-lite", 219 - ] 220 - 221 - [[package]] 222 141 name = "async-recursion" 223 142 version = "1.1.1" 224 143 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 230 149 ] 231 150 232 151 [[package]] 233 - name = "async-std" 234 - version = "1.13.0" 235 - source = "registry+https://github.com/rust-lang/crates.io-index" 236 - checksum = "c634475f29802fde2b8f0b505b1bd00dfe4df7d4a000f0b36f7671197d5c3615" 237 - dependencies = [ 238 - "async-channel 1.9.0", 239 - "async-global-executor", 240 - "async-io", 241 - "async-lock", 242 - "crossbeam-utils", 243 - "futures-channel", 244 - "futures-core", 245 - "futures-io", 246 - "futures-lite", 247 - "gloo-timers", 248 - "kv-log-macro", 249 - "log", 250 - "memchr", 251 - "once_cell", 252 - "pin-project-lite", 253 - "pin-utils", 254 - "slab", 255 - "wasm-bindgen-futures", 256 - ] 257 - 258 - [[package]] 259 - name = "async-task" 260 - version = "4.7.1" 261 - source = "registry+https://github.com/rust-lang/crates.io-index" 262 - checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" 263 - 264 - [[package]] 265 152 name = "async-trait" 266 153 version = "0.1.85" 267 154 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 399 286 ] 400 287 401 288 [[package]] 289 + name = "axum-tracing-opentelemetry" 290 + version = "0.32.1" 291 + source = "registry+https://github.com/rust-lang/crates.io-index" 292 + checksum = "328c8ddd5ca871b2a5acb00be0b4f103aa62f5d6b6db4071ccf3b12b0629e7c1" 293 + dependencies = [ 294 + "axum", 295 + "futures-core", 296 + "futures-util", 297 + "http", 298 + "opentelemetry", 299 + "opentelemetry-semantic-conventions", 300 + "pin-project-lite", 301 + "tower", 302 + "tracing", 303 + "tracing-opentelemetry", 304 + "tracing-opentelemetry-instrumentation-sdk", 305 + ] 306 + 307 + [[package]] 402 308 name = "backtrace" 403 309 version = "0.3.74" 404 310 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 460 366 "proc-macro2", 461 367 "quote", 462 368 "regex", 463 - "rustc-hash", 369 + "rustc-hash 1.1.0", 464 370 "shlex", 465 371 "syn", 466 372 "which", ··· 488 394 ] 489 395 490 396 [[package]] 491 - name = "blocking" 492 - version = "1.6.1" 493 - source = "registry+https://github.com/rust-lang/crates.io-index" 494 - checksum = "703f41c54fc768e63e091340b424302bb1c29ef4aa0c7f10fe849dfb114d29ea" 495 - dependencies = [ 496 - "async-channel 2.3.1", 497 - "async-task", 498 - "futures-io", 499 - "futures-lite", 500 - "piper", 501 - ] 502 - 503 - [[package]] 504 397 name = "brotli" 505 398 version = "7.0.0" 506 399 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 589 482 version = "1.0.0" 590 483 source = "registry+https://github.com/rust-lang/crates.io-index" 591 484 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 485 + 486 + [[package]] 487 + name = "cfg_aliases" 488 + version = "0.2.1" 489 + source = "registry+https://github.com/rust-lang/crates.io-index" 490 + checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 592 491 593 492 [[package]] 594 493 name = "chrono" ··· 727 626 ] 728 627 729 628 [[package]] 730 - name = "concurrent-queue" 731 - version = "2.5.0" 732 - source = "registry+https://github.com/rust-lang/crates.io-index" 733 - checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" 734 - dependencies = [ 735 - "crossbeam-utils", 736 - ] 737 - 738 - [[package]] 739 629 name = "const-oid" 740 630 version = "0.9.6" 741 631 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 752 642 "did-resolver", 753 643 "eyre", 754 644 "figment", 645 + "flume", 755 646 "foldhash", 756 647 "futures", 757 648 "ipld-core", ··· 967 858 name = "dataloader" 968 859 version = "0.18.0" 969 860 dependencies = [ 970 - "async-std", 971 861 "futures", 972 862 "tokio", 973 863 ] ··· 1255 1145 ] 1256 1146 1257 1147 [[package]] 1258 - name = "event-listener" 1259 - version = "2.5.3" 1260 - source = "registry+https://github.com/rust-lang/crates.io-index" 1261 - checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" 1262 - 1263 - [[package]] 1264 - name = "event-listener" 1265 - version = "5.4.0" 1266 - source = "registry+https://github.com/rust-lang/crates.io-index" 1267 - checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" 1268 - dependencies = [ 1269 - "concurrent-queue", 1270 - "parking", 1271 - "pin-project-lite", 1272 - ] 1273 - 1274 - [[package]] 1275 - name = "event-listener-strategy" 1276 - version = "0.5.3" 1277 - source = "registry+https://github.com/rust-lang/crates.io-index" 1278 - checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" 1279 - dependencies = [ 1280 - "event-listener 5.4.0", 1281 - "pin-project-lite", 1282 - ] 1283 - 1284 - [[package]] 1285 1148 name = "eyre" 1286 1149 version = "0.6.12" 1287 1150 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1338 1201 version = "0.5.7" 1339 1202 source = "registry+https://github.com/rust-lang/crates.io-index" 1340 1203 checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" 1204 + 1205 + [[package]] 1206 + name = "flume" 1207 + version = "0.11.1" 1208 + source = "registry+https://github.com/rust-lang/crates.io-index" 1209 + checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" 1210 + dependencies = [ 1211 + "futures-core", 1212 + "futures-sink", 1213 + "nanorand", 1214 + "spin", 1215 + ] 1341 1216 1342 1217 [[package]] 1343 1218 name = "fnv" ··· 1440 1315 checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 1441 1316 1442 1317 [[package]] 1443 - name = "futures-lite" 1444 - version = "2.6.0" 1445 - source = "registry+https://github.com/rust-lang/crates.io-index" 1446 - checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" 1447 - dependencies = [ 1448 - "fastrand", 1449 - "futures-core", 1450 - "futures-io", 1451 - "parking", 1452 - "pin-project-lite", 1453 - ] 1454 - 1455 - [[package]] 1456 1318 name = "futures-macro" 1457 1319 version = "0.3.31" 1458 1320 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1533 1395 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 1534 1396 dependencies = [ 1535 1397 "cfg-if", 1398 + "js-sys", 1536 1399 "libc", 1537 1400 "r-efi", 1538 1401 "wasi 0.14.2+wasi-0.2.4", 1402 + "wasm-bindgen", 1539 1403 ] 1540 1404 1541 1405 [[package]] ··· 1551 1415 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 1552 1416 1553 1417 [[package]] 1554 - name = "gloo-timers" 1555 - version = "0.3.0" 1556 - source = "registry+https://github.com/rust-lang/crates.io-index" 1557 - checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" 1558 - dependencies = [ 1559 - "futures-channel", 1560 - "futures-core", 1561 - "js-sys", 1562 - "wasm-bindgen", 1563 - ] 1564 - 1565 - [[package]] 1566 1418 name = "group" 1567 1419 version = "0.13.0" 1568 1420 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1652 1504 version = "0.3.9" 1653 1505 source = "registry+https://github.com/rust-lang/crates.io-index" 1654 1506 checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 1655 - 1656 - [[package]] 1657 - name = "hermit-abi" 1658 - version = "0.4.0" 1659 - source = "registry+https://github.com/rust-lang/crates.io-index" 1660 - checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" 1661 1507 1662 1508 [[package]] 1663 1509 name = "hex" ··· 2232 2078 "once_cell", 2233 2079 "sha2", 2234 2080 "signature", 2235 - ] 2236 - 2237 - [[package]] 2238 - name = "kv-log-macro" 2239 - version = "1.0.7" 2240 - source = "registry+https://github.com/rust-lang/crates.io-index" 2241 - checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" 2242 - dependencies = [ 2243 - "log", 2244 2081 ] 2245 2082 2246 2083 [[package]] ··· 2263 2100 version = "0.1.0" 2264 2101 dependencies = [ 2265 2102 "chrono", 2103 + "cid", 2266 2104 "serde", 2267 2105 "serde_json", 2268 2106 ] ··· 2347 2185 version = "0.4.25" 2348 2186 source = "registry+https://github.com/rust-lang/crates.io-index" 2349 2187 checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" 2350 - dependencies = [ 2351 - "value-bag", 2352 - ] 2353 2188 2354 2189 [[package]] 2355 2190 name = "lru-cache" ··· 2359 2194 dependencies = [ 2360 2195 "linked-hash-map", 2361 2196 ] 2197 + 2198 + [[package]] 2199 + name = "lru-slab" 2200 + version = "0.1.2" 2201 + source = "registry+https://github.com/rust-lang/crates.io-index" 2202 + checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" 2362 2203 2363 2204 [[package]] 2364 2205 name = "lz4-sys" ··· 2375 2216 version = "0.1.0" 2376 2217 source = "registry+https://github.com/rust-lang/crates.io-index" 2377 2218 checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" 2219 + 2220 + [[package]] 2221 + name = "matchers" 2222 + version = "0.1.0" 2223 + source = "registry+https://github.com/rust-lang/crates.io-index" 2224 + checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" 2225 + dependencies = [ 2226 + "regex-automata 0.1.10", 2227 + ] 2378 2228 2379 2229 [[package]] 2380 2230 name = "matchit" ··· 2527 2377 checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" 2528 2378 2529 2379 [[package]] 2380 + name = "nanorand" 2381 + version = "0.7.0" 2382 + source = "registry+https://github.com/rust-lang/crates.io-index" 2383 + checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" 2384 + dependencies = [ 2385 + "getrandom 0.2.15", 2386 + ] 2387 + 2388 + [[package]] 2530 2389 name = "native-tls" 2531 2390 version = "0.2.12" 2532 2391 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2632 2491 source = "registry+https://github.com/rust-lang/crates.io-index" 2633 2492 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 2634 2493 dependencies = [ 2635 - "hermit-abi 0.3.9", 2494 + "hermit-abi", 2636 2495 "libc", 2637 2496 ] 2638 2497 ··· 2696 2555 ] 2697 2556 2698 2557 [[package]] 2558 + name = "opentelemetry" 2559 + version = "0.31.0" 2560 + source = "registry+https://github.com/rust-lang/crates.io-index" 2561 + checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" 2562 + dependencies = [ 2563 + "futures-core", 2564 + "futures-sink", 2565 + "js-sys", 2566 + "pin-project-lite", 2567 + "thiserror 2.0.12", 2568 + "tracing", 2569 + ] 2570 + 2571 + [[package]] 2572 + name = "opentelemetry-http" 2573 + version = "0.31.0" 2574 + source = "registry+https://github.com/rust-lang/crates.io-index" 2575 + checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" 2576 + dependencies = [ 2577 + "async-trait", 2578 + "bytes", 2579 + "http", 2580 + "opentelemetry", 2581 + "reqwest", 2582 + ] 2583 + 2584 + [[package]] 2585 + name = "opentelemetry-otlp" 2586 + version = "0.31.0" 2587 + source = "registry+https://github.com/rust-lang/crates.io-index" 2588 + checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" 2589 + dependencies = [ 2590 + "http", 2591 + "opentelemetry", 2592 + "opentelemetry-http", 2593 + "opentelemetry-proto", 2594 + "opentelemetry_sdk", 2595 + "prost 0.14.1", 2596 + "reqwest", 2597 + "thiserror 2.0.12", 2598 + "tracing", 2599 + ] 2600 + 2601 + [[package]] 2602 + name = "opentelemetry-proto" 2603 + version = "0.31.0" 2604 + source = "registry+https://github.com/rust-lang/crates.io-index" 2605 + checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" 2606 + dependencies = [ 2607 + "opentelemetry", 2608 + "opentelemetry_sdk", 2609 + "prost 0.14.1", 2610 + "tonic 0.14.2", 2611 + "tonic-prost", 2612 + ] 2613 + 2614 + [[package]] 2615 + name = "opentelemetry-semantic-conventions" 2616 + version = "0.31.0" 2617 + source = "registry+https://github.com/rust-lang/crates.io-index" 2618 + checksum = "e62e29dfe041afb8ed2a6c9737ab57db4907285d999ef8ad3a59092a36bdc846" 2619 + 2620 + [[package]] 2621 + name = "opentelemetry_sdk" 2622 + version = "0.31.0" 2623 + source = "registry+https://github.com/rust-lang/crates.io-index" 2624 + checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" 2625 + dependencies = [ 2626 + "futures-channel", 2627 + "futures-executor", 2628 + "futures-util", 2629 + "opentelemetry", 2630 + "percent-encoding", 2631 + "rand 0.9.1", 2632 + "thiserror 2.0.12", 2633 + ] 2634 + 2635 + [[package]] 2699 2636 name = "overload" 2700 2637 version = "0.1.1" 2701 2638 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2732 2669 "async-recursion", 2733 2670 "axum", 2734 2671 "axum-extra", 2672 + "axum-tracing-opentelemetry", 2735 2673 "base64 0.22.1", 2736 2674 "chrono", 2737 2675 "dataloader", ··· 2746 2684 "jsonwebtoken", 2747 2685 "lexica", 2748 2686 "multibase", 2687 + "opentelemetry", 2688 + "opentelemetry-otlp", 2689 + "opentelemetry_sdk", 2749 2690 "parakeet-db", 2750 2691 "parakeet-index", 2692 + "redis", 2751 2693 "reqwest", 2752 2694 "serde", 2695 + "serde_ipld_dagcbor", 2753 2696 "serde_json", 2754 2697 "tokio", 2698 + "tower", 2755 2699 "tower-http", 2756 2700 "tracing", 2701 + "tracing-opentelemetry", 2757 2702 "tracing-subscriber", 2758 2703 ] 2759 2704 ··· 2764 2709 "chrono", 2765 2710 "diesel", 2766 2711 "postgres-types", 2712 + "serde", 2767 2713 "serde_json", 2768 2714 ] 2769 2715 ··· 2774 2720 "eyre", 2775 2721 "figment", 2776 2722 "itertools 0.14.0", 2777 - "prost", 2723 + "opentelemetry", 2724 + "opentelemetry-otlp", 2725 + "opentelemetry_sdk", 2726 + "prost 0.13.5", 2778 2727 "rocksdb", 2779 2728 "serde", 2780 2729 "tokio", 2781 - "tonic", 2730 + "tonic 0.13.1", 2782 2731 "tonic-build", 2783 2732 "tonic-health", 2733 + "tonic-tracing-opentelemetry", 2734 + "tower", 2784 2735 "tracing", 2736 + "tracing-opentelemetry", 2785 2737 "tracing-subscriber", 2786 2738 ] 2787 2739 ··· 2796 2748 "thiserror 2.0.12", 2797 2749 "walkdir", 2798 2750 ] 2799 - 2800 - [[package]] 2801 - name = "parking" 2802 - version = "2.2.1" 2803 - source = "registry+https://github.com/rust-lang/crates.io-index" 2804 - checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" 2805 2751 2806 2752 [[package]] 2807 2753 name = "parking_lot" ··· 2966 2912 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 2967 2913 2968 2914 [[package]] 2969 - name = "piper" 2970 - version = "0.2.4" 2971 - source = "registry+https://github.com/rust-lang/crates.io-index" 2972 - checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" 2973 - dependencies = [ 2974 - "atomic-waker", 2975 - "fastrand", 2976 - "futures-io", 2977 - ] 2978 - 2979 - [[package]] 2980 2915 name = "pkcs1" 2981 2916 version = "0.7.5" 2982 2917 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3002 2937 version = "0.3.31" 3003 2938 source = "registry+https://github.com/rust-lang/crates.io-index" 3004 2939 checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" 3005 - 3006 - [[package]] 3007 - name = "polling" 3008 - version = "3.7.4" 3009 - source = "registry+https://github.com/rust-lang/crates.io-index" 3010 - checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" 3011 - dependencies = [ 3012 - "cfg-if", 3013 - "concurrent-queue", 3014 - "hermit-abi 0.4.0", 3015 - "pin-project-lite", 3016 - "rustix", 3017 - "tracing", 3018 - "windows-sys 0.59.0", 3019 - ] 3020 2940 3021 2941 [[package]] 3022 2942 name = "portable-atomic" ··· 3119 3039 checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" 3120 3040 dependencies = [ 3121 3041 "bytes", 3122 - "prost-derive", 3042 + "prost-derive 0.13.5", 3043 + ] 3044 + 3045 + [[package]] 3046 + name = "prost" 3047 + version = "0.14.1" 3048 + source = "registry+https://github.com/rust-lang/crates.io-index" 3049 + checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" 3050 + dependencies = [ 3051 + "bytes", 3052 + "prost-derive 0.14.1", 3123 3053 ] 3124 3054 3125 3055 [[package]] ··· 3135 3065 "once_cell", 3136 3066 "petgraph", 3137 3067 "prettyplease", 3138 - "prost", 3068 + "prost 0.13.5", 3139 3069 "prost-types", 3140 3070 "regex", 3141 3071 "syn", ··· 3156 3086 ] 3157 3087 3158 3088 [[package]] 3089 + name = "prost-derive" 3090 + version = "0.14.1" 3091 + source = "registry+https://github.com/rust-lang/crates.io-index" 3092 + checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" 3093 + dependencies = [ 3094 + "anyhow", 3095 + "itertools 0.14.0", 3096 + "proc-macro2", 3097 + "quote", 3098 + "syn", 3099 + ] 3100 + 3101 + [[package]] 3159 3102 name = "prost-types" 3160 3103 version = "0.13.5" 3161 3104 source = "registry+https://github.com/rust-lang/crates.io-index" 3162 3105 checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" 3163 3106 dependencies = [ 3164 - "prost", 3107 + "prost 0.13.5", 3165 3108 ] 3166 3109 3167 3110 [[package]] ··· 3184 3127 version = "1.2.3" 3185 3128 source = "registry+https://github.com/rust-lang/crates.io-index" 3186 3129 checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" 3130 + 3131 + [[package]] 3132 + name = "quinn" 3133 + version = "0.11.9" 3134 + source = "registry+https://github.com/rust-lang/crates.io-index" 3135 + checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" 3136 + dependencies = [ 3137 + "bytes", 3138 + "cfg_aliases", 3139 + "pin-project-lite", 3140 + "quinn-proto", 3141 + "quinn-udp", 3142 + "rustc-hash 2.1.1", 3143 + "rustls", 3144 + "socket2 0.6.0", 3145 + "thiserror 2.0.12", 3146 + "tokio", 3147 + "tracing", 3148 + "web-time", 3149 + ] 3150 + 3151 + [[package]] 3152 + name = "quinn-proto" 3153 + version = "0.11.13" 3154 + source = "registry+https://github.com/rust-lang/crates.io-index" 3155 + checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" 3156 + dependencies = [ 3157 + "bytes", 3158 + "getrandom 0.3.3", 3159 + "lru-slab", 3160 + "rand 0.9.1", 3161 + "ring", 3162 + "rustc-hash 2.1.1", 3163 + "rustls", 3164 + "rustls-pki-types", 3165 + "slab", 3166 + "thiserror 2.0.12", 3167 + "tinyvec", 3168 + "tracing", 3169 + "web-time", 3170 + ] 3171 + 3172 + [[package]] 3173 + name = "quinn-udp" 3174 + version = "0.5.14" 3175 + source = "registry+https://github.com/rust-lang/crates.io-index" 3176 + checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" 3177 + dependencies = [ 3178 + "cfg_aliases", 3179 + "libc", 3180 + "once_cell", 3181 + "socket2 0.6.0", 3182 + "tracing", 3183 + "windows-sys 0.59.0", 3184 + ] 3187 3185 3188 3186 [[package]] 3189 3187 name = "quote" ··· 3347 3345 dependencies = [ 3348 3346 "aho-corasick", 3349 3347 "memchr", 3350 - "regex-automata", 3351 - "regex-syntax", 3348 + "regex-automata 0.4.9", 3349 + "regex-syntax 0.8.5", 3350 + ] 3351 + 3352 + [[package]] 3353 + name = "regex-automata" 3354 + version = "0.1.10" 3355 + source = "registry+https://github.com/rust-lang/crates.io-index" 3356 + checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 3357 + dependencies = [ 3358 + "regex-syntax 0.6.29", 3352 3359 ] 3353 3360 3354 3361 [[package]] ··· 3359 3366 dependencies = [ 3360 3367 "aho-corasick", 3361 3368 "memchr", 3362 - "regex-syntax", 3369 + "regex-syntax 0.8.5", 3363 3370 ] 3364 3371 3365 3372 [[package]] 3366 3373 name = "regex-syntax" 3374 + version = "0.6.29" 3375 + source = "registry+https://github.com/rust-lang/crates.io-index" 3376 + checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" 3377 + 3378 + [[package]] 3379 + name = "regex-syntax" 3367 3380 version = "0.8.5" 3368 3381 source = "registry+https://github.com/rust-lang/crates.io-index" 3369 3382 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" ··· 3378 3391 "base64 0.22.1", 3379 3392 "bytes", 3380 3393 "encoding_rs", 3394 + "futures-channel", 3381 3395 "futures-core", 3382 3396 "futures-util", 3383 3397 "h2", ··· 3396 3410 "once_cell", 3397 3411 "percent-encoding", 3398 3412 "pin-project-lite", 3413 + "quinn", 3414 + "rustls", 3415 + "rustls-native-certs", 3399 3416 "rustls-pemfile", 3417 + "rustls-pki-types", 3400 3418 "serde", 3401 3419 "serde_json", 3402 3420 "serde_urlencoded", ··· 3404 3422 "system-configuration", 3405 3423 "tokio", 3406 3424 "tokio-native-tls", 3425 + "tokio-rustls", 3407 3426 "tokio-util", 3408 3427 "tower", 3409 3428 "tower-service", ··· 3493 3512 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 3494 3513 3495 3514 [[package]] 3515 + name = "rustc-hash" 3516 + version = "2.1.1" 3517 + source = "registry+https://github.com/rust-lang/crates.io-index" 3518 + checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 3519 + 3520 + [[package]] 3496 3521 name = "rustc_version" 3497 3522 version = "0.4.1" 3498 3523 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3522 3547 dependencies = [ 3523 3548 "aws-lc-rs", 3524 3549 "once_cell", 3550 + "ring", 3525 3551 "rustls-pki-types", 3526 3552 "rustls-webpki", 3527 3553 "subtle", ··· 3554 3580 version = "1.11.0" 3555 3581 source = "registry+https://github.com/rust-lang/crates.io-index" 3556 3582 checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" 3583 + dependencies = [ 3584 + "web-time", 3585 + ] 3557 3586 3558 3587 [[package]] 3559 3588 name = "rustls-webpki" ··· 3972 4001 version = "0.9.8" 3973 4002 source = "registry+https://github.com/rust-lang/crates.io-index" 3974 4003 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 4004 + dependencies = [ 4005 + "lock_api", 4006 + ] 3975 4007 3976 4008 [[package]] 3977 4009 name = "spki" ··· 4394 4426 "hyper-util", 4395 4427 "percent-encoding", 4396 4428 "pin-project", 4397 - "prost", 4429 + "prost 0.13.5", 4398 4430 "socket2 0.5.8", 4399 4431 "tokio", 4400 4432 "tokio-stream", ··· 4405 4437 ] 4406 4438 4407 4439 [[package]] 4440 + name = "tonic" 4441 + version = "0.14.2" 4442 + source = "registry+https://github.com/rust-lang/crates.io-index" 4443 + checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" 4444 + dependencies = [ 4445 + "async-trait", 4446 + "base64 0.22.1", 4447 + "bytes", 4448 + "http", 4449 + "http-body", 4450 + "http-body-util", 4451 + "percent-encoding", 4452 + "pin-project", 4453 + "sync_wrapper", 4454 + "tokio-stream", 4455 + "tower-layer", 4456 + "tower-service", 4457 + "tracing", 4458 + ] 4459 + 4460 + [[package]] 4408 4461 name = "tonic-build" 4409 4462 version = "0.13.0" 4410 4463 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4424 4477 source = "registry+https://github.com/rust-lang/crates.io-index" 4425 4478 checksum = "cb87334d340313fefa513b6e60794d44a86d5f039b523229c99c323e4e19ca4b" 4426 4479 dependencies = [ 4427 - "prost", 4480 + "prost 0.13.5", 4428 4481 "tokio", 4429 4482 "tokio-stream", 4430 - "tonic", 4483 + "tonic 0.13.1", 4484 + ] 4485 + 4486 + [[package]] 4487 + name = "tonic-prost" 4488 + version = "0.14.2" 4489 + source = "registry+https://github.com/rust-lang/crates.io-index" 4490 + checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" 4491 + dependencies = [ 4492 + "bytes", 4493 + "prost 0.14.1", 4494 + "tonic 0.14.2", 4495 + ] 4496 + 4497 + [[package]] 4498 + name = "tonic-tracing-opentelemetry" 4499 + version = "0.32.0" 4500 + source = "registry+https://github.com/rust-lang/crates.io-index" 4501 + checksum = "31f57ac46b32b08989476b498239364c300b09d75928c1fa2e46cb489a41c8e3" 4502 + dependencies = [ 4503 + "futures-core", 4504 + "futures-util", 4505 + "http", 4506 + "http-body", 4507 + "hyper", 4508 + "opentelemetry", 4509 + "pin-project-lite", 4510 + "tonic 0.14.2", 4511 + "tower", 4512 + "tracing", 4513 + "tracing-opentelemetry", 4514 + "tracing-opentelemetry-instrumentation-sdk", 4431 4515 ] 4432 4516 4433 4517 [[package]] ··· 4522 4606 ] 4523 4607 4524 4608 [[package]] 4609 + name = "tracing-opentelemetry" 4610 + version = "0.32.0" 4611 + source = "registry+https://github.com/rust-lang/crates.io-index" 4612 + checksum = "1e6e5658463dd88089aba75c7791e1d3120633b1bfde22478b28f625a9bb1b8e" 4613 + dependencies = [ 4614 + "js-sys", 4615 + "opentelemetry", 4616 + "opentelemetry_sdk", 4617 + "rustversion", 4618 + "smallvec", 4619 + "thiserror 2.0.12", 4620 + "tracing", 4621 + "tracing-core", 4622 + "tracing-log", 4623 + "tracing-subscriber", 4624 + "web-time", 4625 + ] 4626 + 4627 + [[package]] 4628 + name = "tracing-opentelemetry-instrumentation-sdk" 4629 + version = "0.32.1" 4630 + source = "registry+https://github.com/rust-lang/crates.io-index" 4631 + checksum = "7a1a4dcfb798af2cef9e47c30a14e13c108b4b40e057120401b2025ec622c416" 4632 + dependencies = [ 4633 + "http", 4634 + "opentelemetry", 4635 + "opentelemetry-semantic-conventions", 4636 + "tracing", 4637 + "tracing-opentelemetry", 4638 + ] 4639 + 4640 + [[package]] 4641 + name = "tracing-serde" 4642 + version = "0.2.0" 4643 + source = "registry+https://github.com/rust-lang/crates.io-index" 4644 + checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" 4645 + dependencies = [ 4646 + "serde", 4647 + "tracing-core", 4648 + ] 4649 + 4650 + [[package]] 4525 4651 name = "tracing-subscriber" 4526 4652 version = "0.3.19" 4527 4653 source = "registry+https://github.com/rust-lang/crates.io-index" 4528 4654 checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" 4529 4655 dependencies = [ 4656 + "matchers", 4530 4657 "nu-ansi-term", 4658 + "once_cell", 4659 + "regex", 4660 + "serde", 4661 + "serde_json", 4531 4662 "sharded-slab", 4532 4663 "smallvec", 4533 4664 "thread_local", 4665 + "tracing", 4534 4666 "tracing-core", 4535 4667 "tracing-log", 4668 + "tracing-serde", 4536 4669 ] 4537 4670 4538 4671 [[package]] ··· 4659 4792 version = "0.1.1" 4660 4793 source = "registry+https://github.com/rust-lang/crates.io-index" 4661 4794 checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" 4662 - 4663 - [[package]] 4664 - name = "value-bag" 4665 - version = "1.10.0" 4666 - source = "registry+https://github.com/rust-lang/crates.io-index" 4667 - checksum = "3ef4c4aa54d5d05a279399bfa921ec387b7aba77caf7a682ae8d86785b8fdad2" 4668 4795 4669 4796 [[package]] 4670 4797 name = "vcpkg" ··· 4807 4934 version = "0.3.77" 4808 4935 source = "registry+https://github.com/rust-lang/crates.io-index" 4809 4936 checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" 4937 + dependencies = [ 4938 + "js-sys", 4939 + "wasm-bindgen", 4940 + ] 4941 + 4942 + [[package]] 4943 + name = "web-time" 4944 + version = "1.1.0" 4945 + source = "registry+https://github.com/rust-lang/crates.io-index" 4946 + checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" 4810 4947 dependencies = [ 4811 4948 "js-sys", 4812 4949 "wasm-bindgen",
+176
LICENSE-APACHE
··· 1 + Apache License 2 + Version 2.0, January 2004 3 + http://www.apache.org/licenses/ 4 + 5 + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 + 7 + 1. Definitions. 8 + 9 + "License" shall mean the terms and conditions for use, reproduction, 10 + and distribution as defined by Sections 1 through 9 of this document. 11 + 12 + "Licensor" shall mean the copyright owner or entity authorized by 13 + the copyright owner that is granting the License. 14 + 15 + "Legal Entity" shall mean the union of the acting entity and all 16 + other entities that control, are controlled by, or are under common 17 + control with that entity. For the purposes of this definition, 18 + "control" means (i) the power, direct or indirect, to cause the 19 + direction or management of such entity, whether by contract or 20 + otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 + outstanding shares, or (iii) beneficial ownership of such entity. 22 + 23 + "You" (or "Your") shall mean an individual or Legal Entity 24 + exercising permissions granted by this License. 25 + 26 + "Source" form shall mean the preferred form for making modifications, 27 + including but not limited to software source code, documentation 28 + source, and configuration files. 29 + 30 + "Object" form shall mean any form resulting from mechanical 31 + transformation or translation of a Source form, including but 32 + not limited to compiled object code, generated documentation, 33 + and conversions to other media types. 34 + 35 + "Work" shall mean the work of authorship, whether in Source or 36 + Object form, made available under the License, as indicated by a 37 + copyright notice that is included in or attached to the work 38 + (an example is provided in the Appendix below). 39 + 40 + "Derivative Works" shall mean any work, whether in Source or Object 41 + form, that is based on (or derived from) the Work and for which the 42 + editorial revisions, annotations, elaborations, or other modifications 43 + represent, as a whole, an original work of authorship. For the purposes 44 + of this License, Derivative Works shall not include works that remain 45 + separable from, or merely link (or bind by name) to the interfaces of, 46 + the Work and Derivative Works thereof. 47 + 48 + "Contribution" shall mean any work of authorship, including 49 + the original version of the Work and any modifications or additions 50 + to that Work or Derivative Works thereof, that is intentionally 51 + submitted to Licensor for inclusion in the Work by the copyright owner 52 + or by an individual or Legal Entity authorized to submit on behalf of 53 + the copyright owner. For the purposes of this definition, "submitted" 54 + means any form of electronic, verbal, or written communication sent 55 + to the Licensor or its representatives, including but not limited to 56 + communication on electronic mailing lists, source code control systems, 57 + and issue tracking systems that are managed by, or on behalf of, the 58 + Licensor for the purpose of discussing and improving the Work, but 59 + excluding communication that is conspicuously marked or otherwise 60 + designated in writing by the copyright owner as "Not a Contribution." 61 + 62 + "Contributor" shall mean Licensor and any individual or Legal Entity 63 + on behalf of whom a Contribution has been received by Licensor and 64 + subsequently incorporated within the Work. 65 + 66 + 2. Grant of Copyright License. Subject to the terms and conditions of 67 + this License, each Contributor hereby grants to You a perpetual, 68 + worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 + copyright license to reproduce, prepare Derivative Works of, 70 + publicly display, publicly perform, sublicense, and distribute the 71 + Work and such Derivative Works in Source or Object form. 72 + 73 + 3. Grant of Patent License. Subject to the terms and conditions of 74 + this License, each Contributor hereby grants to You a perpetual, 75 + worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 + (except as stated in this section) patent license to make, have made, 77 + use, offer to sell, sell, import, and otherwise transfer the Work, 78 + where such license applies only to those patent claims licensable 79 + by such Contributor that are necessarily infringed by their 80 + Contribution(s) alone or by combination of their Contribution(s) 81 + with the Work to which such Contribution(s) was submitted. If You 82 + institute patent litigation against any entity (including a 83 + cross-claim or counterclaim in a lawsuit) alleging that the Work 84 + or a Contribution incorporated within the Work constitutes direct 85 + or contributory patent infringement, then any patent licenses 86 + granted to You under this License for that Work shall terminate 87 + as of the date such litigation is filed. 88 + 89 + 4. Redistribution. You may reproduce and distribute copies of the 90 + Work or Derivative Works thereof in any medium, with or without 91 + modifications, and in Source or Object form, provided that You 92 + meet the following conditions: 93 + 94 + (a) You must give any other recipients of the Work or 95 + Derivative Works a copy of this License; and 96 + 97 + (b) You must cause any modified files to carry prominent notices 98 + stating that You changed the files; and 99 + 100 + (c) You must retain, in the Source form of any Derivative Works 101 + that You distribute, all copyright, patent, trademark, and 102 + attribution notices from the Source form of the Work, 103 + excluding those notices that do not pertain to any part of 104 + the Derivative Works; and 105 + 106 + (d) If the Work includes a "NOTICE" text file as part of its 107 + distribution, then any Derivative Works that You distribute must 108 + include a readable copy of the attribution notices contained 109 + within such NOTICE file, excluding those notices that do not 110 + pertain to any part of the Derivative Works, in at least one 111 + of the following places: within a NOTICE text file distributed 112 + as part of the Derivative Works; within the Source form or 113 + documentation, if provided along with the Derivative Works; or, 114 + within a display generated by the Derivative Works, if and 115 + wherever such third-party notices normally appear. The contents 116 + of the NOTICE file are for informational purposes only and 117 + do not modify the License. You may add Your own attribution 118 + notices within Derivative Works that You distribute, alongside 119 + or as an addendum to the NOTICE text from the Work, provided 120 + that such additional attribution notices cannot be construed 121 + as modifying the License. 122 + 123 + You may add Your own copyright statement to Your modifications and 124 + may provide additional or different license terms and conditions 125 + for use, reproduction, or distribution of Your modifications, or 126 + for any such Derivative Works as a whole, provided Your use, 127 + reproduction, and distribution of the Work otherwise complies with 128 + the conditions stated in this License. 129 + 130 + 5. Submission of Contributions. Unless You explicitly state otherwise, 131 + any Contribution intentionally submitted for inclusion in the Work 132 + by You to the Licensor shall be under the terms and conditions of 133 + this License, without any additional terms or conditions. 134 + Notwithstanding the above, nothing herein shall supersede or modify 135 + the terms of any separate license agreement you may have executed 136 + with Licensor regarding such Contributions. 137 + 138 + 6. Trademarks. This License does not grant permission to use the trade 139 + names, trademarks, service marks, or product names of the Licensor, 140 + except as required for reasonable and customary use in describing the 141 + origin of the Work and reproducing the content of the NOTICE file. 142 + 143 + 7. Disclaimer of Warranty. Unless required by applicable law or 144 + agreed to in writing, Licensor provides the Work (and each 145 + Contributor provides its Contributions) on an "AS IS" BASIS, 146 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 + implied, including, without limitation, any warranties or conditions 148 + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 + PARTICULAR PURPOSE. You are solely responsible for determining the 150 + appropriateness of using or redistributing the Work and assume any 151 + risks associated with Your exercise of permissions under this License. 152 + 153 + 8. Limitation of Liability. In no event and under no legal theory, 154 + whether in tort (including negligence), contract, or otherwise, 155 + unless required by applicable law (such as deliberate and grossly 156 + negligent acts) or agreed to in writing, shall any Contributor be 157 + liable to You for damages, including any direct, indirect, special, 158 + incidental, or consequential damages of any character arising as a 159 + result of this License or out of the use or inability to use the 160 + Work (including but not limited to damages for loss of goodwill, 161 + work stoppage, computer failure or malfunction, or any and all 162 + other commercial damages or losses), even if such Contributor 163 + has been advised of the possibility of such damages. 164 + 165 + 9. Accepting Warranty or Additional Liability. While redistributing 166 + the Work or Derivative Works thereof, You may choose to offer, 167 + and charge a fee for, acceptance of support, warranty, indemnity, 168 + or other liability obligations and/or rights consistent with this 169 + License. However, in accepting such obligations, You may act only 170 + on Your own behalf and on Your sole responsibility, not on behalf 171 + of any other Contributor, and only if You agree to indemnify, 172 + defend, and hold each Contributor harmless for any liability 173 + incurred by, or claims asserted against, such Contributor by reason 174 + of your accepting any such warranty or additional liability. 175 + 176 + END OF TERMS AND CONDITIONS
+21
LICENSE-MIT
··· 1 + MIT License 2 + 3 + Copyright (c) 2025 Parakeet Project 4 + 5 + Permission is hereby granted, free of charge, to any person obtaining a copy 6 + of this software and associated documentation files (the "Software"), to deal 7 + in the Software without restriction, including without limitation the rights 8 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 + copies of the Software, and to permit persons to whom the Software is 10 + furnished to do so, subject to the following conditions: 11 + 12 + The above copyright notice and this permission notice shall be included in all 13 + copies or substantial portions of the Software. 14 + 15 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 + SOFTWARE.
+81
README.md
··· 1 + # Parakeet 2 + 3 + Parakeet is a [Bluesky](https://bsky.app) [AppView](https://atproto.wiki/en/wiki/reference/core-architecture/appview) 4 + aiming to implement most of the functionality required to support the Bluesky client. Notably not implemented is a CDN. 5 + 6 + ## Status and Roadmap 7 + Most common functionality works, with notable omissions being like/repost/follow statuses, blocks and mutes don't get 8 + applied, labels might not track CIDs properly, label redaction doesn't work at all (beware!). 9 + 10 + Future work is tracked in issues, but the highlights are below. Help would be highly appreciated. 11 + - Notifications 12 + - Search 13 + - Pinned Posts 14 + - The Timeline 15 + - Monitoring: metrics, tracing, and health checks. 16 + 17 + ## The Code 18 + Parakeet is implemented in Rust, using Postgres as a database, Redis for caching and queue processing, RocksDB for 19 + aggregation, and Diesel for migrations and querying. 20 + 21 + This repo is one big Rust workspace, containing nearly everything required to run and support the AppView. 22 + 23 + ### Packages 24 + - consumer: Relay indexer, Label consumer, Backfiller. Takes raw records in from repos and stores them. 25 + - dataloader-rs: a vendored fork of https://github.com/cksac/dataloader-rs, with some tweaks to fit caching requirements. 26 + - did-resolver: A did:plc and did:web resolver using hickory and reqwest. Supports custom PLC directories. 27 + - lexica: Rust types for the relevant lexicons[sic] for Bluesky. 28 + - parakeet: The core AppView server code. Using Axum and Diesel. 29 + - parakeet-db: Database types and models, also the Diesel schema. 30 + - parakeet-index: Stats aggregator based on RocksDB. Uses gRPC with tonic. 31 + - parakeet-lexgen: A WIP code generator for Lexicon in Rust. Not in use. 32 + 33 + There is also a dependency on a fork of [jsonwebtoken](https://gitlab.com/parakeet-social/jsonwebtoken) until upstream 34 + supports ES256K. 35 + 36 + ## Running 37 + Prebuilt docker images are published (semi) automatically by GitLab CI at https://gitlab.com/parakeet-social/parakeet. 38 + Use `registry.gitlab.com/parakeet-social/parakeet/[package]:[branch]` in your docker-compose.yml. There is currently no 39 + versioning until the project is more stable (sorry). 40 + You can also just build with cargo. 41 + 42 + To run, you'll need Postgres (version 16 or higher), Redis or a Redis-like, consumer, parakeet, and parakeet-index. 43 + 44 + ### Configuring 45 + There are quite a lot of environment variables, although sensible defaults are provided when possible. Variables are 46 + prefixed by `PK`, `PKC`, or `PKI` depending on if they're used in Parakeet, Consumer, or parakeet-index, respectively. 47 + Some are common to two or three parts, and are marked accordingly. 48 + 49 + | Variable | Default | Description | 50 + |-------------------------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| 51 + | (PK/PKC)_INDEX_URI | n/a | Required. URI of the parakeet-index instance in format `[host]:[port]` | 52 + | (PK/PKC)_REDIS_URI | n/a | Required. URI of Redis (or compatible) in format `redis://[host]:[port]` | 53 + | (PK/PKC)_PLC_DIRECTORY | `https://plc.directory` | Optional. A PLC mirror or different instance to use when resolving did:plc. | 54 + | PKC_DATABASE__URL | n/a | Required. URI of Postgres in format `postgres://[user]:[pass]@[host]:[port]/[db]` | 55 + | PKC_UA_CONTACT | n/a | Recommended. Some contact details (email / bluesky handle / website) to add to User-Agent. | 56 + | PKC_LABEL_SOURCE | n/a | Required if consuming Labels. A labeler or label relay to consume. | 57 + | PKC_RESUME_PATH | n/a | Required if consuming relay or label firehose. Where to store the cursor data. | 58 + | PKC_INDEXER__RELAY_SOURCE | n/a | Required if consuming relay. Relay to consume from. | 59 + | PKC_INDEXER__HISTORY_MODE | n/a | Required if consuming relay. `backfill_history` or `realtime` depending on if you plan to backfill when consuming record data from a relay. | 60 + | PKC_INDEXER__INDEXER_WORKERS | 4 | How many workers to spread indexing work between. 4 or 6 usually works depending on load. Ensure you have enough DB connections available. | 61 + | PKC_INDEXER__START_COMMIT_SEQ | n/a | Optionally, the relay sequence to start consuming from. Overridden by the data in PKC_RESUME_PATH, so clear that first if you reset. | 62 + | PKC_INDEXER__SKIP_HANDLE_VALIDATION | false | Should the indexer SKIP validating handles from `#identity` events. | 63 + | PKC_INDEXER__REQUEST_BACKFILL | false | Should the indexer request backfill when relevant. Only when `backfill_history` set. You likely want TRUE, unless you're manually controlling backfill queues. | 64 + | PKC_BACKFILL__WORKERS | 4 | How many workers to use when backfilling into the DB. Ensure you have enough DB connections available as one is created per worker. | 65 + | PKC_BACKFILL__SKIP_AGGREGATION | false | Whether to skip sending aggregation to parakeet-index. Does not remove the index requirement. Useful when developing. | 66 + | PKC_BACKFILL__DOWNLOAD_WORKERS | 25 | How many workers to use to download repos for backfilling. | 67 + | PKC_BACKFILL__DOWNLOAD_BUFFER | 25000 | How many repos to download and queue. | 68 + | PKC_BACKFILL__DOWNLOAD_TMP_DIR | n/a | Where to download repos to. Ensure there is enough space. | 69 + | (PK/PKI)_SERVER__BIND_ADDRESS | `0.0.0.0` | Address for the server to bind to. For index outside of docker, you probably want loopback as there is no auth. | 70 + | (PK/PKI)_SERVER__PORT | PK: 6000, PKI: 6001 | Port for the server to bind to. | 71 + | (PK/PKI)_DATABASE_URL | n/a | Required. URI of Postgres in format `postgres://[user]:[pass]@[host]:[port]/[db]` | 72 + | PK_SERVICE__DID | n/a | DID for the AppView in did:web. (did:plc is possible but untested) | 73 + | PK_SERVICE__PUBLIC_KEY | n/a | Public key for the AppView. Unsure if actually used, but may be required by PDS. | 74 + | PK_SERVICE__ENDPOINT | n/a | HTTPS publicly accessible endpoint for the AppView. | 75 + | PK_TRUSTED_VERIFIERS | n/a | Optionally, trusted verifiers to use. For many, join with `,`. | 76 + | PK_CDN__BASE | `https://cdn.bsky.app` | Optionally, base URL for a Bluesky compatible CDN | 77 + | PK_CDN__VIDEO_BASE | `https://video.bsky.app` | Optionally, base URL for a Bluesky compatible video CDN | 78 + | PK_DID_ALLOWLIST | n/a | Optional. If set, controls which DIDs can access the AppView. For many, join with `,` | 79 + | PK_MIGRATE | false | Set to TRUE to run database migrations automatically on start. | 80 + | PKI_INDEX_DB_PATH | n/a | Required. Location to store the index database. | 81 +
+2 -1
consumer/Cargo.toml
··· 11 11 did-resolver = { path = "../did-resolver" } 12 12 eyre = "0.6.12" 13 13 figment = { version = "0.10.19", features = ["env", "toml"] } 14 + flume = { version = "0.11", features = ["async"] } 14 15 foldhash = "0.1.4" 15 16 futures = "0.3.31" 16 17 ipld-core = "0.4.1" ··· 35 36 tokio-tungstenite = { version = "0.26.1", features = ["native-tls"] } 36 37 tokio-util = { version = "0.7.14", features = ["io", "rt"] } 37 38 tracing = "0.1.40" 38 - tracing-subscriber = "0.3.18" 39 + tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
+11
consumer/justfile
··· 1 + @release: 2 + cargo build --release 3 + 4 + @lint: 5 + cargo clippy 6 + 7 + @run +params: 8 + cargo run -- {{params}} 9 + 10 + @docker platform='linux/amd64' branch='main': 11 + docker buildx build --platform {{platform}} -t registry.gitlab.com/parakeet-social/parakeet/consumer:{{branch}} . -f consumer/Dockerfile
-1
consumer/run.sh
··· 1 - cargo run
+340
consumer/src/backfill/downloader.rs
··· 1 + use super::{DL_DONE_KEY, PDS_SERVICE_ID}; 2 + use crate::db; 3 + use chrono::prelude::*; 4 + use deadpool_postgres::{Client as PgClient, Pool}; 5 + use did_resolver::Resolver; 6 + use futures::TryStreamExt; 7 + use metrics::{counter, histogram}; 8 + use parakeet_db::types::{ActorStatus, ActorSyncState}; 9 + use redis::aio::MultiplexedConnection; 10 + use redis::AsyncTypedCommands; 11 + use reqwest::header::HeaderMap; 12 + use reqwest::Client as HttpClient; 13 + use std::path::{Path, PathBuf}; 14 + use std::sync::Arc; 15 + use tokio::sync::watch::Receiver as WatchReceiver; 16 + use tokio::time::{Duration, Instant}; 17 + use tokio_postgres::types::Type; 18 + use tokio_util::io::StreamReader; 19 + use tokio_util::task::TaskTracker; 20 + use tracing::instrument; 21 + 22 + const BF_RESET_KEY: &str = "bf_download_ratelimit_reset"; 23 + const BF_REM_KEY: &str = "bf_download_ratelimit_rem"; 24 + const DL_DUP_KEY: &str = "bf_downloaded"; 25 + 26 + pub async fn downloader( 27 + mut rc: MultiplexedConnection, 28 + pool: Pool, 29 + resolver: Arc<Resolver>, 30 + tmp_dir: PathBuf, 31 + concurrency: usize, 32 + buffer: usize, 33 + tracker: TaskTracker, 34 + stop: WatchReceiver<bool>, 35 + ) { 36 + let (tx, rx) = flume::bounded(64); 37 + let mut conn = pool.get().await.unwrap(); 38 + 39 + let http = HttpClient::new(); 40 + 41 + for _ in 0..concurrency { 42 + tracker.spawn(download_thread( 43 + rc.clone(), 44 + pool.clone(), 45 + resolver.clone(), 46 + http.clone(), 47 + rx.clone(), 48 + tmp_dir.clone(), 49 + )); 50 + } 51 + 52 + let status_stmt = conn.prepare_typed_cached( 53 + "INSERT INTO actors (did, sync_state, last_indexed) VALUES ($1, 'processing', NOW()) ON CONFLICT (did) DO UPDATE SET sync_state = 'processing', last_indexed=NOW()", 54 + &[Type::TEXT] 55 + ).await.unwrap(); 56 + 57 + loop { 58 + if stop.has_changed().unwrap_or(true) { 59 + tracing::info!("stopping downloader"); 60 + break; 61 + } 62 + 63 + if let Ok(count) = rc.llen(DL_DONE_KEY).await { 64 + if count > buffer { 65 + tracing::info!("waiting due to full buffer"); 66 + tokio::time::sleep(Duration::from_secs(5)).await; 67 + continue; 68 + } 69 + } 70 + 71 + let did: String = match rc.lpop("backfill_queue", None).await { 72 + Ok(Some(did)) => did, 73 + Ok(None) => { 74 + tokio::time::sleep(Duration::from_millis(250)).await; 75 + continue; 76 + } 77 + Err(e) => { 78 + tracing::error!("failed to get item from backfill queue: {e}"); 79 + continue; 80 + } 81 + }; 82 + 83 + tracing::trace!("resolving repo {did}"); 84 + 85 + // has the repo already been downloaded? 86 + if rc.sismember(DL_DUP_KEY, &did).await.unwrap_or_default() { 87 + tracing::info!("skipping duplicate repo {did}"); 88 + continue; 89 + } 90 + 91 + // check if they're already synced in DB too 92 + match db::actor_get_statuses(&mut conn, &did).await { 93 + Ok(Some((_, state))) => { 94 + if state == ActorSyncState::Synced || state == ActorSyncState::Processing { 95 + tracing::info!("skipping duplicate repo {did}"); 96 + continue; 97 + } 98 + } 99 + Ok(None) => {} 100 + Err(e) => { 101 + tracing::error!(did, "failed to check current repo status: {e}"); 102 + db::backfill_job_write(&mut conn, &did, "failed.resolve") 103 + .await 104 + .unwrap(); 105 + } 106 + } 107 + 108 + match resolver.resolve_did(&did).await { 109 + Ok(Some(did_doc)) => { 110 + let Some(service) = did_doc.find_service_by_id(PDS_SERVICE_ID) else { 111 + tracing::warn!("bad DID doc for {did}"); 112 + db::backfill_job_write(&mut conn, &did, "failed.resolve.did_svc") 113 + .await 114 + .unwrap(); 115 + continue; 116 + }; 117 + let service = service.service_endpoint.clone(); 118 + 119 + // set the repo to processing 120 + if let Err(e) = conn.execute(&status_stmt, &[&did]).await { 121 + tracing::error!("failed to update repo status for {did}: {e}"); 122 + continue; 123 + } 124 + 125 + let handle = did_doc 126 + .also_known_as 127 + .and_then(|akas| akas.first().map(|v| v[5..].to_owned())); 128 + 129 + tracing::trace!("resolved repo {did} {service}"); 130 + if let Err(e) = tx.send_async((service, did, handle)).await { 131 + tracing::error!("failed to send: {e}"); 132 + } 133 + } 134 + Ok(None) => { 135 + tracing::warn!(did, "bad/missing DID doc"); 136 + db::actor_set_sync_status(&mut conn, &did, ActorSyncState::Dirty, Utc::now()) 137 + .await 138 + .unwrap(); 139 + db::backfill_job_write(&mut conn, &did, "failed.resolve.did_doc") 140 + .await 141 + .unwrap(); 142 + } 143 + Err(e) => { 144 + tracing::error!(did, "failed to resolve DID doc: {e}"); 145 + db::actor_set_sync_status(&mut conn, &did, ActorSyncState::Dirty, Utc::now()) 146 + .await 147 + .unwrap(); 148 + db::backfill_job_write(&mut conn, &did, "failed.resolve.did") 149 + .await 150 + .unwrap(); 151 + } 152 + } 153 + } 154 + } 155 + 156 + async fn download_thread( 157 + mut rc: MultiplexedConnection, 158 + pool: Pool, 159 + resolver: Arc<Resolver>, 160 + http: reqwest::Client, 161 + rx: flume::Receiver<(String, String, Option<String>)>, 162 + tmp_dir: PathBuf, 163 + ) { 164 + tracing::debug!("spawning thread"); 165 + 166 + // this will return Err(_) and exit when all senders (only held above) are dropped 167 + while let Ok((pds, did, maybe_handle)) = rx.recv_async().await { 168 + if let Err(e) = enforce_ratelimit(&mut rc, &pds).await { 169 + tracing::error!("ratelimiter error: {e}"); 170 + continue; 171 + }; 172 + 173 + { 174 + tracing::trace!("getting DB conn..."); 175 + let mut conn = pool.get().await.unwrap(); 176 + tracing::trace!("got DB conn..."); 177 + match check_and_update_repo_status(&http, &mut conn, &pds, &did).await { 178 + Ok(true) => {} 179 + Ok(false) => continue, 180 + Err(e) => { 181 + tracing::error!(pds, did, "failed to check repo status: {e}"); 182 + db::backfill_job_write(&mut conn, &did, "failed.resolve.status") 183 + .await 184 + .unwrap(); 185 + continue; 186 + } 187 + } 188 + 189 + tracing::debug!("trying to resolve handle..."); 190 + if let Some(handle) = maybe_handle { 191 + if let Err(e) = resolve_and_set_handle(&conn, &resolver, &did, &handle).await { 192 + tracing::error!(pds, did, "failed to resolve handle: {e}"); 193 + db::backfill_job_write(&mut conn, &did, "failed.resolve.handle") 194 + .await 195 + .unwrap(); 196 + } 197 + } 198 + } 199 + 200 + let start = Instant::now(); 201 + 202 + tracing::trace!("downloading repo {did}"); 203 + 204 + match download_car(&http, &tmp_dir, &pds, &did).await { 205 + Ok(Some((rem, reset))) => { 206 + let _ = rc.zadd(BF_REM_KEY, &pds, rem).await; 207 + let _ = rc.zadd(BF_RESET_KEY, &pds, reset).await; 208 + } 209 + Ok(_) => tracing::debug!(pds, "got response with no ratelimit headers."), 210 + Err(e) => { 211 + tracing::error!(pds, did, "failed to download repo: {e}"); 212 + continue; 213 + } 214 + } 215 + 216 + histogram!("backfill_download_dur", "pds" => pds).record(start.elapsed().as_secs_f64()); 217 + 218 + let _ = rc.sadd(DL_DUP_KEY, &did).await; 219 + if let Err(e) = rc.rpush(DL_DONE_KEY, &did).await { 220 + tracing::error!(did, "failed to mark download complete: {e}"); 221 + } else { 222 + counter!("backfill_downloaded").increment(1); 223 + } 224 + } 225 + 226 + tracing::debug!("thread exiting"); 227 + } 228 + 229 + async fn enforce_ratelimit(rc: &mut MultiplexedConnection, pds: &str) -> eyre::Result<()> { 230 + let score = rc.zscore(BF_REM_KEY, pds).await?; 231 + 232 + if let Some(rem) = score { 233 + if (rem as i32) < 100 { 234 + // if we've got None for some reason, just hope that the next req will contain the reset header. 235 + if let Some(at) = rc.zscore(BF_RESET_KEY, pds).await? { 236 + tracing::debug!("rate limit for {pds} resets at {at}"); 237 + let time = chrono::DateTime::from_timestamp(at as i64, 0).unwrap(); 238 + let delta = (time - Utc::now()).num_milliseconds().max(0); 239 + 240 + tokio::time::sleep(Duration::from_millis(delta as u64)).await; 241 + }; 242 + } 243 + } 244 + 245 + Ok(()) 246 + } 247 + 248 + // you wouldn't... 249 + #[instrument(skip(http, tmp_dir, pds))] 250 + async fn download_car( 251 + http: &HttpClient, 252 + tmp_dir: &Path, 253 + pds: &str, 254 + did: &str, 255 + ) -> eyre::Result<Option<(i32, i32)>> { 256 + let res = http 257 + .get(format!("{pds}/xrpc/com.atproto.sync.getRepo?did={did}")) 258 + .send() 259 + .await? 260 + .error_for_status()?; 261 + 262 + let mut file = tokio::fs::File::create_new(tmp_dir.join(did)).await?; 263 + 264 + let headers = res.headers(); 265 + let ratelimit_rem = header_to_int(headers, "ratelimit-remaining"); 266 + let ratelimit_reset = header_to_int(headers, "ratelimit-reset"); 267 + 268 + let strm = res.bytes_stream().map_err(std::io::Error::other); 269 + let mut reader = StreamReader::new(strm); 270 + 271 + tokio::io::copy(&mut reader, &mut file).await?; 272 + 273 + Ok(ratelimit_rem.zip(ratelimit_reset)) 274 + } 275 + 276 + // there's no ratelimit handling here because we pretty much always call download_car after. 277 + #[instrument(skip(http, conn, pds))] 278 + async fn check_and_update_repo_status( 279 + http: &HttpClient, 280 + conn: &mut PgClient, 281 + pds: &str, 282 + repo: &str, 283 + ) -> eyre::Result<bool> { 284 + match super::check_pds_repo_status(http, pds, repo).await? { 285 + Some(status) => { 286 + if !status.active { 287 + tracing::debug!("repo is inactive"); 288 + 289 + let status = status 290 + .status 291 + .unwrap_or(crate::firehose::AtpAccountStatus::Deleted); 292 + conn.execute( 293 + "UPDATE actors SET sync_state='dirty', status=$2 WHERE did=$1", 294 + &[&repo, &ActorStatus::from(status)], 295 + ) 296 + .await?; 297 + 298 + Ok(false) 299 + } else { 300 + Ok(true) 301 + } 302 + } 303 + None => { 304 + // this repo can't be found - set dirty and assume deleted. 305 + tracing::debug!("repo was deleted"); 306 + conn.execute( 307 + "UPDATE actors SET sync_state='dirty', status='deleted' WHERE did=$1", 308 + &[&repo], 309 + ) 310 + .await?; 311 + 312 + Ok(false) 313 + } 314 + } 315 + } 316 + 317 + async fn resolve_and_set_handle( 318 + conn: &PgClient, 319 + resolver: &Resolver, 320 + did: &str, 321 + handle: &str, 322 + ) -> eyre::Result<()> { 323 + if let Some(handle_did) = resolver.resolve_handle(handle).await? { 324 + if handle_did == did { 325 + conn.execute("UPDATE actors SET handle=$2 WHERE did=$1", &[&did, &handle]) 326 + .await?; 327 + } else { 328 + tracing::warn!("requested DID ({did}) doesn't match handle"); 329 + } 330 + } 331 + 332 + Ok(()) 333 + } 334 + 335 + fn header_to_int(headers: &HeaderMap, name: &str) -> Option<i32> { 336 + headers 337 + .get(name) 338 + .and_then(|v| v.to_str().ok()) 339 + .and_then(|v| v.parse().ok()) 340 + }
+51 -126
consumer/src/backfill/mod.rs
··· 6 6 use deadpool_postgres::{Object, Pool, Transaction}; 7 7 use did_resolver::Resolver; 8 8 use ipld_core::cid::Cid; 9 + use lexica::StrongRef; 9 10 use metrics::counter; 10 11 use parakeet_db::types::{ActorStatus, ActorSyncState}; 11 12 use redis::aio::MultiplexedConnection; 12 - use redis::{AsyncCommands, Direction}; 13 + use redis::AsyncTypedCommands; 13 14 use reqwest::{Client, StatusCode}; 15 + use std::path::PathBuf; 14 16 use std::str::FromStr; 15 17 use std::sync::Arc; 16 18 use tokio::sync::watch::Receiver as WatchReceiver; ··· 18 20 use tokio_util::task::TaskTracker; 19 21 use tracing::instrument; 20 22 23 + mod downloader; 21 24 mod repo; 22 25 mod types; 23 26 27 + const DL_DONE_KEY: &str = "bf_download_complete"; 24 28 const PDS_SERVICE_ID: &str = "#atproto_pds"; 25 29 // There's a 4MiB limit on parakeet-index, so break delta batches up if there's loads. 26 30 // this should be plenty low enough to not trigger the size limit. (59k did slightly) ··· 28 32 29 33 #[derive(Clone)] 30 34 pub struct BackfillManagerInner { 31 - resolver: Arc<Resolver>, 32 - client: Client, 33 35 index_client: Option<parakeet_index::Client>, 34 - opts: BackfillConfig, 36 + tmp_dir: PathBuf, 35 37 } 36 38 37 39 pub struct BackfillManager { 38 40 pool: Pool, 39 41 redis: MultiplexedConnection, 42 + resolver: Arc<Resolver>, 40 43 semaphore: Arc<Semaphore>, 44 + opts: BackfillConfig, 41 45 inner: BackfillManagerInner, 42 46 } 43 47 ··· 49 53 index_client: Option<parakeet_index::Client>, 50 54 opts: BackfillConfig, 51 55 ) -> eyre::Result<Self> { 52 - let client = Client::builder().brotli(true).build()?; 53 - let semaphore = Arc::new(Semaphore::new(opts.backfill_workers as usize)); 56 + let semaphore = Arc::new(Semaphore::new(opts.workers as usize)); 54 57 55 58 Ok(BackfillManager { 56 59 pool, 57 60 redis, 61 + resolver, 58 62 semaphore, 59 63 inner: BackfillManagerInner { 60 - resolver, 61 - client, 62 64 index_client, 63 - opts, 65 + tmp_dir: PathBuf::from_str(&opts.download_tmp_dir)?, 64 66 }, 67 + opts, 65 68 }) 66 69 } 67 70 68 71 pub async fn run(mut self, stop: WatchReceiver<bool>) -> eyre::Result<()> { 69 72 let tracker = TaskTracker::new(); 70 73 74 + tracker.spawn(downloader::downloader( 75 + self.redis.clone(), 76 + self.pool.clone(), 77 + self.resolver, 78 + self.inner.tmp_dir.clone(), 79 + self.opts.download_workers, 80 + self.opts.download_buffer, 81 + tracker.clone(), 82 + stop.clone(), 83 + )); 84 + 71 85 loop { 72 86 if stop.has_changed().unwrap_or(true) { 73 87 tracker.close(); 88 + tracing::info!("stopping backfiller"); 74 89 break; 75 90 } 76 91 77 - let Some(job) = self 78 - .redis 79 - .lmove::<_, _, Option<String>>( 80 - "backfill_queue", 81 - "backfill_processing", 82 - Direction::Left, 83 - Direction::Right, 84 - ) 85 - .await? 86 - else { 92 + let Some(job): Option<String> = self.redis.lpop(DL_DONE_KEY, None).await? else { 87 93 tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; 88 94 continue; 89 95 }; ··· 92 98 93 99 let mut inner = self.inner.clone(); 94 100 let mut conn = self.pool.get().await?; 95 - let mut redis = self.redis.clone(); 101 + let mut rc = self.redis.clone(); 96 102 97 103 tracker.spawn(async move { 98 104 let _p = p; 99 105 tracing::trace!("backfilling {job}"); 100 106 101 - if let Err(e) = backfill_actor(&mut conn, &mut inner, &job).await { 107 + if let Err(e) = backfill_actor(&mut conn, &mut rc, &mut inner, &job).await { 102 108 tracing::error!(did = &job, "backfill failed: {e}"); 103 109 counter!("backfill_failure").increment(1); 104 110 105 - db::backfill_job_write(&mut conn, &job, "failed") 111 + db::backfill_job_write(&mut conn, &job, "failed.write") 106 112 .await 107 113 .unwrap(); 108 114 } else { ··· 113 119 .unwrap(); 114 120 } 115 121 116 - redis 117 - .lrem::<_, _, i32>("backfill_processing", 1, &job) 118 - .await 119 - .unwrap(); 122 + if let Err(e) = tokio::fs::remove_file(inner.tmp_dir.join(&job)).await { 123 + tracing::error!(did = &job, "failed to remove file: {e}"); 124 + } 120 125 }); 121 126 } 122 127 ··· 126 131 } 127 132 } 128 133 129 - #[instrument(skip(conn, inner))] 134 + #[instrument(skip(conn, rc, inner))] 130 135 async fn backfill_actor( 131 136 conn: &mut Object, 137 + rc: &mut MultiplexedConnection, 132 138 inner: &mut BackfillManagerInner, 133 139 did: &str, 134 140 ) -> eyre::Result<()> { 135 - let Some((status, sync_state)) = db::actor_get_statuses(conn, did).await? else { 136 - tracing::error!("skipping backfill on unknown repo"); 137 - return Ok(()); 138 - }; 139 - 140 - if sync_state != ActorSyncState::Dirty || status != ActorStatus::Active { 141 - tracing::debug!("skipping non-dirty or inactive repo"); 142 - return Ok(()); 143 - } 144 - 145 - // resolve the did to a PDS (also validates the handle) 146 - let Some(did_doc) = inner.resolver.resolve_did(did).await? else { 147 - eyre::bail!("missing did doc"); 148 - }; 149 - 150 - let Some(service) = did_doc.find_service_by_id(PDS_SERVICE_ID) else { 151 - eyre::bail!("DID doc contained no service endpoint"); 152 - }; 153 - 154 - let pds_url = service.service_endpoint.clone(); 155 - 156 - // check the repo status before we attempt to resolve the handle. There's a case where we can't 157 - // resolve the handle in the DID doc because the acc is already deleted. 158 - let Some(repo_status) = check_pds_repo_status(&inner.client, &pds_url, did).await? else { 159 - // this repo can't be found - set dirty and assume deleted. 160 - tracing::debug!("repo was deleted"); 161 - db::actor_upsert( 162 - conn, 163 - did, 164 - ActorStatus::Deleted, 165 - ActorSyncState::Dirty, 166 - Utc::now(), 167 - ) 168 - .await?; 169 - return Ok(()); 170 - }; 171 - 172 - if !repo_status.active { 173 - tracing::debug!("repo is inactive"); 174 - let status = repo_status 175 - .status 176 - .unwrap_or(crate::firehose::AtpAccountStatus::Deleted); 177 - db::actor_upsert(conn, did, status.into(), ActorSyncState::Dirty, Utc::now()).await?; 178 - return Ok(()); 179 - } 180 - 181 - if !inner.opts.skip_handle_validation { 182 - // at this point, the account will be active and we can attempt to resolve the handle. 183 - let Some(handle) = did_doc 184 - .also_known_as 185 - .and_then(|aka| aka.first().cloned()) 186 - .and_then(|handle| handle.strip_prefix("at://").map(String::from)) 187 - else { 188 - eyre::bail!("DID doc contained no handle"); 189 - }; 190 - 191 - // in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems 192 - // like a way to end up with really sus handles. 193 - if let Some(handle_did) = inner.resolver.resolve_handle(&handle).await? { 194 - if handle_did != did { 195 - tracing::warn!("requested DID doesn't match handle"); 196 - } else { 197 - // set the handle from above 198 - db::actor_upsert_handle( 199 - conn, 200 - did, 201 - ActorSyncState::Processing, 202 - Some(handle), 203 - Utc::now(), 204 - ) 205 - .await?; 206 - } 207 - } 208 - } 209 - 210 - // now we can start actually backfilling 211 - db::actor_set_sync_status(conn, did, ActorSyncState::Processing, Utc::now()).await?; 212 - 213 141 let mut t = conn.transaction().await?; 214 142 t.execute("SET CONSTRAINTS ALL DEFERRED", &[]).await?; 215 143 216 - tracing::trace!("pulling repo"); 144 + tracing::trace!("loading repo"); 217 145 218 - let (commit, mut deltas, copies) = 219 - repo::stream_and_insert_repo(&mut t, &inner.client, did, &pds_url).await?; 146 + let (commit, mut deltas, copies) = repo::insert_repo(&mut t, rc, &inner.tmp_dir, did).await?; 220 147 221 148 db::actor_set_repo_state(&mut t, did, &commit.rev, commit.data).await?; 222 149 ··· 228 155 ) 229 156 .await?; 230 157 231 - handle_backfill_rows(&mut t, &mut deltas, did, &commit.rev).await?; 158 + handle_backfill_rows(&mut t, rc, &mut deltas, did, &commit.rev).await?; 232 159 233 160 tracing::trace!("insertion finished"); 234 161 ··· 268 195 269 196 async fn handle_backfill_rows( 270 197 conn: &mut Transaction<'_>, 198 + rc: &mut MultiplexedConnection, 271 199 deltas: &mut impl AggregateDeltaStore, 272 200 repo: &str, 273 201 rev: &str, 274 - ) -> Result<(), tokio_postgres::Error> { 202 + ) -> eyre::Result<()> { 275 203 // `pull_backfill_rows` filters out anything before the last commit we pulled 276 204 let backfill_rows = db::backfill_rows_get(conn, repo, rev).await?; 277 205 278 206 for row in backfill_rows { 279 207 // blindly unwrap-ing this CID as we've already parsed it and re-serialized it 280 - let repo_cid = Cid::from_str(&row.cid).unwrap(); 208 + let repo_cid = Cid::from_str(&row.cid)?; 281 209 db::actor_set_repo_state(conn, repo, &row.repo_ver, repo_cid).await?; 282 210 283 211 // again, we've serialized this. 284 - let items: Vec<BackfillItem> = serde_json::from_value(row.data).unwrap(); 212 + let items: Vec<BackfillItem> = serde_json::from_value(row.data)?; 285 213 286 214 for item in items { 287 215 let Some((_, rkey)) = item.at_uri.rsplit_once("/") else { ··· 294 222 continue; 295 223 }; 296 224 297 - indexer::index_op(conn, deltas, repo, cid, record, &item.at_uri, rkey).await? 225 + indexer::index_op(conn, rc, deltas, repo, cid, record, &item.at_uri, rkey) 226 + .await? 298 227 } 299 228 BackfillItemInner::Delete => { 300 229 indexer::index_op_delete( 301 230 conn, 231 + rc, 302 232 deltas, 303 233 repo, 304 234 item.collection, ··· 338 268 339 269 #[derive(Debug, Default)] 340 270 struct CopyStore { 341 - likes: Vec<( 342 - String, 343 - records::StrongRef, 344 - Option<records::StrongRef>, 345 - DateTime<Utc>, 346 - )>, 271 + likes: Vec<(String, StrongRef, Option<StrongRef>, DateTime<Utc>)>, 347 272 posts: Vec<(String, Cid, records::AppBskyFeedPost)>, 348 - reposts: Vec<( 349 - String, 350 - records::StrongRef, 351 - Option<records::StrongRef>, 352 - DateTime<Utc>, 353 - )>, 273 + reposts: Vec<(String, StrongRef, Option<StrongRef>, DateTime<Utc>)>, 354 274 blocks: Vec<(String, String, DateTime<Utc>)>, 355 275 follows: Vec<(String, String, DateTime<Utc>)>, 356 276 list_items: Vec<(String, records::AppBskyGraphListItem)>, 357 277 verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>, 278 + threadgates: Vec<(String, Cid, records::AppBskyFeedThreadgate)>, // not COPY'd but needs to be kept until last. 358 279 records: Vec<(String, Cid)>, 359 280 } 360 281 361 282 impl CopyStore { 362 283 async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> { 363 284 db::copy::copy_likes(t, did, self.likes).await?; 364 - db::copy::copy_posts(t, did, self.posts).await?; 365 285 db::copy::copy_reposts(t, did, self.reposts).await?; 366 286 db::copy::copy_blocks(t, did, self.blocks).await?; 367 287 db::copy::copy_follows(t, did, self.follows).await?; 368 288 db::copy::copy_list_items(t, self.list_items).await?; 369 289 db::copy::copy_verification(t, did, self.verifications).await?; 290 + db::copy::copy_posts(t, did, self.posts).await?; 291 + for (at_uri, cid, record) in self.threadgates { 292 + db::threadgate_enforce_backfill(t, did, &record).await?; 293 + db::threadgate_upsert(t, &at_uri, cid, record).await?; 294 + } 370 295 db::copy::copy_records(t, did, self.records).await?; 371 296 372 297 Ok(())
+30 -28
consumer/src/backfill/repo.rs
··· 1 1 use super::{ 2 - types::{CarCommitEntry, CarEntry}, 2 + types::{CarCommitEntry, CarEntry, CarRecordEntry}, 3 3 CopyStore, 4 4 }; 5 5 use crate::indexer::records; 6 6 use crate::indexer::types::{AggregateDeltaStore, RecordTypes}; 7 + use crate::utils::at_uri_is_by; 7 8 use crate::{db, indexer}; 8 9 use deadpool_postgres::Transaction; 9 - use futures::TryStreamExt; 10 10 use ipld_core::cid::Cid; 11 11 use iroh_car::CarReader; 12 12 use metrics::counter; 13 13 use parakeet_index::AggregateType; 14 - use reqwest::Client; 14 + use redis::aio::MultiplexedConnection; 15 15 use std::collections::HashMap; 16 - use std::io::ErrorKind; 16 + use std::path::Path; 17 17 use tokio::io::BufReader; 18 - use tokio_util::io::StreamReader; 19 18 20 19 type BackfillDeltaStore = HashMap<(String, i32), i32>; 21 20 22 - pub async fn stream_and_insert_repo( 21 + pub async fn insert_repo( 23 22 t: &mut Transaction<'_>, 24 - client: &Client, 23 + rc: &mut MultiplexedConnection, 24 + tmp_dir: &Path, 25 25 repo: &str, 26 - pds: &str, 27 26 ) -> eyre::Result<(CarCommitEntry, BackfillDeltaStore, CopyStore)> { 28 - let res = client 29 - .get(format!("{pds}/xrpc/com.atproto.sync.getRepo?did={repo}")) 30 - .send() 31 - .await? 32 - .error_for_status()?; 33 - 34 - let strm = res 35 - .bytes_stream() 36 - .map_err(|err| std::io::Error::new(ErrorKind::Other, err)); 37 - let reader = StreamReader::new(strm); 38 - let mut car_stream = CarReader::new(BufReader::new(reader)).await?; 27 + let car = tokio::fs::File::open(tmp_dir.join(repo)).await?; 28 + let mut car_stream = CarReader::new(BufReader::new(car)).await?; 39 29 40 30 // the root should be the commit block 41 31 let root = car_stream.header().roots().first().cloned().unwrap(); ··· 63 53 64 54 match block { 65 55 CarEntry::Commit(_) => { 66 - tracing::warn!("got commit entry that was not in root") 56 + tracing::debug!("got commit entry that was not in root") 67 57 } 68 - CarEntry::Record(record) => { 58 + CarEntry::Record(CarRecordEntry::Known(record)) => { 69 59 if let Some(path) = mst_nodes.remove(&cid) { 70 - record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?; 60 + record_index(t, rc, &mut copies, &mut deltas, repo, &path, cid, record).await?; 71 61 } else { 72 62 records.insert(cid, record); 73 63 } 64 + } 65 + CarEntry::Record(CarRecordEntry::Other { ty }) => { 66 + tracing::debug!("repo contains unknown record type: {ty} ({cid})"); 74 67 } 75 68 CarEntry::Mst(mst) => { 76 69 let mut out = Vec::with_capacity(mst.e.len()); ··· 97 90 98 91 for (cid, record) in records { 99 92 if let Some(path) = mst_nodes.remove(&cid) { 100 - record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?; 93 + record_index(t, rc, &mut copies, &mut deltas, repo, &path, cid, record).await?; 101 94 } else { 102 95 tracing::warn!("couldn't find MST node for record {cid}") 103 96 } 104 97 } 105 98 106 - let commit = commit.unwrap(); 99 + let Some(commit) = commit else { 100 + eyre::bail!("repo contained no commit?"); 101 + }; 107 102 108 103 Ok((commit, deltas, copies)) 109 104 } 110 105 111 106 async fn record_index( 112 107 t: &mut Transaction<'_>, 108 + rc: &mut MultiplexedConnection, 113 109 copies: &mut CopyStore, 114 110 deltas: &mut BackfillDeltaStore, 115 111 did: &str, ··· 151 147 db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?; 152 148 } 153 149 if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) { 154 - db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?; 150 + db::post_embed_insert(t, &at_uri, embed, rec.created_at, true).await?; 155 151 } 156 152 157 153 deltas.incr(did, AggregateType::ProfilePost).await; ··· 173 169 .reposts 174 170 .push((rkey.to_string(), rec.subject, rec.via, rec.created_at)); 175 171 } 172 + RecordTypes::AppBskyFeedThreadgate(record) => { 173 + if !at_uri_is_by(&record.post, did) { 174 + return Ok(()); 175 + } 176 + 177 + copies.push_record(&at_uri, cid); 178 + copies.threadgates.push((at_uri, cid, record)); 179 + } 176 180 RecordTypes::AppBskyGraphBlock(rec) => { 177 181 copies.push_record(&at_uri, cid); 178 182 copies ··· 191 195 RecordTypes::AppBskyGraphListItem(rec) => { 192 196 let split_aturi = rec.list.rsplitn(4, '/').collect::<Vec<_>>(); 193 197 if did != split_aturi[2] { 194 - // it's also probably a bad idea to log *all* the attempts to do this... 195 - tracing::warn!("tried to create a listitem on a list we don't control!"); 196 198 return Ok(()); 197 199 } 198 200 ··· 203 205 copies.push_record(&at_uri, cid); 204 206 copies.verifications.push((at_uri, cid, rec)); 205 207 } 206 - _ => indexer::index_op(t, deltas, did, cid, record, &at_uri, rkey).await?, 208 + _ => indexer::index_op(t, rc, deltas, did, cid, record, &at_uri, rkey).await?, 207 209 } 208 210 209 211 Ok(())
+11 -1
consumer/src/backfill/types.rs
··· 8 8 pub enum CarEntry { 9 9 Mst(CarMstEntry), 10 10 Commit(CarCommitEntry), 11 - Record(RecordTypes), 11 + Record(CarRecordEntry), 12 12 } 13 13 14 14 #[derive(Debug, Deserialize)] ··· 33 33 pub rev: String, 34 34 pub prev: Option<Cid>, 35 35 pub sig: ByteBuf, 36 + } 37 + 38 + #[derive(Debug, Deserialize)] 39 + #[serde(untagged)] 40 + pub enum CarRecordEntry { 41 + Known(RecordTypes), 42 + Other { 43 + #[serde(rename = "$type")] 44 + ty: String, 45 + }, 36 46 } 37 47 38 48 #[derive(Debug, Deserialize)]
+26 -4
consumer/src/config.rs
··· 13 13 14 14 #[derive(Debug, Deserialize)] 15 15 pub struct Config { 16 + #[serde(flatten)] 17 + pub instruments: ConfigInstruments, 16 18 pub index_uri: String, 17 19 pub database: deadpool_postgres::Config, 18 20 pub redis_uri: String, ··· 27 29 pub indexer: Option<IndexerConfig>, 28 30 /// Configuration items specific to backfill 29 31 pub backfill: Option<BackfillConfig>, 32 + } 33 + 34 + #[derive(Debug, Deserialize)] 35 + pub struct ConfigInstruments { 36 + #[serde(default)] 37 + pub log_json: bool, 30 38 } 31 39 32 40 #[derive(Debug, Deserialize)] ··· 34 42 pub relay_source: String, 35 43 pub history_mode: HistoryMode, 36 44 #[serde(default = "default_indexer_workers")] 37 - pub indexer_workers: u8, 45 + pub workers: u8, 38 46 pub start_commit_seq: Option<u64>, 39 47 /// Whether to resolve handles as part of `#identity` events. 40 48 /// You can use this to move handle resolution out of event handling and into another place. 41 49 #[serde(default)] 42 50 pub skip_handle_validation: bool, 51 + /// Whether to submit backfill requests for new repos. (Only when history_mode == BackfillHistory). 52 + #[serde(default)] 53 + pub request_backfill: bool, 43 54 } 44 55 45 56 #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Deserialize)] ··· 54 65 #[derive(Clone, Debug, Deserialize)] 55 66 pub struct BackfillConfig { 56 67 #[serde(default = "default_backfill_workers")] 57 - pub backfill_workers: u8, 68 + pub workers: u8, 58 69 #[serde(default)] 59 70 pub skip_aggregation: bool, 60 - #[serde(default)] 61 - pub skip_handle_validation: bool, 71 + #[serde(default = "default_download_workers")] 72 + pub download_workers: usize, 73 + #[serde(default = "default_download_buffer")] 74 + pub download_buffer: usize, 75 + pub download_tmp_dir: String, 62 76 } 63 77 64 78 fn default_backfill_workers() -> u8 { ··· 68 82 fn default_indexer_workers() -> u8 { 69 83 4 70 84 } 85 + 86 + fn default_download_workers() -> usize { 87 + 25 88 + } 89 + 90 + fn default_download_buffer() -> usize { 91 + 25_000 92 + }
+3 -3
consumer/src/db/actor.rs
··· 69 69 ) 70 70 .await?; 71 71 72 - Ok(res.map(|v| (v.get(0), v.get(1)))) 72 + res.map(|v| Ok((v.try_get(0)?, v.try_get(1)?))).transpose() 73 73 } 74 74 75 75 pub async fn actor_get_repo_status<C: GenericClient>( ··· 83 83 ) 84 84 .await?; 85 85 86 - Ok(res.map(|v| (v.get(0), v.get(1)))) 86 + res.map(|v| Ok((v.try_get(0)?, v.try_get(1)?))).transpose() 87 87 } 88 88 89 89 pub async fn actor_get_statuses<C: GenericClient>( ··· 97 97 ) 98 98 .await?; 99 99 100 - Ok(res.map(|v| (v.get(0), v.get(1)))) 100 + res.map(|v| Ok((v.try_get(0)?, v.try_get(1)?))).transpose() 101 101 }
+11 -10
consumer/src/db/backfill.rs
··· 19 19 status: &str, 20 20 ) -> PgExecResult { 21 21 conn.execute( 22 - "INSERT INTO backfill_jobs (did, status) VALUES ($1, $2)", 22 + "INSERT INTO backfill_jobs (did, status) VALUES ($1, $2) ON CONFLICT (did) DO UPDATE SET status = $2, updated_at = NOW()", 23 23 &[&did, &status], 24 24 ) 25 25 .await ··· 51 51 ) 52 52 .await?; 53 53 54 - Ok(res 55 - .into_iter() 56 - .map(|row| BackfillRow { 57 - repo: row.get(0), 58 - repo_ver: row.get(1), 59 - cid: row.get(2), 60 - data: row.get(3), 61 - indexed_at: row.get(4), 54 + res.into_iter() 55 + .map(|row| { 56 + Ok(BackfillRow { 57 + repo: row.try_get(0)?, 58 + repo_ver: row.try_get(1)?, 59 + cid: row.try_get(2)?, 60 + data: row.try_get(3)?, 61 + indexed_at: row.try_get(4)?, 62 + }) 62 63 }) 63 - .collect()) 64 + .collect() 64 65 } 65 66 66 67 pub async fn backfill_delete_rows<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult {
+40 -9
consumer/src/db/copy.rs
··· 1 1 use super::PgExecResult; 2 2 use crate::indexer::records; 3 - use crate::utils::strongref_to_parts; 3 + use crate::utils::{extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::Transaction; 6 6 use futures::pin_mut; 7 7 use ipld_core::cid::Cid; 8 + use lexica::StrongRef; 8 9 use tokio_postgres::binary_copy::BinaryCopyInWriter; 9 10 use tokio_postgres::types::Type; 10 11 ··· 18 19 Type::TEXT, 19 20 Type::TIMESTAMP, 20 21 ]; 21 - type StrongRefRow = ( 22 - String, 23 - records::StrongRef, 24 - Option<records::StrongRef>, 25 - DateTime<Utc>, 26 - ); 22 + type StrongRefRow = (String, StrongRef, Option<StrongRef>, DateTime<Utc>); 27 23 28 24 // SubjectRefs are used in both blocks and follows 29 25 const SUBJECT_TYPES: &[Type] = &[Type::TEXT, Type::TEXT, Type::TEXT, Type::TIMESTAMP]; ··· 123 119 .await 124 120 } 125 121 126 - const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)"; 122 + const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, mentions, created_at) FROM STDIN (FORMAT binary)"; 127 123 const POST_TYPES: &[Type] = &[ 128 124 Type::TEXT, 129 125 Type::TEXT, ··· 139 135 Type::TEXT, 140 136 Type::TEXT, 141 137 Type::TEXT, 138 + Type::TEXT_ARRAY, 142 139 Type::TIMESTAMP, 143 140 ]; 144 141 pub async fn copy_posts( ··· 163 160 164 161 for (at_uri, cid, post) in data { 165 162 let record = serde_json::to_value(&post).unwrap(); 163 + let (mentions, tags) = post 164 + .facets 165 + .as_ref() 166 + .map(|v| extract_mentions_and_tags(v)) 167 + .unzip(); 166 168 let facets = post.facets.and_then(|v| serde_json::to_value(v).ok()); 167 169 let embed = post.embed.as_ref().map(|v| v.as_str()); 168 170 let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype()); 169 171 let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent)); 170 172 let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root)); 171 173 174 + let tags = merge_tags(tags, post.tags); 175 + 172 176 let writer = writer.as_mut(); 173 177 writer 174 178 .write(&[ ··· 179 183 &post.text, 180 184 &facets, 181 185 &post.langs.unwrap_or_default(), 182 - &post.tags.unwrap_or_default(), 186 + &tags, 183 187 &parent_uri, 184 188 &parent_cid, 185 189 &root_uri, 186 190 &root_cid, 187 191 &embed, 188 192 &embed_subtype, 193 + &mentions, 189 194 &post.created_at.naive_utc(), 190 195 ]) 191 196 .await?; 192 197 } 193 198 194 199 writer.finish().await?; 200 + 201 + let threadgated: Vec<(String, String, DateTime<Utc>)> = conn 202 + .query( 203 + "SELECT root_uri, p.at_uri, p.created_at FROM posts_tmp p INNER JOIN threadgates t ON root_uri = post_uri WHERE t.allow IS NOT NULL", 204 + &[], 205 + ) 206 + .await? 207 + .into_iter() 208 + .map(|v| Ok((v.try_get(0)?, v.try_get(1)?, v.try_get(2)?))).collect::<Result<_, _>>()?; 209 + 210 + for (root, post, created_at) in threadgated { 211 + match super::post_enforce_threadgate(conn, &root, did, created_at, true).await { 212 + Ok(true) => { 213 + conn.execute( 214 + "UPDATE posts_tmp SET violates_threadgate=TRUE WHERE at_uri=$1", 215 + &[&post], 216 + ) 217 + .await?; 218 + } 219 + Ok(false) => continue, 220 + Err(e) => { 221 + tracing::error!("failed to check threadgate enforcement: {e}"); 222 + continue; 223 + } 224 + } 225 + } 195 226 196 227 conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[]) 197 228 .await
+213
consumer/src/db/gates.rs
··· 1 + use super::{PgExecResult, PgResult}; 2 + use crate::indexer::records::{ 3 + AppBskyFeedThreadgate, ThreadgateRule, THREADGATE_RULE_FOLLOWER, THREADGATE_RULE_FOLLOWING, 4 + THREADGATE_RULE_LIST, THREADGATE_RULE_MENTION, 5 + }; 6 + use chrono::prelude::*; 7 + use chrono::{DateTime, Utc}; 8 + use deadpool_postgres::GenericClient; 9 + use std::collections::HashSet; 10 + 11 + pub async fn post_enforce_threadgate<C: GenericClient>( 12 + conn: &mut C, 13 + root: &str, 14 + post_author: &str, 15 + post_created_at: DateTime<Utc>, 16 + is_backfill: bool, 17 + ) -> PgResult<bool> { 18 + // check if the root and the current post are the same author 19 + // strip "at://" then break into parts by '/' 20 + let parts = root[5..].split('/').collect::<Vec<_>>(); 21 + let root_author = parts[0]; 22 + if root_author == post_author { 23 + return Ok(false); 24 + } 25 + 26 + let tg_data = super::threadgate_get(conn, root).await?; 27 + 28 + let Some((created_at, allow, allow_lists)) = tg_data else { 29 + return Ok(false); 30 + }; 31 + 32 + // when backfilling, there's no point continuing if the record is dated before the threadgate 33 + if is_backfill && post_created_at < created_at { 34 + return Ok(false); 35 + } 36 + 37 + if allow.is_empty() { 38 + return Ok(true); 39 + } 40 + 41 + let allow: HashSet<String> = HashSet::from_iter(allow); 42 + 43 + if allow.contains(THREADGATE_RULE_FOLLOWER) || allow.contains(THREADGATE_RULE_FOLLOWING) { 44 + let profile_state: Option<(bool, bool)> = conn 45 + .query_opt( 46 + "SELECT following IS NOT NULL, followed IS NOT NULL FROM profile_states WHERE did=$1 AND subject=$2", 47 + &[&root_author, &post_author], 48 + ) 49 + .await? 50 + .map(|v| Ok((v.try_get(0)?, v.try_get(1)?))).transpose()?; 51 + 52 + if let Some((following, followed)) = profile_state { 53 + if allow.contains(THREADGATE_RULE_FOLLOWER) && followed { 54 + return Ok(false); 55 + } 56 + 57 + if allow.contains(THREADGATE_RULE_FOLLOWING) && following { 58 + return Ok(false); 59 + } 60 + } 61 + } 62 + 63 + // check mentions 64 + if allow.contains(THREADGATE_RULE_MENTION) { 65 + let mentions: Vec<String> = conn 66 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 67 + .await? 68 + .and_then(|r| r.try_get::<_, Option<_>>(0).transpose()) 69 + .transpose()? 70 + .unwrap_or_default(); 71 + 72 + if mentions.contains(&post_author.to_owned()) { 73 + return Ok(false); 74 + } 75 + } 76 + 77 + if allow.contains(THREADGATE_RULE_LIST) { 78 + if allow_lists.is_empty() { 79 + return Ok(true); 80 + } 81 + 82 + let count: i64 = conn 83 + .query_one( 84 + "SELECT count(*) FROM list_items WHERE list_uri=ANY($1) AND subject=$2", 85 + &[&allow_lists, &post_author], 86 + ) 87 + .await? 88 + .try_get(0)?; 89 + if count != 0 { 90 + return Ok(false); 91 + } 92 + } 93 + 94 + Ok(true) 95 + } 96 + 97 + pub async fn postgate_maintain_detaches<C: GenericClient>( 98 + conn: &mut C, 99 + post: &str, 100 + detached: &[String], 101 + disable_effective: Option<NaiveDateTime>, 102 + ) -> PgExecResult { 103 + conn.execute( 104 + "SELECT maintain_postgates($1, $2, $3)", 105 + &[&post, &detached, &disable_effective], 106 + ) 107 + .await 108 + } 109 + 110 + // variant of post_enforce_threadgate that runs when backfilling to clean up any posts already in DB 111 + pub async fn threadgate_enforce_backfill<C: GenericClient>( 112 + conn: &mut C, 113 + root_author: &str, 114 + threadgate: &AppBskyFeedThreadgate, 115 + ) -> PgExecResult { 116 + // pull out allow - if it's None we can skip this gate. 117 + let Some(allow) = threadgate.allow.as_ref() else { 118 + return Ok(0); 119 + }; 120 + 121 + let root = &threadgate.post; 122 + 123 + if allow.is_empty() { 124 + // blind update everything 125 + return conn.execute( 126 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 127 + &[&root, &root_author, &threadgate.created_at], 128 + ).await; 129 + } 130 + 131 + // pull authors with our root_uri where the author is not the root author and are dated after created_at 132 + // this is mutable because we'll remove ALLOWED dids 133 + let mut dids: HashSet<String> = conn 134 + .query( 135 + "SELECT DISTINCT did FROM posts WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 136 + &[&root, &root_author, &threadgate.created_at], 137 + ) 138 + .await? 139 + .into_iter() 140 + .map(|row| row.try_get(0)) 141 + .collect::<Result<_, _>>()?; 142 + 143 + // this will be empty if there are no replies. 144 + if dids.is_empty() { 145 + return Ok(0); 146 + } 147 + 148 + let allowed_lists = allow 149 + .iter() 150 + .filter_map(|rule| match rule { 151 + ThreadgateRule::List { list } => Some(list), 152 + _ => None, 153 + }) 154 + .collect::<Vec<_>>(); 155 + 156 + let allow: HashSet<_> = HashSet::from_iter(allow.iter().map(|v| v.as_str())); 157 + 158 + if allow.contains(THREADGATE_RULE_FOLLOWER) && !dids.is_empty() { 159 + let current_dids: Vec<_> = dids.iter().collect(); 160 + 161 + let res = conn.query( 162 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND followed IS NOT NULL", 163 + &[&root_author, &current_dids] 164 + ).await?.into_iter().map(|row| row.try_get(0)).collect::<Result<HashSet<_>, _>>()?; 165 + 166 + dids = &dids - &res; 167 + } 168 + 169 + if allow.contains(THREADGATE_RULE_FOLLOWING) && !dids.is_empty() { 170 + let current_dids: Vec<_> = dids.iter().collect(); 171 + 172 + let res = conn.query( 173 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND following IS NOT NULL", 174 + &[&root_author, &current_dids] 175 + ).await?.into_iter().map(|row| row.try_get(0)).collect::<Result<_, _>>()?; 176 + 177 + dids = &dids - &res; 178 + } 179 + 180 + if allow.contains(THREADGATE_RULE_MENTION) && !dids.is_empty() { 181 + let mentions: Vec<String> = conn 182 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 183 + .await? 184 + .and_then(|r| r.try_get::<_, Option<_>>(0).transpose()) 185 + .transpose()? 186 + .unwrap_or_default(); 187 + 188 + dids = &dids - &HashSet::from_iter(mentions); 189 + } 190 + 191 + if allow.contains(THREADGATE_RULE_LIST) && !dids.is_empty() { 192 + let current_dids: Vec<_> = dids.iter().collect(); 193 + 194 + let res = conn 195 + .query( 196 + "SELECT subject FROM list_items WHERE list_uri = ANY($1) AND subject = ANY($2)", 197 + &[&allowed_lists, &current_dids], 198 + ) 199 + .await? 200 + .into_iter() 201 + .map(|row| row.try_get(0)) 202 + .collect::<Result<_, _>>()?; 203 + 204 + dids = &dids - &res; 205 + } 206 + 207 + let dids = dids.into_iter().collect::<Vec<_>>(); 208 + 209 + conn.execute( 210 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri = $1 AND did = ANY($2) AND created_at >= $3", 211 + &[&threadgate.post, &dids, &threadgate.created_at] 212 + ).await 213 + }
+1 -1
consumer/src/db/labels.rs
··· 32 32 let default_setting = definition 33 33 .and_then(|v| v.default_setting) 34 34 .map(|v| v.to_string()); 35 - let adult_only = definition.and_then(|v| v.adult_only); 35 + let adult_only = definition.and_then(|v| v.adult_only).unwrap_or_default(); 36 36 let locales = definition.and_then(|v| serde_json::to_value(&v.locales).ok()); 37 37 38 38 conn.execute(
+2
consumer/src/db/mod.rs
··· 7 7 mod actor; 8 8 mod backfill; 9 9 pub mod copy; 10 + mod gates; 10 11 mod labels; 11 12 mod record; 12 13 13 14 pub use actor::*; 14 15 pub use backfill::*; 16 + pub use gates::*; 15 17 pub use labels::*; 16 18 pub use record::*;
+135 -53
consumer/src/db/record.rs
··· 1 1 use super::{PgExecResult, PgOptResult, PgResult}; 2 2 use crate::indexer::records::*; 3 - use crate::utils::{blob_ref, strongref_to_parts}; 3 + use crate::utils::{blob_ref, extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::GenericClient; 6 6 use ipld_core::cid::Cid; 7 + use lexica::community_lexicon::bookmarks::Bookmark; 8 + use std::collections::HashSet; 7 9 8 10 pub async fn record_upsert<C: GenericClient>( 9 11 conn: &mut C, ··· 22 24 .await 23 25 } 24 26 27 + pub async fn bookmark_upsert<C: GenericClient>( 28 + conn: &mut C, 29 + rkey: &str, 30 + repo: &str, 31 + rec: Bookmark, 32 + ) -> PgExecResult { 33 + // strip "at://" then break into parts by '/' 34 + let rec_type = match rec.subject.strip_prefix("at://") { 35 + Some(at_uri) => at_uri.split('/').collect::<Vec<_>>()[1], 36 + None => "$uri", 37 + }; 38 + 39 + conn.execute( 40 + include_str!("sql/bookmarks_upsert.sql"), 41 + &[ 42 + &repo, 43 + &rkey, 44 + &rec.subject, 45 + &rec_type, 46 + &rec.tags, 47 + &rec.created_at, 48 + ], 49 + ) 50 + .await 51 + } 52 + 53 + pub async fn bookmark_delete<C: GenericClient>( 54 + conn: &mut C, 55 + rkey: &str, 56 + repo: &str, 57 + ) -> PgExecResult { 58 + conn.execute( 59 + "DELETE FROM bookmarks WHERE rkey=$1 AND did=$2", 60 + &[&rkey, &repo], 61 + ) 62 + .await 63 + } 64 + 25 65 pub async fn block_insert<C: GenericClient>( 26 66 conn: &mut C, 27 67 rkey: &str, ··· 87 127 ], 88 128 ) 89 129 .await 90 - .map(|r| r.get::<_, i32>(0) == 0) 130 + .and_then(|r| Ok(r.try_get::<_, i32>(0)? == 0)) 91 131 } 92 132 93 133 pub async fn feedgen_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { ··· 119 159 ) 120 160 .await?; 121 161 122 - Ok(res.map(|v| v.get(0))) 162 + res.map(|v| v.try_get(0)).transpose() 123 163 } 124 164 125 165 pub async fn labeler_upsert<C: GenericClient>( ··· 184 224 ) 185 225 .await?; 186 226 187 - Ok(res.map(|v| v.get(0))) 227 + res.map(|v| v.try_get(0)).transpose() 188 228 } 189 229 190 230 pub async fn list_upsert<C: GenericClient>( ··· 215 255 ], 216 256 ) 217 257 .await 218 - .map(|r| r.get::<_, i32>(0) == 0) 258 + .and_then(|r| Ok(r.try_get::<_, i32>(0)? == 0)) 219 259 } 220 260 221 261 pub async fn list_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { ··· 278 318 repo: &str, 279 319 cid: Cid, 280 320 rec: AppBskyFeedPost, 321 + is_backfill: bool, 281 322 ) -> PgExecResult { 282 323 let cid = cid.to_string(); 283 324 let record = serde_json::to_value(&rec).unwrap(); 325 + let (mentions, tags) = rec 326 + .facets 327 + .as_ref() 328 + .map(|v| extract_mentions_and_tags(v)) 329 + .unzip(); 284 330 let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok()); 285 331 let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent)); 286 332 let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root)); 287 333 let embed = rec.embed.as_ref().map(|v| v.as_str()); 288 334 let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype()); 289 335 336 + // if there is a root, we need to check for the presence of a threadgate. 337 + let violates_threadgate = match &root_uri { 338 + Some(root) => { 339 + super::post_enforce_threadgate(conn, root, repo, rec.created_at, is_backfill).await? 340 + } 341 + None => false, 342 + }; 343 + 344 + let tags = merge_tags(tags, rec.tags); 345 + 290 346 let count = conn 291 347 .execute( 292 348 include_str!("sql/post_insert.sql"), ··· 298 354 &rec.text, 299 355 &facets, 300 356 &rec.langs.unwrap_or_default(), 301 - &rec.tags.unwrap_or_default(), 357 + &tags, 302 358 &parent_uri, 303 359 &parent_cid, 304 360 &root_uri, 305 361 &root_cid, 306 362 &embed, 307 363 &embed_subtype, 364 + &mentions, 365 + &violates_threadgate, 308 366 &rec.created_at, 309 367 ], 310 368 ) 311 369 .await?; 312 370 313 371 if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) { 314 - post_embed_insert(conn, at_uri, embed, rec.created_at).await?; 372 + post_embed_insert(conn, at_uri, embed, rec.created_at, is_backfill).await?; 315 373 } 316 374 317 375 Ok(count) ··· 333 391 ) 334 392 .await?; 335 393 336 - Ok(res.map(|row| (row.get(0), row.get(1)))) 394 + res.map(|row| Ok((row.try_get(0)?, row.try_get(1)?))) 395 + .transpose() 337 396 } 338 397 339 398 pub async fn post_embed_insert<C: GenericClient>( ··· 341 400 post: &str, 342 401 embed: AppBskyEmbed, 343 402 created_at: DateTime<Utc>, 403 + is_backfill: bool, 344 404 ) -> PgExecResult { 345 405 match embed { 346 406 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 347 407 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, 348 408 AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await, 349 409 AppBskyEmbed::Record(embed) => { 350 - post_embed_record_insert(conn, post, embed, created_at).await 410 + post_embed_record_insert(conn, post, embed, created_at, is_backfill).await 351 411 } 352 412 AppBskyEmbed::RecordWithMedia(embed) => { 353 - post_embed_record_insert(conn, post, embed.record, created_at).await?; 413 + post_embed_record_insert(conn, post, embed.record, created_at, is_backfill).await?; 354 414 match *embed.media { 355 415 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 356 416 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, ··· 371 431 let stmt = conn.prepare("INSERT INTO post_embed_images (post_uri, seq, cid, mime_type, alt, width, height) VALUES ($1, $2, $3, $4, $5, $6, $7)").await?; 372 432 373 433 for (idx, image) in embed.images.iter().enumerate() { 374 - let cid = image.image.r#ref.to_string(); 434 + let cid = image.image.cid.to_string(); 375 435 let width = image.aspect_ratio.as_ref().map(|v| v.width); 376 436 let height = image.aspect_ratio.as_ref().map(|v| v.height); 377 437 ··· 398 458 post: &str, 399 459 embed: AppBskyEmbedVideo, 400 460 ) -> PgExecResult { 401 - let cid = embed.video.r#ref.to_string(); 461 + let cid = embed.video.cid.to_string(); 402 462 let width = embed.aspect_ratio.as_ref().map(|v| v.width); 403 463 let height = embed.aspect_ratio.as_ref().map(|v| v.height); 404 464 ··· 411 471 let stmt = conn.prepare_cached("INSERT INTO post_embed_video_captions (post_uri, cid, mime_type, language) VALUES ($1, $2, $3, $4)").await?; 412 472 413 473 for caption in captions { 414 - let cid = caption.file.r#ref.to_string(); 474 + let cid = caption.file.cid.to_string(); 415 475 conn.execute( 416 476 &stmt, 417 477 &[&post, &cid, &caption.file.mime_type, &caption.lang], ··· 429 489 embed: AppBskyEmbedExternal, 430 490 ) -> PgExecResult { 431 491 let thumb_mime = embed.external.thumb.as_ref().map(|v| v.mime_type.clone()); 432 - let thumb_cid = embed.external.thumb.as_ref().map(|v| v.r#ref.to_string()); 492 + let thumb_cid = embed.external.thumb.as_ref().map(|v| v.cid.to_string()); 433 493 434 494 conn.execute( 435 495 "INSERT INTO post_embed_ext (post_uri, uri, title, description, thumb_mime_type, thumb_cid) VALUES ($1, $2, $3, $4, $5, $6)", ··· 437 497 ).await 438 498 } 439 499 500 + const PG_DISABLE_RULE: &str = "app.bsky.feed.postgate#disableRule"; 440 501 async fn post_embed_record_insert<C: GenericClient>( 441 502 conn: &mut C, 442 503 post: &str, 443 504 embed: AppBskyEmbedRecord, 444 505 post_created_at: DateTime<Utc>, 506 + is_backfill: bool, 445 507 ) -> PgExecResult { 446 508 // strip "at://" then break into parts by '/' 447 509 let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>(); 448 510 449 511 let detached = if parts[1] == "app.bsky.feed.post" { 450 - let postgate_effective: Option<DateTime<Utc>> = conn 451 - .query_opt( 452 - "SELECT created_at FROM postgates WHERE post_uri=$1", 453 - &[&post], 454 - ) 455 - .await? 456 - .map(|v| v.get(0)); 512 + let pg_data = postgate_get(conn, post).await?; 457 513 458 - postgate_effective 459 - .map(|v| Utc::now().min(post_created_at) > v) 460 - .unwrap_or_default() 514 + if let Some((effective, detached, rules)) = pg_data { 515 + let detached: HashSet<String> = HashSet::from_iter(detached); 516 + let rules: HashSet<String> = HashSet::from_iter(rules); 517 + let compare_date = match is_backfill { 518 + true => post_created_at, 519 + false => Utc::now(), 520 + }; 521 + 522 + detached.contains(post) || (rules.contains(PG_DISABLE_RULE) && compare_date > effective) 523 + } else { 524 + false 525 + } 461 526 } else { 462 527 false 463 528 }; ··· 468 533 ).await 469 534 } 470 535 536 + async fn postgate_get<C: GenericClient>( 537 + conn: &mut C, 538 + post: &str, 539 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 540 + conn.query_opt( 541 + "SELECT created_at, detached, rules FROM postgates WHERE post_uri=$1", 542 + &[&post], 543 + ) 544 + .await? 545 + .map(|v| Ok((v.try_get(0)?, v.try_get(1)?, v.try_get(2)?))) 546 + .transpose() 547 + } 548 + 471 549 pub async fn postgate_upsert<C: GenericClient>( 472 550 conn: &mut C, 473 551 at_uri: &str, ··· 499 577 .await 500 578 } 501 579 502 - pub async fn postgate_maintain_detaches<C: GenericClient>( 503 - conn: &mut C, 504 - post: &str, 505 - detached: &[String], 506 - disable_effective: Option<NaiveDateTime>, 507 - ) -> PgExecResult { 508 - conn.execute( 509 - "SELECT maintain_postgates($1, $2, $3)", 510 - &[&post, &detached, &disable_effective], 511 - ) 512 - .await 513 - } 514 - 515 580 pub async fn profile_upsert<C: GenericClient>( 516 581 conn: &mut C, 517 582 repo: &str, ··· 537 602 &pinned_cid, 538 603 &joined_sp_uri, 539 604 &joined_sp_cid, 605 + &rec.pronouns, 606 + &rec.website, 540 607 &rec.created_at.unwrap_or(Utc::now()).naive_utc(), 541 608 ], 542 609 ) ··· 583 650 ) 584 651 .await?; 585 652 586 - Ok(res.map(|v| v.get(0))) 653 + res.map(|v| v.try_get(0)).transpose() 587 654 } 588 655 589 656 pub async fn starter_pack_upsert<C: GenericClient>( ··· 618 685 ], 619 686 ) 620 687 .await 621 - .map(|r| r.get::<_, i32>(0) == 0) 688 + .and_then(|r| Ok(r.try_get::<_, i32>(0)? == 0)) 622 689 } 623 690 624 691 pub async fn starter_pack_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { ··· 634 701 let record = serde_json::to_value(&rec).unwrap(); 635 702 let thumb = rec.embed.as_ref().and_then(|v| v.external.thumb.clone()); 636 703 let thumb_mime = thumb.as_ref().map(|v| v.mime_type.clone()); 637 - let thumb_cid = thumb.as_ref().map(|v| v.r#ref.to_string()); 704 + let thumb_cid = thumb.as_ref().map(|v| v.cid.to_string()); 638 705 639 706 conn.execute( 640 707 include_str!("sql/status_upsert.sql"), ··· 659 726 .await 660 727 } 661 728 729 + pub async fn threadgate_get<C: GenericClient>( 730 + conn: &mut C, 731 + post: &str, 732 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 733 + conn 734 + .query_opt( 735 + "SELECT created_at, allow, allowed_lists FROM threadgates WHERE post_uri=$1 AND allow IS NOT NULL", 736 + &[&post], 737 + ) 738 + .await? 739 + .map(|v| Ok((v.try_get(0)?, v.try_get(1)?, v.try_get(2)?))).transpose() 740 + } 741 + 662 742 pub async fn threadgate_upsert<C: GenericClient>( 663 743 conn: &mut C, 664 744 at_uri: &str, ··· 667 747 ) -> PgExecResult { 668 748 let record = serde_json::to_value(&rec).unwrap(); 669 749 670 - let allowed_lists = rec 671 - .allow 672 - .iter() 673 - .filter_map(|rule| match rule { 674 - ThreadgateRule::List { list } => Some(list.clone()), 675 - _ => None, 676 - }) 677 - .collect::<Vec<_>>(); 750 + let allowed_lists = rec.allow.as_ref().map(|allow| { 751 + allow 752 + .iter() 753 + .filter_map(|rule| match rule { 754 + ThreadgateRule::List { list } => Some(list.clone()), 755 + _ => None, 756 + }) 757 + .collect::<Vec<_>>() 758 + }); 678 759 679 - let allow = rec 680 - .allow 681 - .into_iter() 682 - .map(|v| v.as_str().to_string()) 683 - .collect::<Vec<_>>(); 760 + let allow = rec.allow.map(|allow| { 761 + allow 762 + .into_iter() 763 + .map(|v| v.as_str().to_string()) 764 + .collect::<Vec<_>>() 765 + }); 684 766 685 767 conn.execute( 686 768 include_str!("sql/threadgate_upsert.sql"),
+5
consumer/src/db/sql/bookmarks_upsert.sql
··· 1 + INSERT INTO bookmarks (did, rkey, subject, subject_type, tags, created_at) 2 + VALUES ($1, $2, $3, $4, $5, $6) 3 + ON CONFLICT (did, rkey) DO UPDATE SET subject=EXCLUDED.subject, 4 + subject_type=EXCLUDED.subject_type, 5 + tags=EXCLUDED.tags
+2 -2
consumer/src/db/sql/post_insert.sql
··· 1 1 INSERT INTO posts (at_uri, did, cid, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, 2 - root_cid, embed, embed_subtype, created_at) 3 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 2 + root_cid, embed, embed_subtype, mentions, violates_threadgate, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) 4 4 ON CONFLICT DO NOTHING
+4 -2
consumer/src/db/sql/profile_upsert.sql
··· 1 1 INSERT INTO profiles (did, cid, avatar_cid, banner_cid, display_name, description, pinned_uri, pinned_cid, 2 - joined_sp_uri, joined_sp_cid, created_at) 3 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) 2 + joined_sp_uri, joined_sp_cid, pronouns, website, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) 4 4 ON CONFLICT (did) DO UPDATE SET cid=EXCLUDED.cid, 5 5 avatar_cid=EXCLUDED.avatar_cid, 6 6 banner_cid=EXCLUDED.banner_cid, ··· 10 10 pinned_cid=EXCLUDED.pinned_cid, 11 11 joined_sp_uri=EXCLUDED.joined_sp_uri, 12 12 joined_sp_cid=EXCLUDED.joined_sp_cid, 13 + pronouns=EXCLUDED.pronouns, 14 + website=EXCLUDED.website, 13 15 indexed_at=NOW()
+14
consumer/src/firehose/mod.rs
··· 117 117 118 118 FirehoseEvent::Label(event) 119 119 } 120 + "#sync" => { 121 + counter!("firehose_events.total", "event" => "sync").increment(1); 122 + let event: AtpSyncEvent = serde_ipld_dagcbor::from_reader(&mut reader)?; 123 + 124 + // increment the seq 125 + if self.seq < event.seq { 126 + self.seq = event.seq; 127 + } else { 128 + tracing::error!("Event sequence was not greater than previous seq, exiting. {} <= {}", event.seq, self.seq); 129 + return Ok(FirehoseOutput::Close); 130 + } 131 + 132 + FirehoseEvent::Sync(event) 133 + } 120 134 _ => { 121 135 tracing::warn!("unknown event type {ty}"); 122 136 return Ok(FirehoseOutput::Continue);
+23
consumer/src/firehose/types.rs
··· 31 31 Account(AtpAccountEvent), 32 32 Commit(AtpCommitEvent), 33 33 Label(AtpLabelEvent), 34 + Sync(AtpSyncEvent), 34 35 } 35 36 36 37 #[derive(Debug, Deserialize)] ··· 48 49 Suspended, 49 50 Deleted, 50 51 Deactivated, 52 + Throttled, 53 + Desynchronized, 51 54 } 52 55 53 56 impl AtpAccountStatus { ··· 57 60 AtpAccountStatus::Suspended => "suspended", 58 61 AtpAccountStatus::Deleted => "deleted", 59 62 AtpAccountStatus::Deactivated => "deactivated", 63 + AtpAccountStatus::Throttled => "throttled", 64 + AtpAccountStatus::Desynchronized => "desynchronized", 60 65 } 61 66 } 62 67 } ··· 68 73 AtpAccountStatus::Suspended => parakeet_db::types::ActorStatus::Suspended, 69 74 AtpAccountStatus::Deleted => parakeet_db::types::ActorStatus::Deleted, 70 75 AtpAccountStatus::Deactivated => parakeet_db::types::ActorStatus::Deactivated, 76 + AtpAccountStatus::Throttled | AtpAccountStatus::Desynchronized => { 77 + parakeet_db::types::ActorStatus::Active 78 + } 71 79 } 72 80 } 73 81 } ··· 90 98 pub since: Option<String>, 91 99 pub commit: Cid, 92 100 #[serde(rename = "tooBig")] 101 + #[deprecated] 93 102 pub too_big: bool, 94 103 #[serde(default)] 95 104 pub blocks: ByteBuf, 96 105 #[serde(default)] 97 106 pub ops: Vec<CommitOp>, 98 107 #[serde(default)] 108 + #[deprecated] 99 109 pub blobs: Vec<Cid>, 110 + #[serde(rename = "prevData")] 111 + pub prev_data: Option<Cid>, 100 112 } 101 113 102 114 #[derive(Debug, Deserialize)] 103 115 pub struct CommitOp { 104 116 pub action: String, 105 117 pub cid: Option<Cid>, 118 + pub prev: Option<Cid>, 106 119 pub path: String, 107 120 } 108 121 ··· 124 137 pub seq: u64, 125 138 pub labels: Vec<AtpLabel>, 126 139 } 140 + 141 + #[derive(Debug, Deserialize)] 142 + pub struct AtpSyncEvent { 143 + pub seq: u64, 144 + pub did: String, 145 + pub time: DateTime<Utc>, 146 + pub rev: String, 147 + #[serde(default)] 148 + pub blocks: ByteBuf, 149 + }
+112 -30
consumer/src/indexer/mod.rs
··· 1 1 use crate::config::HistoryMode; 2 2 use crate::db; 3 3 use crate::firehose::{ 4 - AtpAccountEvent, AtpCommitEvent, AtpIdentityEvent, CommitOp, FirehoseConsumer, FirehoseEvent, 5 - FirehoseOutput, 4 + AtpAccountEvent, AtpCommitEvent, AtpIdentityEvent, AtpSyncEvent, CommitOp, FirehoseConsumer, 5 + FirehoseEvent, FirehoseOutput, 6 6 }; 7 7 use crate::indexer::types::{ 8 8 AggregateDeltaStore, BackfillItem, BackfillItemInner, CollectionType, RecordTypes, 9 9 }; 10 + use crate::utils::at_uri_is_by; 10 11 use deadpool_postgres::{Object, Pool, Transaction}; 11 12 use did_resolver::Resolver; 12 13 use foldhash::quality::RandomState; ··· 30 31 pub struct RelayIndexerOpts { 31 32 pub history_mode: HistoryMode, 32 33 pub skip_handle_validation: bool, 34 + pub request_backfill: bool, 33 35 } 34 36 35 37 #[derive(Clone)] ··· 38 40 resolver: Arc<Resolver>, 39 41 do_backfill: bool, 40 42 do_handle_res: bool, 43 + req_backfill: bool, 41 44 } 42 45 43 46 pub struct RelayIndexer { ··· 66 69 state: RelayIndexerState { 67 70 resolver, 68 71 do_backfill: opts.history_mode == HistoryMode::BackfillHistory, 72 + req_backfill: opts.request_backfill, 69 73 do_handle_res: !opts.skip_handle_validation, 70 74 idxc_tx, 71 75 }, ··· 104 108 FirehoseEvent::Commit(commit) => { 105 109 index_commit(&mut state, &mut conn, &mut rc, commit).await 106 110 } 111 + FirehoseEvent::Sync(sync) => { 112 + process_sync(&state, &mut conn, &mut rc, sync).await 113 + } 107 114 FirehoseEvent::Label(_) => unreachable!(), 108 115 }; 109 116 ··· 185 192 FirehoseEvent::Identity(identity) => self.hasher.hash_one(&identity.did) % threads, 186 193 FirehoseEvent::Account(account) => self.hasher.hash_one(&account.did) % threads, 187 194 FirehoseEvent::Commit(commit) => self.hasher.hash_one(&commit.repo) % threads, 195 + FirehoseEvent::Sync(sync) => self.hasher.hash_one(&sync.did) % threads, 188 196 FirehoseEvent::Label(_) => { 189 197 // We handle all labels through direct connections to labelers 190 198 tracing::warn!("got #labels from the relay"); ··· 198 206 } 199 207 } 200 208 209 + #[instrument(skip_all, fields(seq = sync.seq, repo = sync.did))] 210 + async fn process_sync( 211 + state: &RelayIndexerState, 212 + conn: &mut Object, 213 + rc: &mut MultiplexedConnection, 214 + sync: AtpSyncEvent, 215 + ) -> eyre::Result<()> { 216 + let Some((sync_state, Some(current_rev))) = db::actor_get_repo_status(conn, &sync.did).await? 217 + else { 218 + return Ok(()); 219 + }; 220 + 221 + // don't care if we're not synced. also no point if !do_backfill bc we might not have a worker 222 + if sync_state == ActorSyncState::Synced && state.do_backfill && sync.rev > current_rev { 223 + tracing::debug!("triggering backfill due to #sync"); 224 + rc.rpush::<_, _, i32>("backfill_queue", sync.did).await?; 225 + } 226 + 227 + Ok(()) 228 + } 229 + 201 230 #[instrument(skip_all, fields(seq = identity.seq, repo = identity.did))] 202 231 async fn index_identity( 203 232 state: &RelayIndexerState, ··· 275 304 .map(ActorStatus::from) 276 305 .unwrap_or(ActorStatus::Active); 277 306 278 - let trigger_bf = if state.do_backfill && status == ActorStatus::Active { 307 + let trigger_bf = if state.do_backfill && state.req_backfill && status == ActorStatus::Active { 279 308 // check old status - if they exist (Some(*)), AND were previously != Active but not Deleted, 280 309 // AND have a rev == null, then trigger backfill. 281 310 db::actor_get_status_and_rev(conn, &account.did) ··· 325 354 // backfill for them and they can be marked active and indexed normally. 326 355 // TODO: bridgy doesn't implement since atm - we need a special case 327 356 if commit.since.is_some() { 328 - if state.do_backfill { 357 + if state.do_backfill && state.req_backfill { 329 358 rc.rpush::<_, _, i32>("backfill_queue", commit.repo).await?; 330 359 } 331 360 return Ok(()); ··· 356 385 .await?; 357 386 358 387 if trigger_backfill { 359 - rc.rpush::<_, _, i32>("backfill_queue", commit.repo).await?; 388 + if state.req_backfill { 389 + rc.rpush::<_, _, i32>("backfill_queue", commit.repo).await?; 390 + } 360 391 return Ok(()); 361 392 } 362 393 ··· 389 420 db::actor_set_repo_state(&mut t, &commit.repo, &commit.rev, commit.commit).await?; 390 421 391 422 for op in &commit.ops { 392 - process_op(&mut t, &mut state.idxc_tx, &commit.repo, op, &blocks).await?; 423 + process_op(&mut t, rc, &mut state.idxc_tx, &commit.repo, op, &blocks).await?; 393 424 } 394 425 395 426 t.commit().await?; ··· 456 487 #[inline(always)] 457 488 async fn process_op( 458 489 conn: &mut Transaction<'_>, 490 + rc: &mut MultiplexedConnection, 459 491 deltas: &mut impl AggregateDeltaStore, 460 492 repo: &str, 461 493 op: &CommitOp, 462 494 blocks: &HashMap<Cid, Vec<u8>>, 463 - ) -> Result<(), tokio_postgres::Error> { 495 + ) -> eyre::Result<()> { 464 496 let Some((collection_raw, rkey)) = op.path.split_once("/") else { 465 497 tracing::warn!("op contained invalid path {}", op.path); 466 498 return Ok(()); ··· 485 517 return Ok(()); 486 518 }; 487 519 488 - index_op(conn, deltas, repo, cid, decoded, &full_path, rkey).await?; 520 + index_op(conn, rc, deltas, repo, cid, decoded, &full_path, rkey).await?; 489 521 } else if op.action == "delete" { 490 - index_op_delete(conn, deltas, repo, collection, &full_path, rkey).await?; 522 + index_op_delete(conn, rc, deltas, repo, collection, &full_path, rkey).await?; 491 523 } else { 492 524 tracing::warn!("op contained invalid action {}", op.action); 493 525 } ··· 512 544 513 545 pub async fn index_op( 514 546 conn: &mut Transaction<'_>, 547 + rc: &mut MultiplexedConnection, 515 548 deltas: &mut impl AggregateDeltaStore, 516 549 repo: &str, 517 550 cid: Cid, 518 551 record: RecordTypes, 519 552 at_uri: &str, 520 553 rkey: &str, 521 - ) -> Result<(), tokio_postgres::Error> { 554 + ) -> eyre::Result<()> { 522 555 match record { 523 - RecordTypes::AppBskyActorProfile(record) => { 556 + RecordTypes::AppBskyActorProfile(mut record) => { 524 557 if rkey == "self" { 525 558 let labels = record.labels.clone(); 559 + 560 + // don't allow pinned posts that aren't by us. 561 + if let Some(pinned) = &record.pinned_post { 562 + if !at_uri_is_by(&pinned.uri, repo) { 563 + record.pinned_post = None; 564 + } 565 + } 566 + 526 567 db::profile_upsert(conn, repo, cid, record).await?; 527 568 528 569 if let Some(labels) = labels { 529 570 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 530 571 } 572 + 573 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 531 574 } 532 575 } 533 576 RecordTypes::AppBskyActorStatus(record) => { 534 577 if rkey == "self" { 535 578 db::status_upsert(conn, repo, record).await?; 579 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 536 580 } 537 581 } 538 582 RecordTypes::AppBskyFeedGenerator(record) => { ··· 545 589 546 590 if did_insert { 547 591 deltas.incr(repo, AggregateType::ProfileFeed).await; 592 + } else { 593 + redis::AsyncTypedCommands::del(rc, format!("feedgen#{at_uri}")).await?; 548 594 } 549 595 } 550 596 RecordTypes::AppBskyFeedLike(record) => { ··· 579 625 }); 580 626 581 627 let labels = record.labels.clone(); 582 - db::post_insert(conn, at_uri, repo, cid, record).await?; 628 + db::post_insert(conn, at_uri, repo, cid, record, false).await?; 583 629 if let Some(labels) = labels { 584 630 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 585 631 } ··· 593 639 } 594 640 } 595 641 RecordTypes::AppBskyFeedPostgate(record) => { 596 - let split_aturi = record.post.rsplitn(4, '/').collect::<Vec<_>>(); 597 - if repo != split_aturi[2] { 598 - tracing::warn!("tried to create a postgate on a post we don't control!"); 642 + if !at_uri_is_by(&record.post, repo) { 599 643 return Ok(()); 600 644 } 601 645 ··· 613 657 disable_effective, 614 658 ) 615 659 .await?; 660 + 661 + // TODO: should we purge embed#{at_uri} for everything in detached_embeding_uris? 662 + // maybe postgate_maintain_detaches should return a list of uris? 616 663 } 617 664 RecordTypes::AppBskyFeedRepost(record) => { 618 665 deltas ··· 621 668 db::repost_insert(conn, rkey, repo, record).await?; 622 669 } 623 670 RecordTypes::AppBskyFeedThreadgate(record) => { 624 - let split_aturi = record.post.rsplitn(4, '/').collect::<Vec<_>>(); 625 - if repo != split_aturi[2] { 626 - tracing::warn!("tried to create a threadgate on a post we don't control!"); 671 + if !at_uri_is_by(&record.post, repo) { 627 672 return Ok(()); 628 673 } 629 674 630 675 db::threadgate_upsert(conn, at_uri, cid, record).await?; 676 + redis::AsyncTypedCommands::del(rc, format!("post#{at_uri}")).await?; 631 677 } 632 678 RecordTypes::AppBskyGraphBlock(record) => { 633 679 db::block_insert(conn, rkey, repo, record).await?; ··· 653 699 654 700 if did_insert { 655 701 deltas.incr(repo, AggregateType::ProfileList).await; 702 + } else { 703 + redis::AsyncTypedCommands::del(rc, format!("list#{at_uri}")).await?; 656 704 } 657 705 } 658 706 RecordTypes::AppBskyGraphListBlock(record) => { 659 707 db::list_block_insert(conn, at_uri, repo, record).await?; 660 708 } 661 709 RecordTypes::AppBskyGraphListItem(record) => { 662 - let split_aturi = record.list.rsplitn(4, '/').collect::<Vec<_>>(); 663 - if repo != split_aturi[2] { 664 - // it's also probably a bad idea to log *all* the attempts to do this... 665 - tracing::warn!("tried to create a listitem on a list we don't control!"); 710 + if !at_uri_is_by(&record.list, repo) { 666 711 return Ok(()); 667 712 } 668 713 714 + redis::AsyncTypedCommands::del(rc, format!("list#{}", &record.list)).await?; 669 715 db::list_item_insert(conn, at_uri, record).await?; 670 716 } 671 717 RecordTypes::AppBskyGraphStarterPack(record) => { ··· 673 719 674 720 if did_insert { 675 721 deltas.incr(repo, AggregateType::ProfileStarterpack).await; 722 + } else { 723 + redis::AsyncTypedCommands::del(rc, format!("starterpacks#{at_uri}")).await?; 676 724 } 677 725 } 678 726 RecordTypes::AppBskyGraphVerification(record) => { ··· 686 734 if let Some(labels) = labels { 687 735 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 688 736 } 737 + 738 + redis::AsyncTypedCommands::del(rc, format!("labeler#{repo}")).await?; 689 739 } 690 740 } 691 741 RecordTypes::AppBskyNotificationDeclaration(record) => { 692 742 if rkey == "self" { 693 743 db::notif_decl_upsert(conn, repo, record).await?; 744 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 694 745 } 695 746 } 696 747 RecordTypes::ChatBskyActorDeclaration(record) => { 697 748 if rkey == "self" { 698 749 db::chat_decl_upsert(conn, repo, record).await?; 750 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 699 751 } 700 752 } 753 + RecordTypes::CommunityLexiconBookmark(record) => { 754 + db::bookmark_upsert(conn, rkey, repo, record).await?; 755 + } 701 756 } 702 757 703 758 db::record_upsert(conn, at_uri, repo, cid).await?; ··· 707 762 708 763 pub async fn index_op_delete( 709 764 conn: &mut Transaction<'_>, 765 + rc: &mut MultiplexedConnection, 710 766 deltas: &mut impl AggregateDeltaStore, 711 767 repo: &str, 712 768 collection: CollectionType, 713 769 at_uri: &str, 714 770 rkey: &str, 715 - ) -> Result<(), tokio_postgres::Error> { 771 + ) -> eyre::Result<()> { 716 772 match collection { 717 - CollectionType::BskyProfile => db::profile_delete(conn, repo).await?, 718 - CollectionType::BskyStatus => db::status_delete(conn, repo).await?, 773 + CollectionType::BskyProfile => { 774 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 775 + db::profile_delete(conn, repo).await? 776 + } 777 + CollectionType::BskyStatus => { 778 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 779 + db::status_delete(conn, repo).await? 780 + } 719 781 CollectionType::BskyBlock => db::block_delete(conn, rkey, repo).await?, 720 782 CollectionType::BskyFeedGen => { 783 + redis::AsyncTypedCommands::del(rc, format!("feedgen#{at_uri}")).await?; 721 784 let count = db::feedgen_delete(conn, at_uri).await?; 722 785 deltas 723 786 .add_delta(repo, AggregateType::ProfileFeed, -(count as i32)) ··· 734 797 let post_info = db::post_get_info_for_delete(conn, at_uri).await?; 735 798 736 799 db::post_delete(conn, at_uri).await?; 800 + redis::AsyncTypedCommands::del(rc, format!("post#{at_uri}")).await?; 737 801 738 802 if let Some((reply_to, embed)) = post_info { 739 803 deltas.decr(repo, AggregateType::ProfilePost).await; ··· 754 818 } 755 819 0 756 820 } 757 - CollectionType::BskyFeedThreadgate => db::threadgate_delete(conn, at_uri).await?, 821 + CollectionType::BskyFeedThreadgate => { 822 + redis::AsyncTypedCommands::del(rc, format!("post#{at_uri}")).await?; 823 + db::threadgate_delete(conn, at_uri).await? 824 + } 758 825 CollectionType::BskyFollow => { 759 826 if let Some(followee) = db::follow_delete(conn, rkey, repo).await? { 760 827 deltas.decr(&followee, AggregateType::Follower).await; ··· 763 830 0 764 831 } 765 832 CollectionType::BskyList => { 833 + redis::AsyncTypedCommands::del(rc, format!("list#{at_uri}")).await?; 766 834 let count = db::list_delete(conn, at_uri).await?; 767 835 deltas 768 836 .add_delta(repo, AggregateType::ProfileList, -(count as i32)) ··· 770 838 count 771 839 } 772 840 CollectionType::BskyListBlock => db::list_block_delete(conn, at_uri).await?, 773 - CollectionType::BskyListItem => db::list_item_delete(conn, at_uri).await?, 841 + CollectionType::BskyListItem => { 842 + redis::AsyncTypedCommands::del(rc, format!("list#{at_uri}")).await?; 843 + db::list_item_delete(conn, at_uri).await? 844 + } 774 845 CollectionType::BskyStarterPack => { 846 + redis::AsyncTypedCommands::del(rc, format!("starterpacks#{at_uri}")).await?; 775 847 let count = db::starter_pack_delete(conn, at_uri).await?; 776 848 deltas 777 849 .add_delta(repo, AggregateType::ProfileStarterpack, -(count as i32)) ··· 779 851 count 780 852 } 781 853 CollectionType::BskyVerification => db::verification_delete(conn, at_uri).await?, 782 - CollectionType::BskyLabelerService => db::labeler_delete(conn, at_uri).await?, 783 - CollectionType::BskyNotificationDeclaration => db::notif_decl_delete(conn, repo).await?, 784 - CollectionType::ChatActorDecl => db::chat_decl_delete(conn, repo).await?, 854 + CollectionType::BskyLabelerService => { 855 + redis::AsyncTypedCommands::del(rc, format!("labeler#{repo}")).await?; 856 + db::labeler_delete(conn, at_uri).await? 857 + } 858 + CollectionType::BskyNotificationDeclaration => { 859 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 860 + db::notif_decl_delete(conn, repo).await? 861 + } 862 + CollectionType::ChatActorDecl => { 863 + redis::AsyncTypedCommands::del(rc, format!("profile#{repo}")).await?; 864 + db::chat_decl_delete(conn, repo).await? 865 + } 866 + CollectionType::CommunityLexiconBookmark => db::bookmark_delete(conn, rkey, repo).await?, 785 867 _ => unreachable!(), 786 868 }; 787 869
+15 -28
consumer/src/indexer/records.rs
··· 1 1 use crate::utils; 2 2 use chrono::{DateTime, Utc}; 3 - use ipld_core::cid::Cid; 4 3 use lexica::app_bsky::actor::{ChatAllowIncoming, ProfileAllowSubscriptions, Status}; 5 4 use lexica::app_bsky::embed::AspectRatio; 6 5 use lexica::app_bsky::labeler::LabelerPolicy; 7 6 use lexica::app_bsky::richtext::FacetMain; 8 7 use lexica::com_atproto::label::SelfLabels; 9 8 use lexica::com_atproto::moderation::{ReasonType, SubjectType}; 9 + use lexica::{Blob, StrongRef}; 10 10 use serde::{Deserialize, Serialize}; 11 11 use serde_with::serde_as; 12 12 13 - #[derive(Clone, Debug, Deserialize, Serialize)] 14 - pub struct StrongRef { 15 - #[serde( 16 - deserialize_with = "utils::cid_from_string", 17 - serialize_with = "utils::cid_as_str" 18 - )] 19 - pub cid: Cid, 20 - pub uri: String, 21 - } 22 - 23 - #[derive(Clone, Debug, Deserialize, Serialize)] 24 - #[serde(tag = "$type")] 25 - #[serde(rename = "blob")] 26 - #[serde(rename_all = "camelCase")] 27 - pub struct Blob { 28 - pub mime_type: String, 29 - #[serde(serialize_with = "utils::cid_as_link")] 30 - pub r#ref: Cid, 31 - pub size: i32, 32 - } 33 - 34 13 #[derive(Debug, Deserialize, Serialize)] 35 14 #[serde(rename_all = "camelCase")] 36 15 #[serde_as] ··· 44 23 pub labels: Option<SelfLabels>, 45 24 pub joined_via_starter_pack: Option<StrongRef>, 46 25 pub pinned_post: Option<StrongRef>, 26 + #[serde_as(as = "utils::safe_string")] 27 + pub pronouns: Option<String>, 28 + #[serde_as(as = "utils::safe_string")] 29 + pub website: Option<String>, 47 30 pub created_at: Option<DateTime<Utc>>, 48 31 } 49 32 ··· 284 267 pub struct AppBskyFeedThreadgate { 285 268 pub post: String, 286 269 pub created_at: DateTime<Utc>, 287 - #[serde(default)] 288 - pub allow: Vec<ThreadgateRule>, 270 + pub allow: Option<Vec<ThreadgateRule>>, 289 271 #[serde(default)] 290 272 pub hidden_replies: Vec<String>, 291 273 } 274 + 275 + pub const THREADGATE_RULE_MENTION: &str = "app.bsky.feed.threadgate#mentionRule"; 276 + pub const THREADGATE_RULE_FOLLOWER: &str = "app.bsky.feed.threadgate#followerRule"; 277 + pub const THREADGATE_RULE_FOLLOWING: &str = "app.bsky.feed.threadgate#followingRule"; 278 + pub const THREADGATE_RULE_LIST: &str = "app.bsky.feed.threadgate#listRule"; 292 279 293 280 #[derive(Debug, Deserialize, Serialize)] 294 281 #[serde(tag = "$type")] ··· 306 293 impl ThreadgateRule { 307 294 pub fn as_str(&self) -> &'static str { 308 295 match self { 309 - ThreadgateRule::Mention => "app.bsky.feed.threadgate#mentionRule", 310 - ThreadgateRule::Follower => "app.bsky.feed.threadgate#followerRule", 311 - ThreadgateRule::Following => "app.bsky.feed.threadgate#followingRule", 312 - ThreadgateRule::List { .. } => "app.bsky.feed.threadgate#listRule", 296 + ThreadgateRule::Mention => THREADGATE_RULE_MENTION, 297 + ThreadgateRule::Follower => THREADGATE_RULE_FOLLOWER, 298 + ThreadgateRule::Following => THREADGATE_RULE_FOLLOWING, 299 + ThreadgateRule::List { .. } => THREADGATE_RULE_LIST, 313 300 } 314 301 } 315 302 }
+5
consumer/src/indexer/types.rs
··· 41 41 AppBskyNotificationDeclaration(records::AppBskyNotificationDeclaration), 42 42 #[serde(rename = "chat.bsky.actor.declaration")] 43 43 ChatBskyActorDeclaration(records::ChatBskyActorDeclaration), 44 + #[serde(rename = "community.lexicon.bookmarks.bookmark")] 45 + CommunityLexiconBookmark(lexica::community_lexicon::bookmarks::Bookmark), 44 46 } 45 47 46 48 #[derive(Debug, PartialOrd, PartialEq, Deserialize, Serialize)] ··· 63 65 BskyLabelerService, 64 66 BskyNotificationDeclaration, 65 67 ChatActorDecl, 68 + CommunityLexiconBookmark, 66 69 Unsupported, 67 70 } 68 71 ··· 87 90 "app.bsky.labeler.service" => CollectionType::BskyLabelerService, 88 91 "app.bsky.notification.declaration" => CollectionType::BskyNotificationDeclaration, 89 92 "chat.bsky.actor.declaration" => CollectionType::ChatActorDecl, 93 + "community.lexicon.bookmarks.bookmark" => CollectionType::CommunityLexiconBookmark, 90 94 _ => CollectionType::Unsupported, 91 95 } 92 96 } ··· 111 115 CollectionType::BskyVerification => false, 112 116 CollectionType::BskyLabelerService => true, 113 117 CollectionType::BskyNotificationDeclaration => true, 118 + CollectionType::CommunityLexiconBookmark => true, 114 119 CollectionType::Unsupported => false, 115 120 } 116 121 }
+25
consumer/src/instrumentation.rs
··· 1 + use tracing::Subscriber; 2 + use tracing_subscriber::filter::Filtered; 3 + use tracing_subscriber::layer::SubscriberExt; 4 + use tracing_subscriber::registry::LookupSpan; 5 + use tracing_subscriber::util::SubscriberInitExt; 6 + use tracing_subscriber::{EnvFilter, Layer}; 7 + 8 + pub fn init_instruments(cfg: &crate::config::ConfigInstruments) { 9 + let log_layer = init_log(cfg.log_json); 10 + 11 + tracing_subscriber::registry().with(log_layer).init(); 12 + } 13 + 14 + fn init_log<S>(json: bool) -> Filtered<Box<dyn Layer<S> + Send + Sync>, EnvFilter, S> 15 + where 16 + S: Subscriber + for<'span> LookupSpan<'span>, 17 + { 18 + let stdout_filter = EnvFilter::from_default_env(); 19 + 20 + match json { 21 + true => tracing_subscriber::fmt::layer().json().boxed(), 22 + false => tracing_subscriber::fmt::layer().boxed(), 23 + } 24 + .with_filter(stdout_filter) 25 + }
+4 -2
consumer/src/main.rs
··· 12 12 mod db; 13 13 mod firehose; 14 14 mod indexer; 15 + mod instrumentation; 15 16 mod label_indexer; 16 17 mod utils; 17 18 18 19 #[tokio::main] 19 20 async fn main() -> eyre::Result<()> { 20 - tracing_subscriber::fmt::init(); 21 21 PrometheusBuilder::new().install()?; 22 22 23 23 let cli = cmd::parse(); 24 24 let conf = config::load_config()?; 25 25 26 + instrumentation::init_instruments(&conf.instruments); 26 27 let user_agent = build_ua(&conf.ua_contact); 27 28 28 29 let pool = conf.database.create_pool(Some(Runtime::Tokio1), NoTls)?; ··· 115 116 let indexer_opts = indexer::RelayIndexerOpts { 116 117 history_mode: indexer_cfg.history_mode, 117 118 skip_handle_validation: indexer_cfg.skip_handle_validation, 119 + request_backfill: indexer_cfg.request_backfill, 118 120 }; 119 121 120 122 let relay_indexer = indexer::RelayIndexer::new( ··· 128 130 ) 129 131 .await?; 130 132 131 - tracker.spawn(relay_indexer.run(indexer_cfg.indexer_workers, stop)); 133 + tracker.spawn(relay_indexer.run(indexer_cfg.workers, stop)); 132 134 tracker.spawn(index_transport(index_client, idxc_rx)); 133 135 } 134 136
+42 -36
consumer/src/utils.rs
··· 1 - use ipld_core::cid::Cid; 2 - use serde::{Deserialize, Deserializer, Serialize, Serializer}; 1 + use lexica::app_bsky::richtext::{Facet, FacetMain, FacetOuter}; 2 + use lexica::{Blob, StrongRef}; 3 + use serde::{Deserialize, Deserializer}; 3 4 4 5 // see https://deer.social/profile/did:plc:63y3oh7iakdueqhlj6trojbq/post/3ltuv4skhqs2h 5 6 pub fn safe_string<'de, D: Deserializer<'de>>(deserializer: D) -> Result<String, D::Error> { ··· 8 9 Ok(str.replace('\u{0000}', "")) 9 10 } 10 11 11 - pub fn cid_from_string<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Cid, D::Error> { 12 - let str = String::deserialize(deserializer)?; 13 - 14 - Cid::try_from(str).map_err(serde::de::Error::custom) 15 - } 16 - 17 - pub fn cid_as_str<S>(inp: &Cid, serializer: S) -> Result<S::Ok, S::Error> 18 - where 19 - S: Serializer, 20 - { 21 - inp.to_string().serialize(serializer) 12 + pub fn blob_ref(blob: Option<Blob>) -> Option<String> { 13 + blob.map(|blob| blob.cid.to_string()) 22 14 } 23 15 24 - #[derive(Debug, Deserialize, Serialize)] 25 - pub struct LinkRef { 26 - #[serde(rename = "$link")] 27 - link: String, 28 - } 29 - 30 - pub fn cid_as_link<S>(inp: &Cid, serializer: S) -> Result<S::Ok, S::Error> 31 - where 32 - S: Serializer, 33 - { 34 - LinkRef { 35 - link: inp.to_string(), 36 - } 37 - .serialize(serializer) 38 - } 39 - 40 - pub fn blob_ref(blob: Option<crate::indexer::records::Blob>) -> Option<String> { 41 - blob.map(|blob| blob.r#ref.to_string()) 42 - } 43 - 44 - pub fn strongref_to_parts( 45 - strongref: Option<&crate::indexer::records::StrongRef>, 46 - ) -> (Option<String>, Option<String>) { 16 + pub fn strongref_to_parts(strongref: Option<&StrongRef>) -> (Option<String>, Option<String>) { 47 17 strongref 48 18 .map(|sr| (sr.uri.clone(), sr.cid.to_string())) 49 19 .unzip() ··· 64 34 None 65 35 } 66 36 } 37 + 38 + pub fn at_uri_is_by(uri: &str, did: &str) -> bool { 39 + let split_aturi = uri.rsplitn(4, '/').collect::<Vec<_>>(); 40 + 41 + did == split_aturi[2] 42 + } 43 + 44 + pub fn extract_mentions_and_tags(from: &[FacetMain]) -> (Vec<String>, Vec<String>) { 45 + let (mentions, tags) = from 46 + .iter() 47 + .flat_map(|v| { 48 + v.features.iter().map(|facet| match facet { 49 + FacetOuter::Bsky(Facet::Mention { did }) => (Some(did), None), 50 + FacetOuter::Bsky(Facet::Tag { tag }) => (None, Some(tag)), 51 + _ => (None, None), 52 + }) 53 + }) 54 + .unzip::<_, _, Vec<_>, Vec<_>>(); 55 + 56 + let mentions = mentions.into_iter().flatten().cloned().collect(); 57 + let tags = tags.into_iter().flatten().cloned().collect(); 58 + 59 + (mentions, tags) 60 + } 61 + 62 + pub fn merge_tags<T>(t1: Option<Vec<T>>, t2: Option<Vec<T>>) -> Vec<T> { 63 + match (t1, t2) { 64 + (Some(t1), None) => t1, 65 + (None, Some(t2)) => t2, 66 + (Some(mut t1), Some(t2)) => { 67 + t1.extend(t2); 68 + t1 69 + } 70 + _ => Vec::default(), 71 + } 72 + }
+2 -12
dataloader-rs/Cargo.toml
··· 2 2 name = "dataloader" 3 3 version = "0.18.0" 4 4 edition = "2021" 5 - authors = ["cksac <cs.cksac@gmail.com>", "Lily"] 5 + authors = ["cksac <cs.cksac@gmail.com>", "Mia"] 6 6 description = "Rust implementation of Facebook's DataLoader using async-await." 7 7 keywords = ["batcher", "dataloader", "cache"] 8 8 categories = ["asynchronous", "caching"] ··· 15 15 [badges] 16 16 travis-ci = { repository = "/cksac/dataloader-rs" } 17 17 18 - [features] 19 - default = ["runtime-async-std"] 20 - runtime-async-std = [ 21 - "async-std", 22 - ] 23 - runtime-tokio = [ 24 - "tokio" 25 - ] 26 - 27 18 [dependencies] 28 - async-std = { version = "1", optional = true } 29 - tokio = { version = "1", features = [ "sync", "rt" ], optional = true } 19 + tokio = { version = "1", features = [ "sync", "rt" ] } 30 20 31 21 [dev-dependencies] 32 22 futures = "0.3"
+200
dataloader-rs/src/async_cached.rs
··· 1 + use crate::runtime::{Arc, Mutex}; 2 + use crate::{yield_fn, BatchFn, WaitForWorkFn}; 3 + use std::collections::{HashMap, HashSet}; 4 + use std::hash::Hash; 5 + use std::iter::IntoIterator; 6 + 7 + pub trait AsyncCache { 8 + type Key; 9 + type Val; 10 + async fn get(&mut self, key: &Self::Key) -> Option<Self::Val>; 11 + async fn insert(&mut self, key: Self::Key, val: Self::Val); 12 + async fn remove(&mut self, key: &Self::Key) -> Option<Self::Val>; 13 + async fn clear(&mut self); 14 + } 15 + 16 + struct State<K, V, C> 17 + where 18 + C: AsyncCache<Key = K, Val = V>, 19 + { 20 + completed: C, 21 + pending: HashSet<K>, 22 + } 23 + 24 + impl<K: Eq + Hash, V, C> State<K, V, C> 25 + where 26 + C: AsyncCache<Key = K, Val = V>, 27 + { 28 + fn with_cache(cache: C) -> Self { 29 + State { 30 + completed: cache, 31 + pending: HashSet::new(), 32 + } 33 + } 34 + } 35 + 36 + #[derive(Clone)] 37 + pub struct Loader<K, V, F, C> 38 + where 39 + K: Eq + Hash + Clone, 40 + V: Clone, 41 + F: BatchFn<K, V>, 42 + C: AsyncCache<Key = K, Val = V>, 43 + { 44 + state: Arc<Mutex<State<K, V, C>>>, 45 + load_fn: Arc<Mutex<F>>, 46 + wait_for_work_fn: Arc<dyn WaitForWorkFn>, 47 + max_batch_size: usize, 48 + } 49 + 50 + impl<K, V, F, C> Loader<K, V, F, C> 51 + where 52 + K: Eq + Hash + Clone, 53 + V: Clone, 54 + F: BatchFn<K, V>, 55 + C: AsyncCache<Key = K, Val = V>, 56 + { 57 + pub fn new(load_fn: F, cache: C) -> Self { 58 + Loader { 59 + state: Arc::new(Mutex::new(State::with_cache(cache))), 60 + load_fn: Arc::new(Mutex::new(load_fn)), 61 + max_batch_size: 200, 62 + wait_for_work_fn: Arc::new(yield_fn(10)), 63 + } 64 + } 65 + 66 + pub fn with_max_batch_size(mut self, max_batch_size: usize) -> Self { 67 + self.max_batch_size = max_batch_size; 68 + self 69 + } 70 + 71 + pub fn with_yield_count(mut self, yield_count: usize) -> Self { 72 + self.wait_for_work_fn = Arc::new(yield_fn(yield_count)); 73 + self 74 + } 75 + 76 + /// Replaces the yielding for work behavior with an arbitrary future. Rather than yielding 77 + /// the runtime repeatedly this will generate and `.await` a future of your choice. 78 + /// ***This is incompatible with*** [`Self::with_yield_count()`]. 79 + pub fn with_custom_wait_for_work(mut self, wait_for_work_fn: impl WaitForWorkFn) -> Self { 80 + self.wait_for_work_fn = Arc::new(wait_for_work_fn); 81 + self 82 + } 83 + 84 + pub fn max_batch_size(&self) -> usize { 85 + self.max_batch_size 86 + } 87 + 88 + pub async fn load(&self, key: K) -> Option<V> { 89 + let mut state = self.state.lock().await; 90 + if let Some(v) = state.completed.get(&key).await { 91 + return Some(v.clone()); 92 + } 93 + 94 + if !state.pending.contains(&key) { 95 + state.pending.insert(key.clone()); 96 + if state.pending.len() >= self.max_batch_size { 97 + let keys = state.pending.drain().collect::<Vec<K>>(); 98 + let mut load_fn = self.load_fn.lock().await; 99 + let load_ret = load_fn.load(keys.as_ref()).await; 100 + drop(load_fn); 101 + for (k, v) in load_ret.into_iter() { 102 + state.completed.insert(k, v).await; 103 + } 104 + return state.completed.get(&key).await.clone(); 105 + } 106 + } 107 + drop(state); 108 + 109 + (self.wait_for_work_fn)().await; 110 + 111 + let mut state = self.state.lock().await; 112 + if let Some(v) = state.completed.get(&key).await { 113 + return Some(v.clone()); 114 + } 115 + 116 + if !state.pending.is_empty() { 117 + let keys = state.pending.drain().collect::<Vec<K>>(); 118 + let mut load_fn = self.load_fn.lock().await; 119 + let load_ret = load_fn.load(keys.as_ref()).await; 120 + drop(load_fn); 121 + for (k, v) in load_ret.into_iter() { 122 + state.completed.insert(k, v).await; 123 + } 124 + } 125 + 126 + state.completed.get(&key).await.clone() 127 + } 128 + 129 + pub async fn load_many(&self, keys: Vec<K>) -> HashMap<K, V> { 130 + let mut state = self.state.lock().await; 131 + let mut ret = HashMap::new(); 132 + let mut rest = Vec::new(); 133 + for key in keys.into_iter() { 134 + if let Some(v) = state.completed.get(&key).await.clone() { 135 + ret.insert(key, v); 136 + continue; 137 + } 138 + if !state.pending.contains(&key) { 139 + state.pending.insert(key.clone()); 140 + 141 + if state.pending.len() >= self.max_batch_size { 142 + let keys = state.pending.drain().collect::<Vec<K>>(); 143 + let mut load_fn = self.load_fn.lock().await; 144 + let load_ret = load_fn.load(keys.as_ref()).await; 145 + drop(load_fn); 146 + for (k, v) in load_ret.into_iter() { 147 + state.completed.insert(k, v).await; 148 + } 149 + } 150 + } 151 + rest.push(key); 152 + } 153 + drop(state); 154 + 155 + (self.wait_for_work_fn)().await; 156 + 157 + if !rest.is_empty() { 158 + let mut state = self.state.lock().await; 159 + if !state.pending.is_empty() { 160 + let keys = state.pending.drain().collect::<Vec<K>>(); 161 + let mut load_fn = self.load_fn.lock().await; 162 + let load_ret = load_fn.load(keys.as_ref()).await; 163 + drop(load_fn); 164 + for (k, v) in load_ret.into_iter() { 165 + state.completed.insert(k, v).await; 166 + } 167 + } 168 + 169 + for key in rest.into_iter() { 170 + if let Some(v) = state.completed.get(&key).await.clone() { 171 + ret.insert(key, v); 172 + } 173 + } 174 + } 175 + 176 + ret 177 + } 178 + 179 + pub async fn prime(&self, key: K, val: V) { 180 + let mut state = self.state.lock().await; 181 + state.completed.insert(key, val).await; 182 + } 183 + 184 + pub async fn prime_many(&self, values: impl IntoIterator<Item = (K, V)>) { 185 + let mut state = self.state.lock().await; 186 + for (k, v) in values.into_iter() { 187 + state.completed.insert(k, v).await; 188 + } 189 + } 190 + 191 + pub async fn clear(&self, key: K) { 192 + let mut state = self.state.lock().await; 193 + state.completed.remove(&key).await; 194 + } 195 + 196 + pub async fn clear_all(&self) { 197 + let mut state = self.state.lock().await; 198 + state.completed.clear().await 199 + } 200 + }
+1
dataloader-rs/src/lib.rs
··· 1 1 #![allow(async_fn_in_trait)] 2 2 3 + pub mod async_cached; 3 4 mod batch_fn; 4 5 pub mod cached; 5 6 pub mod non_cached;
-13
dataloader-rs/src/runtime.rs
··· 1 - // runtime-async-std 2 - #[cfg(feature = "runtime-async-std")] 3 - pub type Arc<T> = async_std::sync::Arc<T>; 4 - 5 - #[cfg(feature = "runtime-async-std")] 6 - pub type Mutex<T> = async_std::sync::Mutex<T>; 7 - 8 - #[cfg(feature = "runtime-async-std")] 9 - pub use async_std::task::yield_now; 10 - 11 1 // runtime-tokio 12 - #[cfg(feature = "runtime-tokio")] 13 2 pub type Arc<T> = std::sync::Arc<T>; 14 3 15 - #[cfg(feature = "runtime-tokio")] 16 4 pub type Mutex<T> = tokio::sync::Mutex<T>; 17 5 18 - #[cfg(feature = "runtime-tokio")] 19 6 pub use tokio::task::yield_now;
+23
justfile
··· 1 + mod consumer 2 + mod parakeet 3 + mod parakeet-index 4 + 5 + alias run-consumer := consumer::run 6 + alias run-parakeet := parakeet::run 7 + alias run-index := parakeet-index::run 8 + 9 + @reset-db: 10 + echo "Resetting and redoing Diesel migrations..." 11 + diesel migration redo -a --locked-schema 12 + 13 + @reset-redis: 14 + echo "Resetting Redis lists..." 15 + redis-cli DEL backfill_queue backfill_processing bf_downloaded 16 + 17 + @reset-and-backfill *dids: reset-db reset-redis 18 + for PARAMETER_VALUE in {{dids}}; do \ 19 + psql parakeet -c "INSERT INTO actors (did) VALUES ('$PARAMETER_VALUE');" > /dev/null 2>&1 \ 20 + redis-cli LPUSH backfill_queue "$PARAMETER_VALUE" > /dev/null 2>&1; \ 21 + done 22 + export RUST_LOG=info,consumer=trace,consumer::firehose=info 23 + just run-consumer --backfill
+1
lexica/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 chrono = { version = "0.4.39", features = ["serde"] } 8 + cid = { version = "0.11", features = ["serde"] } 8 9 serde = { version = "1.0.216", features = ["derive"] } 9 10 serde_json = "1.0.134"
+36 -6
lexica/src/app_bsky/actor.rs
··· 1 1 use crate::app_bsky::embed::External; 2 + use crate::app_bsky::graph::ListViewBasic; 2 3 use crate::com_atproto::label::Label; 3 4 use chrono::prelude::*; 4 5 use serde::{Deserialize, Serialize}; 5 6 use std::fmt::Display; 6 7 use std::str::FromStr; 8 + 9 + #[derive(Clone, Default, Debug, Serialize)] 10 + #[serde(rename_all = "camelCase")] 11 + pub struct ProfileViewerState { 12 + pub muted: bool, 13 + #[serde(skip_serializing_if = "Option::is_none")] 14 + pub muted_by_list: Option<ListViewBasic>, 15 + pub blocked_by: bool, 16 + #[serde(skip_serializing_if = "Option::is_none")] 17 + pub blocking: Option<String>, 18 + #[serde(skip_serializing_if = "Option::is_none")] 19 + pub blocking_by_list: Option<ListViewBasic>, 20 + #[serde(skip_serializing_if = "Option::is_none")] 21 + pub following: Option<String>, 22 + #[serde(skip_serializing_if = "Option::is_none")] 23 + pub followed_by: Option<String>, 24 + // #[serde(skip_serializing_if = "Option::is_none")] 25 + // pub known_followers: Option<()>, 26 + // #[serde(skip_serializing_if = "Option::is_none")] 27 + // pub activity_subscriptions: Option<()>, 28 + } 7 29 8 30 #[derive(Clone, Default, Debug, Serialize)] 9 31 #[serde(rename_all = "camelCase")] ··· 130 152 pub avatar: Option<String>, 131 153 #[serde(skip_serializing_if = "Option::is_none")] 132 154 pub associated: Option<ProfileAssociated>, 133 - // #[serde(skip_serializing_if = "Option::is_none")] 134 - // pub viewer: Option<()>, 155 + #[serde(skip_serializing_if = "Option::is_none")] 156 + pub viewer: Option<ProfileViewerState>, 135 157 #[serde(skip_serializing_if = "Vec::is_empty")] 136 158 pub labels: Vec<Label>, 137 159 #[serde(skip_serializing_if = "Option::is_none")] 138 160 pub verification: Option<VerificationState>, 139 161 #[serde(skip_serializing_if = "Option::is_none")] 140 162 pub status: Option<StatusView>, 163 + #[serde(skip_serializing_if = "Option::is_none")] 164 + pub pronouns: Option<String>, 141 165 142 166 pub created_at: DateTime<Utc>, 143 167 } ··· 156 180 pub avatar: Option<String>, 157 181 #[serde(skip_serializing_if = "Option::is_none")] 158 182 pub associated: Option<ProfileAssociated>, 159 - // #[serde(skip_serializing_if = "Option::is_none")] 160 - // pub viewer: Option<()>, 183 + #[serde(skip_serializing_if = "Option::is_none")] 184 + pub viewer: Option<ProfileViewerState>, 161 185 #[serde(skip_serializing_if = "Vec::is_empty")] 162 186 pub labels: Vec<Label>, 163 187 #[serde(skip_serializing_if = "Option::is_none")] 164 188 pub verification: Option<VerificationState>, 165 189 #[serde(skip_serializing_if = "Option::is_none")] 166 190 pub status: Option<StatusView>, 191 + #[serde(skip_serializing_if = "Option::is_none")] 192 + pub pronouns: Option<String>, 167 193 168 194 pub created_at: DateTime<Utc>, 169 195 pub indexed_at: NaiveDateTime, ··· 189 215 pub associated: Option<ProfileAssociated>, 190 216 // #[serde(skip_serializing_if = "Option::is_none")] 191 217 // pub joined_via_starter_pack: Option<()>, 192 - // #[serde(skip_serializing_if = "Option::is_none")] 193 - // pub viewer: Option<()>, 218 + #[serde(skip_serializing_if = "Option::is_none")] 219 + pub viewer: Option<ProfileViewerState>, 194 220 #[serde(skip_serializing_if = "Vec::is_empty")] 195 221 pub labels: Vec<Label>, 196 222 // #[serde(skip_serializing_if = "Option::is_none")] ··· 199 225 pub verification: Option<VerificationState>, 200 226 #[serde(skip_serializing_if = "Option::is_none")] 201 227 pub status: Option<StatusView>, 228 + #[serde(skip_serializing_if = "Option::is_none")] 229 + pub pronouns: Option<String>, 230 + #[serde(skip_serializing_if = "Option::is_none")] 231 + pub website: Option<String>, 202 232 203 233 pub created_at: DateTime<Utc>, 204 234 pub indexed_at: NaiveDateTime,
+32
lexica/src/app_bsky/bookmark.rs
··· 1 + use crate::app_bsky::feed::{BlockedAuthor, PostView}; 2 + use crate::StrongRef; 3 + use chrono::prelude::*; 4 + use serde::Serialize; 5 + 6 + #[derive(Clone, Debug, Serialize)] 7 + #[serde(rename_all = "camelCase")] 8 + pub struct BookmarkView { 9 + pub subject: StrongRef, 10 + pub item: BookmarkViewItem, 11 + pub created_at: DateTime<Utc>, 12 + } 13 + 14 + #[derive(Clone, Debug, Serialize)] 15 + #[serde(tag = "$type")] 16 + // This is technically the same as ReplyRefPost atm, but just in case... 17 + pub enum BookmarkViewItem { 18 + #[serde(rename = "app.bsky.feed.defs#postView")] 19 + Post(PostView), 20 + #[serde(rename = "app.bsky.feed.defs#notFoundPost")] 21 + NotFound { 22 + uri: String, 23 + #[serde(rename = "notFound")] 24 + not_found: bool, 25 + }, 26 + #[serde(rename = "app.bsky.feed.defs#blockedPost")] 27 + Blocked { 28 + uri: String, 29 + blocked: bool, 30 + author: BlockedAuthor, 31 + }, 32 + }
+29 -10
lexica/src/app_bsky/feed.rs
··· 1 1 use super::RecordStats; 2 - use crate::app_bsky::actor::{ProfileView, ProfileViewBasic}; 2 + use crate::app_bsky::actor::{ProfileView, ProfileViewBasic, ProfileViewerState}; 3 3 use crate::app_bsky::embed::Embed; 4 4 use crate::app_bsky::graph::ListViewBasic; 5 5 use crate::app_bsky::richtext::FacetMain; ··· 8 8 use serde::{Deserialize, Serialize}; 9 9 use std::str::FromStr; 10 10 11 + #[derive(Clone, Default, Debug, Serialize)] 12 + #[serde(rename_all = "camelCase")] 13 + pub struct PostViewerState { 14 + #[serde(skip_serializing_if = "Option::is_none")] 15 + pub repost: Option<String>, 16 + #[serde(skip_serializing_if = "Option::is_none")] 17 + pub like: Option<String>, 18 + pub bookmarked: bool, 19 + pub thread_muted: bool, 20 + pub reply_disabled: bool, 21 + pub embedding_disabled: bool, 22 + pub pinned: bool, 23 + } 24 + 11 25 #[derive(Clone, Debug, Serialize)] 12 26 #[serde(rename_all = "camelCase")] 13 27 pub struct PostView { ··· 23 37 24 38 #[serde(skip_serializing_if = "Vec::is_empty")] 25 39 pub labels: Vec<Label>, 26 - // #[serde(skip_serializing_if = "Option::is_none")] 27 - // pub viewer: Option<()>, 40 + #[serde(skip_serializing_if = "Option::is_none")] 41 + pub viewer: Option<PostViewerState>, 28 42 #[serde(skip_serializing_if = "Option::is_none")] 29 43 pub threadgate: Option<ThreadgateView>, 30 44 ··· 123 137 124 138 #[derive(Clone, Debug, Serialize)] 125 139 pub struct BlockedAuthor { 126 - pub uri: String, 127 - // pub viewer: Option<()>, 140 + pub did: String, 141 + pub viewer: Option<ProfileViewerState>, 142 + } 143 + 144 + #[derive(Clone, Default, Debug, Serialize)] 145 + #[serde(rename_all = "camelCase")] 146 + pub struct GeneratorViewerState { 147 + #[serde(skip_serializing_if = "Option::is_none")] 148 + pub like: Option<String>, 128 149 } 129 150 130 151 #[derive(Clone, Debug, Serialize)] ··· 148 169 pub accepts_interactions: bool, 149 170 #[serde(skip_serializing_if = "Vec::is_empty")] 150 171 pub labels: Vec<Label>, 151 - // #[serde(skip_serializing_if = "Option::is_none")] 152 - // pub viewer: Option<()>, 172 + #[serde(skip_serializing_if = "Option::is_none")] 173 + pub viewer: Option<GeneratorViewerState>, 153 174 #[serde(skip_serializing_if = "Option::is_none")] 154 175 pub content_mode: Option<GeneratorContentMode>, 155 176 ··· 219 240 #[serde(rename = "app.bsky.feed.defs#skeletonReasonPin")] 220 241 Pin {}, 221 242 #[serde(rename = "app.bsky.feed.defs#skeletonReasonRepost")] 222 - Repost { 223 - repost: String, 224 - }, 243 + Repost { repost: String }, 225 244 }
+12 -4
lexica/src/app_bsky/graph.rs
··· 6 6 use serde::{Deserialize, Serialize}; 7 7 use std::str::FromStr; 8 8 9 + #[derive(Clone, Default, Debug, Serialize)] 10 + #[serde(rename_all = "camelCase")] 11 + pub struct ListViewerState { 12 + pub muted: bool, 13 + #[serde(skip_serializing_if = "Option::is_none")] 14 + pub blocked: Option<String>, 15 + } 16 + 9 17 #[derive(Clone, Debug, Serialize)] 10 18 #[serde(rename_all = "camelCase")] 11 19 pub struct ListViewBasic { ··· 18 26 pub avatar: Option<String>, 19 27 pub list_item_count: i64, 20 28 21 - // #[serde(skip_serializing_if = "Option::is_none")] 22 - // pub viewer: Option<()>, 29 + #[serde(skip_serializing_if = "Option::is_none")] 30 + pub viewer: Option<ListViewerState>, 23 31 #[serde(skip_serializing_if = "Vec::is_empty")] 24 32 pub labels: Vec<Label>, 25 33 ··· 44 52 pub avatar: Option<String>, 45 53 pub list_item_count: i64, 46 54 47 - // #[serde(skip_serializing_if = "Option::is_none")] 48 - // pub viewer: Option<()>, 55 + #[serde(skip_serializing_if = "Option::is_none")] 56 + pub viewer: Option<ListViewerState>, 49 57 #[serde(skip_serializing_if = "Vec::is_empty")] 50 58 pub labels: Vec<Label>, 51 59
+11 -4
lexica/src/app_bsky/labeler.rs
··· 4 4 use chrono::prelude::*; 5 5 use serde::{Deserialize, Serialize}; 6 6 7 + #[derive(Clone, Default, Debug, Serialize)] 8 + #[serde(rename_all = "camelCase")] 9 + pub struct LabelerViewerState { 10 + #[serde(skip_serializing_if = "Option::is_none")] 11 + pub like: Option<String>, 12 + } 13 + 7 14 #[derive(Clone, Debug, Serialize)] 8 15 #[serde(rename_all = "camelCase")] 9 16 pub struct LabelerView { ··· 12 19 pub creator: ProfileView, 13 20 14 21 pub like_count: i64, 15 - // #[serde(skip_serializing_if = "Option::is_none")] 16 - // pub viewer: Option<()>, 22 + #[serde(skip_serializing_if = "Option::is_none")] 23 + pub viewer: Option<LabelerViewerState>, 17 24 #[serde(skip_serializing_if = "Vec::is_empty")] 18 25 pub labels: Vec<Label>, 19 26 pub indexed_at: DateTime<Utc>, ··· 27 34 pub creator: ProfileView, 28 35 29 36 pub like_count: i64, 30 - // #[serde(skip_serializing_if = "Option::is_none")] 31 - // pub viewer: Option<()>, 37 + #[serde(skip_serializing_if = "Option::is_none")] 38 + pub viewer: Option<LabelerViewerState>, 32 39 #[serde(skip_serializing_if = "Vec::is_empty")] 33 40 pub labels: Vec<Label>, 34 41 pub policies: LabelerPolicy,
+2
lexica/src/app_bsky/mod.rs
··· 1 1 use serde::Serialize; 2 2 3 3 pub mod actor; 4 + pub mod bookmark; 4 5 pub mod embed; 5 6 pub mod feed; 6 7 pub mod graph; 7 8 pub mod labeler; 8 9 pub mod richtext; 10 + pub mod unspecced; 9 11 10 12 #[derive(Clone, Default, Debug, Serialize)] 11 13 #[serde(rename_all = "camelCase")]
+33
lexica/src/app_bsky/unspecced.rs
··· 1 + use crate::app_bsky::feed::{BlockedAuthor, PostView}; 2 + use serde::Serialize; 3 + 4 + #[derive(Clone, Debug, Serialize)] 5 + pub struct ThreadV2Item { 6 + pub uri: String, 7 + pub depth: i32, 8 + pub value: ThreadV2ItemType, 9 + } 10 + 11 + #[derive(Clone, Debug, Serialize)] 12 + #[serde(tag = "$type")] 13 + pub enum ThreadV2ItemType { 14 + #[serde(rename = "app.bsky.unspecced.defs#threadItemPost")] 15 + Post(ThreadItemPost), 16 + #[serde(rename = "app.bsky.unspecced.defs#threadItemNoUnauthenticated")] 17 + NoUnauthenticated {}, 18 + #[serde(rename = "app.bsky.unspecced.defs#threadItemNotFound")] 19 + NotFound {}, 20 + #[serde(rename = "app.bsky.unspecced.defs#threadItemBlocked")] 21 + Blocked { author: BlockedAuthor }, 22 + } 23 + 24 + #[derive(Clone, Debug, Serialize)] 25 + #[serde(rename_all = "camelCase")] 26 + pub struct ThreadItemPost { 27 + pub post: PostView, 28 + pub more_parents: bool, 29 + pub more_replies: i32, 30 + pub op_thread: bool, 31 + pub hidden_by_threadgate: bool, 32 + pub muted_by_viewer: bool, 33 + }
+14
lexica/src/community_lexicon/bookmarks.rs
··· 1 + use chrono::prelude::*; 2 + use serde::{Deserialize, Serialize}; 3 + 4 + #[derive(Clone, Debug, Deserialize, Serialize)] 5 + #[serde(tag = "$type")] 6 + #[serde(rename = "community.lexicon.bookmarks.bookmark")] 7 + #[serde(rename_all = "camelCase")] 8 + pub struct Bookmark { 9 + pub subject: String, 10 + #[serde(default)] 11 + #[serde(skip_serializing_if = "Vec::is_empty")] 12 + pub tags: Vec<String>, 13 + pub created_at: DateTime<Utc>, 14 + }
+1
lexica/src/community_lexicon/mod.rs
··· 1 + pub mod bookmarks;
+35 -1
lexica/src/lib.rs
··· 1 - use serde::Serialize; 1 + use cid::Cid; 2 + use serde::{Deserialize, Serialize}; 3 + 4 + pub use utils::LinkRef; 2 5 3 6 pub mod app_bsky; 4 7 pub mod com_atproto; 8 + pub mod community_lexicon; 9 + mod utils; 5 10 6 11 #[derive(Clone, Debug, Serialize)] 7 12 pub struct JsonBytes { 8 13 #[serde(rename = "$bytes")] 9 14 pub bytes: String, 10 15 } 16 + 17 + #[derive(Clone, Debug, Deserialize, Serialize)] 18 + pub struct StrongRef { 19 + #[serde( 20 + deserialize_with = "utils::cid_from_string", 21 + serialize_with = "utils::cid_as_str" 22 + )] 23 + pub cid: Cid, 24 + pub uri: String, 25 + } 26 + 27 + impl StrongRef { 28 + pub fn new_from_str(uri: String, cid: &str) -> Result<Self, cid::Error> { 29 + let cid = cid.parse()?; 30 + Ok(StrongRef { uri, cid }) 31 + } 32 + } 33 + 34 + #[derive(Clone, Debug, Deserialize, Serialize)] 35 + #[serde(tag = "$type")] 36 + #[serde(rename = "blob")] 37 + #[serde(rename_all = "camelCase")] 38 + pub struct Blob { 39 + pub mime_type: String, 40 + #[serde(rename = "ref")] 41 + #[serde(serialize_with = "utils::cid_as_link")] 42 + pub cid: Cid, 43 + pub size: i32, 44 + }
+31
lexica/src/utils.rs
··· 1 + use cid::Cid; 2 + use serde::{Deserialize, Deserializer, Serialize, Serializer}; 3 + 4 + pub fn cid_from_string<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Cid, D::Error> { 5 + let str = String::deserialize(deserializer)?; 6 + 7 + Cid::try_from(str).map_err(serde::de::Error::custom) 8 + } 9 + 10 + pub fn cid_as_str<S>(inp: &Cid, serializer: S) -> Result<S::Ok, S::Error> 11 + where 12 + S: Serializer, 13 + { 14 + inp.to_string().serialize(serializer) 15 + } 16 + 17 + #[derive(Debug, Deserialize, Serialize)] 18 + pub struct LinkRef { 19 + #[serde(rename = "$link")] 20 + link: String, 21 + } 22 + 23 + pub fn cid_as_link<S>(inp: &Cid, serializer: S) -> Result<S::Ok, S::Error> 24 + where 25 + S: Serializer, 26 + { 27 + LinkRef { 28 + link: inp.to_string(), 29 + } 30 + .serialize(serializer) 31 + }
+2 -2
migrations/2025-02-16-142357_posts/up.sql
··· 123 123 post_uri text not null, 124 124 125 125 hidden_replies text[] not null, 126 - allow text[] not null, 127 - allowed_lists text[] not null, 126 + allow text[], 127 + allowed_lists text[], 128 128 129 129 record jsonb not null, 130 130
+2
migrations/2025-08-03-125504_mutes/down.sql
··· 1 + drop table list_mutes; 2 + drop table mutes;
+22
migrations/2025-08-03-125504_mutes/up.sql
··· 1 + create table list_mutes 2 + ( 3 + did text not null references actors (did), 4 + list_uri text not null, 5 + created_at timestamptz not null default now(), 6 + 7 + primary key (did, list_uri) 8 + ); 9 + 10 + create index listmutes_list_index on list_mutes using hash (list_uri); 11 + create index listmutes_did_index on list_mutes using hash (did); 12 + 13 + create table mutes 14 + ( 15 + did text not null references actors (did), 16 + subject text not null, 17 + created_at timestamptz not null default now(), 18 + 19 + primary key (did, subject) 20 + ); 21 + 22 + create index mutes_subject_index on mutes (subject);
+1
migrations/2025-09-02-190833_bookmarks/down.sql
··· 1 + drop table bookmarks;
+19
migrations/2025-09-02-190833_bookmarks/up.sql
··· 1 + create table bookmarks 2 + ( 3 + did text not null references actors (did), 4 + rkey text, 5 + subject text not null, 6 + subject_cid text, 7 + subject_type text not null, 8 + tags text[] not null default ARRAY []::text[], 9 + 10 + created_at timestamptz not null default now(), 11 + 12 + primary key (did, subject) 13 + ); 14 + 15 + create index bookmarks_rkey_index on bookmarks (rkey); 16 + create index bookmarks_subject_index on bookmarks (subject); 17 + create index bookmarks_subject_type_index on bookmarks (subject_type); 18 + create index bookmarks_tags_index on bookmarks using gin (tags); 19 + create unique index bookmarks_rkey_ui on bookmarks (did, rkey);
+17
migrations/2025-09-17-190406_viewer-interactions/down.sql
··· 1 + drop trigger t_profile_state_ins on follows; 2 + drop trigger t_profile_state_del on follows; 3 + drop trigger t_profile_state_ins on blocks; 4 + drop trigger t_profile_state_del on blocks; 5 + drop trigger t_profile_state_ins on mutes; 6 + drop trigger t_profile_state_del on mutes; 7 + 8 + drop function f_profile_state_ins_follow; 9 + drop function f_profile_state_del_follow; 10 + drop function f_profile_state_ins_block; 11 + drop function f_profile_state_del_block; 12 + drop function f_profile_state_ins_mute; 13 + drop function f_profile_state_del_mute; 14 + 15 + drop view v_list_mutes_exp; 16 + drop view v_list_block_exp; 17 + drop table profile_states;
+146
migrations/2025-09-17-190406_viewer-interactions/up.sql
··· 1 + create table profile_states 2 + ( 3 + did text not null, 4 + subject text not null, 5 + muting bool not null default false, -- subj muted by did 6 + blocked bool not null default false, -- did blocked by subj 7 + blocking text, -- subj blocked by did 8 + following text, -- rkey of follow record (did->subj) 9 + followed text, -- rkey of follow record (subj->did) 10 + 11 + primary key (did, subject) 12 + ); 13 + 14 + create index profilestates_did_index on profile_states using hash (did); 15 + create index profilestates_sub_index on profile_states using hash (subject); 16 + 17 + create view v_list_block_exp as 18 + ( 19 + select lb.list_uri, did, li.subject 20 + from list_blocks lb 21 + inner join list_items li on lb.list_uri = li.list_uri 22 + ); 23 + 24 + create view v_list_mutes_exp as 25 + ( 26 + select lm.list_uri, did, li.subject 27 + from list_mutes lm 28 + inner join list_items li on lm.list_uri = li.list_uri 29 + ); 30 + 31 + -- profile_states follow triggers 32 + create function f_profile_state_ins_follow() returns trigger 33 + language plpgsql as 34 + $$ 35 + begin 36 + insert into profile_states (did, subject, following) 37 + VALUES (NEW.did, NEW.subject, NEW.rkey) 38 + ON CONFLICT (did, subject) DO UPDATE SET following=excluded.following; 39 + 40 + insert into profile_states (did, subject, followed) 41 + VALUES (NEW.subject, NEW.did, NEW.rkey) 42 + ON CONFLICT (did, subject) DO UPDATE SET followed=excluded.followed; 43 + 44 + return NEW; 45 + end; 46 + $$; 47 + 48 + create trigger t_profile_state_ins 49 + before insert 50 + on follows 51 + for each row 52 + execute procedure f_profile_state_ins_follow(); 53 + 54 + create function f_profile_state_del_follow() returns trigger 55 + language plpgsql as 56 + $$ 57 + begin 58 + update profile_states set following = null where did = OLD.did and subject = OLD.subject; 59 + update profile_states set followed = null where did = OLD.subject and subject = OLD.did; 60 + 61 + return OLD; 62 + end; 63 + $$; 64 + 65 + create trigger t_profile_state_del 66 + before delete 67 + on follows 68 + for each row 69 + execute procedure f_profile_state_del_follow(); 70 + 71 + -- profile_states block triggers 72 + 73 + create function f_profile_state_ins_block() returns trigger 74 + language plpgsql as 75 + $$ 76 + begin 77 + insert into profile_states (did, subject, blocking) 78 + VALUES (NEW.did, NEW.subject, NEW.rkey) 79 + ON CONFLICT (did, subject) DO UPDATE SET blocking=excluded.blocking; 80 + 81 + insert into profile_states (did, subject, blocked) 82 + VALUES (NEW.subject, NEW.did, TRUE) 83 + ON CONFLICT (did, subject) DO UPDATE SET blocked=excluded.blocked; 84 + 85 + return NEW; 86 + end; 87 + $$; 88 + 89 + create trigger t_profile_state_ins 90 + before insert 91 + on blocks 92 + for each row 93 + execute procedure f_profile_state_ins_block(); 94 + 95 + create function f_profile_state_del_block() returns trigger 96 + language plpgsql as 97 + $$ 98 + begin 99 + update profile_states set blocking = null where did = OLD.did and subject = OLD.subject; 100 + update profile_states set blocked = FALSE where did = OLD.subject and subject = OLD.did; 101 + 102 + return OLD; 103 + end; 104 + $$; 105 + 106 + create trigger t_profile_state_del 107 + before delete 108 + on blocks 109 + for each row 110 + execute procedure f_profile_state_del_block(); 111 + 112 + -- profile_states mutes triggers 113 + 114 + create function f_profile_state_ins_mute() returns trigger 115 + language plpgsql as 116 + $$ 117 + begin 118 + insert into profile_states (did, subject, muting) 119 + VALUES (NEW.did, NEW.subject, TRUE) 120 + ON CONFLICT (did, subject) DO UPDATE SET muting=excluded.muting; 121 + 122 + return NEW; 123 + end; 124 + $$; 125 + 126 + create trigger t_profile_state_ins 127 + before insert 128 + on mutes 129 + for each row 130 + execute procedure f_profile_state_ins_mute(); 131 + 132 + create function f_profile_state_del_mute() returns trigger 133 + language plpgsql as 134 + $$ 135 + begin 136 + update profile_states set muting = false where did = OLD.did and subject = OLD.subject; 137 + 138 + return OLD; 139 + end; 140 + $$; 141 + 142 + create trigger t_profile_state_del 143 + before delete 144 + on mutes 145 + for each row 146 + execute procedure f_profile_state_del_mute();
+3
migrations/2025-09-24-205239_profiles-4224/down.sql
··· 1 + alter table profiles 2 + drop column pronouns, 3 + drop column website;
+3
migrations/2025-09-24-205239_profiles-4224/up.sql
··· 1 + alter table profiles 2 + add column pronouns text, 3 + add column website text;
+15
migrations/2025-09-27-171241_post-tweaks/down.sql
··· 1 + alter table posts 2 + drop column mentions, 3 + drop column violates_threadgate; 4 + 5 + drop trigger t_author_feed_ins_post on posts; 6 + drop trigger t_author_feed_del_post on posts; 7 + drop trigger t_author_feed_ins_repost on reposts; 8 + drop trigger t_author_feed_del_repost on reposts; 9 + 10 + drop function f_author_feed_ins_post; 11 + drop function f_author_feed_del_post; 12 + drop function f_author_feed_ins_repost; 13 + drop function f_author_feed_del_repost; 14 + 15 + drop table author_feeds;
+79
migrations/2025-09-27-171241_post-tweaks/up.sql
··· 1 + alter table posts 2 + add column mentions text[], 3 + add column violates_threadgate bool not null default false; 4 + 5 + create table author_feeds 6 + ( 7 + uri text primary key, 8 + cid text not null, 9 + post text not null, 10 + did text not null, 11 + typ text not null, 12 + sort_at timestamptz not null 13 + ); 14 + 15 + -- author_feeds post triggers 16 + create function f_author_feed_ins_post() returns trigger 17 + language plpgsql as 18 + $$ 19 + begin 20 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 21 + VALUES (NEW.at_uri, NEW.cid, NEW.at_uri, NEW.did, 'post', NEW.created_at) 22 + on conflict do nothing; 23 + return NEW; 24 + end; 25 + $$; 26 + 27 + create trigger t_author_feed_ins_post 28 + before insert 29 + on posts 30 + for each row 31 + execute procedure f_author_feed_ins_post(); 32 + 33 + create function f_author_feed_del_post() returns trigger 34 + language plpgsql as 35 + $$ 36 + begin 37 + delete from author_feeds where did = OLD.did and uri = OLD.at_uri and typ = 'post'; 38 + return OLD; 39 + end; 40 + $$; 41 + 42 + create trigger t_author_feed_del_post 43 + before delete 44 + on posts 45 + for each row 46 + execute procedure f_author_feed_del_post(); 47 + 48 + -- author_feeds repost triggers 49 + create function f_author_feed_ins_repost() returns trigger 50 + language plpgsql as 51 + $$ 52 + begin 53 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 54 + VALUES ('at://' || NEW.did || 'app.bsky.feed.repost' || NEW.rkey, NEW.post_cid, NEW.post, NEW.did, 'repost', NEW.created_at) 55 + on conflict do nothing; 56 + return NEW; 57 + end; 58 + $$; 59 + 60 + create trigger t_author_feed_ins_repost 61 + before insert 62 + on reposts 63 + for each row 64 + execute procedure f_author_feed_ins_repost(); 65 + 66 + create function f_author_feed_del_repost() returns trigger 67 + language plpgsql as 68 + $$ 69 + begin 70 + delete from author_feeds where did = OLD.did and post = OLD.post and typ = 'repost'; 71 + return OLD; 72 + end; 73 + $$; 74 + 75 + create trigger t_author_feed_del_repost 76 + before delete 77 + on reposts 78 + for each row 79 + execute procedure f_author_feed_del_repost();
+11 -3
parakeet/Cargo.toml
··· 6 6 [dependencies] 7 7 async-recursion = "1.1.1" 8 8 axum = { version = "0.8", features = ["json"] } 9 + axum-tracing-opentelemetry = "0.32" 9 10 axum-extra = { version = "0.10.0", features = ["query", "typed-header"] } 10 11 base64 = "0.22" 11 12 chrono = { version = "0.4.39", features = ["serde"] } 12 - dataloader = { path = "../dataloader-rs", default-features = false, features = ["runtime-tokio"] } 13 + dataloader = { path = "../dataloader-rs" } 13 14 deadpool = { version = "0.12.1", features = ["managed"] } 14 15 did-resolver = { path = "../did-resolver" } 15 16 diesel = { version = "2.2.6", features = ["chrono", "serde_json"] } ··· 21 22 jsonwebtoken = { git = "https://gitlab.com/parakeet-social/jsonwebtoken", branch = "es256k" } 22 23 lexica = { path = "../lexica" } 23 24 multibase = "0.9.1" 25 + opentelemetry = "0.31.0" 26 + opentelemetry-otlp = "0.31.0" 27 + opentelemetry_sdk = "0.31.0" 24 28 parakeet-db = { path = "../parakeet-db" } 25 - parakeet-index = { path = "../parakeet-index" } 29 + parakeet-index = { path = "../parakeet-index", features = ["otel"] } 30 + redis = { version = "0.32", features = ["tokio-native-tls-comp"] } 26 31 reqwest = { version = "0.12", features = ["json"] } 27 32 serde = { version = "1.0.217", features = ["derive"] } 33 + serde_ipld_dagcbor = "0.6.1" 28 34 serde_json = "1.0.134" 29 35 tokio = { version = "1.42.0", features = ["full"] } 36 + tower = "0.5" 30 37 tower-http = { version = "0.6.2", features = ["cors", "trace"] } 31 38 tracing = "0.1.40" 32 - tracing-subscriber = "0.3.18" 39 + tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } 40 + tracing-opentelemetry = "0.32"
+11
parakeet/justfile
··· 1 + @release: 2 + cargo build --release 3 + 4 + @lint: 5 + cargo clippy 6 + 7 + @run *params: 8 + cargo run -- {{params}} 9 + 10 + @docker platform='linux/amd64' branch='main': 11 + docker buildx build --platform {{platform}} -t registry.gitlab.com/parakeet-social/parakeet/parakeet:{{branch}} . -f parakeet/Dockerfile
-1
parakeet/run.sh
··· 1 - cargo run
+150
parakeet/src/cache.rs
··· 1 + use dataloader::async_cached::AsyncCache; 2 + use redis::aio::MultiplexedConnection; 3 + use redis::AsyncTypedCommands; 4 + use serde::{Deserialize, Serialize}; 5 + use std::marker::PhantomData; 6 + 7 + /// General Loader Cache 8 + pub struct LoaderCache<V> { 9 + conn: MultiplexedConnection, 10 + exp: Option<u64>, 11 + _phantom: PhantomData<V>, 12 + } 13 + 14 + impl<V> LoaderCache<V> { 15 + pub fn new(conn: &MultiplexedConnection, exp: Option<u64>) -> Self { 16 + LoaderCache::<V> { 17 + conn: conn.clone(), 18 + exp, 19 + _phantom: PhantomData, 20 + } 21 + } 22 + } 23 + 24 + impl<V> AsyncCache for LoaderCache<V> 25 + where 26 + V: for<'a> Deserialize<'a> + Serialize, 27 + { 28 + type Key = String; 29 + type Val = V; 30 + 31 + async fn get(&mut self, key: &Self::Key) -> Option<Self::Val> { 32 + let res: Option<Vec<u8>> = redis::AsyncCommands::get(&mut self.conn, key).await.ok()?; 33 + 34 + match serde_ipld_dagcbor::from_slice(&res?) { 35 + Ok(v) => Some(v), 36 + Err(err) => { 37 + tracing::error!(key, "failed to decode cache value: {err}"); 38 + None 39 + } 40 + } 41 + } 42 + 43 + async fn insert(&mut self, key: Self::Key, val: Self::Val) { 44 + let data = match serde_ipld_dagcbor::to_vec(&val) { 45 + Ok(data) => data, 46 + Err(err) => { 47 + tracing::error!(key, "failed to encode cache value: {err}"); 48 + return; 49 + } 50 + }; 51 + 52 + if let Some(exp) = self.exp { 53 + self.conn.set_ex(key, data, exp).await.unwrap(); 54 + } else { 55 + self.conn.set(key, data).await.unwrap(); 56 + } 57 + } 58 + 59 + async fn remove(&mut self, key: &Self::Key) -> Option<Self::Val> { 60 + let res: Option<Vec<u8>> = redis::AsyncCommands::get_del(&mut self.conn, key) 61 + .await 62 + .ok()?; 63 + 64 + match serde_ipld_dagcbor::from_slice(&res?) { 65 + Ok(v) => Some(v), 66 + Err(err) => { 67 + tracing::error!(key, "failed to decode cache value: {err}"); 68 + None 69 + } 70 + } 71 + } 72 + 73 + async fn clear(&mut self) {} 74 + } 75 + 76 + /// A Loader Cache in with a key prefix 77 + pub struct PrefixedLoaderCache<V> { 78 + conn: MultiplexedConnection, 79 + prefix: String, 80 + exp: Option<u64>, 81 + _phantom: PhantomData<V>, 82 + } 83 + 84 + impl<V> PrefixedLoaderCache<V> { 85 + pub fn new(conn: &MultiplexedConnection, prefix: String, exp: Option<u64>) -> Self { 86 + PrefixedLoaderCache { 87 + conn: conn.clone(), 88 + prefix, 89 + exp, 90 + _phantom: PhantomData, 91 + } 92 + } 93 + } 94 + 95 + impl<V> AsyncCache for PrefixedLoaderCache<V> 96 + where 97 + V: for<'a> Deserialize<'a> + Serialize, 98 + { 99 + type Key = String; 100 + type Val = V; 101 + 102 + async fn get(&mut self, key: &Self::Key) -> Option<Self::Val> { 103 + let key = format!("{}#{}", self.prefix, key); 104 + 105 + let res: Option<Vec<u8>> = redis::AsyncCommands::get(&mut self.conn, &key).await.ok()?; 106 + 107 + match serde_ipld_dagcbor::from_slice(&res?) { 108 + Ok(v) => Some(v), 109 + Err(err) => { 110 + tracing::error!(key, "failed to decode cache value: {err}"); 111 + None 112 + } 113 + } 114 + } 115 + 116 + async fn insert(&mut self, key: Self::Key, val: Self::Val) { 117 + let key = format!("{}#{}", self.prefix, key); 118 + let data = match serde_ipld_dagcbor::to_vec(&val) { 119 + Ok(data) => data, 120 + Err(err) => { 121 + tracing::error!(key = &key, "failed to encode cache value: {err}"); 122 + return; 123 + } 124 + }; 125 + 126 + if let Some(exp) = self.exp { 127 + self.conn.set_ex(key, data, exp).await.unwrap(); 128 + } else { 129 + self.conn.set(key, data).await.unwrap(); 130 + } 131 + } 132 + 133 + async fn remove(&mut self, key: &Self::Key) -> Option<Self::Val> { 134 + let key = format!("{}#{}", self.prefix, key); 135 + 136 + let res: Option<Vec<u8>> = redis::AsyncCommands::get_del(&mut self.conn, &key) 137 + .await 138 + .ok()?; 139 + 140 + match serde_ipld_dagcbor::from_slice(&res?) { 141 + Ok(v) => Some(v), 142 + Err(err) => { 143 + tracing::error!(key, "failed to decode cache value: {err}"); 144 + None 145 + } 146 + } 147 + } 148 + 149 + async fn clear(&mut self) {} 150 + }
+11
parakeet/src/config.rs
··· 13 13 14 14 #[derive(Debug, Deserialize)] 15 15 pub struct Config { 16 + #[serde(flatten)] 17 + pub instruments: ConfigInstruments, 16 18 pub index_uri: String, 17 19 pub database_url: String, 20 + pub redis_uri: String, 18 21 #[serde(default)] 19 22 pub server: ConfigServer, 20 23 pub service: ConfigService, ··· 26 29 pub did_allowlist: Option<Vec<String>>, 27 30 #[serde(default)] 28 31 pub migrate: bool, 32 + } 33 + 34 + #[derive(Debug, Deserialize)] 35 + pub struct ConfigInstruments { 36 + #[serde(default)] 37 + pub otel_enable: bool, 38 + #[serde(default)] 39 + pub log_json: bool, 29 40 } 30 41 31 42 #[derive(Debug, Deserialize)]
+297
parakeet/src/db.rs
··· 1 1 use diesel::prelude::*; 2 + use diesel::sql_types::{Array, Bool, Integer, Nullable, Text}; 2 3 use diesel_async::{AsyncPgConnection, RunQueryDsl}; 4 + use parakeet_db::models::TextArray; 3 5 use parakeet_db::{schema, types}; 6 + use tracing::instrument; 4 7 8 + #[instrument(skip_all)] 5 9 pub async fn get_actor_status( 6 10 conn: &mut AsyncPgConnection, 7 11 did: &str, ··· 13 17 .await 14 18 .optional() 15 19 } 20 + 21 + #[derive(Clone, Debug, QueryableByName)] 22 + #[diesel(check_for_backend(diesel::pg::Pg))] 23 + pub struct ProfileStateRet { 24 + #[diesel(sql_type = Text)] 25 + pub did: String, 26 + #[diesel(sql_type = Text)] 27 + pub subject: String, 28 + #[diesel(sql_type = Nullable<Bool>)] 29 + pub muting: Option<bool>, 30 + #[diesel(sql_type = Nullable<Bool>)] 31 + pub blocked: Option<bool>, 32 + #[diesel(sql_type = Nullable<Text>)] 33 + pub blocking: Option<String>, 34 + #[diesel(sql_type = Nullable<Text>)] 35 + pub following: Option<String>, 36 + #[diesel(sql_type = Nullable<Text>)] 37 + pub followed: Option<String>, 38 + #[diesel(sql_type = Nullable<Text>)] 39 + pub list_block: Option<String>, 40 + #[diesel(sql_type = Nullable<Text>)] 41 + pub list_mute: Option<String>, 42 + } 43 + 44 + #[instrument(skip_all)] 45 + pub async fn get_profile_state( 46 + conn: &mut AsyncPgConnection, 47 + did: &str, 48 + sub: &str, 49 + ) -> QueryResult<Option<ProfileStateRet>> { 50 + diesel::sql_query(include_str!("sql/profile_state.sql")) 51 + .bind::<Text, _>(did) 52 + .bind::<Array<Text>, _>(vec![sub]) 53 + .get_result::<ProfileStateRet>(conn) 54 + .await 55 + .optional() 56 + } 57 + 58 + #[instrument(skip_all)] 59 + pub async fn get_profile_states( 60 + conn: &mut AsyncPgConnection, 61 + did: &str, 62 + sub: &[String], 63 + ) -> QueryResult<Vec<ProfileStateRet>> { 64 + diesel::sql_query(include_str!("sql/profile_state.sql")) 65 + .bind::<Text, _>(did) 66 + .bind::<Array<Text>, _>(sub) 67 + .load::<ProfileStateRet>(conn) 68 + .await 69 + } 70 + 71 + #[derive(Clone, Debug, QueryableByName)] 72 + #[diesel(check_for_backend(diesel::pg::Pg))] 73 + pub struct PostStateRet { 74 + #[diesel(sql_type = diesel::sql_types::Text)] 75 + pub at_uri: String, 76 + #[diesel(sql_type = diesel::sql_types::Text)] 77 + pub did: String, 78 + #[diesel(sql_type = diesel::sql_types::Text)] 79 + pub cid: String, 80 + #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)] 81 + pub like_rkey: Option<String>, 82 + #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)] 83 + pub repost_rkey: Option<String>, 84 + #[diesel(sql_type = diesel::sql_types::Bool)] 85 + pub bookmarked: bool, 86 + // #[diesel(sql_type = diesel::sql_types::Bool)] 87 + // pub muted: bool, 88 + #[diesel(sql_type = diesel::sql_types::Bool)] 89 + pub embed_disabled: bool, 90 + #[diesel(sql_type = diesel::sql_types::Bool)] 91 + pub pinned: bool, 92 + } 93 + 94 + #[instrument(skip_all)] 95 + pub async fn get_post_state( 96 + conn: &mut AsyncPgConnection, 97 + did: &str, 98 + subject: &str, 99 + ) -> QueryResult<Option<PostStateRet>> { 100 + diesel::sql_query(include_str!("sql/post_state.sql")) 101 + .bind::<Text, _>(did) 102 + .bind::<Array<Text>, _>(vec![subject]) 103 + .get_result::<PostStateRet>(conn) 104 + .await 105 + .optional() 106 + } 107 + 108 + #[instrument(skip_all)] 109 + pub async fn get_post_states( 110 + conn: &mut AsyncPgConnection, 111 + did: &str, 112 + sub: &[String], 113 + ) -> QueryResult<Vec<PostStateRet>> { 114 + diesel::sql_query(include_str!("sql/post_state.sql")) 115 + .bind::<Text, _>(did) 116 + .bind::<Array<Text>, _>(sub) 117 + .load::<PostStateRet>(conn) 118 + .await 119 + } 120 + 121 + #[derive(Clone, Debug, QueryableByName)] 122 + #[diesel(check_for_backend(diesel::pg::Pg))] 123 + pub struct ListStateRet { 124 + #[diesel(sql_type = Text)] 125 + pub at_uri: String, 126 + #[diesel(sql_type = Bool)] 127 + pub muted: bool, 128 + #[diesel(sql_type = Nullable<Text>)] 129 + pub block: Option<String>, 130 + } 131 + 132 + #[instrument(skip_all)] 133 + pub async fn get_list_state( 134 + conn: &mut AsyncPgConnection, 135 + did: &str, 136 + subject: &str, 137 + ) -> QueryResult<Option<ListStateRet>> { 138 + diesel::sql_query(include_str!("sql/list_states.sql")) 139 + .bind::<Text, _>(did) 140 + .bind::<Array<Text>, _>(vec![subject]) 141 + .get_result::<ListStateRet>(conn) 142 + .await 143 + .optional() 144 + } 145 + 146 + #[instrument(skip_all)] 147 + pub async fn get_list_states( 148 + conn: &mut AsyncPgConnection, 149 + did: &str, 150 + sub: &[String], 151 + ) -> QueryResult<Vec<ListStateRet>> { 152 + diesel::sql_query(include_str!("sql/list_states.sql")) 153 + .bind::<Text, _>(did) 154 + .bind::<Array<Text>, _>(sub) 155 + .load::<ListStateRet>(conn) 156 + .await 157 + } 158 + 159 + #[instrument(skip_all)] 160 + pub async fn get_like_state( 161 + conn: &mut AsyncPgConnection, 162 + did: &str, 163 + subject: &str, 164 + ) -> QueryResult<Option<(String, String)>> { 165 + schema::likes::table 166 + .select((schema::likes::did, schema::likes::rkey)) 167 + .filter( 168 + schema::likes::did 169 + .eq(did) 170 + .and(schema::likes::subject.eq(subject)), 171 + ) 172 + .get_result(conn) 173 + .await 174 + .optional() 175 + } 176 + 177 + #[instrument(skip_all)] 178 + pub async fn get_like_states( 179 + conn: &mut AsyncPgConnection, 180 + did: &str, 181 + sub: &[String], 182 + ) -> QueryResult<Vec<(String, String, String)>> { 183 + schema::likes::table 184 + .select(( 185 + schema::likes::subject, 186 + schema::likes::did, 187 + schema::likes::rkey, 188 + )) 189 + .filter( 190 + schema::likes::did 191 + .eq(did) 192 + .and(schema::likes::subject.eq_any(sub)), 193 + ) 194 + .load(conn) 195 + .await 196 + } 197 + 198 + #[instrument(skip_all)] 199 + pub async fn get_pinned_post_uri( 200 + conn: &mut AsyncPgConnection, 201 + did: &str, 202 + ) -> QueryResult<Option<String>> { 203 + schema::profiles::table 204 + .select(schema::profiles::pinned_uri.assume_not_null()) 205 + .filter( 206 + schema::profiles::did 207 + .eq(did) 208 + .and(schema::profiles::pinned_uri.is_not_null()), 209 + ) 210 + .get_result(conn) 211 + .await 212 + .optional() 213 + } 214 + 215 + #[derive(Debug, QueryableByName)] 216 + #[diesel(check_for_backend(diesel::pg::Pg))] 217 + #[allow(unused)] 218 + pub struct ThreadItem { 219 + #[diesel(sql_type = Text)] 220 + pub at_uri: String, 221 + #[diesel(sql_type = Nullable<Text>)] 222 + pub parent_uri: Option<String>, 223 + #[diesel(sql_type = Nullable<Text>)] 224 + pub root_uri: Option<String>, 225 + #[diesel(sql_type = Integer)] 226 + pub depth: i32, 227 + } 228 + 229 + #[instrument(skip_all)] 230 + pub async fn get_thread_children( 231 + conn: &mut AsyncPgConnection, 232 + uri: &str, 233 + depth: i32, 234 + ) -> QueryResult<Vec<ThreadItem>> { 235 + diesel::sql_query(include_str!("sql/thread.sql")) 236 + .bind::<Text, _>(uri) 237 + .bind::<Integer, _>(depth) 238 + .load(conn) 239 + .await 240 + } 241 + 242 + #[instrument(skip_all)] 243 + pub async fn get_thread_children_branching( 244 + conn: &mut AsyncPgConnection, 245 + uri: &str, 246 + depth: i32, 247 + branching_factor: i32, 248 + ) -> QueryResult<Vec<ThreadItem>> { 249 + diesel::sql_query(include_str!("sql/thread_branching.sql")) 250 + .bind::<Text, _>(uri) 251 + .bind::<Integer, _>(depth) 252 + .bind::<Integer, _>(branching_factor) 253 + .load(conn) 254 + .await 255 + } 256 + 257 + #[derive(Debug, QueryableByName)] 258 + #[diesel(check_for_backend(diesel::pg::Pg))] 259 + pub struct HiddenThreadChildItem { 260 + #[diesel(sql_type = Text)] 261 + pub at_uri: String, 262 + } 263 + 264 + #[instrument(skip_all)] 265 + pub async fn get_thread_children_hidden( 266 + conn: &mut AsyncPgConnection, 267 + uri: &str, 268 + root: &str, 269 + ) -> QueryResult<Vec<HiddenThreadChildItem>> { 270 + diesel::sql_query(include_str!("sql/thread_v2_hidden_children.sql")) 271 + .bind::<Text, _>(uri) 272 + .bind::<Text, _>(root) 273 + .load(conn) 274 + .await 275 + } 276 + 277 + #[instrument(skip_all)] 278 + pub async fn get_thread_parents( 279 + conn: &mut AsyncPgConnection, 280 + uri: &str, 281 + height: i32, 282 + ) -> QueryResult<Vec<ThreadItem>> { 283 + diesel::sql_query(include_str!("sql/thread_parent.sql")) 284 + .bind::<Text, _>(uri) 285 + .bind::<Integer, _>(height) 286 + .load(conn) 287 + .await 288 + } 289 + 290 + #[instrument(skip_all)] 291 + pub async fn get_root_post(conn: &mut AsyncPgConnection, uri: &str) -> QueryResult<Option<String>> { 292 + schema::posts::table 293 + .select(schema::posts::root_uri) 294 + .find(&uri) 295 + .get_result(conn) 296 + .await 297 + .optional() 298 + .map(|v| v.flatten()) 299 + } 300 + 301 + #[instrument(skip_all)] 302 + pub async fn get_threadgate_hiddens( 303 + conn: &mut AsyncPgConnection, 304 + uri: &str, 305 + ) -> QueryResult<Option<TextArray>> { 306 + schema::threadgates::table 307 + .select(schema::threadgates::hidden_replies) 308 + .find(&uri) 309 + .get_result(conn) 310 + .await 311 + .optional() 312 + }
+3
parakeet/src/hydration/embed.rs
··· 8 8 use lexica::app_bsky::feed::PostView; 9 9 use parakeet_db::models; 10 10 use std::collections::HashMap; 11 + use tracing::instrument; 11 12 12 13 fn build_aspect_ratio(height: Option<i32>, width: Option<i32>) -> Option<AspectRatio> { 13 14 height ··· 176 177 out 177 178 } 178 179 180 + #[instrument(skip_all)] 179 181 pub async fn hydrate_embed(&self, post: String) -> Option<Embed> { 180 182 let (embed, author) = self.loaders.embed.load(post).await?; 181 183 ··· 195 197 } 196 198 } 197 199 200 + #[instrument(skip_all)] 198 201 pub async fn hydrate_embeds(&self, posts: Vec<String>) -> HashMap<String, Embed> { 199 202 let embeds = self.loaders.embed.load_many(posts).await; 200 203
+52 -6
parakeet/src/hydration/feedgen.rs
··· 1 1 use crate::hydration::map_labels; 2 2 use crate::xrpc::cdn::BskyCdn; 3 3 use lexica::app_bsky::actor::ProfileView; 4 - use lexica::app_bsky::feed::{GeneratorContentMode, GeneratorView}; 4 + use lexica::app_bsky::feed::{GeneratorContentMode, GeneratorView, GeneratorViewerState}; 5 5 use parakeet_db::models; 6 6 use std::collections::HashMap; 7 7 use std::str::FromStr; 8 + use tracing::instrument; 9 + 10 + fn build_viewer((did, rkey): (String, String)) -> GeneratorViewerState { 11 + GeneratorViewerState { 12 + like: Some(format!("at://{did}/app.bsky.feed.like/{rkey}")), 13 + } 14 + } 8 15 9 16 fn build_feedgen( 10 17 feedgen: models::FeedGen, 11 18 creator: ProfileView, 12 19 labels: Vec<models::Label>, 13 20 likes: Option<i32>, 21 + viewer: Option<GeneratorViewerState>, 14 22 cdn: &BskyCdn, 15 23 ) -> GeneratorView { 16 24 let content_mode = feedgen ··· 35 43 like_count: likes.unwrap_or_default() as i64, 36 44 accepts_interactions: feedgen.accepts_interactions, 37 45 labels: map_labels(labels), 46 + viewer, 38 47 content_mode, 39 48 indexed_at: feedgen.created_at, 40 49 } 41 50 } 42 51 43 52 impl super::StatefulHydrator<'_> { 53 + #[instrument(skip_all)] 44 54 pub async fn hydrate_feedgen(&self, feedgen: String) -> Option<GeneratorView> { 45 55 let labels = self.get_label(&feedgen).await; 46 - let (feedgen, likes) = self.loaders.feedgen.load(feedgen).await?; 56 + let viewer = self.get_feedgen_viewer_state(&feedgen).await; 57 + let likes = self.loaders.like.load(feedgen.clone()).await; 58 + let feedgen = self.loaders.feedgen.load(feedgen).await?; 47 59 let profile = self.hydrate_profile(feedgen.owner.clone()).await?; 48 60 49 - Some(build_feedgen(feedgen, profile, labels, likes, &self.cdn)) 61 + Some(build_feedgen( 62 + feedgen, profile, labels, likes, viewer, &self.cdn, 63 + )) 50 64 } 51 65 66 + #[instrument(skip_all)] 52 67 pub async fn hydrate_feedgens(&self, feedgens: Vec<String>) -> HashMap<String, GeneratorView> { 53 68 let labels = self.get_label_many(&feedgens).await; 69 + let viewers = self.get_feedgen_viewer_states(&feedgens).await; 70 + let mut likes = self.loaders.like.load_many(feedgens.clone()).await; 54 71 let feedgens = self.loaders.feedgen.load_many(feedgens).await; 55 72 56 73 let creators = feedgens 57 74 .values() 58 - .map(|(feedgen, _)| feedgen.owner.clone()) 75 + .map(|feedgen| feedgen.owner.clone()) 59 76 .collect(); 60 77 61 78 let creators = self.hydrate_profiles(creators).await; 62 79 63 80 feedgens 64 81 .into_iter() 65 - .filter_map(|(uri, (feedgen, likes))| { 82 + .filter_map(|(uri, feedgen)| { 66 83 let creator = creators.get(&feedgen.owner).cloned()?; 84 + let viewer = viewers.get(&uri).cloned(); 67 85 let labels = labels.get(&uri).cloned().unwrap_or_default(); 86 + let likes = likes.remove(&uri); 68 87 69 88 Some(( 70 89 uri, 71 - build_feedgen(feedgen, creator, labels, likes, &self.cdn), 90 + build_feedgen(feedgen, creator, labels, likes, viewer, &self.cdn), 72 91 )) 73 92 }) 74 93 .collect() 94 + } 95 + 96 + #[instrument(skip_all)] 97 + async fn get_feedgen_viewer_state(&self, subject: &str) -> Option<GeneratorViewerState> { 98 + if let Some(viewer) = &self.current_actor { 99 + let data = self.loaders.like_state.get(viewer, subject).await?; 100 + 101 + Some(build_viewer(data)) 102 + } else { 103 + None 104 + } 105 + } 106 + 107 + #[instrument(skip_all)] 108 + async fn get_feedgen_viewer_states( 109 + &self, 110 + subjects: &[String], 111 + ) -> HashMap<String, GeneratorViewerState> { 112 + if let Some(viewer) = &self.current_actor { 113 + let data = self.loaders.like_state.get_many(viewer, subjects).await; 114 + 115 + data.into_iter() 116 + .map(|(k, state)| (k, build_viewer(state))) 117 + .collect() 118 + } else { 119 + HashMap::new() 120 + } 75 121 } 76 122 }
+88 -27
parakeet/src/hydration/labeler.rs
··· 1 1 use crate::hydration::{map_labels, StatefulHydrator}; 2 2 use lexica::app_bsky::actor::ProfileView; 3 - use lexica::app_bsky::labeler::{LabelerPolicy, LabelerView, LabelerViewDetailed}; 3 + use lexica::app_bsky::labeler::{ 4 + LabelerPolicy, LabelerView, LabelerViewDetailed, LabelerViewerState, 5 + }; 4 6 use lexica::com_atproto::label::{Blurs, LabelValueDefinition, Severity}; 5 7 use lexica::com_atproto::moderation::{ReasonType, SubjectType}; 6 8 use parakeet_db::models; 7 9 use std::collections::HashMap; 8 10 use std::str::FromStr; 11 + use tracing::instrument; 12 + 13 + fn build_viewer((did, rkey): (String, String)) -> LabelerViewerState { 14 + LabelerViewerState { 15 + like: Some(format!("at://{did}/app.bsky.feed.like/{rkey}")), 16 + } 17 + } 9 18 10 19 fn build_view( 11 20 labeler: models::LabelerService, 12 21 creator: ProfileView, 13 22 labels: Vec<models::Label>, 23 + viewer: Option<LabelerViewerState>, 14 24 likes: Option<i32>, 15 25 ) -> LabelerView { 16 26 LabelerView { ··· 18 28 cid: labeler.cid, 19 29 creator, 20 30 like_count: likes.unwrap_or_default() as i64, 31 + viewer, 21 32 labels: map_labels(labels), 22 33 indexed_at: labeler.indexed_at.and_utc(), 23 34 } ··· 28 39 defs: Vec<models::LabelDefinition>, 29 40 creator: ProfileView, 30 41 labels: Vec<models::Label>, 42 + viewer: Option<LabelerViewerState>, 31 43 likes: Option<i32>, 32 44 ) -> LabelerViewDetailed { 33 45 let reason_types = labeler.reasons.map(|v| { 34 - v.into_iter() 35 - .flatten() 36 - .filter_map(|v| ReasonType::from_str(&v).ok()) 46 + v.iter() 47 + .filter_map(|v| ReasonType::from_str(v).ok()) 37 48 .collect() 38 49 }); 39 50 ··· 63 74 }) 64 75 .collect(); 65 76 let subject_types = labeler.subject_types.map(|v| { 66 - v.into_iter() 67 - .flatten() 68 - .filter_map(|v| SubjectType::from_str(&v).ok()) 77 + v.iter() 78 + .filter_map(|v| SubjectType::from_str(v).ok()) 69 79 .collect() 70 80 }); 71 - let subject_collections = labeler 72 - .subject_collections 73 - .map(|v| v.into_iter().flatten().collect()); 81 + let subject_collections = labeler.subject_collections.map(Vec::from); 74 82 75 83 LabelerViewDetailed { 76 84 uri: format!("at://{}/app.bsky.labeler.service/self", labeler.did), 77 85 cid: labeler.cid, 78 86 creator, 79 87 like_count: likes.unwrap_or_default() as i64, 88 + viewer, 80 89 policies: LabelerPolicy { 81 90 label_values, 82 91 label_value_definitions, ··· 90 99 } 91 100 92 101 impl StatefulHydrator<'_> { 102 + #[instrument(skip_all)] 93 103 pub async fn hydrate_labeler(&self, labeler: String) -> Option<LabelerView> { 94 104 let labels = self.get_label(&labeler).await; 95 - let (labeler, _, likes) = self.loaders.labeler.load(labeler).await?; 105 + let viewer = self.get_labeler_viewer_state(&labeler).await; 106 + let likes = self.loaders.like.load(make_labeler_uri(&labeler)).await; 107 + let (labeler, _) = self.loaders.labeler.load(labeler).await?; 96 108 let creator = self.hydrate_profile(labeler.did.clone()).await?; 97 109 98 - Some(build_view(labeler, creator, labels, likes)) 110 + Some(build_view(labeler, creator, labels, viewer, likes)) 99 111 } 100 112 113 + #[instrument(skip_all)] 101 114 pub async fn hydrate_labelers(&self, labelers: Vec<String>) -> HashMap<String, LabelerView> { 102 115 let labels = self.get_label_many(&labelers).await; 103 116 let labelers = self.loaders.labeler.load_many(labelers).await; 104 117 105 - let creators = labelers 118 + let (creators, uris) = labelers 106 119 .values() 107 - .map(|(labeler, _, _)| labeler.did.clone()) 108 - .collect(); 120 + .map(|(labeler, _)| (labeler.did.clone(), make_labeler_uri(&labeler.did))) 121 + .unzip::<_, _, Vec<_>, Vec<_>>(); 122 + let viewers = self.get_labeler_viewer_states(&uris).await; 109 123 let creators = self.hydrate_profiles(creators).await; 124 + let mut likes = self.loaders.like.load_many(uris.clone()).await; 110 125 111 126 labelers 112 127 .into_iter() 113 - .filter_map(|(k, (labeler, _, likes))| { 128 + .filter_map(|(k, (labeler, _))| { 114 129 let creator = creators.get(&labeler.did).cloned()?; 115 130 let labels = labels.get(&k).cloned().unwrap_or_default(); 131 + let likes = likes.remove(&make_labeler_uri(&labeler.did)); 132 + let viewer = viewers.get(&make_labeler_uri(&k)).cloned(); 116 133 117 - Some((k, build_view(labeler, creator, labels, likes))) 134 + Some((k, build_view(labeler, creator, labels, viewer, likes))) 118 135 }) 119 136 .collect() 120 137 } 121 138 139 + #[instrument(skip_all)] 122 140 pub async fn hydrate_labeler_detailed(&self, labeler: String) -> Option<LabelerViewDetailed> { 123 141 let labels = self.get_label(&labeler).await; 124 - let (labeler, defs, likes) = self.loaders.labeler.load(labeler).await?; 142 + let viewer = self.get_labeler_viewer_state(&labeler).await; 143 + let likes = self.loaders.like.load(make_labeler_uri(&labeler)).await; 144 + let (labeler, defs) = self.loaders.labeler.load(labeler).await?; 125 145 let creator = self.hydrate_profile(labeler.did.clone()).await?; 126 146 127 - Some(build_view_detailed(labeler, defs, creator, labels, likes)) 147 + Some(build_view_detailed( 148 + labeler, defs, creator, labels, viewer, likes, 149 + )) 128 150 } 129 151 152 + #[instrument(skip_all)] 130 153 pub async fn hydrate_labelers_detailed( 131 154 &self, 132 155 labelers: Vec<String>, ··· 134 157 let labels = self.get_label_many(&labelers).await; 135 158 let labelers = self.loaders.labeler.load_many(labelers).await; 136 159 137 - let creators = labelers 160 + let (creators, uris) = labelers 138 161 .values() 139 - .map(|(labeler, _, _)| labeler.did.clone()) 140 - .collect(); 162 + .map(|(labeler, _)| (labeler.did.clone(), make_labeler_uri(&labeler.did))) 163 + .unzip::<_, _, Vec<_>, Vec<_>>(); 164 + let viewers = self.get_labeler_viewer_states(&uris).await; 141 165 let creators = self.hydrate_profiles(creators).await; 166 + let mut likes = self.loaders.like.load_many(uris.clone()).await; 142 167 143 168 labelers 144 169 .into_iter() 145 - .filter_map(|(k, (labeler, defs, likes))| { 170 + .filter_map(|(k, (labeler, defs))| { 146 171 let creator = creators.get(&labeler.did).cloned()?; 147 172 let labels = labels.get(&k).cloned().unwrap_or_default(); 173 + let likes = likes.remove(&make_labeler_uri(&labeler.did)); 174 + let viewer = viewers.get(&make_labeler_uri(&k)).cloned(); 148 175 149 - Some(( 150 - k, 151 - build_view_detailed(labeler, defs, creator, labels, likes), 152 - )) 176 + let view = build_view_detailed(labeler, defs, creator, labels, viewer, likes); 177 + 178 + Some((k, view)) 153 179 }) 154 180 .collect() 155 181 } 182 + 183 + #[instrument(skip_all)] 184 + async fn get_labeler_viewer_state(&self, subject: &str) -> Option<LabelerViewerState> { 185 + if let Some(viewer) = &self.current_actor { 186 + let data = self 187 + .loaders 188 + .like_state 189 + .get(&make_labeler_uri(viewer), subject) 190 + .await?; 191 + 192 + Some(build_viewer(data)) 193 + } else { 194 + None 195 + } 196 + } 197 + 198 + #[instrument(skip_all)] 199 + async fn get_labeler_viewer_states( 200 + &self, 201 + subjects: &[String], 202 + ) -> HashMap<String, LabelerViewerState> { 203 + if let Some(viewer) = &self.current_actor { 204 + let data = self.loaders.like_state.get_many(viewer, subjects).await; 205 + 206 + data.into_iter() 207 + .map(|(k, state)| (k, build_viewer(state))) 208 + .collect() 209 + } else { 210 + HashMap::new() 211 + } 212 + } 213 + } 214 + 215 + fn make_labeler_uri(did: &str) -> String { 216 + format!("at://{did}/app.bsky.labeler.service/self") 156 217 }
+64 -5
parakeet/src/hydration/list.rs
··· 1 + use crate::db::ListStateRet; 1 2 use crate::hydration::{map_labels, StatefulHydrator}; 2 3 use crate::xrpc::cdn::BskyCdn; 3 4 use lexica::app_bsky::actor::ProfileView; 4 - use lexica::app_bsky::graph::{ListPurpose, ListView, ListViewBasic}; 5 + use lexica::app_bsky::graph::{ListPurpose, ListView, ListViewBasic, ListViewerState}; 5 6 use parakeet_db::models; 6 7 use std::collections::HashMap; 7 8 use std::str::FromStr; 9 + use tracing::instrument; 10 + 11 + fn build_viewer(data: ListStateRet) -> ListViewerState { 12 + ListViewerState { 13 + muted: data.muted, 14 + blocked: data.block, 15 + } 16 + } 8 17 9 18 fn build_basic( 10 19 list: models::List, 11 20 list_item_count: i64, 12 21 labels: Vec<models::Label>, 22 + viewer: Option<ListViewerState>, 13 23 cdn: &BskyCdn, 14 24 ) -> Option<ListViewBasic> { 15 25 let purpose = ListPurpose::from_str(&list.list_type).ok()?; ··· 22 32 purpose, 23 33 avatar, 24 34 list_item_count, 35 + viewer, 25 36 labels: map_labels(labels), 26 37 indexed_at: list.created_at, 27 38 }) ··· 32 43 list_item_count: i64, 33 44 creator: ProfileView, 34 45 labels: Vec<models::Label>, 46 + viewer: Option<ListViewerState>, 35 47 cdn: &BskyCdn, 36 48 ) -> Option<ListView> { 37 49 let purpose = ListPurpose::from_str(&list.list_type).ok()?; ··· 51 63 description_facets, 52 64 avatar, 53 65 list_item_count, 66 + viewer, 54 67 labels: map_labels(labels), 55 68 indexed_at: list.created_at, 56 69 }) 57 70 } 58 71 59 72 impl StatefulHydrator<'_> { 73 + #[instrument(skip_all)] 60 74 pub async fn hydrate_list_basic(&self, list: String) -> Option<ListViewBasic> { 61 75 let labels = self.get_label(&list).await; 76 + let viewer = self.get_list_viewer_state(&list).await; 62 77 let (list, count) = self.loaders.list.load(list).await?; 63 78 64 - build_basic(list, count, labels, &self.cdn) 79 + build_basic(list, count, labels, viewer, &self.cdn) 65 80 } 66 81 82 + #[instrument(skip_all)] 67 83 pub async fn hydrate_lists_basic(&self, lists: Vec<String>) -> HashMap<String, ListViewBasic> { 84 + if lists.is_empty() { 85 + return HashMap::new(); 86 + } 87 + 68 88 let labels = self.get_label_many(&lists).await; 89 + let viewers = self.get_list_viewer_states(&lists).await; 69 90 let lists = self.loaders.list.load_many(lists).await; 70 91 71 92 lists 72 93 .into_iter() 73 94 .filter_map(|(uri, (list, count))| { 74 95 let labels = labels.get(&uri).cloned().unwrap_or_default(); 96 + let viewer = viewers.get(&uri).cloned(); 75 97 76 - build_basic(list, count, labels, &self.cdn).map(|v| (uri, v)) 98 + build_basic(list, count, labels, viewer, &self.cdn).map(|v| (uri, v)) 77 99 }) 78 100 .collect() 79 101 } 80 102 103 + #[instrument(skip_all)] 81 104 pub async fn hydrate_list(&self, list: String) -> Option<ListView> { 82 105 let labels = self.get_label(&list).await; 106 + let viewer = self.get_list_viewer_state(&list).await; 83 107 let (list, count) = self.loaders.list.load(list).await?; 84 108 let profile = self.hydrate_profile(list.owner.clone()).await?; 85 109 86 - build_listview(list, count, profile, labels, &self.cdn) 110 + build_listview(list, count, profile, labels, viewer, &self.cdn) 87 111 } 88 112 113 + #[instrument(skip_all)] 89 114 pub async fn hydrate_lists(&self, lists: Vec<String>) -> HashMap<String, ListView> { 115 + if lists.is_empty() { 116 + return HashMap::new(); 117 + } 118 + 90 119 let labels = self.get_label_many(&lists).await; 120 + let viewers = self.get_list_viewer_states(&lists).await; 91 121 let lists = self.loaders.list.load_many(lists).await; 92 122 93 123 let creators = lists.values().map(|(list, _)| list.owner.clone()).collect(); ··· 97 127 .into_iter() 98 128 .filter_map(|(uri, (list, count))| { 99 129 let creator = creators.get(&list.owner)?; 130 + let viewer = viewers.get(&uri).cloned(); 100 131 let labels = labels.get(&uri).cloned().unwrap_or_default(); 101 132 102 - build_listview(list, count, creator.to_owned(), labels, &self.cdn).map(|v| (uri, v)) 133 + build_listview(list, count, creator.to_owned(), labels, viewer, &self.cdn) 134 + .map(|v| (uri, v)) 103 135 }) 104 136 .collect() 137 + } 138 + 139 + #[instrument(skip_all)] 140 + async fn get_list_viewer_state(&self, subject: &str) -> Option<ListViewerState> { 141 + if let Some(viewer) = &self.current_actor { 142 + let data = self.loaders.list_state.get(viewer, subject).await?; 143 + 144 + Some(build_viewer(data)) 145 + } else { 146 + None 147 + } 148 + } 149 + 150 + #[instrument(skip_all)] 151 + async fn get_list_viewer_states( 152 + &self, 153 + subjects: &[String], 154 + ) -> HashMap<String, ListViewerState> { 155 + if let Some(viewer) = &self.current_actor { 156 + let data = self.loaders.list_state.get_many(viewer, subjects).await; 157 + 158 + data.into_iter() 159 + .map(|(k, state)| (k, build_viewer(state))) 160 + .collect() 161 + } else { 162 + HashMap::new() 163 + } 105 164 } 106 165 }
+4
parakeet/src/hydration/mod.rs
··· 63 63 } 64 64 } 65 65 66 + #[tracing::instrument(skip_all)] 66 67 async fn get_label(&self, uri: &str) -> Vec<parakeet_db::models::Label> { 67 68 self.loaders.label.load(uri, self.accept_labelers).await 68 69 } 69 70 71 + #[tracing::instrument(skip_all)] 70 72 async fn get_profile_label(&self, did: &str) -> Vec<parakeet_db::models::Label> { 71 73 let uris = &[ 72 74 did.to_string(), ··· 80 82 .collect() 81 83 } 82 84 85 + #[tracing::instrument(skip_all)] 83 86 async fn get_label_many( 84 87 &self, 85 88 uris: &[String], ··· 90 93 .await 91 94 } 92 95 96 + #[tracing::instrument(skip_all)] 93 97 async fn get_profile_label_many( 94 98 &self, 95 99 uris: &[String],
+255 -82
parakeet/src/hydration/posts.rs
··· 1 + use crate::db::PostStateRet; 1 2 use crate::hydration::{map_labels, StatefulHydrator}; 2 3 use lexica::app_bsky::actor::ProfileViewBasic; 3 4 use lexica::app_bsky::embed::Embed; 4 - use lexica::app_bsky::feed::{FeedViewPost, PostView, ReplyRef, ReplyRefPost, ThreadgateView}; 5 + use lexica::app_bsky::feed::{ 6 + BlockedAuthor, FeedReasonRepost, FeedViewPost, FeedViewPostReason, PostView, PostViewerState, 7 + ReplyRef, ReplyRefPost, ThreadgateView, 8 + }; 5 9 use lexica::app_bsky::graph::ListViewBasic; 6 10 use lexica::app_bsky::RecordStats; 7 11 use parakeet_db::models; 8 12 use parakeet_index::PostStats; 9 13 use std::collections::HashMap; 14 + use tracing::instrument; 15 + 16 + fn build_viewer(did: &str, data: PostStateRet) -> PostViewerState { 17 + let is_me = did == data.did; 18 + 19 + let repost = data 20 + .repost_rkey 21 + .map(|rkey| format!("at://{did}/app.bsky.feed.repost/{rkey}")); 22 + let like = data 23 + .like_rkey 24 + .map(|rkey| format!("at://{did}/app.bsky.feed.like/{rkey}")); 25 + 26 + PostViewerState { 27 + repost, 28 + like, 29 + bookmarked: data.bookmarked, 30 + thread_muted: false, // todo when we have thread mutes 31 + reply_disabled: false, 32 + embedding_disabled: data.embed_disabled && !is_me, // poster can always bypass embed disabled. 33 + pinned: data.pinned, 34 + } 35 + } 36 + 37 + type HydratePostsRet = ( 38 + models::Post, 39 + ProfileViewBasic, 40 + Vec<models::Label>, 41 + Option<Embed>, 42 + Option<ThreadgateView>, 43 + Option<PostViewerState>, 44 + Option<PostStats>, 45 + ); 10 46 11 47 fn build_postview( 12 - post: models::Post, 13 - author: ProfileViewBasic, 14 - labels: Vec<models::Label>, 15 - embed: Option<Embed>, 16 - threadgate: Option<ThreadgateView>, 17 - stats: Option<PostStats>, 48 + (post, author, labels, embed, threadgate, viewer, stats): HydratePostsRet, 18 49 ) -> PostView { 19 50 let stats = stats 20 51 .map(|stats| RecordStats { ··· 33 64 embed, 34 65 stats, 35 66 labels: map_labels(labels), 67 + viewer, 36 68 threadgate, 37 69 indexed_at: post.created_at, 38 70 } ··· 51 83 } 52 84 53 85 impl StatefulHydrator<'_> { 86 + #[instrument(skip_all)] 54 87 async fn hydrate_threadgate( 55 88 &self, 56 89 threadgate: Option<models::Threadgate>, 57 90 ) -> Option<ThreadgateView> { 58 91 let threadgate = threadgate?; 59 92 60 - let lists = threadgate 61 - .allowed_lists 62 - .iter() 63 - .flatten() 64 - .cloned() 65 - .collect::<Vec<_>>(); 93 + let lists = match threadgate.allowed_lists.as_ref() { 94 + Some(allowed_lists) => allowed_lists.clone().into(), 95 + None => Vec::new(), 96 + }; 66 97 let lists = self.hydrate_lists_basic(lists).await; 67 98 68 99 Some(build_threadgate_view( ··· 71 102 )) 72 103 } 73 104 105 + #[instrument(skip_all)] 74 106 async fn hydrate_threadgates( 75 107 &self, 76 108 threadgates: Vec<models::Threadgate>, 77 109 ) -> HashMap<String, ThreadgateView> { 78 110 let lists = threadgates.iter().fold(Vec::new(), |mut acc, c| { 79 - acc.extend(c.allowed_lists.iter().flatten().cloned()); 111 + if let Some(lists) = &c.allowed_lists { 112 + acc.extend(lists.clone().0); 113 + } 80 114 acc 81 115 }); 82 116 let lists = self.hydrate_lists_basic(lists).await; ··· 84 118 threadgates 85 119 .into_iter() 86 120 .map(|threadgate| { 87 - let this_lists = threadgate 88 - .allowed_lists 89 - .iter() 90 - .filter_map(|v| v.clone().and_then(|v| lists.get(&v).cloned())) 91 - .collect(); 121 + let this_lists = match &threadgate.allowed_lists { 122 + Some(allowed_lists) => allowed_lists 123 + .iter() 124 + .filter_map(|v| lists.get(v).cloned()) 125 + .collect(), 126 + None => Vec::new(), 127 + }; 92 128 93 129 ( 94 130 threadgate.at_uri.clone(), ··· 98 134 .collect() 99 135 } 100 136 137 + #[instrument(skip_all)] 101 138 pub async fn hydrate_post(&self, post: String) -> Option<PostView> { 102 - let (post, threadgate, stats) = self.loaders.posts.load(post).await?; 139 + let stats = self.loaders.post_stats.load(post.clone()).await; 140 + let (post, threadgate) = self.loaders.posts.load(post).await?; 141 + let viewer = self.get_post_viewer_state(&post.at_uri).await; 103 142 let embed = self.hydrate_embed(post.at_uri.clone()).await; 104 143 let author = self.hydrate_profile_basic(post.did.clone()).await?; 105 144 let threadgate = self.hydrate_threadgate(threadgate).await; 106 145 let labels = self.get_label(&post.at_uri).await; 107 146 108 - Some(build_postview( 109 - post, author, labels, embed, threadgate, stats, 110 - )) 147 + Some(build_postview(( 148 + post, author, labels, embed, threadgate, viewer, stats, 149 + ))) 111 150 } 112 151 113 - pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 152 + #[instrument(skip_all)] 153 + async fn hydrate_posts_inner(&self, posts: Vec<String>) -> HashMap<String, HydratePostsRet> { 154 + let stats = self.loaders.post_stats.load_many(posts.clone()).await; 114 155 let posts = self.loaders.posts.load_many(posts).await; 115 156 116 157 let (authors, post_uris) = posts 117 158 .values() 118 - .map(|(post, _, _)| (post.did.clone(), post.at_uri.clone())) 159 + .map(|(post, _)| (post.did.clone(), post.at_uri.clone())) 119 160 .unzip::<_, _, Vec<_>, Vec<_>>(); 120 161 let authors = self.hydrate_profiles_basic(authors).await; 121 162 122 - let post_labels = self.get_label_many(&post_uris).await; 163 + let mut post_labels = self.get_label_many(&post_uris).await; 164 + let mut viewer_data = self.get_post_viewer_states(&post_uris).await; 123 165 124 166 let threadgates = posts 125 167 .values() 126 - .filter_map(|(_, threadgate, _)| threadgate.clone()) 168 + .filter_map(|(_, threadgate)| threadgate.clone()) 127 169 .collect(); 128 170 let threadgates = self.hydrate_threadgates(threadgates).await; 129 171 130 - let embeds = self.hydrate_embeds(post_uris).await; 172 + let mut embeds = self.hydrate_embeds(post_uris).await; 131 173 132 174 posts 133 175 .into_iter() 134 - .filter_map(|(uri, (post, threadgate, stats))| { 135 - let author = authors.get(&post.did)?; 136 - let embed = embeds.get(&uri).cloned(); 176 + .filter_map(|(uri, (post, threadgate))| { 177 + let author = authors.get(&post.did)?.clone(); 178 + let embed = embeds.remove(&uri); 137 179 let threadgate = threadgate.and_then(|tg| threadgates.get(&tg.at_uri).cloned()); 138 - let labels = post_labels.get(&uri).cloned().unwrap_or_default(); 180 + let labels = post_labels.remove(&uri).unwrap_or_default(); 181 + let stats = stats.get(&uri).cloned(); 182 + let viewer = viewer_data.remove(&uri); 139 183 140 184 Some(( 141 185 uri, 142 - build_postview(post, author.to_owned(), labels, embed, threadgate, stats), 186 + (post, author, labels, embed, threadgate, viewer, stats), 143 187 )) 144 188 }) 145 189 .collect() 146 190 } 147 191 148 - pub async fn hydrate_feed_posts(&self, posts: Vec<String>) -> HashMap<String, FeedViewPost> { 149 - let posts = self.loaders.posts.load_many(posts).await; 150 - 151 - let (authors, post_uris) = posts 152 - .values() 153 - .map(|(post, _, _)| (post.did.clone(), post.at_uri.clone())) 154 - .unzip::<_, _, Vec<_>, Vec<_>>(); 155 - let authors = self.hydrate_profiles_basic(authors).await; 156 - 157 - let post_labels = self.get_label_many(&post_uris).await; 192 + #[instrument(skip_all)] 193 + pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 194 + self.hydrate_posts_inner(posts) 195 + .await 196 + .into_iter() 197 + .map(|(uri, data)| (uri, build_postview(data))) 198 + .collect() 199 + } 158 200 159 - let embeds = self.hydrate_embeds(post_uris).await; 201 + #[instrument(skip_all)] 202 + pub async fn hydrate_feed_posts( 203 + &self, 204 + posts: Vec<RawFeedItem>, 205 + author_threads_only: bool, 206 + ) -> Vec<FeedViewPost> { 207 + let post_uris = posts 208 + .iter() 209 + .map(|item| item.post_uri().to_string()) 210 + .collect::<Vec<_>>(); 211 + let mut posts_hyd = self.hydrate_posts_inner(post_uris).await; 160 212 161 - let reply_refs = posts 213 + // we shouldn't show the parent when the post violates a threadgate. 214 + let reply_refs = posts_hyd 162 215 .values() 163 - .flat_map(|(post, _, _)| [post.parent_uri.clone(), post.root_uri.clone()]) 216 + .filter(|(post, ..)| !post.violates_threadgate) 217 + .flat_map(|(post, ..)| [post.parent_uri.clone(), post.root_uri.clone()]) 164 218 .flatten() 165 219 .collect::<Vec<_>>(); 166 - 167 220 let reply_posts = self.hydrate_posts(reply_refs).await; 168 221 222 + let repost_profiles = posts 223 + .iter() 224 + .filter_map(|item| item.repost_by()) 225 + .collect::<Vec<_>>(); 226 + let profiles_hydrated = self.hydrate_profiles_basic(repost_profiles).await; 227 + 169 228 posts 170 229 .into_iter() 171 - .filter_map(|(post_uri, (post, _, stats))| { 172 - let author = authors.get(&post.did)?; 230 + .filter_map(|item| { 231 + let post = posts_hyd.remove(item.post_uri())?; 232 + let context = item.context(); 233 + 234 + let reply = if let RawFeedItem::Post { .. } = item { 235 + let root_uri = post.0.root_uri.as_ref(); 236 + let parent_uri = post.0.parent_uri.as_ref(); 237 + 238 + let (root, parent) = if author_threads_only { 239 + if root_uri.is_some() && parent_uri.is_some() { 240 + let root = root_uri.and_then(|uri| posts_hyd.get(uri))?; 241 + let parent = parent_uri.and_then(|uri| posts_hyd.get(uri))?; 242 + 243 + let root = build_postview(root.clone()); 244 + let parent = build_postview(parent.clone()); 245 + 246 + (Some(root), Some(parent)) 247 + } else { 248 + (None, None) 249 + } 250 + } else { 251 + let root = root_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 252 + let parent = parent_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 173 253 174 - let root = post.root_uri.as_ref().and_then(|uri| reply_posts.get(uri)); 175 - let parent = post 176 - .parent_uri 177 - .as_ref() 178 - .and_then(|uri| reply_posts.get(uri)); 254 + (root, parent) 255 + }; 179 256 180 - let reply = if post.parent_uri.is_some() && post.root_uri.is_some() { 181 - Some(ReplyRef { 182 - root: root.cloned().map(ReplyRefPost::Post).unwrap_or( 183 - ReplyRefPost::NotFound { 184 - uri: post.root_uri.as_ref().unwrap().clone(), 185 - not_found: true, 186 - }, 187 - ), 188 - parent: parent.cloned().map(ReplyRefPost::Post).unwrap_or( 189 - ReplyRefPost::NotFound { 190 - uri: post.parent_uri.as_ref().unwrap().clone(), 191 - not_found: true, 192 - }, 193 - ), 194 - grandparent_author: None, 195 - }) 257 + if root_uri.is_some() || parent_uri.is_some() { 258 + Some(ReplyRef { 259 + root: root.map(postview_to_replyref).unwrap_or( 260 + ReplyRefPost::NotFound { 261 + uri: root_uri.unwrap().to_owned(), 262 + not_found: true, 263 + }, 264 + ), 265 + parent: parent.map(postview_to_replyref).unwrap_or( 266 + ReplyRefPost::NotFound { 267 + uri: parent_uri.unwrap().to_owned(), 268 + not_found: true, 269 + }, 270 + ), 271 + grandparent_author: None, 272 + }) 273 + } else { 274 + None 275 + } 196 276 } else { 197 277 None 198 278 }; 199 279 200 - let embed = embeds.get(&post_uri).cloned(); 201 - let labels = post_labels.get(&post_uri).cloned().unwrap_or_default(); 202 - let post = build_postview(post, author.to_owned(), labels, embed, None, stats); 280 + let reason = match item { 281 + RawFeedItem::Repost { uri, by, at, .. } => { 282 + Some(FeedViewPostReason::Repost(FeedReasonRepost { 283 + by: profiles_hydrated.get(&by).cloned()?, 284 + uri: Some(uri), 285 + cid: None, 286 + indexed_at: at, 287 + })) 288 + } 289 + RawFeedItem::Pin { .. } => Some(FeedViewPostReason::Pin), 290 + _ => None, 291 + }; 203 292 204 - Some(( 205 - post_uri, 206 - FeedViewPost { 207 - post, 208 - reply, 209 - reason: None, 210 - feed_context: None, 211 - }, 212 - )) 293 + let post = build_postview(post); 294 + 295 + Some(FeedViewPost { 296 + post, 297 + reply, 298 + reason, 299 + feed_context: context, 300 + }) 213 301 }) 214 302 .collect() 303 + } 304 + 305 + #[instrument(skip_all)] 306 + async fn get_post_viewer_state(&self, subject: &str) -> Option<PostViewerState> { 307 + if let Some(viewer) = &self.current_actor { 308 + let data = self.loaders.post_state.get(viewer, subject).await?; 309 + 310 + Some(build_viewer(viewer, data)) 311 + } else { 312 + None 313 + } 314 + } 315 + 316 + #[instrument(skip_all)] 317 + async fn get_post_viewer_states( 318 + &self, 319 + subjects: &[String], 320 + ) -> HashMap<String, PostViewerState> { 321 + if let Some(viewer) = &self.current_actor { 322 + let data = self.loaders.post_state.get_many(viewer, subjects).await; 323 + 324 + data.into_iter() 325 + .map(|(k, state)| (k, build_viewer(viewer, state))) 326 + .collect() 327 + } else { 328 + HashMap::new() 329 + } 330 + } 331 + } 332 + 333 + fn postview_to_replyref(post: PostView) -> ReplyRefPost { 334 + match &post.author.viewer { 335 + Some(v) if v.blocked_by || v.blocking.is_some() => ReplyRefPost::Blocked { 336 + uri: post.uri, 337 + blocked: true, 338 + author: BlockedAuthor { 339 + did: post.author.did.clone(), 340 + viewer: post.author.viewer, 341 + }, 342 + }, 343 + _ => ReplyRefPost::Post(post), 344 + } 345 + } 346 + 347 + #[derive(Debug)] 348 + pub enum RawFeedItem { 349 + Pin { 350 + uri: String, 351 + context: Option<String>, 352 + }, 353 + Post { 354 + uri: String, 355 + context: Option<String>, 356 + }, 357 + Repost { 358 + uri: String, 359 + post: String, 360 + by: String, 361 + at: chrono::DateTime<chrono::Utc>, 362 + context: Option<String>, 363 + }, 364 + } 365 + 366 + impl RawFeedItem { 367 + fn post_uri(&self) -> &str { 368 + match self { 369 + RawFeedItem::Pin { uri, .. } => uri, 370 + RawFeedItem::Post { uri, .. } => uri, 371 + RawFeedItem::Repost { post, .. } => post, 372 + } 373 + } 374 + 375 + fn repost_by(&self) -> Option<String> { 376 + match self { 377 + RawFeedItem::Repost { by, .. } => Some(by.clone()), 378 + _ => None, 379 + } 380 + } 381 + 382 + fn context(&self) -> Option<String> { 383 + match self { 384 + RawFeedItem::Pin { context, .. } => context.clone(), 385 + RawFeedItem::Post { context, .. } => context.clone(), 386 + RawFeedItem::Repost { context, .. } => context.clone(), 387 + } 215 388 } 216 389 }
+148 -11
parakeet/src/hydration/profile.rs
··· 1 + use crate::db::ProfileStateRet; 1 2 use crate::hydration::map_labels; 2 3 use crate::loaders::ProfileLoaderRet; 3 4 use crate::xrpc::cdn::BskyCdn; ··· 5 6 use chrono::TimeDelta; 6 7 use lexica::app_bsky::actor::*; 7 8 use lexica::app_bsky::embed::External; 9 + use lexica::app_bsky::graph::ListViewBasic; 8 10 use parakeet_db::models; 9 11 use parakeet_index::ProfileStats; 10 12 use std::collections::HashMap; 11 13 use std::str::FromStr; 12 14 use std::sync::OnceLock; 15 + use tracing::instrument; 13 16 14 17 pub static TRUSTED_VERIFIERS: OnceLock<Vec<String>> = OnceLock::new(); 15 18 ··· 34 37 }) 35 38 } else { 36 39 None 40 + } 41 + } 42 + 43 + fn build_viewer( 44 + data: ProfileStateRet, 45 + list_mute: Option<ListViewBasic>, 46 + list_block: Option<ListViewBasic>, 47 + ) -> ProfileViewerState { 48 + let following = data 49 + .following 50 + .map(|rkey| format!("at://{}/app.bsky.graph.follow/{rkey}", data.did)); 51 + let followed_by = data 52 + .followed 53 + .map(|rkey| format!("at://{}/app.bsky.graph.follow/{rkey}", data.subject)); 54 + 55 + let blocking = data.list_block.or(data 56 + .blocking 57 + .map(|rkey| format!("at://{}/app.bsky.graph.block/{rkey}", data.did))); 58 + 59 + ProfileViewerState { 60 + muted: data.muting.unwrap_or_default(), 61 + muted_by_list: list_mute, 62 + blocked_by: data.blocked.unwrap_or_default(), // TODO: this doesn't factor for blocklists atm 63 + blocking, 64 + blocking_by_list: list_block, 65 + following, 66 + followed_by, 37 67 } 38 68 } 39 69 ··· 152 182 } 153 183 154 184 fn build_basic( 155 - (handle, profile, chat_decl, is_labeler, stats, status, notif_decl): ProfileLoaderRet, 185 + (handle, profile, chat_decl, is_labeler, status, notif_decl): ProfileLoaderRet, 186 + stats: Option<ProfileStats>, 156 187 labels: Vec<models::Label>, 157 188 verifications: Option<Vec<models::VerificationEntry>>, 189 + viewer: Option<ProfileViewerState>, 158 190 cdn: &BskyCdn, 159 191 ) -> ProfileViewBasic { 160 192 let associated = build_associated(chat_decl, is_labeler, stats, notif_decl); ··· 168 200 display_name: profile.display_name, 169 201 avatar, 170 202 associated, 203 + viewer, 171 204 labels: map_labels(labels), 172 205 verification, 173 206 status, 207 + pronouns: profile.pronouns, 174 208 created_at: profile.created_at.and_utc(), 175 209 } 176 210 } 177 211 178 212 fn build_profile( 179 - (handle, profile, chat_decl, is_labeler, stats, status, notif_decl): ProfileLoaderRet, 213 + (handle, profile, chat_decl, is_labeler, status, notif_decl): ProfileLoaderRet, 214 + stats: Option<ProfileStats>, 180 215 labels: Vec<models::Label>, 181 216 verifications: Option<Vec<models::VerificationEntry>>, 217 + viewer: Option<ProfileViewerState>, 182 218 cdn: &BskyCdn, 183 219 ) -> ProfileView { 184 220 let associated = build_associated(chat_decl, is_labeler, stats, notif_decl); ··· 193 229 description: profile.description, 194 230 avatar, 195 231 associated, 232 + viewer, 196 233 labels: map_labels(labels), 197 234 verification, 198 235 status, 236 + pronouns: profile.pronouns, 199 237 created_at: profile.created_at.and_utc(), 200 238 indexed_at: profile.indexed_at, 201 239 } 202 240 } 203 241 204 242 fn build_detailed( 205 - (handle, profile, chat_decl, is_labeler, stats, status, notif_decl): ProfileLoaderRet, 243 + (handle, profile, chat_decl, is_labeler, status, notif_decl): ProfileLoaderRet, 244 + stats: Option<ProfileStats>, 206 245 labels: Vec<models::Label>, 207 246 verifications: Option<Vec<models::VerificationEntry>>, 247 + viewer: Option<ProfileViewerState>, 208 248 cdn: &BskyCdn, 209 249 ) -> ProfileViewDetailed { 210 250 let associated = build_associated(chat_decl, is_labeler, stats, notif_decl); ··· 223 263 followers_count: stats.map(|v| v.followers as i64).unwrap_or_default(), 224 264 follows_count: stats.map(|v| v.following as i64).unwrap_or_default(), 225 265 associated, 266 + viewer, 226 267 labels: map_labels(labels), 227 268 verification, 228 269 status, 270 + pronouns: profile.pronouns, 271 + website: profile.website, 229 272 created_at: profile.created_at.and_utc(), 230 273 indexed_at: profile.indexed_at, 231 274 } 232 275 } 233 276 234 277 impl super::StatefulHydrator<'_> { 278 + #[instrument(skip_all)] 235 279 pub async fn hydrate_profile_basic(&self, did: String) -> Option<ProfileViewBasic> { 236 280 let labels = self.get_profile_label(&did).await; 281 + let viewer = self.get_profile_viewer_state(&did).await; 237 282 let verif = self.loaders.verification.load(did.clone()).await; 283 + let stats = self.loaders.profile_stats.load(did.clone()).await; 238 284 let profile_info = self.loaders.profile.load(did).await?; 239 285 240 - Some(build_basic(profile_info, labels, verif, &self.cdn)) 286 + Some(build_basic( 287 + profile_info, 288 + stats, 289 + labels, 290 + verif, 291 + viewer, 292 + &self.cdn, 293 + )) 241 294 } 242 295 296 + #[instrument(skip_all)] 243 297 pub async fn hydrate_profiles_basic( 244 298 &self, 245 299 dids: Vec<String>, 246 300 ) -> HashMap<String, ProfileViewBasic> { 247 301 let labels = self.get_profile_label_many(&dids).await; 302 + let viewers = self.get_profile_viewer_states(&dids).await; 248 303 let verif = self.loaders.verification.load_many(dids.clone()).await; 304 + let stats = self.loaders.profile_stats.load_many(dids.clone()).await; 249 305 let profiles = self.loaders.profile.load_many(dids).await; 250 306 251 307 profiles ··· 253 309 .map(|(k, profile_info)| { 254 310 let labels = labels.get(&k).cloned().unwrap_or_default(); 255 311 let verif = verif.get(&k).cloned(); 312 + let viewer = viewers.get(&k).cloned(); 313 + let stats = stats.get(&k).cloned(); 256 314 257 - let v = build_basic(profile_info, labels, verif, &self.cdn); 315 + let v = build_basic(profile_info, stats, labels, verif, viewer, &self.cdn); 258 316 (k, v) 259 317 }) 260 318 .collect() 261 319 } 262 320 321 + #[instrument(skip_all)] 263 322 pub async fn hydrate_profile(&self, did: String) -> Option<ProfileView> { 264 323 let labels = self.get_profile_label(&did).await; 265 - 324 + let viewer = self.get_profile_viewer_state(&did).await; 266 325 let verif = self.loaders.verification.load(did.clone()).await; 326 + let stats = self.loaders.profile_stats.load(did.clone()).await; 267 327 let profile_info = self.loaders.profile.load(did).await?; 268 328 269 - Some(build_profile(profile_info, labels, verif, &self.cdn)) 329 + Some(build_profile( 330 + profile_info, 331 + stats, 332 + labels, 333 + verif, 334 + viewer, 335 + &self.cdn, 336 + )) 270 337 } 271 338 339 + #[instrument(skip_all)] 272 340 pub async fn hydrate_profiles(&self, dids: Vec<String>) -> HashMap<String, ProfileView> { 273 341 let labels = self.get_profile_label_many(&dids).await; 342 + let viewers = self.get_profile_viewer_states(&dids).await; 274 343 let verif = self.loaders.verification.load_many(dids.clone()).await; 344 + let stats = self.loaders.profile_stats.load_many(dids.clone()).await; 275 345 let profiles = self.loaders.profile.load_many(dids).await; 276 346 277 347 profiles ··· 279 349 .map(|(k, profile_info)| { 280 350 let labels = labels.get(&k).cloned().unwrap_or_default(); 281 351 let verif = verif.get(&k).cloned(); 352 + let viewer = viewers.get(&k).cloned(); 353 + let stats = stats.get(&k).cloned(); 282 354 283 - let v = build_profile(profile_info, labels, verif, &self.cdn); 355 + let v = build_profile(profile_info, stats, labels, verif, viewer, &self.cdn); 284 356 (k, v) 285 357 }) 286 358 .collect() 287 359 } 288 360 361 + #[instrument(skip_all)] 289 362 pub async fn hydrate_profile_detailed(&self, did: String) -> Option<ProfileViewDetailed> { 290 363 let labels = self.get_profile_label(&did).await; 291 - 364 + let viewer = self.get_profile_viewer_state(&did).await; 292 365 let verif = self.loaders.verification.load(did.clone()).await; 366 + let stats = self.loaders.profile_stats.load(did.clone()).await; 293 367 let profile_info = self.loaders.profile.load(did).await?; 294 368 295 - Some(build_detailed(profile_info, labels, verif, &self.cdn)) 369 + Some(build_detailed( 370 + profile_info, 371 + stats, 372 + labels, 373 + verif, 374 + viewer, 375 + &self.cdn, 376 + )) 296 377 } 297 378 379 + #[instrument(skip_all)] 298 380 pub async fn hydrate_profiles_detailed( 299 381 &self, 300 382 dids: Vec<String>, 301 383 ) -> HashMap<String, ProfileViewDetailed> { 302 384 let labels = self.get_profile_label_many(&dids).await; 385 + let viewers = self.get_profile_viewer_states(&dids).await; 303 386 let verif = self.loaders.verification.load_many(dids.clone()).await; 387 + let stats = self.loaders.profile_stats.load_many(dids.clone()).await; 304 388 let profiles = self.loaders.profile.load_many(dids).await; 305 389 306 390 profiles ··· 308 392 .map(|(k, profile_info)| { 309 393 let labels = labels.get(&k).cloned().unwrap_or_default(); 310 394 let verif = verif.get(&k).cloned(); 395 + let viewer = viewers.get(&k).cloned(); 396 + let stats = stats.get(&k).cloned(); 311 397 312 - let v = build_detailed(profile_info, labels, verif, &self.cdn); 398 + let v = build_detailed(profile_info, stats, labels, verif, viewer, &self.cdn); 313 399 (k, v) 314 400 }) 315 401 .collect() 402 + } 403 + 404 + #[instrument(skip_all)] 405 + async fn get_profile_viewer_state(&self, subject: &str) -> Option<ProfileViewerState> { 406 + if let Some(viewer) = &self.current_actor { 407 + let data = self.loaders.profile_state.get(viewer, subject).await?; 408 + 409 + let list_block = match &data.list_block { 410 + Some(uri) => self.hydrate_list_basic(uri.clone()).await, 411 + None => None, 412 + }; 413 + let list_mute = match &data.list_mute { 414 + Some(uri) => self.hydrate_list_basic(uri.clone()).await, 415 + None => None, 416 + }; 417 + 418 + Some(build_viewer(data, list_mute, list_block)) 419 + } else { 420 + None 421 + } 422 + } 423 + 424 + #[instrument(skip_all)] 425 + async fn get_profile_viewer_states( 426 + &self, 427 + dids: &[String], 428 + ) -> HashMap<String, ProfileViewerState> { 429 + if let Some(viewer) = &self.current_actor { 430 + let data = self.loaders.profile_state.get_many(viewer, dids).await; 431 + let lists = data 432 + .values() 433 + .flat_map(|v| [&v.list_block, &v.list_mute]) 434 + .flatten() 435 + .cloned() 436 + .collect(); 437 + let lists = self.hydrate_lists_basic(lists).await; 438 + 439 + data.into_iter() 440 + .map(|(k, state)| { 441 + let list_mute = state.list_mute.as_ref().and_then(|v| lists.get(v).cloned()); 442 + let list_block = state 443 + .list_block 444 + .as_ref() 445 + .and_then(|v| lists.get(v).cloned()); 446 + 447 + (k, build_viewer(state, list_mute, list_block)) 448 + }) 449 + .collect() 450 + } else { 451 + HashMap::new() 452 + } 316 453 } 317 454 }
+11 -9
parakeet/src/hydration/starter_packs.rs
··· 4 4 use lexica::app_bsky::graph::{ListViewBasic, StarterPackView, StarterPackViewBasic}; 5 5 use parakeet_db::models; 6 6 use std::collections::HashMap; 7 + use tracing::instrument; 7 8 8 9 fn build_basic( 9 10 starter_pack: models::StaterPack, ··· 50 51 } 51 52 52 53 impl StatefulHydrator<'_> { 54 + #[instrument(skip_all)] 53 55 pub async fn hydrate_starterpack_basic(&self, pack: String) -> Option<StarterPackViewBasic> { 54 56 let labels = self.get_label(&pack).await; 55 57 let sp = self.loaders.starterpacks.load(pack).await?; ··· 59 61 Some(build_basic(sp, creator, labels, list_item_count)) 60 62 } 61 63 64 + #[instrument(skip_all)] 62 65 pub async fn hydrate_starterpacks_basic( 63 66 &self, 64 67 packs: Vec<String>, ··· 86 89 .collect() 87 90 } 88 91 92 + #[instrument(skip_all)] 89 93 pub async fn hydrate_starterpack(&self, pack: String) -> Option<StarterPackView> { 90 94 let labels = self.get_label(&pack).await; 91 95 let sp = self.loaders.starterpacks.load(pack).await?; ··· 93 97 let creator = self.hydrate_profile_basic(sp.owner.clone()).await?; 94 98 let list = self.hydrate_list_basic(sp.list.clone()).await; 95 99 96 - let feeds = sp 97 - .feeds 98 - .clone() 99 - .unwrap_or_default() 100 - .into_iter() 101 - .flatten() 100 + let feeds = sp.feeds.clone().unwrap_or_default(); 101 + let feeds = self 102 + .hydrate_feedgens(feeds.into()) 103 + .await 104 + .into_values() 102 105 .collect(); 103 - let feeds = self.hydrate_feedgens(feeds).await.into_values().collect(); 104 106 105 107 Some(build_spview(sp, creator, labels, list, feeds)) 106 108 } 107 109 110 + #[instrument(skip_all)] 108 111 pub async fn hydrate_starterpacks( 109 112 &self, 110 113 packs: Vec<String>, ··· 119 122 let feeds = packs 120 123 .values() 121 124 .filter_map(|pack| pack.feeds.clone()) 122 - .flat_map(|feeds| feeds.into_iter().flatten()) 125 + .flat_map(Vec::from) 123 126 .collect(); 124 127 125 128 let creators = self.hydrate_profiles_basic(creators).await; ··· 133 136 let list = lists.get(&pack.list).cloned(); 134 137 let feeds = pack.feeds.as_ref().map(|v| { 135 138 v.iter() 136 - .flatten() 137 139 .filter_map(|feed| feeds.get(feed).cloned()) 138 140 .collect() 139 141 });
+57
parakeet/src/instrumentation.rs
··· 1 + use opentelemetry::trace::TracerProvider; 2 + use opentelemetry_otlp::{Protocol, SpanExporter, WithExportConfig}; 3 + use opentelemetry_sdk::trace::{Sampler, SdkTracer, SdkTracerProvider}; 4 + use tracing::Subscriber; 5 + use tracing_opentelemetry::OpenTelemetryLayer; 6 + use tracing_subscriber::filter::Filtered; 7 + use tracing_subscriber::layer::SubscriberExt; 8 + use tracing_subscriber::registry::LookupSpan; 9 + use tracing_subscriber::util::SubscriberInitExt; 10 + use tracing_subscriber::{EnvFilter, Layer}; 11 + 12 + pub fn init_instruments(cfg: &crate::config::ConfigInstruments) { 13 + let otel_layer = cfg.otel_enable.then(init_otel); 14 + let log_layer = init_log(cfg.log_json); 15 + 16 + tracing_subscriber::registry() 17 + .with(log_layer) 18 + .with(otel_layer) 19 + .init(); 20 + } 21 + 22 + fn init_otel<S>() -> Filtered<OpenTelemetryLayer<S, SdkTracer>, EnvFilter, S> 23 + where 24 + S: Subscriber + for<'span> LookupSpan<'span>, 25 + { 26 + let span_exporter = SpanExporter::builder() 27 + .with_http() 28 + .with_protocol(Protocol::HttpBinary) 29 + .build() 30 + .unwrap(); 31 + 32 + let tracer_provider = SdkTracerProvider::builder() 33 + .with_batch_exporter(span_exporter) 34 + .with_sampler(Sampler::AlwaysOn) 35 + .build(); 36 + 37 + opentelemetry::global::set_tracer_provider(tracer_provider.clone()); 38 + 39 + let tracer = tracer_provider.tracer("parakeet"); 40 + let otel_filter = EnvFilter::new("info,otel::tracing=trace,tower_http=off"); 41 + 42 + OpenTelemetryLayer::new(tracer).with_filter(otel_filter) 43 + } 44 + 45 + fn init_log<S>(json: bool) -> Filtered<Box<dyn Layer<S> + Send + Sync>, EnvFilter, S> 46 + where 47 + S: Subscriber + for<'span> LookupSpan<'span>, 48 + { 49 + let stdout_filter = 50 + EnvFilter::from_default_env().add_directive("otel::tracing=off".parse().unwrap()); 51 + 52 + match json { 53 + true => tracing_subscriber::fmt::layer().json().boxed(), 54 + false => tracing_subscriber::fmt::layer().boxed(), 55 + } 56 + .with_filter(stdout_filter) 57 + }
+277 -111
parakeet/src/loaders.rs
··· 1 + use crate::cache::PrefixedLoaderCache; 2 + use crate::db; 1 3 use crate::xrpc::extract::LabelConfigItem; 2 - use dataloader::cached::Loader; 4 + use dataloader::async_cached::Loader; 5 + use dataloader::non_cached::Loader as NonCachedLoader; 3 6 use dataloader::BatchFn; 7 + use diesel::dsl::sql; 4 8 use diesel::prelude::*; 5 9 use diesel_async::pooled_connection::deadpool::Pool; 6 10 use diesel_async::{AsyncPgConnection, RunQueryDsl}; 7 11 use itertools::Itertools; 8 12 use lexica::app_bsky::actor::{ChatAllowIncoming, ProfileAllowSubscriptions}; 9 13 use parakeet_db::{models, schema}; 14 + use redis::aio::MultiplexedConnection; 15 + use serde::{Deserialize, Serialize}; 10 16 use std::collections::HashMap; 11 17 use std::str::FromStr; 18 + use tracing::instrument; 19 + 20 + type CachingLoader<K, V, L> = Loader<K, V, L, PrefixedLoaderCache<V>>; 21 + 22 + fn new_plc_loader<V, F>( 23 + load_fn: F, 24 + conn: &MultiplexedConnection, 25 + prefix: &str, 26 + exp: u64, 27 + ) -> Loader<String, V, F, PrefixedLoaderCache<V>> 28 + where 29 + V: Clone + Serialize + for<'a> Deserialize<'a>, 30 + F: BatchFn<String, V>, 31 + { 32 + Loader::new( 33 + load_fn, 34 + PrefixedLoaderCache::new(conn, prefix.to_string(), Some(exp)), 35 + ) 36 + } 12 37 13 38 pub struct Dataloaders { 14 - pub embed: Loader<String, (EmbedLoaderRet, String), EmbedLoader>, 15 - pub feedgen: Loader<String, FeedGenLoaderRet, FeedGenLoader>, 16 - pub handle: Loader<String, String, HandleLoader>, 39 + pub embed: CachingLoader<String, (EmbedLoaderRet, String), EmbedLoader>, 40 + pub feedgen: CachingLoader<String, models::FeedGen, FeedGenLoader>, 41 + pub handle: CachingLoader<String, String, HandleLoader>, 17 42 pub label: LabelLoader, 18 - pub labeler: Loader<String, LabelServiceLoaderRet, LabelServiceLoader>, 19 - pub list: Loader<String, ListLoaderRet, ListLoader>, 20 - pub posts: Loader<String, PostLoaderRet, PostLoader>, 21 - pub profile: Loader<String, ProfileLoaderRet, ProfileLoader>, 22 - pub starterpacks: Loader<String, StarterPackLoaderRet, StarterPackLoader>, 23 - pub verification: Loader<String, Vec<models::VerificationEntry>, VerificationLoader>, 43 + pub labeler: CachingLoader<String, LabelServiceLoaderRet, LabelServiceLoader>, 44 + pub list: CachingLoader<String, ListLoaderRet, ListLoader>, 45 + pub list_state: ListStateLoader, 46 + pub like: NonCachedLoader<String, i32, LikeLoader>, 47 + pub like_state: LikeRecordLoader, 48 + pub posts: CachingLoader<String, PostLoaderRet, PostLoader>, 49 + pub post_stats: NonCachedLoader<String, parakeet_index::PostStats, PostStatsLoader>, 50 + pub post_state: PostStateLoader, 51 + pub profile: CachingLoader<String, ProfileLoaderRet, ProfileLoader>, 52 + pub profile_stats: NonCachedLoader<String, parakeet_index::ProfileStats, ProfileStatsLoader>, 53 + pub profile_state: ProfileStateLoader, 54 + pub starterpacks: CachingLoader<String, StarterPackLoaderRet, StarterPackLoader>, 55 + pub verification: CachingLoader<String, Vec<models::VerificationEntry>, VerificationLoader>, 24 56 } 25 57 26 58 impl Dataloaders { 27 - // for the moment, we set up memory cached loaders 28 - // we should build a redis/valkey backend at some point in the future. 29 - pub fn new(pool: Pool<AsyncPgConnection>, idxc: parakeet_index::Client) -> Dataloaders { 59 + #[rustfmt::skip] 60 + pub fn new( 61 + pool: Pool<AsyncPgConnection>, 62 + rc: MultiplexedConnection, 63 + idxc: parakeet_index::Client, 64 + ) -> Dataloaders { 30 65 Dataloaders { 31 - embed: Loader::new(EmbedLoader(pool.clone())), 32 - feedgen: Loader::new(FeedGenLoader(pool.clone(), idxc.clone())), 33 - handle: Loader::new(HandleLoader(pool.clone())), 66 + embed: new_plc_loader(EmbedLoader(pool.clone()), &rc, "embed", 3600), 67 + feedgen: new_plc_loader(FeedGenLoader(pool.clone()), &rc, "feedgen", 600), 68 + handle: new_plc_loader(HandleLoader(pool.clone()), &rc, "handle", 60), 34 69 label: LabelLoader(pool.clone()), // CARE: never cache this. 35 - labeler: Loader::new(LabelServiceLoader(pool.clone(), idxc.clone())), 36 - list: Loader::new(ListLoader(pool.clone())), 37 - posts: Loader::new(PostLoader(pool.clone(), idxc.clone())), 38 - profile: Loader::new(ProfileLoader(pool.clone(), idxc.clone())), 39 - starterpacks: Loader::new(StarterPackLoader(pool.clone())), 40 - verification: Loader::new(VerificationLoader(pool.clone())), 70 + labeler: new_plc_loader(LabelServiceLoader(pool.clone()), &rc, "labeler", 600), 71 + like: NonCachedLoader::new(LikeLoader(idxc.clone())), 72 + like_state: LikeRecordLoader(pool.clone()), 73 + list: new_plc_loader(ListLoader(pool.clone()), &rc, "list", 600), 74 + list_state: ListStateLoader(pool.clone()), 75 + posts: new_plc_loader(PostLoader(pool.clone()), &rc, "post", 3600), 76 + post_stats: NonCachedLoader::new(PostStatsLoader(idxc.clone())), 77 + post_state: PostStateLoader(pool.clone()), 78 + profile: new_plc_loader(ProfileLoader(pool.clone()), &rc, "profile", 3600), 79 + profile_stats: NonCachedLoader::new(ProfileStatsLoader(idxc.clone())), 80 + profile_state: ProfileStateLoader(pool.clone()), 81 + starterpacks: new_plc_loader(StarterPackLoader(pool.clone()), &rc, "starterpacks", 600), 82 + verification: new_plc_loader(VerificationLoader(pool.clone()), &rc, "verification", 60), 83 + } 84 + } 85 + } 86 + 87 + pub struct LikeLoader(parakeet_index::Client); 88 + impl BatchFn<String, i32> for LikeLoader { 89 + #[instrument(name = "LikeLoader", skip_all)] 90 + async fn load(&mut self, keys: &[String]) -> HashMap<String, i32> { 91 + let res = self 92 + .0 93 + .get_like_count_many(parakeet_index::GetStatsManyReq { 94 + uris: keys.to_vec(), 95 + }) 96 + .await 97 + .map(|v| v.into_inner()); 98 + 99 + match res { 100 + Ok(data) => data 101 + .entries 102 + .into_iter() 103 + .map(|(k, v)| (k, v.likes)) 104 + .collect(), 105 + Err(_) => HashMap::new(), 106 + } 107 + } 108 + } 109 + 110 + pub struct LikeRecordLoader(Pool<AsyncPgConnection>); 111 + impl LikeRecordLoader { 112 + #[instrument(name = "LikeRecordLoader::get", skip_all)] 113 + pub async fn get(&self, did: &str, subject: &str) -> Option<(String, String)> { 114 + let mut conn = self.0.get().await.unwrap(); 115 + 116 + db::get_like_state(&mut conn, did, subject) 117 + .await 118 + .unwrap_or_else(|e| { 119 + tracing::error!("like state load failed: {e}"); 120 + None 121 + }) 122 + } 123 + 124 + #[instrument(name = "LikeRecordLoader::get_many", skip_all)] 125 + pub async fn get_many( 126 + &self, 127 + did: &str, 128 + subjects: &[String], 129 + ) -> HashMap<String, (String, String)> { 130 + let mut conn = self.0.get().await.unwrap(); 131 + 132 + match db::get_like_states(&mut conn, did, subjects).await { 133 + Ok(res) => { 134 + HashMap::from_iter(res.into_iter().map(|(sub, did, rkey)| (sub, (did, rkey)))) 135 + } 136 + Err(e) => { 137 + tracing::error!("like state load failed: {e}"); 138 + HashMap::new() 139 + } 41 140 } 42 141 } 43 142 } 44 143 45 144 pub struct HandleLoader(Pool<AsyncPgConnection>); 46 145 impl BatchFn<String, String> for HandleLoader { 146 + #[instrument(name = "HandleLoader", skip_all)] 47 147 async fn load(&mut self, keys: &[String]) -> HashMap<String, String> { 48 148 let mut conn = self.0.get().await.unwrap(); 49 149 ··· 66 166 } 67 167 } 68 168 69 - pub struct ProfileLoader(Pool<AsyncPgConnection>, parakeet_index::Client); 169 + pub struct ProfileLoader(Pool<AsyncPgConnection>); 70 170 pub type ProfileLoaderRet = ( 71 171 Option<String>, 72 172 models::Profile, 73 173 Option<ChatAllowIncoming>, 74 174 bool, 75 - Option<parakeet_index::ProfileStats>, 76 175 Option<models::Status>, 77 176 Option<ProfileAllowSubscriptions>, 78 177 ); 79 178 impl BatchFn<String, ProfileLoaderRet> for ProfileLoader { 179 + #[instrument(name = "ProfileLoader", skip_all)] 80 180 async fn load(&mut self, keys: &[String]) -> HashMap<String, ProfileLoaderRet> { 81 181 let mut conn = self.0.get().await.unwrap(); 82 182 ··· 115 215 )>(&mut conn) 116 216 .await; 117 217 118 - let stats_req = parakeet_index::GetStatsManyReq { 119 - uris: keys.to_vec(), 120 - }; 121 - let mut stats = self 122 - .1 123 - .get_profile_stats_many(stats_req) 124 - .await 125 - .unwrap() 126 - .into_inner() 127 - .entries; 128 - 129 218 match res { 130 219 Ok(res) => HashMap::from_iter(res.into_iter().map( 131 220 |(did, handle, profile, chat_decl, labeler_cid, status, notif_decl)| { ··· 133 222 let notif_decl = 134 223 notif_decl.and_then(|v| ProfileAllowSubscriptions::from_str(&v).ok()); 135 224 let is_labeler = labeler_cid.is_some(); 136 - let maybe_stats = stats.remove(&did); 137 225 138 - let val = ( 139 - handle, 140 - profile, 141 - chat_decl, 142 - is_labeler, 143 - maybe_stats, 144 - status, 145 - notif_decl, 146 - ); 226 + let val = (handle, profile, chat_decl, is_labeler, status, notif_decl); 147 227 148 228 (did, val) 149 229 }, ··· 156 236 } 157 237 } 158 238 239 + pub struct ProfileStatsLoader(parakeet_index::Client); 240 + impl BatchFn<String, parakeet_index::ProfileStats> for ProfileStatsLoader { 241 + #[instrument(name = "ProfileStatsLoader", skip_all)] 242 + async fn load(&mut self, keys: &[String]) -> HashMap<String, parakeet_index::ProfileStats> { 243 + let stats_req = parakeet_index::GetStatsManyReq { 244 + uris: keys.to_vec(), 245 + }; 246 + 247 + self.0 248 + .get_profile_stats_many(stats_req) 249 + .await 250 + .unwrap() 251 + .into_inner() 252 + .entries 253 + } 254 + } 255 + 256 + pub struct ProfileStateLoader(Pool<AsyncPgConnection>); 257 + impl ProfileStateLoader { 258 + #[instrument(name = "ProfileStateLoader::get", skip_all)] 259 + pub async fn get(&self, did: &str, subject: &str) -> Option<db::ProfileStateRet> { 260 + let mut conn = self.0.get().await.unwrap(); 261 + 262 + db::get_profile_state(&mut conn, did, subject) 263 + .await 264 + .unwrap_or_else(|e| { 265 + tracing::error!("profile state load failed: {e}"); 266 + None 267 + }) 268 + } 269 + 270 + #[instrument(name = "ProfileStateLoader::get_many", skip_all)] 271 + pub async fn get_many( 272 + &self, 273 + did: &str, 274 + subjects: &[String], 275 + ) -> HashMap<String, db::ProfileStateRet> { 276 + let mut conn = self.0.get().await.unwrap(); 277 + 278 + match db::get_profile_states(&mut conn, did, subjects).await { 279 + Ok(res) => HashMap::from_iter(res.into_iter().map(|v| (v.subject.clone(), v))), 280 + Err(e) => { 281 + tracing::error!("profile state load failed: {e}"); 282 + HashMap::new() 283 + } 284 + } 285 + } 286 + } 287 + 159 288 pub struct ListLoader(Pool<AsyncPgConnection>); 160 289 type ListLoaderRet = (models::List, i64); 161 290 impl BatchFn<String, ListLoaderRet> for ListLoader { 291 + #[instrument(name = "ListLoaderRet", skip_all)] 162 292 async fn load(&mut self, keys: &[String]) -> HashMap<String, ListLoaderRet> { 163 293 let mut conn = self.0.get().await.unwrap(); 164 294 ··· 188 318 } 189 319 } 190 320 191 - pub struct FeedGenLoader(Pool<AsyncPgConnection>, parakeet_index::Client); 192 - type FeedGenLoaderRet = (models::FeedGen, Option<i32>); 193 - impl BatchFn<String, FeedGenLoaderRet> for FeedGenLoader { 194 - async fn load(&mut self, keys: &[String]) -> HashMap<String, FeedGenLoaderRet> { 321 + pub struct ListStateLoader(Pool<AsyncPgConnection>); 322 + impl ListStateLoader { 323 + #[instrument(name = "ListStateLoader::get", skip_all)] 324 + pub async fn get(&self, did: &str, subject: &str) -> Option<db::ListStateRet> { 325 + let mut conn = self.0.get().await.unwrap(); 326 + 327 + db::get_list_state(&mut conn, did, subject) 328 + .await 329 + .unwrap_or_else(|e| { 330 + tracing::error!("list state load failed: {e}"); 331 + None 332 + }) 333 + } 334 + 335 + #[instrument(name = "ListStateLoader::get_many", skip_all)] 336 + pub async fn get_many( 337 + &self, 338 + did: &str, 339 + subjects: &[String], 340 + ) -> HashMap<String, db::ListStateRet> { 341 + let mut conn = self.0.get().await.unwrap(); 342 + 343 + match db::get_list_states(&mut conn, did, subjects).await { 344 + Ok(res) => HashMap::from_iter(res.into_iter().map(|v| (v.at_uri.clone(), v))), 345 + Err(e) => { 346 + tracing::error!("list state load failed: {e}"); 347 + HashMap::new() 348 + } 349 + } 350 + } 351 + } 352 + 353 + pub struct FeedGenLoader(Pool<AsyncPgConnection>); 354 + impl BatchFn<String, models::FeedGen> for FeedGenLoader { 355 + #[instrument(name = "FeedGenLoader", skip_all)] 356 + async fn load(&mut self, keys: &[String]) -> HashMap<String, models::FeedGen> { 195 357 let mut conn = self.0.get().await.unwrap(); 196 358 197 359 let res = schema::feedgens::table ··· 200 362 .load(&mut conn) 201 363 .await; 202 364 203 - let stats_req = parakeet_index::GetStatsManyReq { 204 - uris: keys.to_vec(), 205 - }; 206 - let mut stats = self 207 - .1 208 - .get_like_count_many(stats_req) 209 - .await 210 - .unwrap() 211 - .into_inner() 212 - .entries; 213 - 214 365 match res { 215 - Ok(res) => HashMap::from_iter(res.into_iter().map(|feedgen| { 216 - let likes = stats.remove(&feedgen.at_uri).map(|v| v.likes); 217 - 218 - (feedgen.at_uri.clone(), (feedgen, likes)) 219 - })), 366 + Ok(res) => HashMap::from_iter( 367 + res.into_iter() 368 + .map(|feedgen| (feedgen.at_uri.clone(), feedgen)), 369 + ), 220 370 Err(e) => { 221 371 tracing::error!("feedgen load failed: {e}"); 222 372 HashMap::new() ··· 225 375 } 226 376 } 227 377 228 - pub struct PostLoader(Pool<AsyncPgConnection>, parakeet_index::Client); 229 - type PostLoaderRet = ( 230 - models::Post, 231 - Option<models::Threadgate>, 232 - Option<parakeet_index::PostStats>, 233 - ); 378 + pub struct PostLoader(Pool<AsyncPgConnection>); 379 + type PostLoaderRet = (models::Post, Option<models::Threadgate>); 234 380 impl BatchFn<String, PostLoaderRet> for PostLoader { 381 + #[instrument(name = "PostLoader", skip_all)] 235 382 async fn load(&mut self, keys: &[String]) -> HashMap<String, PostLoaderRet> { 236 383 let mut conn = self.0.get().await.unwrap(); 237 384 238 385 let res = schema::posts::table 239 - .left_join(schema::threadgates::table) 386 + .left_join(schema::threadgates::table.on( 387 + schema::threadgates::post_uri.eq(sql("coalesce(posts.root_uri, posts.at_uri)")), 388 + )) 240 389 .select(( 241 390 models::Post::as_select(), 242 391 Option::<models::Threadgate>::as_select(), ··· 245 394 .load(&mut conn) 246 395 .await; 247 396 397 + match res { 398 + Ok(res) => HashMap::from_iter( 399 + res.into_iter() 400 + .map(|(post, threadgate)| (post.at_uri.clone(), (post, threadgate))), 401 + ), 402 + Err(e) => { 403 + tracing::error!("post load failed: {e}"); 404 + HashMap::new() 405 + } 406 + } 407 + } 408 + } 409 + 410 + pub struct PostStatsLoader(parakeet_index::Client); 411 + impl BatchFn<String, parakeet_index::PostStats> for PostStatsLoader { 412 + #[instrument(name = "PostStatsLoader", skip_all)] 413 + async fn load(&mut self, keys: &[String]) -> HashMap<String, parakeet_index::PostStats> { 248 414 let stats_req = parakeet_index::GetStatsManyReq { 249 415 uris: keys.to_vec(), 250 416 }; 251 - let mut stats = self 252 - .1 417 + 418 + self.0 253 419 .get_post_stats_many(stats_req) 254 420 .await 255 421 .unwrap() 256 422 .into_inner() 257 - .entries; 423 + .entries 424 + } 425 + } 258 426 259 - match res { 260 - Ok(res) => HashMap::from_iter(res.into_iter().map(|(post, threadgate)| { 261 - let maybe_stats = stats.remove(&post.at_uri); 427 + pub struct PostStateLoader(Pool<AsyncPgConnection>); 428 + impl PostStateLoader { 429 + #[instrument(name = "PostStateLoader::get", skip_all)] 430 + pub async fn get(&self, did: &str, subject: &str) -> Option<db::PostStateRet> { 431 + let mut conn = self.0.get().await.unwrap(); 432 + 433 + db::get_post_state(&mut conn, did, subject) 434 + .await 435 + .unwrap_or_else(|e| { 436 + tracing::error!("post state load failed: {e}"); 437 + None 438 + }) 439 + } 440 + 441 + #[instrument(name = "PostStateLoader::get_many", skip_all)] 442 + pub async fn get_many( 443 + &self, 444 + did: &str, 445 + subjects: &[String], 446 + ) -> HashMap<String, db::PostStateRet> { 447 + let mut conn = self.0.get().await.unwrap(); 262 448 263 - (post.at_uri.clone(), (post, threadgate, maybe_stats)) 264 - })), 449 + match db::get_post_states(&mut conn, did, subjects).await { 450 + Ok(res) => HashMap::from_iter(res.into_iter().map(|v| (v.at_uri.clone(), v))), 265 451 Err(e) => { 266 - tracing::error!("post load failed: {e}"); 452 + tracing::error!("post state load failed: {e}"); 267 453 HashMap::new() 268 454 } 269 455 } ··· 271 457 } 272 458 273 459 pub struct EmbedLoader(Pool<AsyncPgConnection>); 274 - #[derive(Debug, Clone)] 460 + #[derive(Debug, Clone, Serialize, Deserialize)] 275 461 pub enum EmbedLoaderRet { 276 462 Images(Vec<models::PostEmbedImage>), 277 463 Video(models::PostEmbedVideo), ··· 280 466 RecordWithMedia(models::PostEmbedRecord, Box<EmbedLoaderRet>), 281 467 } 282 468 impl BatchFn<String, (EmbedLoaderRet, String)> for EmbedLoader { 469 + #[instrument(name = "EmbedLoader", skip_all)] 283 470 async fn load(&mut self, keys: &[String]) -> HashMap<String, (EmbedLoaderRet, String)> { 284 471 let mut conn = self.0.get().await.unwrap(); 285 472 ··· 362 549 pub struct StarterPackLoader(Pool<AsyncPgConnection>); 363 550 type StarterPackLoaderRet = models::StaterPack; 364 551 impl BatchFn<String, StarterPackLoaderRet> for StarterPackLoader { 552 + #[instrument(name = "StarterPackLoader", skip_all)] 365 553 async fn load(&mut self, keys: &[String]) -> HashMap<String, StarterPackLoaderRet> { 366 554 let mut conn = self.0.get().await.unwrap(); 367 555 ··· 384 572 } 385 573 } 386 574 387 - pub struct LabelServiceLoader(Pool<AsyncPgConnection>, parakeet_index::Client); 388 - type LabelServiceLoaderRet = ( 389 - models::LabelerService, 390 - Vec<models::LabelDefinition>, 391 - Option<i32>, 392 - ); 575 + pub struct LabelServiceLoader(Pool<AsyncPgConnection>); 576 + type LabelServiceLoaderRet = (models::LabelerService, Vec<models::LabelDefinition>); 393 577 impl BatchFn<String, LabelServiceLoaderRet> for LabelServiceLoader { 578 + #[instrument(name = "LabelServiceLoader", skip_all)] 394 579 async fn load(&mut self, keys: &[String]) -> HashMap<String, LabelServiceLoaderRet> { 395 580 let mut conn = self.0.get().await.unwrap(); 396 581 ··· 408 593 409 594 let defs = defs.grouped_by(&labelers); 410 595 411 - let uris = keys 412 - .iter() 413 - .map(|v| format!("at://{v}/app.bsky.labeler.service/self")) 414 - .collect(); 415 - let stats_req = parakeet_index::GetStatsManyReq { uris }; 416 - let mut stats = self 417 - .1 418 - .get_like_count_many(stats_req) 419 - .await 420 - .unwrap() 421 - .into_inner() 422 - .entries; 423 - 424 596 labelers 425 597 .into_iter() 426 598 .zip(defs) 427 - .map(|(labeler, defs)| { 428 - let likes = stats 429 - .remove(&format!( 430 - "at://{}/app.bsky.labeler.service/self", 431 - &labeler.did 432 - )) 433 - .map(|v| v.likes); 434 - 435 - (labeler.did.clone(), (labeler, defs, likes)) 436 - }) 599 + .map(|(labeler, defs)| (labeler.did.clone(), (labeler, defs))) 437 600 .collect() 438 601 } 439 602 } ··· 451 614 // but it should live here anyway 452 615 pub struct LabelLoader(Pool<AsyncPgConnection>); 453 616 impl LabelLoader { 617 + #[instrument(name = "LabelLoader::load", skip_all)] 454 618 pub async fn load(&self, uri: &str, services: &[LabelConfigItem]) -> Vec<models::Label> { 455 619 let mut conn = self.0.get().await.unwrap(); 456 620 ··· 470 634 }) 471 635 } 472 636 637 + #[instrument(name = "LabelLoader::load_many", skip_all)] 473 638 pub async fn load_many( 474 639 &self, 475 640 uris: &[String], ··· 504 669 505 670 pub struct VerificationLoader(Pool<AsyncPgConnection>); 506 671 impl BatchFn<String, Vec<models::VerificationEntry>> for VerificationLoader { 672 + #[instrument(name = "VerificationLoader", skip_all)] 507 673 async fn load(&mut self, keys: &[String]) -> HashMap<String, Vec<models::VerificationEntry>> { 508 674 let mut conn = self.0.get().await.unwrap(); 509 675
+22 -5
parakeet/src/main.rs
··· 1 + use axum_tracing_opentelemetry::middleware::{OtelAxumLayer, OtelInResponseLayer}; 1 2 use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; 2 3 use diesel_async::pooled_connection::deadpool::Pool; 3 4 use diesel_async::pooled_connection::AsyncDieselConnectionManager; 4 5 use diesel_async::AsyncPgConnection; 5 6 use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness}; 7 + use redis::aio::MultiplexedConnection; 6 8 use std::sync::Arc; 7 9 use tower_http::cors::{AllowHeaders, AllowOrigin, CorsLayer}; 8 10 use tower_http::trace::TraceLayer; 9 11 10 12 const MIGRATIONS: EmbeddedMigrations = embed_migrations!(); 11 13 14 + mod cache; 12 15 mod config; 13 16 mod db; 14 17 mod hydration; 18 + mod instrumentation; 15 19 mod loaders; 16 20 mod xrpc; 17 21 18 22 #[derive(Clone)] 19 23 pub struct GlobalState { 20 24 pub pool: Pool<AsyncPgConnection>, 25 + pub redis_mp: MultiplexedConnection, 21 26 pub dataloaders: Arc<loaders::Dataloaders>, 22 27 pub resolver: Arc<did_resolver::Resolver>, 23 28 pub index_client: parakeet_index::Client, ··· 28 33 29 34 #[tokio::main] 30 35 async fn main() -> eyre::Result<()> { 31 - tracing_subscriber::fmt::init(); 32 - 33 36 let conf = config::load_config()?; 34 37 38 + instrumentation::init_instruments(&conf.instruments); 39 + 35 40 let db_mgr = AsyncDieselConnectionManager::<AsyncPgConnection>::new(&conf.database_url); 36 41 let pool = Pool::builder(db_mgr).build()?; 37 42 ··· 46 51 tracing::info!("database migrations complete"); 47 52 } 48 53 49 - let index_client = parakeet_index::Client::connect(conf.index_uri).await?; 54 + let redis_client = redis::Client::open(conf.redis_uri)?; 55 + let redis_mp = redis_client.get_multiplexed_tokio_connection().await?; 56 + 57 + let index_client = parakeet_index::connect_with_otel(conf.index_uri) 58 + .await 59 + .map_err(|e| eyre::eyre!(e))?; 50 60 51 61 let dataloaders = Arc::new(loaders::Dataloaders::new( 52 62 pool.clone(), 63 + redis_mp.clone(), 53 64 index_client.clone(), 54 65 )); 55 66 let resolver = Arc::new(did_resolver::Resolver::new(did_resolver::ResolverOpts { ··· 72 83 73 84 let did_doc = did_web_doc(&conf.service); 74 85 86 + let mw = tower::ServiceBuilder::new() 87 + .option_layer(conf.instruments.otel_enable.then(OtelInResponseLayer::default)) 88 + .option_layer(conf.instruments.otel_enable.then(OtelAxumLayer::default)) 89 + .layer(TraceLayer::new_for_http()) 90 + .layer(cors); 91 + 75 92 let app = axum::Router::new() 76 93 .nest("/xrpc", xrpc::xrpc_routes()) 77 94 .route( 78 95 "/.well-known/did.json", 79 96 axum::routing::get(async || axum::Json(did_doc)), 80 97 ) 81 - .layer(TraceLayer::new_for_http()) 82 - .layer(cors) 98 + .layer(mw) 83 99 .with_state(GlobalState { 84 100 pool, 101 + redis_mp, 85 102 dataloaders, 86 103 resolver, 87 104 index_client,
+5
parakeet/src/sql/list_states.sql
··· 1 + select l.at_uri, lb.at_uri as block, lm.did is not null as muted 2 + from lists l 3 + left join list_blocks lb on l.at_uri = lb.list_uri and lb.did = $1 4 + left join list_mutes lm on l.at_uri = lm.list_uri and lm.did = $1 5 + where l.at_uri = any ($2) and (lm.did is not null or lb.at_uri is not null)
+16
parakeet/src/sql/post_state.sql
··· 1 + select bq.*, coalesce(bq.at_uri = pinned_uri, false) as pinned 2 + from (select p.at_uri, 3 + p.did, 4 + p.cid, 5 + l.rkey as like_rkey, 6 + r.rkey as repost_rkey, 7 + b.did is not null as bookmarked, 8 + coalesce(pg.rules && ARRAY ['app.bsky.feed.postgate#disableRule'], false) as embed_disabled 9 + from posts p 10 + left join likes l on l.subject = p.at_uri and l.did = $1 11 + left join reposts r on r.post = p.at_uri and r.did = $1 12 + left join bookmarks b on b.subject = p.at_uri and b.did = $1 13 + left join postgates pg on pg.post_uri = p.at_uri 14 + where p.at_uri = any ($2) 15 + and (l.rkey is not null or r.rkey is not null or b.did is not null or pg.rules is not null)) bq, 16 + (select pinned_uri, pinned_cid from profiles where did = $1) pp;
+20
parakeet/src/sql/profile_state.sql
··· 1 + with vlb as (select * from v_list_block_exp where did = $1 and subject = any ($2)), 2 + vlm as (select * from v_list_mutes_exp where did = $1 and subject = any ($2)), 3 + ps as (select * from profile_states where did = $1 and subject = any ($2)), 4 + vlb2 as (select subject as did, did as subject, list_uri is not null as blocked 5 + from v_list_block_exp 6 + where did = any ($2) 7 + and subject = $1) 8 + select distinct on (did, subject) did, 9 + subject, 10 + muting, 11 + ps.blocked or vlb2.blocked as blocked, 12 + blocking, 13 + following, 14 + followed, 15 + vlb.list_uri as list_block, 16 + vlm.list_uri as list_mute 17 + from ps 18 + full join vlb using (did, subject) 19 + full join vlm using (did, subject) 20 + full join vlb2 using (did, subject);
+3 -3
parakeet/src/sql/thread.sql
··· 1 - with recursive thread as (select at_uri, parent_uri, root_uri, 0 as depth 1 + with recursive thread as (select at_uri, parent_uri, root_uri, 1 as depth 2 2 from posts 3 - where parent_uri = $1 3 + where parent_uri = $1 and violates_threadgate=FALSE 4 4 union all 5 5 select p.at_uri, p.parent_uri, p.root_uri, thread.depth + 1 6 6 from posts p 7 7 join thread on p.parent_uri = thread.at_uri 8 - where thread.depth <= $2) 8 + where thread.depth <= $2 and p.violates_threadgate=FALSE) 9 9 select * 10 10 from thread 11 11 order by depth desc;
+13
parakeet/src/sql/thread_branching.sql
··· 1 + with recursive thread as (select at_uri, parent_uri, root_uri, 1 as depth 2 + from posts 3 + where parent_uri = $1 4 + and violates_threadgate = FALSE 5 + union all 6 + (select p.at_uri, p.parent_uri, p.root_uri, thread.depth + 1 7 + from posts p 8 + join thread on p.parent_uri = thread.at_uri 9 + where thread.depth <= $2 10 + and violates_threadgate = FALSE 11 + LIMIT $3)) 12 + select * 13 + from thread;
+4 -2
parakeet/src/sql/thread_parent.sql
··· 1 1 with recursive parents as (select at_uri, cid, parent_uri, root_uri, 0 as depth 2 2 from posts 3 - where at_uri = (select parent_uri from posts where at_uri = $1) 3 + where 4 + at_uri = (select parent_uri from posts where at_uri = $1 and violates_threadgate = FALSE) 4 5 union all 5 6 select p.at_uri, p.cid, p.parent_uri, p.root_uri, parents.depth + 1 6 7 from posts p 7 8 join parents on p.at_uri = parents.parent_uri 8 - where parents.depth <= $2) 9 + where parents.depth <= $2 10 + and p.violates_threadgate = FALSE) 9 11 select * 10 12 from parents 11 13 order by depth desc;
+6
parakeet/src/sql/thread_v2_hidden_children.sql
··· 1 + select at_uri 2 + from posts 3 + where parent_uri = $1 4 + and at_uri = any (select unnest(hidden_replies) 5 + from threadgates 6 + where post_uri = $2)
+161
parakeet/src/xrpc/app_bsky/bookmark.rs
··· 1 + use crate::hydration::StatefulHydrator; 2 + use crate::xrpc::error::XrpcResult; 3 + use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; 4 + use crate::xrpc::{datetime_cursor, CursorQuery}; 5 + use crate::GlobalState; 6 + use axum::extract::{Query, State}; 7 + use axum::Json; 8 + use diesel::prelude::*; 9 + use diesel_async::RunQueryDsl; 10 + use lexica::app_bsky::bookmark::{BookmarkView, BookmarkViewItem}; 11 + use lexica::app_bsky::feed::{BlockedAuthor, PostView}; 12 + use lexica::StrongRef; 13 + use parakeet_db::{models, schema}; 14 + use serde::{Deserialize, Serialize}; 15 + 16 + const BSKY_ALLOWED_TYPES: &[&str] = &["app.bsky.feed.post"]; 17 + 18 + #[derive(Debug, Deserialize)] 19 + pub struct CreateBookmarkReq { 20 + pub uri: String, 21 + pub cid: String, 22 + } 23 + 24 + pub async fn create_bookmark( 25 + State(state): State<GlobalState>, 26 + auth: AtpAuth, 27 + Json(form): Json<CreateBookmarkReq>, 28 + ) -> XrpcResult<()> { 29 + let mut conn = state.pool.get().await?; 30 + 31 + // strip "at://" then break into parts by '/' 32 + let parts = form.uri[5..].split('/').collect::<Vec<_>>(); 33 + 34 + let data = models::NewBookmark { 35 + did: &auth.0, 36 + rkey: None, 37 + subject: &form.uri, 38 + subject_cid: Some(form.cid), 39 + subject_type: parts[1], 40 + tags: vec![], 41 + }; 42 + 43 + diesel::insert_into(schema::bookmarks::table) 44 + .values(&data) 45 + .on_conflict_do_nothing() 46 + .execute(&mut conn) 47 + .await?; 48 + 49 + Ok(()) 50 + } 51 + 52 + #[derive(Debug, Deserialize)] 53 + pub struct DeleteBookmarkReq { 54 + pub uri: String, 55 + } 56 + 57 + pub async fn delete_bookmark( 58 + State(state): State<GlobalState>, 59 + auth: AtpAuth, 60 + Json(form): Json<DeleteBookmarkReq>, 61 + ) -> XrpcResult<()> { 62 + let mut conn = state.pool.get().await?; 63 + 64 + diesel::delete(schema::bookmarks::table) 65 + .filter( 66 + schema::bookmarks::did 67 + .eq(&auth.0) 68 + .and(schema::bookmarks::subject.eq(&form.uri)), 69 + ) 70 + .execute(&mut conn) 71 + .await?; 72 + 73 + Ok(()) 74 + } 75 + 76 + #[derive(Debug, Serialize)] 77 + pub struct GetBookmarksRes { 78 + #[serde(skip_serializing_if = "Option::is_none")] 79 + cursor: Option<String>, 80 + bookmarks: Vec<BookmarkView>, 81 + } 82 + 83 + pub async fn get_bookmarks( 84 + State(state): State<GlobalState>, 85 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 86 + auth: AtpAuth, 87 + Query(query): Query<CursorQuery>, 88 + ) -> XrpcResult<Json<GetBookmarksRes>> { 89 + let mut conn = state.pool.get().await?; 90 + let did = auth.0.clone(); 91 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, Some(auth)); 92 + 93 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 94 + 95 + let mut bookmarks_query = schema::bookmarks::table 96 + .select(models::Bookmark::as_select()) 97 + .filter(schema::bookmarks::did.eq(&did)) 98 + .filter(schema::bookmarks::subject_type.eq_any(BSKY_ALLOWED_TYPES)) 99 + .into_boxed(); 100 + 101 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 102 + bookmarks_query = bookmarks_query.filter(schema::bookmarks::created_at.lt(cursor)); 103 + } 104 + 105 + let results = bookmarks_query 106 + .order(schema::bookmarks::created_at.desc()) 107 + .limit(limit as i64) 108 + .load(&mut conn) 109 + .await?; 110 + 111 + let cursor = results 112 + .last() 113 + .map(|bm| bm.created_at.timestamp_millis().to_string()); 114 + 115 + let uris = results.iter().map(|bm| bm.subject.clone()).collect(); 116 + 117 + let mut posts = hyd.hydrate_posts(uris).await; 118 + 119 + let bookmarks = results 120 + .into_iter() 121 + .filter_map(|bookmark| { 122 + let maybe_item = posts.remove(&bookmark.subject); 123 + let maybe_cid = maybe_item.as_ref().map(|v| v.cid.clone()); 124 + 125 + // ensure that either the cid is set in the bookmark record *or* in the post record 126 + // otherwise just ditch. we should have one. 127 + let cid = bookmark.subject_cid.or(maybe_cid)?; 128 + 129 + let item = maybe_item 130 + .map(postview_to_bvi) 131 + .unwrap_or(BookmarkViewItem::NotFound { 132 + uri: bookmark.subject.clone(), 133 + not_found: true, 134 + }); 135 + 136 + let subject = StrongRef::new_from_str(bookmark.subject, &cid).ok()?; 137 + 138 + Some(BookmarkView { 139 + subject, 140 + item, 141 + created_at: bookmark.created_at, 142 + }) 143 + }) 144 + .collect(); 145 + 146 + Ok(Json(GetBookmarksRes { cursor, bookmarks })) 147 + } 148 + 149 + fn postview_to_bvi(post: PostView) -> BookmarkViewItem { 150 + match &post.author.viewer { 151 + Some(v) if v.blocked_by || v.blocking.is_some() => BookmarkViewItem::Blocked { 152 + uri: post.uri, 153 + blocked: true, 154 + author: BlockedAuthor { 155 + did: post.author.did.clone(), 156 + viewer: post.author.viewer, 157 + }, 158 + }, 159 + _ => BookmarkViewItem::Post(post), 160 + } 161 + }
+7 -8
parakeet/src/xrpc/app_bsky/feed/likes.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::error::{Error, XrpcResult}; 3 4 use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; ··· 57 58 .last() 58 59 .map(|(last, _)| last.timestamp_millis().to_string()); 59 60 60 - let at_uris = results 61 + let raw_feed = results 61 62 .iter() 62 - .map(|(_, uri)| uri.clone()) 63 + .map(|(_, uri)| RawFeedItem::Post { 64 + uri: uri.clone(), 65 + context: None, 66 + }) 63 67 .collect::<Vec<_>>(); 64 68 65 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 66 - 67 - let feed: Vec<_> = results 68 - .into_iter() 69 - .filter_map(|(_, uri)| posts.remove(&uri)) 70 - .collect(); 69 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 71 70 72 71 Ok(Json(FeedRes { cursor, feed })) 73 72 }
+158 -122
parakeet/src/xrpc/app_bsky/feed/posts.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::app_bsky::graph::lists::ListWithCursorQuery; 3 4 use crate::xrpc::error::{Error, XrpcResult}; ··· 16 17 use diesel_async::{AsyncPgConnection, RunQueryDsl}; 17 18 use lexica::app_bsky::actor::ProfileView; 18 19 use lexica::app_bsky::feed::{ 19 - FeedReasonRepost, FeedSkeletonResponse, FeedViewPost, FeedViewPostReason, PostView, 20 - SkeletonReason, ThreadViewPost, ThreadViewPostType, ThreadgateView, 20 + BlockedAuthor, FeedSkeletonResponse, FeedViewPost, PostView, SkeletonReason, ThreadViewPost, 21 + ThreadViewPostType, ThreadgateView, 21 22 }; 22 - use parakeet_db::schema; 23 + use parakeet_db::{models, schema}; 23 24 use reqwest::Url; 24 25 use serde::{Deserialize, Serialize}; 25 26 use std::collections::HashMap; 27 + use tracing::instrument; 26 28 27 29 const FEEDGEN_SERVICE_ID: &str = "#bsky_fg"; 28 30 ··· 113 115 114 116 let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 115 117 116 - let at_uris = skeleton.feed.iter().map(|v| v.post.clone()).collect(); 117 118 let repost_skeleton = skeleton 118 119 .feed 119 120 .iter() ··· 122 123 _ => None, 123 124 }) 124 125 .collect::<Vec<_>>(); 125 - 126 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 127 - let mut repost_data = get_skeleton_repost_data(&mut conn, &hyd, repost_skeleton).await; 126 + let mut repost_data = get_skeleton_repost_data(&mut conn, repost_skeleton).await; 128 127 129 - let feed = skeleton 128 + let raw_feed = skeleton 130 129 .feed 131 130 .into_iter() 132 - .filter_map(|item| { 133 - let mut post = posts.remove(&item.post)?; 134 - let reason = match item.reason { 135 - Some(SkeletonReason::Repost { repost }) => { 136 - repost_data.remove(&repost).map(FeedViewPostReason::Repost) 137 - } 138 - Some(SkeletonReason::Pin {}) => Some(FeedViewPostReason::Pin), 139 - _ => None, 140 - }; 141 - 142 - post.reason = reason; 143 - post.feed_context = item.feed_context; 144 - 145 - Some(post) 131 + .filter_map(|v| match v.reason { 132 + Some(SkeletonReason::Repost { repost }) => { 133 + repost_data 134 + .remove_entry(&repost) 135 + .map(|(uri, (by, at))| RawFeedItem::Repost { 136 + uri, 137 + post: v.post, 138 + by, 139 + at: at.and_utc(), 140 + context: v.feed_context, 141 + }) 142 + } 143 + Some(SkeletonReason::Pin {}) => Some(RawFeedItem::Pin { 144 + uri: v.post, 145 + context: v.feed_context, 146 + }), 147 + None => Some(RawFeedItem::Post { 148 + uri: v.post, 149 + context: v.feed_context, 150 + }), 146 151 }) 147 152 .collect(); 148 153 154 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 155 + 149 156 Ok(Json(FeedRes { 150 157 cursor: skeleton.cursor, 151 158 feed, 152 159 })) 153 160 } 154 161 155 - #[derive(Debug, Deserialize)] 162 + #[derive(Debug, Default, Eq, PartialEq, Deserialize)] 156 163 #[serde(rename_all = "snake_case")] 164 + #[allow(clippy::enum_variant_names)] 157 165 pub enum GetAuthorFeedFilter { 166 + #[default] 158 167 PostsWithReplies, 159 168 PostsNoReplies, 160 169 PostsWithMedia, 161 170 PostsAndAuthorThreads, 162 171 PostsWithVideo, 163 - } 164 - 165 - impl Default for GetAuthorFeedFilter { 166 - fn default() -> Self { 167 - Self::PostsWithReplies 168 - } 169 172 } 170 173 171 174 #[derive(Debug, Deserialize)] ··· 187 190 Query(query): Query<GetAuthorFeedQuery>, 188 191 ) -> XrpcResult<Json<FeedRes>> { 189 192 let mut conn = state.pool.get().await?; 190 - let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 191 193 192 194 let did = get_actor_did(&state.dataloaders, query.actor.clone()).await?; 193 195 194 196 check_actor_status(&mut conn, &did).await?; 195 197 198 + // check if we block the actor or if they block us 199 + if let Some(auth) = &maybe_auth { 200 + if let Some(psr) = crate::db::get_profile_state(&mut conn, &auth.0, &did).await? { 201 + if psr.blocked.unwrap_or_default() { 202 + // they block us 203 + return Err(Error::new(StatusCode::BAD_REQUEST, "BlockedByActor", None)); 204 + } else if psr.blocking.is_some() { 205 + // we block them 206 + return Err(Error::new(StatusCode::BAD_REQUEST, "BlockedActor", None)); 207 + } 208 + } 209 + } 210 + 211 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 212 + 213 + let pin = match query.include_pins && query.cursor.is_none() { 214 + false => None, 215 + true => crate::db::get_pinned_post_uri(&mut conn, &did).await?, 216 + }; 217 + 196 218 let limit = query.limit.unwrap_or(50).clamp(1, 100); 197 219 198 - let mut posts_query = schema::posts::table 199 - .select((schema::posts::created_at, schema::posts::at_uri)) 200 - .filter(schema::posts::did.eq(did)) 220 + let mut posts_query = schema::author_feeds::table 221 + .select(models::AuthorFeedItem::as_select()) 222 + .left_join(schema::posts::table.on(schema::posts::at_uri.eq(schema::author_feeds::post))) 223 + .filter(schema::author_feeds::did.eq(&did)) 201 224 .into_boxed(); 202 225 203 226 if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 204 - posts_query = posts_query.filter(schema::posts::created_at.lt(cursor)); 227 + posts_query = posts_query.filter(schema::author_feeds::sort_at.lt(cursor)); 205 228 } 206 229 230 + let author_threads_only = query.filter == GetAuthorFeedFilter::PostsAndAuthorThreads; 207 231 posts_query = match query.filter { 208 - GetAuthorFeedFilter::PostsWithReplies => posts_query, 232 + GetAuthorFeedFilter::PostsWithReplies => { 233 + posts_query.filter(schema::author_feeds::typ.eq("post")) 234 + } 209 235 GetAuthorFeedFilter::PostsNoReplies => { 210 236 posts_query.filter(schema::posts::parent_uri.is_null()) 211 237 } 212 - GetAuthorFeedFilter::PostsWithMedia => posts_query.filter(embed_type_filter(&[ 213 - "app.bsky.embed.video", 214 - "app.bsky.embed.images", 215 - ])), 238 + GetAuthorFeedFilter::PostsWithMedia => posts_query.filter( 239 + embed_type_filter(&["app.bsky.embed.video", "app.bsky.embed.images"]) 240 + .and(schema::author_feeds::typ.eq("post")), 241 + ), 216 242 GetAuthorFeedFilter::PostsAndAuthorThreads => posts_query.filter( 217 243 (schema::posts::parent_uri 218 - .like(format!("at://{}/%", &query.actor)) 244 + .like(format!("at://{did}/%")) 219 245 .or(schema::posts::parent_uri.is_null())) 220 246 .and( 221 247 schema::posts::root_uri 222 - .like(format!("at://{}/%", &query.actor)) 248 + .like(format!("at://{did}/%")) 223 249 .or(schema::posts::root_uri.is_null()), 224 250 ), 225 251 ), 226 - GetAuthorFeedFilter::PostsWithVideo => { 227 - posts_query.filter(embed_type_filter(&["app.bsky.embed.video"])) 228 - } 252 + GetAuthorFeedFilter::PostsWithVideo => posts_query.filter( 253 + embed_type_filter(&["app.bsky.embed.video"]).and(schema::author_feeds::typ.eq("post")), 254 + ), 229 255 }; 230 256 231 257 let results = posts_query 232 - .order(schema::posts::created_at.desc()) 258 + .order(schema::author_feeds::sort_at.desc()) 233 259 .limit(limit as i64) 234 - .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 260 + .load(&mut conn) 235 261 .await?; 236 262 237 263 let cursor = results 238 264 .last() 239 - .map(|(last, _)| last.timestamp_millis().to_string()); 265 + .map(|item| item.sort_at.timestamp_millis().to_string()); 240 266 241 - let at_uris = results 242 - .iter() 243 - .map(|(_, uri)| uri.clone()) 267 + let mut raw_feed = results 268 + .into_iter() 269 + .filter_map(|item| match &*item.typ { 270 + "post" => Some(RawFeedItem::Post { 271 + uri: item.post, 272 + context: None, 273 + }), 274 + "repost" => Some(RawFeedItem::Repost { 275 + uri: item.uri, 276 + post: item.post, 277 + by: item.did, 278 + at: item.sort_at, 279 + context: None, 280 + }), 281 + _ => None, 282 + }) 244 283 .collect::<Vec<_>>(); 245 284 246 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 285 + if let Some(post) = pin { 286 + raw_feed.insert( 287 + 0, 288 + RawFeedItem::Pin { 289 + uri: post, 290 + context: None, 291 + }, 292 + ); 293 + } 247 294 248 - let feed = results 249 - .into_iter() 250 - .filter_map(|(_, uri)| posts.remove(&uri)) 251 - .collect(); 295 + let feed = hyd.hydrate_feed_posts(raw_feed, author_threads_only).await; 252 296 253 297 Ok(Json(FeedRes { cursor, feed })) 254 298 } ··· 291 335 .last() 292 336 .map(|(last, _)| last.timestamp_millis().to_string()); 293 337 294 - let at_uris = results 338 + let raw_feed = results 295 339 .iter() 296 - .map(|(_, uri)| uri.clone()) 340 + .map(|(_, uri)| RawFeedItem::Post { 341 + uri: uri.clone(), 342 + context: None, 343 + }) 297 344 .collect::<Vec<_>>(); 298 345 299 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 300 - 301 - let feed = results 302 - .into_iter() 303 - .filter_map(|(_, uri)| posts.remove(&uri)) 304 - .collect(); 346 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 305 347 306 348 Ok(Json(FeedRes { cursor, feed })) 307 349 } ··· 321 363 pub threadgate: Option<ThreadgateView>, 322 364 } 323 365 324 - #[derive(Debug, QueryableByName)] 325 - #[diesel(check_for_backend(diesel::pg::Pg))] 326 - struct ThreadItem { 327 - #[diesel(sql_type = diesel::sql_types::Text)] 328 - at_uri: String, 329 - #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)] 330 - parent_uri: Option<String>, 331 - // #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)] 332 - // root_uri: Option<String>, 333 - #[diesel(sql_type = diesel::sql_types::Integer)] 334 - depth: i32, 335 - } 336 - 337 366 pub async fn get_post_thread( 338 367 State(state): State<GlobalState>, 339 368 AtpAcceptLabelers(labelers): AtpAcceptLabelers, ··· 347 376 let depth = query.depth.unwrap_or(6).clamp(0, 1000); 348 377 let parent_height = query.parent_height.unwrap_or(80).clamp(0, 1000); 349 378 350 - let replies = diesel::sql_query(include_str!("../../../sql/thread.sql")) 351 - .bind::<diesel::sql_types::Text, _>(&uri) 352 - .bind::<diesel::sql_types::Integer, _>(depth as i32) 353 - .load::<ThreadItem>(&mut conn) 354 - .await?; 379 + let root = hyd 380 + .hydrate_post(uri.clone()) 381 + .await 382 + .ok_or(Error::not_found())?; 383 + let threadgate = root.threadgate.clone(); 355 384 356 - let parents = diesel::sql_query(include_str!("../../../sql/thread_parent.sql")) 357 - .bind::<diesel::sql_types::Text, _>(&uri) 358 - .bind::<diesel::sql_types::Integer, _>(parent_height as i32) 359 - .load::<ThreadItem>(&mut conn) 360 - .await?; 385 + if let Some(viewer) = &root.author.viewer { 386 + if viewer.blocked_by || viewer.blocking.is_some() { 387 + return Ok(Json(GetPostThreadRes { 388 + thread: ThreadViewPostType::Blocked { 389 + uri, 390 + blocked: true, 391 + author: BlockedAuthor { 392 + did: root.author.did, 393 + viewer: root.author.viewer, 394 + }, 395 + }, 396 + threadgate, 397 + })); 398 + } 399 + } 400 + 401 + let replies = crate::db::get_thread_children(&mut conn, &uri, depth as i32).await?; 402 + let parents = crate::db::get_thread_parents(&mut conn, &uri, parent_height as i32).await?; 361 403 362 404 let reply_uris = replies.iter().map(|item| item.at_uri.clone()).collect(); 363 405 let parent_uris = parents.iter().map(|item| item.at_uri.clone()).collect(); 364 406 365 - let root = hyd 366 - .hydrate_post(uri.clone()) 367 - .await 368 - .ok_or(Error::not_found())?; 369 407 let mut replies_hydrated = hyd.hydrate_posts(reply_uris).await; 370 408 let mut parents_hydrated = hyd.hydrate_posts(parent_uris).await; 371 409 ··· 381 419 continue; 382 420 }; 383 421 384 - entry.push(ThreadViewPostType::Post(Box::new(ThreadViewPost { 385 - post, 386 - parent: None, 387 - replies: this_post_replies, 388 - }))); 422 + entry.push(postview_to_tvpt(post, None, this_post_replies)); 389 423 } 390 424 391 425 let mut root_parent = None; ··· 394 428 395 429 let parent = parents_hydrated 396 430 .remove(&parent.at_uri) 397 - .map(|post| { 398 - ThreadViewPostType::Post(Box::new(ThreadViewPost { 399 - post, 400 - parent: p2, 401 - replies: vec![], 402 - })) 403 - }) 431 + .map(|post| postview_to_tvpt(post, p2, Vec::default())) 404 432 .unwrap_or(ThreadViewPostType::NotFound { 405 433 uri: parent.at_uri.clone(), 406 434 not_found: true, ··· 411 439 412 440 let replies = tmpbuf.remove(&root.uri).unwrap_or_default(); 413 441 414 - let threadgate = root.threadgate.clone(); 415 - 416 442 Ok(Json(GetPostThreadRes { 417 443 threadgate, 418 444 thread: ThreadViewPostType::Post(Box::new(ThreadViewPost { ··· 521 547 } 522 548 523 549 #[derive(Debug, Serialize)] 550 + #[serde(rename_all = "camelCase")] 524 551 pub struct GetRepostedByRes { 525 552 pub uri: String, 526 553 #[serde(skip_serializing_if = "Option::is_none")] ··· 587 614 .or(schema::posts::embed_subtype.eq_any(filter)) 588 615 } 589 616 617 + #[instrument(skip_all)] 590 618 async fn get_feed_skeleton( 591 619 feed: &str, 592 620 service: &str, ··· 628 656 } 629 657 } 630 658 631 - async fn get_skeleton_repost_data<'a>( 659 + #[instrument(skip_all)] 660 + async fn get_skeleton_repost_data( 632 661 conn: &mut AsyncPgConnection, 633 - hyd: &StatefulHydrator<'a>, 634 662 reposts: Vec<String>, 635 - ) -> HashMap<String, FeedReasonRepost> { 663 + ) -> HashMap<String, (String, NaiveDateTime)> { 636 664 let Ok(repost_data) = schema::records::table 637 665 .select(( 638 666 schema::records::at_uri, ··· 645 673 else { 646 674 return HashMap::new(); 647 675 }; 648 - 649 - let profiles = repost_data.iter().map(|(_, did, _)| did.clone()).collect(); 650 - let profiles = hyd.hydrate_profiles_basic(profiles).await; 651 676 652 677 repost_data 653 678 .into_iter() 654 - .filter_map(|(uri, did, indexed_at)| { 655 - let by = profiles.get(&did).cloned()?; 679 + .map(|(uri, did, at)| (uri, (did, at))) 680 + .collect() 681 + } 656 682 657 - let repost = FeedReasonRepost { 658 - by, 659 - uri: Some(uri.clone()), 660 - cid: None, // okay, we do have this, but the app doesn't seem to be bothered about not setting it. 661 - indexed_at: indexed_at.and_utc(), 662 - }; 663 - 664 - Some((uri, repost)) 665 - }) 666 - .collect() 683 + fn postview_to_tvpt( 684 + post: PostView, 685 + parent: Option<ThreadViewPostType>, 686 + replies: Vec<ThreadViewPostType>, 687 + ) -> ThreadViewPostType { 688 + match &post.author.viewer { 689 + Some(v) if v.blocked_by || v.blocking.is_some() => ThreadViewPostType::Blocked { 690 + uri: post.uri.clone(), 691 + blocked: true, 692 + author: BlockedAuthor { 693 + did: post.author.did, 694 + viewer: post.author.viewer, 695 + }, 696 + }, 697 + _ => ThreadViewPostType::Post(Box::new(ThreadViewPost { 698 + post, 699 + parent, 700 + replies, 701 + })), 702 + } 667 703 }
+87 -4
parakeet/src/xrpc/app_bsky/graph/lists.rs
··· 1 1 use crate::hydration::StatefulHydrator; 2 2 use crate::xrpc::error::{Error, XrpcResult}; 3 3 use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; 4 - use crate::xrpc::{check_actor_status, datetime_cursor, get_actor_did, ActorWithCursorQuery}; 4 + use crate::xrpc::{ 5 + check_actor_status, datetime_cursor, get_actor_did, ActorWithCursorQuery, CursorQuery, 6 + }; 5 7 use crate::GlobalState; 6 8 use axum::extract::{Query, State}; 7 9 use axum::Json; ··· 19 21 } 20 22 21 23 #[derive(Debug, Serialize)] 22 - pub struct AppBskyGraphGetListsRes { 24 + pub struct GetListsRes { 23 25 #[serde(skip_serializing_if = "Option::is_none")] 24 26 cursor: Option<String>, 25 27 lists: Vec<ListView>, ··· 30 32 AtpAcceptLabelers(labelers): AtpAcceptLabelers, 31 33 maybe_auth: Option<AtpAuth>, 32 34 Query(query): Query<ActorWithCursorQuery>, 33 - ) -> XrpcResult<Json<AppBskyGraphGetListsRes>> { 35 + ) -> XrpcResult<Json<GetListsRes>> { 34 36 let mut conn = state.pool.get().await?; 35 37 let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 36 38 ··· 68 70 .filter_map(|(_, uri)| lists.remove(&uri)) 69 71 .collect(); 70 72 71 - Ok(Json(AppBskyGraphGetListsRes { cursor, lists })) 73 + Ok(Json(GetListsRes { cursor, lists })) 72 74 } 73 75 74 76 #[derive(Debug, Serialize)] ··· 135 137 items, 136 138 })) 137 139 } 140 + 141 + pub async fn get_list_mutes( 142 + State(state): State<GlobalState>, 143 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 144 + auth: AtpAuth, 145 + Query(query): Query<CursorQuery>, 146 + ) -> XrpcResult<Json<GetListsRes>> { 147 + let mut conn = state.pool.get().await?; 148 + let did = auth.0.clone(); 149 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, Some(auth)); 150 + 151 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 152 + 153 + let mut mutes_query = schema::list_mutes::table 154 + .select((schema::list_mutes::created_at, schema::list_mutes::list_uri)) 155 + .filter(schema::list_mutes::did.eq(did)) 156 + .into_boxed(); 157 + 158 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 159 + mutes_query = mutes_query.filter(schema::list_mutes::created_at.lt(cursor)); 160 + } 161 + 162 + let results = mutes_query 163 + .order(schema::list_mutes::created_at.desc()) 164 + .limit(limit as i64) 165 + .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 166 + .await?; 167 + 168 + let cursor = results 169 + .last() 170 + .map(|(last, _)| last.timestamp_millis().to_string()); 171 + 172 + let uris = results.iter().map(|(_, uri)| uri.clone()).collect(); 173 + 174 + let lists = hyd.hydrate_lists(uris).await; 175 + let lists = lists.into_values().collect::<Vec<_>>(); 176 + 177 + Ok(Json(GetListsRes { cursor, lists })) 178 + } 179 + 180 + pub async fn get_list_blocks( 181 + State(state): State<GlobalState>, 182 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 183 + auth: AtpAuth, 184 + Query(query): Query<CursorQuery>, 185 + ) -> XrpcResult<Json<GetListsRes>> { 186 + let mut conn = state.pool.get().await?; 187 + let did = auth.0.clone(); 188 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, Some(auth)); 189 + 190 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 191 + 192 + let mut blocks_query = schema::list_blocks::table 193 + .select(( 194 + schema::list_blocks::created_at, 195 + schema::list_blocks::list_uri, 196 + )) 197 + .filter(schema::list_blocks::did.eq(did)) 198 + .into_boxed(); 199 + 200 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 201 + blocks_query = blocks_query.filter(schema::list_blocks::created_at.lt(cursor)); 202 + } 203 + 204 + let results = blocks_query 205 + .order(schema::list_blocks::created_at.desc()) 206 + .limit(limit as i64) 207 + .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 208 + .await?; 209 + 210 + let cursor = results 211 + .last() 212 + .map(|(last, _)| last.timestamp_millis().to_string()); 213 + 214 + let uris = results.iter().map(|(_, uri)| uri.clone()).collect(); 215 + 216 + let lists = hyd.hydrate_lists(uris).await; 217 + let lists = lists.into_values().collect::<Vec<_>>(); 218 + 219 + Ok(Json(GetListsRes { cursor, lists })) 220 + }
+1
parakeet/src/xrpc/app_bsky/graph/mod.rs
··· 1 1 pub mod lists; 2 + pub mod mutes; 2 3 pub mod relations; 3 4 pub mod starter_packs;
+148
parakeet/src/xrpc/app_bsky/graph/mutes.rs
··· 1 + use crate::hydration::StatefulHydrator; 2 + use crate::xrpc::error::XrpcResult; 3 + use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; 4 + use crate::xrpc::{datetime_cursor, CursorQuery}; 5 + use crate::GlobalState; 6 + use axum::extract::{Query, State}; 7 + use axum::Json; 8 + use diesel::prelude::*; 9 + use diesel_async::RunQueryDsl; 10 + use lexica::app_bsky::actor::ProfileView; 11 + use parakeet_db::{models, schema}; 12 + use serde::{Deserialize, Serialize}; 13 + 14 + #[derive(Debug, Serialize)] 15 + pub struct GetMutesRes { 16 + #[serde(skip_serializing_if = "Option::is_none")] 17 + cursor: Option<String>, 18 + mutes: Vec<ProfileView>, 19 + } 20 + 21 + pub async fn get_mutes( 22 + State(state): State<GlobalState>, 23 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 24 + auth: AtpAuth, 25 + Query(query): Query<CursorQuery>, 26 + ) -> XrpcResult<Json<GetMutesRes>> { 27 + let mut conn = state.pool.get().await?; 28 + let did = auth.0.clone(); 29 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, Some(auth)); 30 + 31 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 32 + 33 + let mut muted_query = schema::mutes::table 34 + .select((schema::mutes::created_at, schema::mutes::subject)) 35 + .filter(schema::mutes::did.eq(did)) 36 + .into_boxed(); 37 + 38 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 39 + muted_query = muted_query.filter(schema::mutes::created_at.lt(cursor)); 40 + } 41 + 42 + let results = muted_query 43 + .order(schema::mutes::created_at.desc()) 44 + .limit(limit as i64) 45 + .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 46 + .await?; 47 + 48 + let cursor = results 49 + .last() 50 + .map(|(last, _)| last.timestamp_millis().to_string()); 51 + 52 + let dids = results.iter().map(|(_, did)| did.clone()).collect(); 53 + 54 + let profiles = hyd.hydrate_profiles(dids).await; 55 + let mutes = profiles.into_values().collect::<Vec<_>>(); 56 + 57 + Ok(Json(GetMutesRes { cursor, mutes })) 58 + } 59 + 60 + #[derive(Debug, Deserialize)] 61 + pub struct MuteActorReq { 62 + pub actor: String, 63 + } 64 + 65 + #[derive(Debug, Deserialize)] 66 + pub struct MuteActorListReq { 67 + pub list: String, 68 + } 69 + 70 + pub async fn mute_actor( 71 + State(state): State<GlobalState>, 72 + auth: AtpAuth, 73 + Json(form): Json<MuteActorReq>, 74 + ) -> XrpcResult<()> { 75 + let mut conn = state.pool.get().await?; 76 + 77 + let data = models::NewMute { 78 + did: &auth.0, 79 + subject: &form.actor, 80 + }; 81 + 82 + diesel::insert_into(schema::mutes::table) 83 + .values(&data) 84 + .on_conflict_do_nothing() 85 + .execute(&mut conn) 86 + .await?; 87 + 88 + Ok(()) 89 + } 90 + 91 + pub async fn mute_actor_list( 92 + State(state): State<GlobalState>, 93 + auth: AtpAuth, 94 + Json(form): Json<MuteActorListReq>, 95 + ) -> XrpcResult<()> { 96 + let mut conn = state.pool.get().await?; 97 + 98 + let data = models::NewListMute { 99 + did: &auth.0, 100 + list_uri: &form.list, 101 + }; 102 + 103 + diesel::insert_into(schema::list_mutes::table) 104 + .values(&data) 105 + .on_conflict_do_nothing() 106 + .execute(&mut conn) 107 + .await?; 108 + 109 + Ok(()) 110 + } 111 + 112 + pub async fn unmute_actor( 113 + State(state): State<GlobalState>, 114 + auth: AtpAuth, 115 + Json(form): Json<MuteActorReq>, 116 + ) -> XrpcResult<()> { 117 + let mut conn = state.pool.get().await?; 118 + 119 + diesel::delete(schema::mutes::table) 120 + .filter( 121 + schema::mutes::did 122 + .eq(&auth.0) 123 + .and(schema::mutes::subject.eq(&form.actor)), 124 + ) 125 + .execute(&mut conn) 126 + .await?; 127 + 128 + Ok(()) 129 + } 130 + 131 + pub async fn unmute_actor_list( 132 + State(state): State<GlobalState>, 133 + auth: AtpAuth, 134 + Json(form): Json<MuteActorListReq>, 135 + ) -> XrpcResult<()> { 136 + let mut conn = state.pool.get().await?; 137 + 138 + diesel::delete(schema::list_mutes::table) 139 + .filter( 140 + schema::list_mutes::did 141 + .eq(&auth.0) 142 + .and(schema::list_mutes::list_uri.eq(&form.list)), 143 + ) 144 + .execute(&mut conn) 145 + .await?; 146 + 147 + Ok(()) 148 + }
+47 -1
parakeet/src/xrpc/app_bsky/graph/relations.rs
··· 1 1 use crate::hydration::StatefulHydrator; 2 2 use crate::xrpc::error::{Error, XrpcResult}; 3 3 use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; 4 - use crate::xrpc::{datetime_cursor, get_actor_did, ActorWithCursorQuery}; 4 + use crate::xrpc::{datetime_cursor, get_actor_did, ActorWithCursorQuery, CursorQuery}; 5 5 use crate::GlobalState; 6 6 use axum::extract::{Query, State}; 7 7 use axum::Json; ··· 10 10 use lexica::app_bsky::actor::ProfileView; 11 11 use parakeet_db::schema; 12 12 use serde::Serialize; 13 + 14 + #[derive(Debug, Serialize)] 15 + pub struct GetBlocksRes { 16 + #[serde(skip_serializing_if = "Option::is_none")] 17 + cursor: Option<String>, 18 + blocks: Vec<ProfileView>, 19 + } 20 + 21 + pub async fn get_blocks( 22 + State(state): State<GlobalState>, 23 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 24 + auth: AtpAuth, 25 + Query(query): Query<CursorQuery>, 26 + ) -> XrpcResult<Json<GetBlocksRes>> { 27 + let mut conn = state.pool.get().await?; 28 + let did = auth.0.clone(); 29 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, Some(auth)); 30 + 31 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 32 + 33 + let mut blocked_query = schema::blocks::table 34 + .select((schema::blocks::created_at, schema::blocks::subject)) 35 + .filter(schema::blocks::did.eq(did)) 36 + .into_boxed(); 37 + 38 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 39 + blocked_query = blocked_query.filter(schema::blocks::created_at.lt(cursor)); 40 + } 41 + 42 + let results = blocked_query 43 + .order(schema::blocks::created_at.desc()) 44 + .limit(limit as i64) 45 + .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 46 + .await?; 47 + 48 + let cursor = results 49 + .last() 50 + .map(|(last, _)| last.timestamp_millis().to_string()); 51 + 52 + let dids = results.iter().map(|(_, did)| did.clone()).collect(); 53 + 54 + let profiles = hyd.hydrate_profiles(dids).await; 55 + let blocks = profiles.into_values().collect::<Vec<_>>(); 56 + 57 + Ok(Json(GetBlocksRes { cursor, blocks })) 58 + } 13 59 14 60 #[derive(Debug, Serialize)] 15 61 pub struct AppBskyGraphGetFollowersRes {
+43 -3
parakeet/src/xrpc/app_bsky/mod.rs
··· 1 - use axum::routing::get; 1 + use axum::routing::{get, post}; 2 2 use axum::Router; 3 3 4 4 mod actor; 5 + mod bookmark; 5 6 mod feed; 6 7 mod graph; 7 8 mod labeler; 9 + mod unspecced; 8 10 9 11 #[rustfmt::skip] 10 12 pub fn routes() -> Router<crate::GlobalState> { 11 13 Router::new() 14 + .route("/app.bsky.actor.getPreferences", get(not_implemented)) 15 + .route("/app.bsky.actor.putPreferences", post(not_implemented)) 12 16 .route("/app.bsky.actor.getProfile", get(actor::get_profile)) 13 17 .route("/app.bsky.actor.getProfiles", get(actor::get_profiles)) 18 + // TODO: app.bsky.actor.getSuggestions (recs) 19 + // TODO: app.bsky.actor.searchActor (search) 20 + // TODO: app.bsky.actor.searchActorTypeahead (search) 21 + .route("/app.bsky.bookmark.createBookmark", post(bookmark::create_bookmark)) 22 + .route("/app.bsky.bookmark.deleteBookmark", post(bookmark::delete_bookmark)) 23 + .route("/app.bsky.bookmark.getBookmarks", get(bookmark::get_bookmarks)) 14 24 .route("/app.bsky.feed.getActorFeeds", get(feed::feedgen::get_actor_feeds)) 15 25 .route("/app.bsky.feed.getActorLikes", get(feed::likes::get_actor_likes)) 16 26 .route("/app.bsky.feed.getAuthorFeed", get(feed::posts::get_author_feed)) 17 27 .route("/app.bsky.feed.getFeed", get(feed::posts::get_feed)) 28 + .route("/app.bsky.feed.getFeedGenerator", get(feed::feedgen::get_feed_generator)) 29 + .route("/app.bsky.feed.getFeedGenerators", get(feed::feedgen::get_feed_generators)) 18 30 .route("/app.bsky.feed.getLikes", get(feed::likes::get_likes)) 19 31 .route("/app.bsky.feed.getListFeed", get(feed::posts::get_list_feed)) 20 32 .route("/app.bsky.feed.getPostThread", get(feed::posts::get_post_thread)) 21 33 .route("/app.bsky.feed.getPosts", get(feed::posts::get_posts)) 22 34 .route("/app.bsky.feed.getQuotes", get(feed::posts::get_quotes)) 23 35 .route("/app.bsky.feed.getRepostedBy", get(feed::posts::get_reposted_by)) 24 - .route("/app.bsky.feed.getFeedGenerator", get(feed::feedgen::get_feed_generator)) 25 - .route("/app.bsky.feed.getFeedGenerators", get(feed::feedgen::get_feed_generators)) 36 + // TODO: app.bsky.feed.getSuggestedFeeds (recs) 37 + // TODO: app.bsky.feed.getTimeline (complicated) 38 + // TODO: app.bsky.feed.searchPosts (search) 26 39 .route("/app.bsky.graph.getActorStarterPacks", get(graph::starter_packs::get_actor_starter_packs)) 40 + .route("/app.bsky.graph.getBlocks", get(graph::relations::get_blocks)) 27 41 .route("/app.bsky.graph.getFollowers", get(graph::relations::get_followers)) 28 42 .route("/app.bsky.graph.getFollows", get(graph::relations::get_follows)) 43 + // TODO: app.bsky.graph.getKnownFollowers 29 44 .route("/app.bsky.graph.getList", get(graph::lists::get_list)) 45 + .route("/app.bsky.graph.getListBlocks", get(graph::lists::get_list_blocks)) 46 + .route("/app.bsky.graph.getListMutes", get(graph::lists::get_list_mutes)) 30 47 .route("/app.bsky.graph.getLists", get(graph::lists::get_lists)) 48 + .route("/app.bsky.graph.getMutes", get(graph::mutes::get_mutes)) 49 + // TODO: app.bsky.graph.getRelationships 31 50 .route("/app.bsky.graph.getStarterPack", get(graph::starter_packs::get_starter_pack)) 32 51 .route("/app.bsky.graph.getStarterPacks", get(graph::starter_packs::get_starter_packs)) 52 + // TODO: app.bsky.graph.getSuggestedFollows (recs) 53 + .route("/app.bsky.graph.muteActor", post(graph::mutes::mute_actor)) 54 + .route("/app.bsky.graph.muteActorList", post(graph::mutes::mute_actor_list)) 55 + // TODO: app.bsky.graph.muteThread (notifs) 56 + // TODO: app.bsky.graph.searchStarterPacks (search) 57 + .route("/app.bsky.graph.unmuteActor", post(graph::mutes::unmute_actor)) 58 + .route("/app.bsky.graph.unmuteActorList", post(graph::mutes::unmute_actor_list)) 59 + // TODO: app.bsky.graph.unmuteThread (notifs) 33 60 .route("/app.bsky.labeler.getServices", get(labeler::get_services)) 61 + // TODO: app.bsky.notification.getPreferences 62 + // TODO: app.bsky.notification.getUnreadCount 63 + // TODO: app.bsky.notification.listActivitySubscriptions 64 + // TODO: app.bsky.notification.listNotifications 65 + // TODO: app.bsky.notification.putActivitySubscriptions 66 + // TODO: app.bsky.notification.putPreferences 67 + // TODO: app.bsky.notification.putPreferencesV2 68 + .route("/app.bsky.unspecced.getPostThreadV2", get(unspecced::thread_v2::get_post_thread_v2)) 69 + .route("/app.bsky.unspecced.getPostThreadOtherV2", get(unspecced::thread_v2::get_post_thread_other_v2)) 70 + } 71 + 72 + async fn not_implemented() -> axum::http::StatusCode { 73 + axum::http::StatusCode::NOT_IMPLEMENTED 34 74 }
+1
parakeet/src/xrpc/app_bsky/unspecced/mod.rs
··· 1 + pub mod thread_v2;
+379
parakeet/src/xrpc/app_bsky/unspecced/thread_v2.rs
··· 1 + use crate::db::ThreadItem; 2 + use crate::hydration::StatefulHydrator; 3 + use crate::xrpc::error::{Error, XrpcResult}; 4 + use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; 5 + use crate::xrpc::normalise_at_uri; 6 + use crate::GlobalState; 7 + use axum::extract::{Query, State}; 8 + use axum::Json; 9 + use itertools::Itertools; 10 + use lexica::app_bsky::feed::{BlockedAuthor, PostView, ThreadgateView}; 11 + use lexica::app_bsky::unspecced::{ThreadItemPost, ThreadV2Item, ThreadV2ItemType}; 12 + use serde::{Deserialize, Serialize}; 13 + use std::cmp::Ordering; 14 + use std::collections::{HashMap, HashSet}; 15 + 16 + const THREAD_PARENTS: usize = 50; 17 + const DEFAULT_BRANCHING: u32 = 10; 18 + const DEFAULT_DEPTH: u32 = 6; 19 + 20 + #[derive(Copy, Clone, Debug, Default, Deserialize)] 21 + #[serde(rename_all = "lowercase")] 22 + pub enum PostThreadSort { 23 + Newest, 24 + #[default] 25 + Oldest, 26 + Top, 27 + } 28 + 29 + #[derive(Debug, Deserialize)] 30 + #[serde(rename_all = "camelCase")] 31 + pub struct GetPostThreadV2Req { 32 + pub anchor: String, 33 + pub above: Option<bool>, 34 + pub below: Option<u32>, 35 + pub branching_factor: Option<u32>, 36 + #[serde(default)] 37 + pub sort: PostThreadSort, 38 + } 39 + 40 + #[derive(Debug, Serialize)] 41 + #[serde(rename_all = "camelCase")] 42 + pub struct GetPostThreadV2Res { 43 + pub thread: Vec<ThreadV2Item>, 44 + #[serde(skip_serializing_if = "Option::is_none")] 45 + pub threadgate: Option<ThreadgateView>, 46 + pub has_other_replies: bool, 47 + } 48 + 49 + pub async fn get_post_thread_v2( 50 + State(state): State<GlobalState>, 51 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 52 + maybe_auth: Option<AtpAuth>, 53 + Query(query): Query<GetPostThreadV2Req>, 54 + ) -> XrpcResult<Json<GetPostThreadV2Res>> { 55 + let mut conn = state.pool.get().await?; 56 + let maybe_did = maybe_auth.clone().map(|v| v.0); 57 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 58 + 59 + let uri = normalise_at_uri(&state.dataloaders, &query.anchor).await?; 60 + let depth = query.below.unwrap_or(DEFAULT_DEPTH).clamp(0, 20) as i32; 61 + let branching_factor = query 62 + .branching_factor 63 + .unwrap_or(DEFAULT_BRANCHING) 64 + .clamp(0, 100) as i32; 65 + 66 + let anchor = hyd 67 + .hydrate_post(uri.clone()) 68 + .await 69 + .ok_or(Error::not_found())?; 70 + 71 + if let Some(v) = &anchor.author.viewer { 72 + if v.blocked_by || v.blocking.is_some() { 73 + let block = ThreadV2ItemType::Blocked { 74 + author: BlockedAuthor { 75 + did: anchor.author.did, 76 + viewer: anchor.author.viewer, 77 + }, 78 + }; 79 + 80 + return Ok(Json(GetPostThreadV2Res { 81 + thread: vec![ThreadV2Item { 82 + uri, 83 + depth: 0, 84 + value: block, 85 + }], 86 + threadgate: anchor.threadgate, 87 + has_other_replies: false, 88 + })); 89 + } 90 + } 91 + 92 + // get the root post URI (if there is one) and return its author's DID. 93 + let root_uri = crate::db::get_root_post(&mut conn, &uri) 94 + .await? 95 + .unwrap_or(uri.clone()); 96 + let root_did = root_uri[5..].split('/').collect::<Vec<_>>()[0]; 97 + 98 + let replies = 99 + crate::db::get_thread_children_branching(&mut conn, &uri, depth, branching_factor + 1) 100 + .await?; 101 + let reply_uris = replies 102 + .iter() 103 + .map(|item| item.at_uri.clone()) 104 + .collect::<Vec<_>>(); 105 + 106 + // bluesky seems to use -50 atm. we get 1 extra to know if to set more_parents. 107 + let parents = match query.above.unwrap_or(true) { 108 + true => crate::db::get_thread_parents(&mut conn, &uri, THREAD_PARENTS as i32 + 1).await?, 109 + false => vec![], 110 + }; 111 + let parent_uris = parents 112 + .iter() 113 + .map(|item| item.at_uri.clone()) 114 + .collect::<Vec<_>>(); 115 + 116 + let (mut replies_hyd, mut parents_hyd) = tokio::join!( 117 + hyd.hydrate_posts(reply_uris), 118 + hyd.hydrate_posts(parent_uris), 119 + ); 120 + 121 + let threadgate = anchor.threadgate.clone(); 122 + let hidden: HashSet<_, std::hash::RandomState> = match &threadgate { 123 + Some(tg) => crate::db::get_threadgate_hiddens(&mut conn, &tg.uri).await?, 124 + None => None, 125 + } 126 + .map(|hiddens| HashSet::from_iter(Vec::from(hiddens))) 127 + .unwrap_or_default(); 128 + 129 + let root_has_more = parents.len() > THREAD_PARENTS; 130 + let mut is_op_thread = true; 131 + 132 + let mut thread = Vec::with_capacity(1 + replies.len() + parents.len()); 133 + 134 + thread.extend( 135 + parents 136 + .into_iter() 137 + .tail(THREAD_PARENTS) 138 + .enumerate() 139 + .map(|(idx, item)| { 140 + let value = parents_hyd 141 + .remove(&item.at_uri) 142 + .map(|post| { 143 + if let Some(v) = &post.author.viewer { 144 + if v.blocked_by || v.blocking.is_some() { 145 + return ThreadV2ItemType::Blocked { 146 + author: BlockedAuthor { 147 + did: post.author.did, 148 + viewer: post.author.viewer, 149 + }, 150 + }; 151 + } 152 + } 153 + 154 + let op_thread = (is_op_thread 155 + || item.root_uri.is_none() && item.parent_uri.is_none()) 156 + && post.author.did == root_did; 157 + 158 + ThreadV2ItemType::Post(ThreadItemPost { 159 + post, 160 + more_parents: idx == 0 && root_has_more, 161 + more_replies: 0, 162 + op_thread, 163 + hidden_by_threadgate: false, 164 + muted_by_viewer: false, 165 + }) 166 + }) 167 + .unwrap_or(ThreadV2ItemType::NotFound {}); 168 + 169 + ThreadV2Item { 170 + uri: item.at_uri, 171 + depth: -item.depth - 1, 172 + value, 173 + } 174 + }), 175 + ); 176 + 177 + is_op_thread = is_op_thread && anchor.author.did == root_did; 178 + thread.push(ThreadV2Item { 179 + uri: uri.clone(), 180 + depth: 0, 181 + value: ThreadV2ItemType::Post(ThreadItemPost { 182 + post: anchor, 183 + more_parents: false, 184 + more_replies: 0, 185 + op_thread: is_op_thread, 186 + hidden_by_threadgate: false, 187 + muted_by_viewer: false, 188 + }), 189 + }); 190 + 191 + let mut replies_grouped = replies 192 + .into_iter() 193 + .into_group_map_by(|item| item.parent_uri.clone().unwrap_or_default()); 194 + 195 + // start with the anchor 196 + let (children, has_other_replies) = build_thread_children( 197 + &mut replies_grouped, 198 + &mut replies_hyd, 199 + &hidden, 200 + &uri, 201 + is_op_thread, 202 + 1, 203 + &BuildThreadChildrenOpts { 204 + root_did, 205 + sort: query.sort, 206 + maybe_did: &maybe_did, 207 + max_depth: depth, 208 + }, 209 + ); 210 + thread.extend(children); 211 + 212 + Ok(Json(GetPostThreadV2Res { 213 + thread, 214 + threadgate, 215 + has_other_replies, 216 + })) 217 + } 218 + 219 + #[derive(Debug, Deserialize)] 220 + #[serde(rename_all = "camelCase")] 221 + pub struct GetPostThreadOtherV2Req { 222 + pub anchor: String, 223 + } 224 + 225 + #[derive(Debug, Serialize)] 226 + #[serde(rename_all = "camelCase")] 227 + pub struct GetPostThreadOtherV2Res { 228 + pub thread: Vec<ThreadV2Item>, 229 + } 230 + 231 + pub async fn get_post_thread_other_v2( 232 + State(state): State<GlobalState>, 233 + AtpAcceptLabelers(labelers): AtpAcceptLabelers, 234 + maybe_auth: Option<AtpAuth>, 235 + Query(query): Query<GetPostThreadOtherV2Req>, 236 + ) -> XrpcResult<Json<GetPostThreadOtherV2Res>> { 237 + let mut conn = state.pool.get().await?; 238 + let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 239 + 240 + let uri = normalise_at_uri(&state.dataloaders, &query.anchor).await?; 241 + 242 + let root = crate::db::get_root_post(&mut conn, &uri) 243 + .await? 244 + .unwrap_or(uri.clone()); 245 + 246 + // this only returns immediate children (depth==1) where hiddenByThreadgate=TRUE 247 + let replies = crate::db::get_thread_children_hidden(&mut conn, &uri, &root).await?; 248 + let reply_uris = replies 249 + .into_iter() 250 + .map(|item| item.at_uri) 251 + .collect::<Vec<_>>(); 252 + let thread = hyd 253 + .hydrate_posts(reply_uris) 254 + .await 255 + .into_iter() 256 + .filter(|(_, post)| matches!(&post.author.viewer, Some(viewer) if viewer.blocked_by || viewer.blocking.is_some())) 257 + .map(|(uri, post)| { 258 + let post = ThreadItemPost { 259 + post, 260 + more_parents: false, 261 + more_replies: 0, 262 + op_thread: false, 263 + hidden_by_threadgate: true, 264 + muted_by_viewer: false, 265 + }; 266 + 267 + ThreadV2Item { 268 + uri, 269 + depth: 1, 270 + value: ThreadV2ItemType::Post(post), 271 + } 272 + }) 273 + .collect(); 274 + 275 + Ok(Json(GetPostThreadOtherV2Res { thread })) 276 + } 277 + 278 + #[derive(Debug)] 279 + struct BuildThreadChildrenOpts<'a> { 280 + root_did: &'a str, 281 + sort: PostThreadSort, 282 + maybe_did: &'a Option<String>, 283 + max_depth: i32, 284 + } 285 + 286 + fn build_thread_children( 287 + grouped_replies: &mut HashMap<String, Vec<ThreadItem>>, 288 + replies_hyd: &mut HashMap<String, PostView>, 289 + hidden: &HashSet<String>, 290 + parent: &str, 291 + is_op_thread: bool, 292 + depth: i32, 293 + opts: &BuildThreadChildrenOpts, 294 + ) -> (Vec<ThreadV2Item>, bool) { 295 + let mut has_other_replies = false; 296 + 297 + let Some(replies) = grouped_replies.remove(parent) else { 298 + return (Vec::default(), has_other_replies); 299 + }; 300 + 301 + let replies = replies 302 + .into_iter() 303 + .filter_map(|item| replies_hyd.remove(&item.at_uri)) 304 + .sorted_by(sort_replies(&opts.sort)); 305 + 306 + let mut out = Vec::new(); 307 + 308 + for post in replies { 309 + let reply_count = grouped_replies 310 + .get(&post.uri) 311 + .map(|v| v.len()) 312 + .unwrap_or_default(); 313 + let at_max = depth == opts.max_depth; 314 + let more_replies = if at_max { reply_count } else { 0 }; 315 + let op_thread = is_op_thread && post.author.did == opts.root_did; 316 + 317 + // shouldn't push to the thread if there's a block relation. Bsky doesn't push a type of Blocked for replies... 318 + if let Some(v) = &post.author.viewer { 319 + if v.blocked_by || v.blocking.is_some() { 320 + continue; 321 + } 322 + } 323 + 324 + // check if the post is hidden AND we're NOT the author (hidden posts still show for their author) 325 + if hidden.contains(&post.uri) && !did_is_cur(opts.maybe_did, &post.author.did) { 326 + // post is hidden - do not ~pass go~ push to the thread. 327 + if depth == 1 { 328 + has_other_replies = true; 329 + } 330 + continue; 331 + } 332 + 333 + let uri = post.uri.clone(); 334 + out.push(ThreadV2Item { 335 + uri: post.uri.clone(), 336 + depth, 337 + value: ThreadV2ItemType::Post(ThreadItemPost { 338 + post, 339 + more_parents: false, 340 + more_replies: more_replies as i32, 341 + op_thread, 342 + hidden_by_threadgate: false, 343 + muted_by_viewer: false, 344 + }), 345 + }); 346 + 347 + if !at_max { 348 + // we don't care about has_other_replies when recursing 349 + let (children, _) = build_thread_children( 350 + grouped_replies, 351 + replies_hyd, 352 + hidden, 353 + &uri, 354 + op_thread, 355 + depth + 1, 356 + opts, 357 + ); 358 + 359 + out.extend(children); 360 + } 361 + } 362 + 363 + (out, has_other_replies) 364 + } 365 + 366 + fn sort_replies(sort: &PostThreadSort) -> impl Fn(&PostView, &PostView) -> Ordering + use<'_> { 367 + move |a: &PostView, b: &PostView| match sort { 368 + PostThreadSort::Newest => b.indexed_at.cmp(&a.indexed_at), 369 + PostThreadSort::Oldest => a.indexed_at.cmp(&b.indexed_at), 370 + PostThreadSort::Top => b.stats.like_count.cmp(&a.stats.like_count), 371 + } 372 + } 373 + 374 + fn did_is_cur(cur: &Option<String>, did: &String) -> bool { 375 + match cur { 376 + Some(cur) => did == cur, 377 + None => false, 378 + } 379 + }
+2
parakeet/src/xrpc/com_atproto/mod.rs
··· 2 2 use axum::Router; 3 3 4 4 mod identity; 5 + mod repo; 5 6 6 7 #[rustfmt::skip] 7 8 pub fn routes() -> Router<crate::GlobalState> { 8 9 Router::new() 9 10 .route("/com.atproto.identity.resolveHandle", get(identity::resolve_handle)) 11 + .route("/com.atproto.repo.getRecord", get(repo::get_record)) 10 12 }
+77
parakeet/src/xrpc/com_atproto/repo.rs
··· 1 + use crate::xrpc::check_actor_status; 2 + use crate::xrpc::error::{Error, XrpcResult}; 3 + use crate::xrpc::extract::AtpAuth; 4 + use crate::GlobalState; 5 + use axum::extract::{Query, State}; 6 + use axum::Json; 7 + use diesel::prelude::*; 8 + use diesel_async::RunQueryDsl; 9 + use parakeet_db::schema; 10 + use serde::{Deserialize, Serialize}; 11 + use serde_json::Value; 12 + 13 + #[derive(Debug, Deserialize)] 14 + pub struct GetRecordQuery { 15 + pub repo: String, 16 + pub collection: String, 17 + pub rkey: String, 18 + } 19 + 20 + #[derive(Debug, Serialize)] 21 + pub struct GetRecordRes { 22 + pub uri: String, 23 + pub cid: String, 24 + pub value: Value, 25 + } 26 + 27 + pub async fn get_record( 28 + State(state): State<GlobalState>, 29 + _maybe_auth: Option<AtpAuth>, 30 + Query(query): Query<GetRecordQuery>, 31 + ) -> XrpcResult<Json<GetRecordRes>> { 32 + let mut conn = state.pool.get().await?; 33 + 34 + check_actor_status(&mut conn, &query.repo).await?; 35 + 36 + let at_uri = format!("at://{}/{}/{}", &query.repo, &query.collection, &query.rkey); 37 + 38 + let (cid, value) = match query.collection.as_str() { 39 + "app.bsky.feed.generator" => { 40 + // we don't store the full data so this returns partial 41 + let (cid, service_did) = schema::feedgens::table 42 + .select((schema::feedgens::cid, schema::feedgens::service_did)) 43 + .find(&at_uri) 44 + .get_result::<(String, String)>(&mut conn) 45 + .await?; 46 + 47 + ( 48 + cid, 49 + serde_json::json!({ 50 + "$type": "app.bsky.feed.generator", 51 + "did": service_did, 52 + }), 53 + ) 54 + } 55 + "app.bsky.feed.post" => { 56 + schema::posts::table 57 + .select((schema::posts::cid, schema::posts::record)) 58 + .find(&at_uri) 59 + .get_result::<(String, Value)>(&mut conn) 60 + .await? 61 + } 62 + "app.bsky.graph.starterpack" => { 63 + schema::starterpacks::table 64 + .select((schema::starterpacks::cid, schema::starterpacks::record)) 65 + .find(&at_uri) 66 + .get_result::<(String, Value)>(&mut conn) 67 + .await? 68 + } 69 + _ => return Err(Error::invalid_request(None)), 70 + }; 71 + 72 + Ok(Json(GetRecordRes { 73 + uri: at_uri, 74 + cid, 75 + value, 76 + })) 77 + }
+69
parakeet/src/xrpc/community_lexicon/bookmarks.rs
··· 1 + use crate::xrpc::datetime_cursor; 2 + use crate::xrpc::error::XrpcResult; 3 + use crate::xrpc::extract::AtpAuth; 4 + use crate::GlobalState; 5 + use axum::extract::{Query, State}; 6 + use axum::Json; 7 + use diesel::prelude::*; 8 + use diesel_async::RunQueryDsl; 9 + use lexica::community_lexicon::bookmarks::Bookmark; 10 + use parakeet_db::{models, schema}; 11 + use serde::{Deserialize, Serialize}; 12 + 13 + #[derive(Debug, Deserialize)] 14 + pub struct BookmarkCursorQuery { 15 + pub tags: Option<Vec<String>>, 16 + pub limit: Option<u8>, 17 + pub cursor: Option<String>, 18 + } 19 + 20 + #[derive(Debug, Serialize)] 21 + pub struct GetActorBookmarksRes { 22 + #[serde(skip_serializing_if = "Option::is_none")] 23 + cursor: Option<String>, 24 + bookmarks: Vec<Bookmark>, 25 + } 26 + 27 + pub async fn get_actor_bookmarks( 28 + State(state): State<GlobalState>, 29 + auth: AtpAuth, 30 + Query(query): Query<BookmarkCursorQuery>, 31 + ) -> XrpcResult<Json<GetActorBookmarksRes>> { 32 + let mut conn = state.pool.get().await?; 33 + 34 + let limit = query.limit.unwrap_or(50).clamp(1, 100); 35 + 36 + let mut bookmarks_query = schema::bookmarks::table 37 + .select(models::Bookmark::as_select()) 38 + .filter(schema::bookmarks::did.eq(&auth.0)) 39 + .into_boxed(); 40 + 41 + if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 42 + bookmarks_query = bookmarks_query.filter(schema::bookmarks::created_at.lt(cursor)); 43 + } 44 + 45 + if let Some(tags) = query.tags { 46 + bookmarks_query = bookmarks_query.filter(schema::bookmarks::tags.contains(tags)); 47 + } 48 + 49 + let results = bookmarks_query 50 + .order(schema::bookmarks::created_at.desc()) 51 + .limit(limit as i64) 52 + .load(&mut conn) 53 + .await?; 54 + 55 + let cursor = results 56 + .last() 57 + .map(|bm| bm.created_at.timestamp_millis().to_string()); 58 + 59 + let bookmarks = results 60 + .into_iter() 61 + .map(|bookmark| Bookmark { 62 + subject: bookmark.subject, 63 + tags: bookmark.tags.into(), 64 + created_at: bookmark.created_at, 65 + }) 66 + .collect(); 67 + 68 + Ok(Json(GetActorBookmarksRes { cursor, bookmarks })) 69 + }
+10
parakeet/src/xrpc/community_lexicon/mod.rs
··· 1 + use axum::routing::get; 2 + use axum::Router; 3 + 4 + pub mod bookmarks; 5 + 6 + #[rustfmt::skip] 7 + pub fn routes() -> Router<crate::GlobalState> { 8 + Router::new() 9 + .route("/community.lexicon.bookmarks.getActorBookmarks", get(bookmarks::get_actor_bookmarks)) 10 + }
+3
parakeet/src/xrpc/jwt.rs
··· 4 4 use std::collections::HashMap; 5 5 use std::sync::{Arc, LazyLock}; 6 6 use tokio::sync::RwLock; 7 + use tracing::instrument; 7 8 8 9 static DUMMY_KEY: LazyLock<DecodingKey> = LazyLock::new(|| DecodingKey::from_secret(&[])); 9 10 static NO_VERIFY: LazyLock<Validation> = LazyLock::new(|| { ··· 38 39 } 39 40 } 40 41 42 + #[instrument(skip_all)] 41 43 pub async fn resolve_and_verify_jwt(&self, token: &str, aud: Option<&str>) -> Option<Claims> { 42 44 // first we need to decode without verifying, to get iss. 43 45 let unsafe_data = jsonwebtoken::decode::<Claims>(token, &DUMMY_KEY, &NO_VERIFY).ok()?; ··· 56 58 self.verify_jwt_multibase_with_alg(token, &multibase_key, unsafe_data.header.alg, aud) 57 59 } 58 60 61 + #[instrument(skip_all)] 59 62 async fn resolve_key(&self, did: &str) -> Option<String> { 60 63 tracing::trace!("resolving multikey for {did}"); 61 64 let did_doc = self.resolver.resolve_did(did).await.ok()??;
+8
parakeet/src/xrpc/mod.rs
··· 8 8 mod app_bsky; 9 9 pub mod cdn; 10 10 mod com_atproto; 11 + mod community_lexicon; 11 12 mod error; 12 13 pub mod extract; 13 14 pub mod jwt; ··· 16 17 Router::new() 17 18 .merge(app_bsky::routes()) 18 19 .merge(com_atproto::routes()) 20 + .merge(community_lexicon::routes()) 19 21 } 20 22 21 23 fn datetime_cursor(cursor: Option<&String>) -> Option<chrono::DateTime<chrono::Utc>> { ··· 92 94 )), 93 95 Some(parakeet_db::types::ActorStatus::Deleted) | None => Err(Error::not_found()), 94 96 } 97 + } 98 + 99 + #[derive(Debug, Deserialize)] 100 + pub struct CursorQuery { 101 + pub limit: Option<u8>, 102 + pub cursor: Option<String>, 95 103 } 96 104 97 105 #[derive(Debug, Deserialize)]
+1
parakeet-db/Cargo.toml
··· 7 7 chrono = { version = "0.4.39", features = ["serde"] } 8 8 diesel = { version = "2.2.6", features = ["chrono", "serde_json"], optional = true } 9 9 postgres-types = { version = "0.2.9", optional = true } 10 + serde = { version = "1.0.217", features = ["derive"] } 10 11 serde_json = "1.0.134" 11 12 12 13 [features]
+132 -26
parakeet-db/src/models.rs
··· 1 1 use crate::types::*; 2 2 use chrono::prelude::*; 3 3 use diesel::prelude::*; 4 + use serde::{Deserialize, Serialize}; 4 5 5 6 #[derive(Debug, Queryable, Selectable, Identifiable)] 6 7 #[diesel(table_name = crate::schema::actors)] ··· 16 17 pub last_indexed: Option<NaiveDateTime>, 17 18 } 18 19 19 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 20 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 20 21 #[diesel(table_name = crate::schema::profiles)] 21 22 #[diesel(primary_key(did))] 22 23 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 36 37 pub joined_sp_uri: Option<String>, 37 38 pub joined_sp_cid: Option<String>, 38 39 40 + pub pronouns: Option<String>, 41 + pub website: Option<String>, 42 + 39 43 pub created_at: NaiveDateTime, 40 44 pub indexed_at: NaiveDateTime, 41 45 } 42 46 43 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 47 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 44 48 #[diesel(table_name = crate::schema::lists)] 45 49 #[diesel(primary_key(at_uri))] 46 50 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 85 89 pub indexed_at: NaiveDateTime, 86 90 } 87 91 88 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 92 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 89 93 #[diesel(table_name = crate::schema::feedgens)] 90 94 #[diesel(primary_key(at_uri))] 91 95 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 121 125 pub indexed_at: NaiveDateTime, 122 126 } 123 127 124 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 128 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 125 129 #[diesel(table_name = crate::schema::posts)] 126 130 #[diesel(primary_key(at_uri))] 127 131 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 133 137 134 138 pub content: String, 135 139 pub facets: Option<serde_json::Value>, 136 - pub languages: Vec<Option<String>>, 137 - pub tags: Vec<Option<String>>, 140 + pub languages: not_null_vec::TextArray, 141 + pub tags: not_null_vec::TextArray, 138 142 139 143 pub parent_uri: Option<String>, 140 144 pub parent_cid: Option<String>, ··· 144 148 pub embed: Option<String>, 145 149 pub embed_subtype: Option<String>, 146 150 151 + pub mentions: Option<not_null_vec::TextArray>, 152 + pub violates_threadgate: bool, 153 + 147 154 pub created_at: DateTime<Utc>, 148 155 pub indexed_at: NaiveDateTime, 149 156 } 150 157 151 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 158 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 152 159 #[diesel(table_name = crate::schema::post_embed_images)] 153 160 #[diesel(primary_key(post_uri, seq))] 154 161 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 164 171 pub height: Option<i32>, 165 172 } 166 173 167 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 174 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 168 175 #[diesel(table_name = crate::schema::post_embed_video)] 169 176 #[diesel(primary_key(post_uri))] 170 177 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 179 186 pub height: Option<i32>, 180 187 } 181 188 182 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 189 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 183 190 #[diesel(table_name = crate::schema::post_embed_video_captions)] 184 191 #[diesel(primary_key(post_uri, language))] 185 192 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 191 198 pub cid: String, 192 199 } 193 200 194 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 201 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 195 202 #[diesel(table_name = crate::schema::post_embed_ext)] 196 203 #[diesel(primary_key(post_uri))] 197 204 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 206 213 pub thumb_cid: Option<String>, 207 214 } 208 215 209 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 216 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 210 217 #[diesel(table_name = crate::schema::post_embed_record)] 211 218 #[diesel(primary_key(post_uri))] 212 219 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 229 236 pub cid: String, 230 237 pub post_uri: String, 231 238 232 - pub detached: Vec<Option<String>>, 233 - pub rules: Vec<Option<String>>, 239 + pub detached: not_null_vec::TextArray, 240 + pub rules: not_null_vec::TextArray, 234 241 235 242 pub created_at: DateTime<Utc>, 236 243 pub indexed_at: NaiveDateTime, 237 244 } 238 245 239 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 246 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 240 247 #[diesel(table_name = crate::schema::threadgates)] 241 248 #[diesel(primary_key(post_uri))] 242 249 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 245 252 pub cid: String, 246 253 pub post_uri: String, 247 254 248 - pub hidden_replies: Vec<Option<String>>, 249 - pub allow: Vec<Option<String>>, 250 - pub allowed_lists: Vec<Option<String>>, 255 + pub hidden_replies: not_null_vec::TextArray, 256 + pub allow: Option<not_null_vec::TextArray>, 257 + pub allowed_lists: Option<not_null_vec::TextArray>, 251 258 252 259 pub record: serde_json::Value, 253 260 ··· 255 262 pub indexed_at: NaiveDateTime, 256 263 } 257 264 258 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 265 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 259 266 #[diesel(table_name = crate::schema::starterpacks)] 260 267 #[diesel(primary_key(at_uri))] 261 268 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 269 276 pub description: Option<String>, 270 277 pub description_facets: Option<serde_json::Value>, 271 278 pub list: String, 272 - pub feeds: Option<Vec<Option<String>>>, 279 + pub feeds: Option<not_null_vec::TextArray>, 273 280 274 281 pub created_at: DateTime<Utc>, 275 282 pub indexed_at: NaiveDateTime, 276 283 } 277 284 278 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 285 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 279 286 #[diesel(table_name = crate::schema::labelers)] 280 287 #[diesel(primary_key(did))] 281 288 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 283 290 pub did: String, 284 291 pub cid: String, 285 292 286 - pub reasons: Option<Vec<Option<String>>>, 287 - pub subject_types: Option<Vec<Option<String>>>, 288 - pub subject_collections: Option<Vec<Option<String>>>, 293 + pub reasons: Option<not_null_vec::TextArray>, 294 + pub subject_types: Option<not_null_vec::TextArray>, 295 + pub subject_collections: Option<not_null_vec::TextArray>, 289 296 290 297 pub created_at: NaiveDateTime, 291 298 pub indexed_at: NaiveDateTime, 292 299 } 293 300 294 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable, Associations)] 301 + #[derive( 302 + Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable, Associations, 303 + )] 295 304 #[diesel(table_name = crate::schema::labeler_defs)] 296 305 #[diesel(belongs_to(LabelerService, foreign_key = labeler))] 297 306 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 329 338 pub indexed_at: NaiveDateTime, 330 339 } 331 340 332 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 341 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 333 342 #[diesel(table_name = crate::schema::verification)] 334 343 #[diesel(primary_key(at_uri))] 335 344 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 346 355 pub indexed_at: NaiveDateTime, 347 356 } 348 357 349 - #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 358 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 350 359 #[diesel(table_name = crate::schema::statuses)] 351 360 #[diesel(primary_key(did))] 352 361 #[diesel(check_for_backend(diesel::pg::Pg))] ··· 366 375 pub created_at: DateTime<Utc>, 367 376 pub indexed_at: NaiveDateTime, 368 377 } 378 + 379 + #[derive(Debug, Insertable, AsChangeset)] 380 + #[diesel(table_name = crate::schema::mutes)] 381 + #[diesel(check_for_backend(diesel::pg::Pg))] 382 + pub struct NewMute<'a> { 383 + pub did: &'a str, 384 + pub subject: &'a str, 385 + } 386 + 387 + #[derive(Debug, Insertable, AsChangeset)] 388 + #[diesel(table_name = crate::schema::list_mutes)] 389 + #[diesel(check_for_backend(diesel::pg::Pg))] 390 + pub struct NewListMute<'a> { 391 + pub did: &'a str, 392 + pub list_uri: &'a str, 393 + } 394 + 395 + #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 396 + #[diesel(table_name = crate::schema::bookmarks)] 397 + #[diesel(primary_key(did, subject, subject_cid))] 398 + #[diesel(check_for_backend(diesel::pg::Pg))] 399 + pub struct Bookmark { 400 + pub did: String, 401 + pub rkey: Option<String>, 402 + pub subject: String, 403 + pub subject_cid: Option<String>, 404 + pub subject_type: String, 405 + pub tags: not_null_vec::TextArray, 406 + pub created_at: DateTime<Utc>, 407 + } 408 + 409 + #[derive(Debug, Insertable, AsChangeset)] 410 + #[diesel(table_name = crate::schema::bookmarks)] 411 + #[diesel(check_for_backend(diesel::pg::Pg))] 412 + pub struct NewBookmark<'a> { 413 + pub did: &'a str, 414 + pub rkey: Option<String>, 415 + pub subject: &'a str, 416 + pub subject_cid: Option<String>, 417 + pub subject_type: &'a str, 418 + pub tags: Vec<String>, 419 + } 420 + 421 + #[derive(Debug, Queryable, Selectable, Identifiable)] 422 + #[diesel(table_name = crate::schema::author_feeds)] 423 + #[diesel(primary_key(uri))] 424 + #[diesel(check_for_backend(diesel::pg::Pg))] 425 + pub struct AuthorFeedItem { 426 + pub uri: String, 427 + pub cid: String, 428 + pub post: String, 429 + pub did: String, 430 + pub typ: String, 431 + pub sort_at: DateTime<Utc>, 432 + } 433 + 434 + pub use not_null_vec::TextArray; 435 + mod not_null_vec { 436 + use diesel::deserialize::FromSql; 437 + use diesel::pg::Pg; 438 + use diesel::sql_types::{Array, Nullable, Text}; 439 + use diesel::{deserialize, FromSqlRow}; 440 + use serde::{Deserialize, Serialize}; 441 + use std::ops::{Deref, DerefMut}; 442 + 443 + #[derive(Clone, Debug, Default, Serialize, Deserialize, FromSqlRow)] 444 + #[diesel(sql_type = Array<Nullable<Text>>)] 445 + pub struct TextArray(pub Vec<String>); 446 + 447 + impl FromSql<Array<Nullable<Text>>, Pg> for TextArray { 448 + fn from_sql(bytes: diesel::pg::PgValue<'_>) -> deserialize::Result<Self> { 449 + let vec_with_nulls = 450 + <Vec<Option<String>> as FromSql<Array<Nullable<Text>>, Pg>>::from_sql(bytes)?; 451 + Ok(TextArray(vec_with_nulls.into_iter().flatten().collect())) 452 + } 453 + } 454 + 455 + impl Deref for TextArray { 456 + type Target = Vec<String>; 457 + 458 + fn deref(&self) -> &Self::Target { 459 + &self.0 460 + } 461 + } 462 + 463 + impl DerefMut for TextArray { 464 + fn deref_mut(&mut self) -> &mut Self::Target { 465 + &mut self.0 466 + } 467 + } 468 + 469 + impl From<TextArray> for Vec<String> { 470 + fn from(v: TextArray) -> Vec<String> { 471 + v.0 472 + } 473 + } 474 + }
+65 -2
parakeet-db/src/schema.rs
··· 13 13 } 14 14 15 15 diesel::table! { 16 + author_feeds (uri) { 17 + uri -> Text, 18 + cid -> Text, 19 + post -> Text, 20 + did -> Text, 21 + typ -> Text, 22 + sort_at -> Timestamptz, 23 + } 24 + } 25 + 26 + diesel::table! { 16 27 backfill (repo, repo_ver) { 17 28 repo -> Text, 18 29 repo_ver -> Text, ··· 38 49 rkey -> Text, 39 50 did -> Text, 40 51 subject -> Text, 52 + created_at -> Timestamptz, 53 + } 54 + } 55 + 56 + diesel::table! { 57 + bookmarks (did, subject) { 58 + did -> Text, 59 + rkey -> Nullable<Text>, 60 + subject -> Text, 61 + subject_cid -> Nullable<Text>, 62 + subject_type -> Text, 63 + tags -> Array<Nullable<Text>>, 41 64 created_at -> Timestamptz, 42 65 } 43 66 } ··· 151 174 } 152 175 153 176 diesel::table! { 177 + list_mutes (did, list_uri) { 178 + did -> Text, 179 + list_uri -> Text, 180 + created_at -> Timestamptz, 181 + } 182 + } 183 + 184 + diesel::table! { 154 185 lists (at_uri) { 155 186 at_uri -> Text, 156 187 owner -> Text, ··· 166 197 } 167 198 168 199 diesel::table! { 200 + mutes (did, subject) { 201 + did -> Text, 202 + subject -> Text, 203 + created_at -> Timestamptz, 204 + } 205 + } 206 + 207 + diesel::table! { 169 208 notif_decl (did) { 170 209 did -> Text, 171 210 allow_subscriptions -> Nullable<Text>, ··· 256 295 embed_subtype -> Nullable<Text>, 257 296 created_at -> Timestamptz, 258 297 indexed_at -> Timestamp, 298 + mentions -> Nullable<Array<Nullable<Text>>>, 299 + violates_threadgate -> Bool, 300 + } 301 + } 302 + 303 + diesel::table! { 304 + profile_states (did, subject) { 305 + did -> Text, 306 + subject -> Text, 307 + muting -> Bool, 308 + blocked -> Bool, 309 + blocking -> Nullable<Text>, 310 + following -> Nullable<Text>, 311 + followed -> Nullable<Text>, 259 312 } 260 313 } 261 314 ··· 273 326 joined_sp_cid -> Nullable<Text>, 274 327 created_at -> Timestamp, 275 328 indexed_at -> Timestamp, 329 + pronouns -> Nullable<Text>, 330 + website -> Nullable<Text>, 276 331 } 277 332 } 278 333 ··· 336 391 cid -> Text, 337 392 post_uri -> Text, 338 393 hidden_replies -> Array<Nullable<Text>>, 339 - allow -> Array<Nullable<Text>>, 340 - allowed_lists -> Array<Nullable<Text>>, 394 + allow -> Nullable<Array<Nullable<Text>>>, 395 + allowed_lists -> Nullable<Array<Nullable<Text>>>, 341 396 record -> Jsonb, 342 397 created_at -> Timestamptz, 343 398 indexed_at -> Timestamp, ··· 359 414 360 415 diesel::joinable!(backfill -> actors (repo)); 361 416 diesel::joinable!(blocks -> actors (did)); 417 + diesel::joinable!(bookmarks -> actors (did)); 362 418 diesel::joinable!(chat_decls -> actors (did)); 363 419 diesel::joinable!(feedgens -> actors (owner)); 364 420 diesel::joinable!(follows -> actors (did)); ··· 366 422 diesel::joinable!(labelers -> actors (did)); 367 423 diesel::joinable!(likes -> actors (did)); 368 424 diesel::joinable!(list_blocks -> actors (did)); 425 + diesel::joinable!(list_mutes -> actors (did)); 369 426 diesel::joinable!(lists -> actors (owner)); 427 + diesel::joinable!(mutes -> actors (did)); 370 428 diesel::joinable!(notif_decl -> actors (did)); 371 429 diesel::joinable!(post_embed_ext -> posts (post_uri)); 372 430 diesel::joinable!(post_embed_images -> posts (post_uri)); ··· 384 442 385 443 diesel::allow_tables_to_appear_in_same_query!( 386 444 actors, 445 + author_feeds, 387 446 backfill, 388 447 backfill_jobs, 389 448 blocks, 449 + bookmarks, 390 450 chat_decls, 391 451 feedgens, 392 452 follows, ··· 396 456 likes, 397 457 list_blocks, 398 458 list_items, 459 + list_mutes, 399 460 lists, 461 + mutes, 400 462 notif_decl, 401 463 post_embed_ext, 402 464 post_embed_images, ··· 405 467 post_embed_video_captions, 406 468 postgates, 407 469 posts, 470 + profile_states, 408 471 profiles, 409 472 records, 410 473 reposts,
+1 -1
parakeet-db/src/types.rs
··· 85 85 use std::io::Write; 86 86 let val = self.to_string(); 87 87 88 - out.write(val.as_bytes())?; 88 + out.write_all(val.as_bytes())?; 89 89 Ok(diesel::serialize::IsNull::No) 90 90 } 91 91 }
+24 -2
parakeet-index/Cargo.toml
··· 10 10 [dependencies] 11 11 tonic = "0.13.0" 12 12 prost = "0.13.5" 13 + tonic-tracing-opentelemetry = { version = "0.32", optional = true } 14 + tower = { version = "0.5", optional = true } 13 15 14 16 eyre = { version = "0.6.12", optional = true } 15 17 figment = { version = "0.10.19", features = ["env", "toml"], optional = true } 16 18 itertools = { version = "0.14.0", optional = true } 19 + opentelemetry = { version = "0.31.0", optional = true } 20 + opentelemetry-otlp = { version = "0.31.0", features = ["reqwest-rustls"], optional = true } 21 + opentelemetry_sdk = { version = "0.31.0", optional = true } 17 22 rocksdb = { version = "0.23", default-features = false, features = ["lz4", "bindgen-runtime"], optional = true } 18 23 serde = { version = "1.0.217", features = ["derive"], optional = true } 19 24 tokio = { version = "1.42.0", features = ["full"], optional = true } 20 25 tonic-health = { version = "0.13.0", optional = true } 21 26 tracing = { version = "0.1.40", optional = true } 22 - tracing-subscriber = { version = "0.3.18", optional = true } 27 + tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"], optional = true } 28 + tracing-opentelemetry = { version = "0.32", optional = true } 23 29 24 30 [build-dependencies] 25 31 tonic-build = "0.13.0" 26 32 27 33 [features] 28 - server = ["dep:eyre", "dep:figment", "dep:itertools", "dep:rocksdb", "dep:serde", "dep:tokio", "dep:tonic-health", "dep:tracing", "dep:tracing-subscriber"] 34 + otel = ["dep:tonic-tracing-opentelemetry", "dep:tower"] 35 + server = [ 36 + "dep:eyre", 37 + "dep:figment", 38 + "dep:itertools", 39 + "dep:opentelemetry", 40 + "dep:opentelemetry-otlp", 41 + "dep:opentelemetry_sdk", 42 + "dep:rocksdb", 43 + "dep:serde", 44 + "dep:tokio", 45 + "dep:tonic-health", 46 + "otel", 47 + "dep:tracing", 48 + "dep:tracing-subscriber", 49 + "dep:tracing-opentelemetry" 50 + ]
+11
parakeet-index/justfile
··· 1 + @release: 2 + cargo build --release --features server 3 + 4 + @lint: 5 + cargo clippy 6 + 7 + @run *params: 8 + cargo run --features server -- {{params}} 9 + 10 + @docker platform='linux/amd64' branch='main': 11 + docker buildx build --platform {{platform}} -t registry.gitlab.com/parakeet-social/parakeet/parakeet-index:{{branch}} . -f parakeet-index/Dockerfile
-1
parakeet-index/run.sh
··· 1 - cargo run --features server
+20 -1
parakeet-index/src/lib.rs
··· 1 + use tonic::transport::Channel; 2 + 1 3 #[allow(clippy::all)] 2 4 pub mod index { 3 5 tonic::include_proto!("parakeet"); 4 6 } 5 7 6 8 pub use index::*; 7 - pub type Client = index_client::IndexClient<tonic::transport::Channel>; 9 + #[cfg(not(feature = "otel"))] 10 + pub type Client = index_client::IndexClient<Channel>; 11 + #[cfg(feature = "otel")] 12 + pub type Client = index_client::IndexClient< 13 + tonic_tracing_opentelemetry::middleware::client::OtelGrpcService<Channel>, 14 + >; 8 15 9 16 #[cfg(feature = "server")] 10 17 pub mod server; 18 + 19 + #[cfg(feature = "otel")] 20 + pub async fn connect_with_otel( 21 + uri: String, 22 + ) -> Result<Client, Box<dyn std::error::Error + Send + Sync>> { 23 + let channel = Channel::from_shared(uri)?.connect().await?; 24 + let channel = tower::ServiceBuilder::new() 25 + .layer(tonic_tracing_opentelemetry::middleware::client::OtelGrpcLayer) 26 + .service(channel); 27 + 28 + Ok(index_client::IndexClient::new(channel)) 29 + }
+9 -3
parakeet-index/src/main.rs
··· 1 1 use parakeet_index::index_server::IndexServer; 2 2 use parakeet_index::server::service::Service; 3 - use parakeet_index::server::{GlobalState, config}; 3 + use parakeet_index::server::{GlobalState, config, instrumentation}; 4 4 use std::sync::Arc; 5 5 use tonic::transport::Server; 6 + use tonic_tracing_opentelemetry::middleware::server::OtelGrpcLayer; 6 7 7 8 #[tokio::main] 8 9 async fn main() -> eyre::Result<()> { 9 - tracing_subscriber::fmt::init(); 10 - 11 10 let conf = config::load_config()?; 12 11 12 + instrumentation::init_instruments(&conf.instruments); 13 + 13 14 let db_root = conf.index_db_path.parse()?; 14 15 let addr = std::net::SocketAddr::new(conf.server.bind_address.parse()?, conf.server.port); 15 16 let state = Arc::new(GlobalState::new(db_root)?); ··· 18 19 reporter.set_serving::<IndexServer<Service>>().await; 19 20 20 21 let service = Service::new(state.clone()); 22 + 23 + let mw = tower::ServiceBuilder::new() 24 + .option_layer(conf.instruments.otel_enable.then(OtelGrpcLayer::default)); 25 + 21 26 Server::builder() 27 + .layer(mw) 22 28 .add_service(health_service) 23 29 .add_service(IndexServer::new(service)) 24 30 .serve(addr)
+10
parakeet-index/src/server/config.rs
··· 13 13 14 14 #[derive(Debug, Deserialize)] 15 15 pub struct Config { 16 + #[serde(flatten)] 17 + pub instruments: ConfigInstruments, 16 18 pub database_url: String, 17 19 pub index_db_path: String, 18 20 #[serde(default)] 19 21 pub server: ConfigServer, 22 + } 23 + 24 + #[derive(Debug, Deserialize)] 25 + pub struct ConfigInstruments { 26 + #[serde(default)] 27 + pub otel_enable: bool, 28 + #[serde(default)] 29 + pub log_json: bool, 20 30 } 21 31 22 32 #[derive(Debug, Deserialize)]
+57
parakeet-index/src/server/instrumentation.rs
··· 1 + use opentelemetry::trace::TracerProvider; 2 + use opentelemetry_otlp::{Protocol, SpanExporter, WithExportConfig}; 3 + use opentelemetry_sdk::trace::{Sampler, SdkTracer, SdkTracerProvider}; 4 + use tracing::Subscriber; 5 + use tracing_opentelemetry::OpenTelemetryLayer; 6 + use tracing_subscriber::filter::Filtered; 7 + use tracing_subscriber::layer::SubscriberExt; 8 + use tracing_subscriber::registry::LookupSpan; 9 + use tracing_subscriber::util::SubscriberInitExt; 10 + use tracing_subscriber::{EnvFilter, Layer}; 11 + 12 + pub fn init_instruments(cfg: &super::config::ConfigInstruments) { 13 + let otel_layer = cfg.otel_enable.then(init_otel); 14 + let log_layer = init_log(cfg.log_json); 15 + 16 + tracing_subscriber::registry() 17 + .with(log_layer) 18 + .with(otel_layer) 19 + .init(); 20 + } 21 + 22 + fn init_otel<S>() -> Filtered<OpenTelemetryLayer<S, SdkTracer>, EnvFilter, S> 23 + where 24 + S: Subscriber + for<'span> LookupSpan<'span>, 25 + { 26 + let span_exporter = SpanExporter::builder() 27 + .with_http() 28 + .with_protocol(Protocol::HttpBinary) 29 + .build() 30 + .unwrap(); 31 + 32 + let tracer_provider = SdkTracerProvider::builder() 33 + .with_batch_exporter(span_exporter) 34 + .with_sampler(Sampler::AlwaysOn) 35 + .build(); 36 + 37 + opentelemetry::global::set_tracer_provider(tracer_provider.clone()); 38 + 39 + let tracer = tracer_provider.tracer("parakeet"); 40 + let otel_filter = EnvFilter::new("info,otel::tracing=trace"); 41 + 42 + OpenTelemetryLayer::new(tracer).with_filter(otel_filter) 43 + } 44 + 45 + fn init_log<S>(json: bool) -> Filtered<Box<dyn Layer<S> + Send + Sync>, EnvFilter, S> 46 + where 47 + S: Subscriber + for<'span> LookupSpan<'span>, 48 + { 49 + let stdout_filter = 50 + EnvFilter::from_default_env().add_directive("otel::tracing=off".parse().unwrap()); 51 + 52 + match json { 53 + true => tracing_subscriber::fmt::layer().json().boxed(), 54 + false => tracing_subscriber::fmt::layer().boxed(), 55 + } 56 + .with_filter(stdout_filter) 57 + }
+1
parakeet-index/src/server/mod.rs
··· 2 2 3 3 pub mod config; 4 4 pub mod db; 5 + pub mod instrumentation; 5 6 pub mod service; 6 7 mod utils; 7 8