backfill works, sqlite infra + observability set up, and now works as a nice sub in for record fetches, and acts as a cache.

Orual 247da442 40eee0db

+2544 -116
+358 -55
Cargo.lock
··· 330 330 331 331 [[package]] 332 332 name = "async-compression" 333 - version = "0.4.35" 333 + version = "0.4.36" 334 334 source = "registry+https://github.com/rust-lang/crates.io-index" 335 - checksum = "07a926debf178f2d355197f9caddb08e54a9329d44748034bba349c5848cb519" 335 + checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" 336 336 dependencies = [ 337 337 "compression-codecs", 338 338 "compression-core", ··· 473 473 version = "1.5.0" 474 474 source = "registry+https://github.com/rust-lang/crates.io-index" 475 475 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" 476 + 477 + [[package]] 478 + name = "aws-lc-rs" 479 + version = "1.15.1" 480 + source = "registry+https://github.com/rust-lang/crates.io-index" 481 + checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" 482 + dependencies = [ 483 + "aws-lc-sys", 484 + "zeroize", 485 + ] 486 + 487 + [[package]] 488 + name = "aws-lc-sys" 489 + version = "0.34.0" 490 + source = "registry+https://github.com/rust-lang/crates.io-index" 491 + checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" 492 + dependencies = [ 493 + "cc", 494 + "cmake", 495 + "dunce", 496 + "fs_extra", 497 + ] 476 498 477 499 [[package]] 478 500 name = "axum" ··· 1211 1233 ] 1212 1234 1213 1235 [[package]] 1236 + name = "cmake" 1237 + version = "0.1.54" 1238 + source = "registry+https://github.com/rust-lang/crates.io-index" 1239 + checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" 1240 + dependencies = [ 1241 + "cc", 1242 + ] 1243 + 1244 + [[package]] 1214 1245 name = "cobs" 1215 1246 version = "0.3.0" 1216 1247 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1272 1303 1273 1304 [[package]] 1274 1305 name = "compression-codecs" 1275 - version = "0.4.34" 1306 + version = "0.4.35" 1276 1307 source = "registry+https://github.com/rust-lang/crates.io-index" 1277 - checksum = "34a3cbbb8b6eca96f3a5c4bf6938d5b27ced3675d69f95bb51948722870bc323" 1308 + checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" 1278 1309 dependencies = [ 1279 1310 "compression-core", 1280 1311 "flate2", ··· 2774 2805 [[package]] 2775 2806 name = "dioxus-primitives" 2776 2807 version = "0.0.1" 2777 - source = "git+https://github.com/DioxusLabs/components#f165b24277a8f04a2d78a6303effb9ba3a86c285" 2808 + source = "git+https://github.com/DioxusLabs/components#7e5862e574aeceb3a3a021d042c165a839f1860b" 2778 2809 dependencies = [ 2779 2810 "dioxus 0.7.2", 2780 - "dioxus-time", 2811 + "dioxus-sdk-time", 2781 2812 "lazy-js-bundle 0.6.2", 2782 2813 "num-integer", 2783 2814 "time", ··· 2994 3025 ] 2995 3026 2996 3027 [[package]] 2997 - name = "dioxus-time" 2998 - version = "0.7.0" 2999 - source = "git+https://github.com/ealmloff/dioxus-std?branch=0.7#8c868ac1d60e3232e3f16f6195d6deb3c016de17" 3000 - dependencies = [ 3001 - "dioxus 0.7.2", 3002 - "futures", 3003 - "gloo-timers", 3004 - "tokio", 3005 - ] 3006 - 3007 - [[package]] 3008 3028 name = "dioxus-use-js-macro" 3009 3029 version = "0.1.0" 3010 3030 dependencies = [ ··· 3499 3519 ] 3500 3520 3501 3521 [[package]] 3522 + name = "fallible-iterator" 3523 + version = "0.3.0" 3524 + source = "registry+https://github.com/rust-lang/crates.io-index" 3525 + checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" 3526 + 3527 + [[package]] 3528 + name = "fallible-streaming-iterator" 3529 + version = "0.1.9" 3530 + source = "registry+https://github.com/rust-lang/crates.io-index" 3531 + checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 3532 + 3533 + [[package]] 3502 3534 name = "fancy-regex" 3503 3535 version = "0.16.2" 3504 3536 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3697 3729 "swc_macros_common", 3698 3730 "syn 2.0.111", 3699 3731 ] 3732 + 3733 + [[package]] 3734 + name = "fs_extra" 3735 + version = "1.3.0" 3736 + source = "registry+https://github.com/rust-lang/crates.io-index" 3737 + checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" 3700 3738 3701 3739 [[package]] 3702 3740 name = "futf" ··· 4126 4164 checksum = "0bb0228f477c0900c880fd78c8759b95c7636dbd7842707f49e132378aa2acdc" 4127 4165 dependencies = [ 4128 4166 "heck 0.4.1", 4129 - "proc-macro-crate 2.0.0", 4167 + "proc-macro-crate 2.0.2", 4130 4168 "proc-macro-error", 4131 4169 "proc-macro2", 4132 4170 "quote", ··· 4789 4827 "hyper", 4790 4828 "hyper-util", 4791 4829 "rustls", 4830 + "rustls-native-certs", 4792 4831 "rustls-pki-types", 4793 4832 "tokio", 4794 4833 "tokio-rustls", ··· 4797 4836 ] 4798 4837 4799 4838 [[package]] 4839 + name = "hyper-tls" 4840 + version = "0.6.0" 4841 + source = "registry+https://github.com/rust-lang/crates.io-index" 4842 + checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" 4843 + dependencies = [ 4844 + "bytes", 4845 + "http-body-util", 4846 + "hyper", 4847 + "hyper-util", 4848 + "native-tls", 4849 + "tokio", 4850 + "tokio-native-tls", 4851 + "tower-service", 4852 + ] 4853 + 4854 + [[package]] 4800 4855 name = "hyper-util" 4801 4856 version = "0.1.19" 4802 4857 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4895 4950 4896 4951 [[package]] 4897 4952 name = "icu_properties" 4898 - version = "2.1.1" 4953 + version = "2.1.2" 4899 4954 source = "registry+https://github.com/rust-lang/crates.io-index" 4900 - checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" 4955 + checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" 4901 4956 dependencies = [ 4902 4957 "icu_collections", 4903 4958 "icu_locale_core", ··· 4909 4964 4910 4965 [[package]] 4911 4966 name = "icu_properties_data" 4912 - version = "2.1.1" 4967 + version = "2.1.2" 4913 4968 source = "registry+https://github.com/rust-lang/crates.io-index" 4914 - checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" 4969 + checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" 4915 4970 4916 4971 [[package]] 4917 4972 name = "icu_provider" ··· 5000 5055 dependencies = [ 5001 5056 "bitmaps", 5002 5057 "rand_core 0.6.4", 5003 - "rand_xoshiro", 5058 + "rand_xoshiro 0.6.0", 5004 5059 "serde", 5005 5060 "sized-chunks", 5006 5061 "typenum", ··· 5495 5550 ] 5496 5551 5497 5552 [[package]] 5553 + name = "itertools" 5554 + version = "0.14.0" 5555 + source = "registry+https://github.com/rust-lang/crates.io-index" 5556 + checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" 5557 + dependencies = [ 5558 + "either", 5559 + ] 5560 + 5561 + [[package]] 5498 5562 name = "itoa" 5499 5563 version = "1.0.15" 5500 5564 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5503 5567 [[package]] 5504 5568 name = "jacquard" 5505 5569 version = "0.9.4" 5570 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5506 5571 dependencies = [ 5507 5572 "bytes", 5508 5573 "getrandom 0.2.16", ··· 5534 5599 [[package]] 5535 5600 name = "jacquard-api" 5536 5601 version = "0.9.2" 5602 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5537 5603 dependencies = [ 5538 5604 "bon", 5539 5605 "bytes", ··· 5552 5618 [[package]] 5553 5619 name = "jacquard-axum" 5554 5620 version = "0.9.2" 5621 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5555 5622 dependencies = [ 5556 5623 "axum", 5557 5624 "bytes", ··· 5573 5640 [[package]] 5574 5641 name = "jacquard-common" 5575 5642 version = "0.9.2" 5643 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5576 5644 dependencies = [ 5577 5645 "base64 0.22.1", 5578 5646 "bon", ··· 5620 5688 [[package]] 5621 5689 name = "jacquard-derive" 5622 5690 version = "0.9.4" 5691 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5623 5692 dependencies = [ 5624 5693 "heck 0.5.0", 5625 5694 "jacquard-lexicon", ··· 5631 5700 [[package]] 5632 5701 name = "jacquard-identity" 5633 5702 version = "0.9.2" 5703 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5634 5704 dependencies = [ 5635 5705 "bon", 5636 5706 "bytes", ··· 5659 5729 [[package]] 5660 5730 name = "jacquard-lexicon" 5661 5731 version = "0.9.2" 5732 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5662 5733 dependencies = [ 5663 5734 "cid", 5664 5735 "dashmap 6.1.0", ··· 5684 5755 [[package]] 5685 5756 name = "jacquard-oauth" 5686 5757 version = "0.9.2" 5758 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5687 5759 dependencies = [ 5688 5760 "base64 0.22.1", 5689 5761 "bytes", ··· 5716 5788 [[package]] 5717 5789 name = "jacquard-repo" 5718 5790 version = "0.9.4" 5791 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 5719 5792 dependencies = [ 5720 5793 "bytes", 5721 5794 "cid", ··· 6004 6077 ] 6005 6078 6006 6079 [[package]] 6080 + name = "libsqlite3-sys" 6081 + version = "0.35.0" 6082 + source = "registry+https://github.com/rust-lang/crates.io-index" 6083 + checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" 6084 + dependencies = [ 6085 + "cc", 6086 + "pkg-config", 6087 + "vcpkg", 6088 + ] 6089 + 6090 + [[package]] 6007 6091 name = "libxdo" 6008 6092 version = "0.6.0" 6009 6093 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6060 6144 version = "0.4.29" 6061 6145 source = "registry+https://github.com/rust-lang/crates.io-index" 6062 6146 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" 6147 + 6148 + [[package]] 6149 + name = "loki-api" 6150 + version = "0.1.3" 6151 + source = "registry+https://github.com/rust-lang/crates.io-index" 6152 + checksum = "bdc38a304f59a03e6efa3876766a48c70a766a93f88341c3fff4212834b8e327" 6153 + dependencies = [ 6154 + "prost", 6155 + "prost-types", 6156 + ] 6063 6157 6064 6158 [[package]] 6065 6159 name = "lol_alloc" ··· 6093 6187 6094 6188 [[package]] 6095 6189 name = "loro" 6096 - version = "1.10.0" 6190 + version = "1.10.3" 6097 6191 source = "registry+https://github.com/rust-lang/crates.io-index" 6098 - checksum = "88dccd9df337cf38accfa64bd2267edfd2eeb17a459d38ab0f7ac80eb878cdc5" 6192 + checksum = "d75216d8f99725531a30f7b00901ee154a4f8a9b7f125bfe032e197d4c7ffb8c" 6099 6193 dependencies = [ 6100 6194 "enum-as-inner 0.6.1", 6101 6195 "generic-btree", ··· 6139 6233 6140 6234 [[package]] 6141 6235 name = "loro-internal" 6142 - version = "1.10.0" 6236 + version = "1.10.3" 6143 6237 source = "registry+https://github.com/rust-lang/crates.io-index" 6144 - checksum = "9e5c014162198a558f119e67287f042c37feb55e82b50d97d6ac2a90333995dd" 6238 + checksum = "f447044ec3d3ba572623859add3334bd87b84340ee5fdf00315bfee0e3ad3e3f" 6145 6239 dependencies = [ 6146 6240 "append-only-bytes", 6147 6241 "arref", ··· 6386 6480 [[package]] 6387 6481 name = "markdown-weaver" 6388 6482 version = "0.13.0" 6483 + source = "git+https://github.com/rsform/markdown-weaver#52075e20a194375f1bd4a0c78201ce3b3a52c82d" 6389 6484 dependencies = [ 6390 6485 "bitflags 2.10.0", 6391 6486 "getopts", ··· 6398 6493 [[package]] 6399 6494 name = "markdown-weaver-escape" 6400 6495 version = "0.11.0" 6496 + source = "git+https://github.com/rsform/markdown-weaver#52075e20a194375f1bd4a0c78201ce3b3a52c82d" 6401 6497 6402 6498 [[package]] 6403 6499 name = "markup5ever" ··· 6522 6618 ] 6523 6619 6524 6620 [[package]] 6621 + name = "metrics" 6622 + version = "0.24.3" 6623 + source = "registry+https://github.com/rust-lang/crates.io-index" 6624 + checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" 6625 + dependencies = [ 6626 + "ahash", 6627 + "portable-atomic", 6628 + ] 6629 + 6630 + [[package]] 6631 + name = "metrics-exporter-prometheus" 6632 + version = "0.17.2" 6633 + source = "registry+https://github.com/rust-lang/crates.io-index" 6634 + checksum = "2b166dea96003ee2531cf14833efedced545751d800f03535801d833313f8c15" 6635 + dependencies = [ 6636 + "base64 0.22.1", 6637 + "http-body-util", 6638 + "hyper", 6639 + "hyper-rustls", 6640 + "hyper-util", 6641 + "indexmap 2.12.1", 6642 + "ipnet", 6643 + "metrics", 6644 + "metrics-util", 6645 + "quanta", 6646 + "thiserror 2.0.17", 6647 + "tokio", 6648 + "tracing", 6649 + ] 6650 + 6651 + [[package]] 6652 + name = "metrics-util" 6653 + version = "0.20.1" 6654 + source = "registry+https://github.com/rust-lang/crates.io-index" 6655 + checksum = "cdfb1365fea27e6dd9dc1dbc19f570198bc86914533ad639dae939635f096be4" 6656 + dependencies = [ 6657 + "crossbeam-epoch", 6658 + "crossbeam-utils", 6659 + "hashbrown 0.16.1", 6660 + "metrics", 6661 + "quanta", 6662 + "rand 0.9.2", 6663 + "rand_xoshiro 0.7.0", 6664 + "sketches-ddsketch", 6665 + ] 6666 + 6667 + [[package]] 6525 6668 name = "miette" 6526 6669 version = "5.10.0" 6527 6670 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6603 6746 [[package]] 6604 6747 name = "mini-moka" 6605 6748 version = "0.10.99" 6749 + source = "git+https://tangled.org/@nonbinary.computer/jacquard#5bd87b462e1b020feb232fefaf4926ff004eefe4" 6606 6750 dependencies = [ 6607 6751 "crossbeam-channel", 6608 6752 "crossbeam-utils", ··· 8014 8158 8015 8159 [[package]] 8016 8160 name = "proc-macro-crate" 8017 - version = "2.0.0" 8161 + version = "2.0.2" 8018 8162 source = "registry+https://github.com/rust-lang/crates.io-index" 8019 - checksum = "7e8366a6159044a37876a2b9817124296703c586a5c92e2c53751fa06d8d43e8" 8163 + checksum = "b00f26d3400549137f92511a46ac1cd8ce37cb5598a96d382381458b992a5d24" 8020 8164 dependencies = [ 8021 - "toml_edit 0.20.7", 8165 + "toml_datetime 0.6.3", 8166 + "toml_edit 0.20.2", 8022 8167 ] 8023 8168 8024 8169 [[package]] ··· 8083 8228 ] 8084 8229 8085 8230 [[package]] 8231 + name = "prost" 8232 + version = "0.13.5" 8233 + source = "registry+https://github.com/rust-lang/crates.io-index" 8234 + checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" 8235 + dependencies = [ 8236 + "bytes", 8237 + "prost-derive", 8238 + ] 8239 + 8240 + [[package]] 8241 + name = "prost-derive" 8242 + version = "0.13.5" 8243 + source = "registry+https://github.com/rust-lang/crates.io-index" 8244 + checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" 8245 + dependencies = [ 8246 + "anyhow", 8247 + "itertools 0.14.0", 8248 + "proc-macro2", 8249 + "quote", 8250 + "syn 2.0.111", 8251 + ] 8252 + 8253 + [[package]] 8254 + name = "prost-types" 8255 + version = "0.13.5" 8256 + source = "registry+https://github.com/rust-lang/crates.io-index" 8257 + checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" 8258 + dependencies = [ 8259 + "prost", 8260 + ] 8261 + 8262 + [[package]] 8086 8263 name = "psl-types" 8087 8264 version = "2.0.11" 8088 8265 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 8361 8538 checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" 8362 8539 dependencies = [ 8363 8540 "rand_core 0.6.4", 8541 + ] 8542 + 8543 + [[package]] 8544 + name = "rand_xoshiro" 8545 + version = "0.7.0" 8546 + source = "registry+https://github.com/rust-lang/crates.io-index" 8547 + checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" 8548 + dependencies = [ 8549 + "rand_core 0.9.3", 8364 8550 ] 8365 8551 8366 8552 [[package]] ··· 8467 8653 8468 8654 [[package]] 8469 8655 name = "reqwest" 8470 - version = "0.12.24" 8656 + version = "0.12.25" 8471 8657 source = "registry+https://github.com/rust-lang/crates.io-index" 8472 - checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" 8658 + checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" 8473 8659 dependencies = [ 8474 - "async-compression", 8475 8660 "base64 0.22.1", 8476 8661 "bytes", 8477 8662 "cookie", ··· 8485 8670 "http-body-util", 8486 8671 "hyper", 8487 8672 "hyper-rustls", 8673 + "hyper-tls", 8488 8674 "hyper-util", 8489 8675 "js-sys", 8490 8676 "log", 8491 8677 "mime", 8492 8678 "mime_guess", 8679 + "native-tls", 8493 8680 "percent-encoding", 8494 8681 "pin-project-lite", 8495 8682 "quinn", ··· 8500 8687 "serde_urlencoded", 8501 8688 "sync_wrapper", 8502 8689 "tokio", 8690 + "tokio-native-tls", 8503 8691 "tokio-rustls", 8504 8692 "tokio-util", 8505 8693 "tower", ··· 8644 8832 ] 8645 8833 8646 8834 [[package]] 8835 + name = "rusqlite" 8836 + version = "0.37.0" 8837 + source = "registry+https://github.com/rust-lang/crates.io-index" 8838 + checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f" 8839 + dependencies = [ 8840 + "bitflags 2.10.0", 8841 + "fallible-iterator", 8842 + "fallible-streaming-iterator", 8843 + "hashlink", 8844 + "libsqlite3-sys", 8845 + "smallvec", 8846 + ] 8847 + 8848 + [[package]] 8849 + name = "rusqlite_migration" 8850 + version = "2.3.0" 8851 + source = "registry+https://github.com/rust-lang/crates.io-index" 8852 + checksum = "3fc9767ae49274bafd3e55be9d30405a033b7a59548327d87fd4971fbb58e264" 8853 + dependencies = [ 8854 + "include_dir", 8855 + "log", 8856 + "rusqlite", 8857 + ] 8858 + 8859 + [[package]] 8647 8860 name = "rustc-demangle" 8648 8861 version = "0.1.26" 8649 8862 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 8689 8902 source = "registry+https://github.com/rust-lang/crates.io-index" 8690 8903 checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" 8691 8904 dependencies = [ 8905 + "aws-lc-rs", 8692 8906 "log", 8693 8907 "once_cell", 8694 8908 "ring", ··· 8753 8967 source = "registry+https://github.com/rust-lang/crates.io-index" 8754 8968 checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" 8755 8969 dependencies = [ 8970 + "aws-lc-rs", 8756 8971 "ring", 8757 8972 "rustls-pki-types", 8758 8973 "untrusted", ··· 9428 9643 ] 9429 9644 9430 9645 [[package]] 9646 + name = "sketches-ddsketch" 9647 + version = "0.3.0" 9648 + source = "registry+https://github.com/rust-lang/crates.io-index" 9649 + checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" 9650 + 9651 + [[package]] 9431 9652 name = "slab" 9432 9653 version = "0.4.11" 9433 9654 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 9507 9728 "borsh", 9508 9729 "serde_core", 9509 9730 ] 9731 + 9732 + [[package]] 9733 + name = "snap" 9734 + version = "1.1.1" 9735 + source = "registry+https://github.com/rust-lang/crates.io-index" 9736 + checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" 9510 9737 9511 9738 [[package]] 9512 9739 name = "socket2" ··· 10343 10570 ] 10344 10571 10345 10572 [[package]] 10573 + name = "tokio-native-tls" 10574 + version = "0.3.1" 10575 + source = "registry+https://github.com/rust-lang/crates.io-index" 10576 + checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 10577 + dependencies = [ 10578 + "native-tls", 10579 + "tokio", 10580 + ] 10581 + 10582 + [[package]] 10346 10583 name = "tokio-rustls" 10347 10584 version = "0.26.4" 10348 10585 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 10382 10619 10383 10620 [[package]] 10384 10621 name = "tokio-tungstenite" 10622 + version = "0.26.2" 10623 + source = "registry+https://github.com/rust-lang/crates.io-index" 10624 + checksum = "7a9daff607c6d2bf6c16fd681ccb7eecc83e4e2cdc1ca067ffaadfca5de7f084" 10625 + dependencies = [ 10626 + "futures-util", 10627 + "log", 10628 + "native-tls", 10629 + "tokio", 10630 + "tokio-native-tls", 10631 + "tungstenite 0.26.2", 10632 + ] 10633 + 10634 + [[package]] 10635 + name = "tokio-tungstenite" 10385 10636 version = "0.27.0" 10386 10637 source = "registry+https://github.com/rust-lang/crates.io-index" 10387 10638 checksum = "489a59b6730eda1b0171fcfda8b121f4bee2b35cba8645ca35c5f7ba3eb736c1" ··· 10462 10713 10463 10714 [[package]] 10464 10715 name = "toml" 10465 - version = "0.8.23" 10716 + version = "0.8.2" 10466 10717 source = "registry+https://github.com/rust-lang/crates.io-index" 10467 - checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" 10718 + checksum = "185d8ab0dfbb35cf1399a6344d8484209c088f75f8f68230da55d48d95d43e3d" 10468 10719 dependencies = [ 10469 10720 "serde", 10470 10721 "serde_spanned", 10471 - "toml_datetime 0.6.11", 10472 - "toml_edit 0.22.27", 10722 + "toml_datetime 0.6.3", 10723 + "toml_edit 0.20.2", 10473 10724 ] 10474 10725 10475 10726 [[package]] 10476 10727 name = "toml_datetime" 10477 - version = "0.6.11" 10728 + version = "0.6.3" 10478 10729 source = "registry+https://github.com/rust-lang/crates.io-index" 10479 - checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" 10730 + checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" 10480 10731 dependencies = [ 10481 10732 "serde", 10482 10733 ] ··· 10497 10748 checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" 10498 10749 dependencies = [ 10499 10750 "indexmap 2.12.1", 10500 - "toml_datetime 0.6.11", 10751 + "toml_datetime 0.6.3", 10501 10752 "winnow 0.5.40", 10502 10753 ] 10503 10754 10504 10755 [[package]] 10505 10756 name = "toml_edit" 10506 - version = "0.20.7" 10757 + version = "0.20.2" 10507 10758 source = "registry+https://github.com/rust-lang/crates.io-index" 10508 - checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" 10509 - dependencies = [ 10510 - "indexmap 2.12.1", 10511 - "toml_datetime 0.6.11", 10512 - "winnow 0.5.40", 10513 - ] 10514 - 10515 - [[package]] 10516 - name = "toml_edit" 10517 - version = "0.22.27" 10518 - source = "registry+https://github.com/rust-lang/crates.io-index" 10519 - checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" 10759 + checksum = "396e4d48bbb2b7554c944bde63101b5ae446cff6ec4a24227428f15eb72ef338" 10520 10760 dependencies = [ 10521 10761 "indexmap 2.12.1", 10522 10762 "serde", 10523 10763 "serde_spanned", 10524 - "toml_datetime 0.6.11", 10525 - "winnow 0.7.14", 10764 + "toml_datetime 0.6.3", 10765 + "winnow 0.5.40", 10526 10766 ] 10527 10767 10528 10768 [[package]] ··· 10568 10808 source = "registry+https://github.com/rust-lang/crates.io-index" 10569 10809 checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" 10570 10810 dependencies = [ 10811 + "async-compression", 10571 10812 "bitflags 2.10.0", 10572 10813 "bytes", 10573 10814 "futures-core", ··· 10657 10898 ] 10658 10899 10659 10900 [[package]] 10901 + name = "tracing-loki" 10902 + version = "0.2.6" 10903 + source = "registry+https://github.com/rust-lang/crates.io-index" 10904 + checksum = "ba3beec919fbdf99d719de8eda6adae3281f8a5b71ae40431f44dc7423053d34" 10905 + dependencies = [ 10906 + "loki-api", 10907 + "reqwest", 10908 + "serde", 10909 + "serde_json", 10910 + "snap", 10911 + "tokio", 10912 + "tokio-stream", 10913 + "tracing", 10914 + "tracing-core", 10915 + "tracing-log", 10916 + "tracing-serde", 10917 + "tracing-subscriber", 10918 + "url", 10919 + ] 10920 + 10921 + [[package]] 10922 + name = "tracing-serde" 10923 + version = "0.2.0" 10924 + source = "registry+https://github.com/rust-lang/crates.io-index" 10925 + checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" 10926 + dependencies = [ 10927 + "serde", 10928 + "tracing-core", 10929 + ] 10930 + 10931 + [[package]] 10660 10932 name = "tracing-subscriber" 10661 10933 version = "0.3.22" 10662 10934 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 10777 11049 "rustls-pki-types", 10778 11050 "sha1 0.10.6", 10779 11051 "thiserror 1.0.69", 11052 + "utf-8", 11053 + ] 11054 + 11055 + [[package]] 11056 + name = "tungstenite" 11057 + version = "0.26.2" 11058 + source = "registry+https://github.com/rust-lang/crates.io-index" 11059 + checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13" 11060 + dependencies = [ 11061 + "bytes", 11062 + "data-encoding", 11063 + "http", 11064 + "httparse", 11065 + "log", 11066 + "native-tls", 11067 + "rand 0.9.2", 11068 + "sha1 0.10.6", 11069 + "thiserror 2.0.17", 10780 11070 "utf-8", 10781 11071 ] 10782 11072 ··· 11447 11737 "js-sys", 11448 11738 "markdown-weaver", 11449 11739 "markdown-weaver-escape", 11740 + "metrics", 11741 + "metrics-exporter-prometheus", 11450 11742 "miette 7.6.0", 11451 11743 "mime-sniffer", 11452 11744 "n0-future 0.1.3", ··· 11463 11755 "thiserror 2.0.17", 11464 11756 "tokio", 11465 11757 "tracing", 11758 + "tracing-loki", 11759 + "tracing-subscriber", 11466 11760 "trait-variant", 11467 11761 "wasm-bindgen", 11468 11762 "wasm-bindgen-futures", ··· 11476 11770 name = "weaver-index" 11477 11771 version = "0.1.0" 11478 11772 dependencies = [ 11773 + "axum", 11479 11774 "base64 0.22.1", 11480 11775 "bytes", 11481 11776 "chrono", ··· 11485 11780 "clickhouse", 11486 11781 "dashmap 6.1.0", 11487 11782 "dotenvy", 11783 + "futures-util", 11488 11784 "humansize", 11489 11785 "include_dir", 11490 11786 "jacquard", 11787 + "jacquard-axum", 11491 11788 "jacquard-common", 11492 11789 "jacquard-repo", 11493 11790 "miette 7.6.0", 11494 11791 "n0-future 0.1.3", 11792 + "rusqlite", 11793 + "rusqlite_migration", 11495 11794 "serde", 11496 11795 "serde_ipld_dagcbor", 11497 11796 "serde_json", 11498 11797 "smol_str", 11499 11798 "thiserror 2.0.17", 11500 11799 "tokio", 11800 + "tokio-tungstenite 0.26.2", 11801 + "tower", 11802 + "tower-http", 11501 11803 "tracing", 11502 11804 "tracing-subscriber", 11503 11805 "url", 11504 11806 "weaver-api", 11807 + "weaver-common", 11505 11808 ] 11506 11809 11507 11810 [[package]]
+19 -18
Cargo.toml
··· 28 28 syntect = { version = "5.2.0", default-features = false } 29 29 n0-future = "=0.1.3" 30 30 tracing = { version = "0.1.41", default-features = false, features = ["std"] } 31 - # markdown-weaver = { git = "https://github.com/rsform/markdown-weaver" } 32 - # markdown-weaver-escape = { git = "https://github.com/rsform/markdown-weaver" } 33 - markdown-weaver = { path = "../markdown-weaver/markdown-weaver" } 34 - markdown-weaver-escape = { path = "../markdown-weaver/markdown-weaver-escape" } 31 + markdown-weaver = { git = "https://github.com/rsform/markdown-weaver" } 32 + markdown-weaver-escape = { git = "https://github.com/rsform/markdown-weaver" } 33 + # markdown-weaver = { path = "../markdown-weaver/markdown-weaver" } 34 + # markdown-weaver-escape = { path = "../markdown-weaver/markdown-weaver-escape" } 35 35 36 - # jacquard = { git = "https://tangled.org/@nonbinary.computer/jacquard", default-features = false, features = ["derive", "api_bluesky", "tracing"] } 37 - # jacquard-identity = { git = "https://tangled.org/@nonbinary.computer/jacquard", features = ["cache"] } 38 - # jacquard-common = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 39 - # jacquard-axum = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 40 - # jacquard-derive = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 41 - # jacquard-lexicon = { git = "https://tangled.org/@nonbinary.computer/jacquard", default-features = false } 36 + jacquard = { git = "https://tangled.org/@nonbinary.computer/jacquard", default-features = false, features = ["derive", "api_bluesky", "tracing"] } 37 + jacquard-identity = { git = "https://tangled.org/@nonbinary.computer/jacquard", features = ["cache"] } 38 + jacquard-common = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 39 + jacquard-axum = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 40 + jacquard-derive = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 41 + jacquard-lexicon = { git = "https://tangled.org/@nonbinary.computer/jacquard", default-features = false } 42 + jacquard-repo = { git = "https://tangled.org/@nonbinary.computer/jacquard" } 42 43 43 - jacquard = { path = "../jacquard/crates/jacquard", default-features = false, features = ["derive", "api_bluesky", "tracing", "cache"] } 44 - jacquard-identity = { path = "../jacquard/crates/jacquard-identity", features = ["cache"] } 45 - jacquard-api = { path = "../jacquard/crates/jacquard-api" } 46 - jacquard-common = { path = "../jacquard/crates/jacquard-common" } 47 - jacquard-axum = {path = "../jacquard/crates/jacquard-axum" } 48 - jacquard-derive = { path = "../jacquard/crates/jacquard-derive" } 49 - jacquard-lexicon = { path = "../jacquard/crates/jacquard-lexicon", default-features = false } 50 - jacquard-repo = { path = "../jacquard/crates/jacquard-repo" } 44 + # jacquard = { path = "../jacquard/crates/jacquard", default-features = false, features = ["derive", "api_bluesky", "tracing", "cache"] } 45 + # jacquard-identity = { path = "../jacquard/crates/jacquard-identity", features = ["cache"] } 46 + # jacquard-api = { path = "../jacquard/crates/jacquard-api" } 47 + # jacquard-common = { path = "../jacquard/crates/jacquard-common" } 48 + # jacquard-axum = {path = "../jacquard/crates/jacquard-axum" } 49 + # jacquard-derive = { path = "../jacquard/crates/jacquard-derive" } 50 + # jacquard-lexicon = { path = "../jacquard/crates/jacquard-lexicon", default-features = false } 51 + # jacquard-repo = { path = "../jacquard/crates/jacquard-repo" } 51 52 52 53 # jacquard = { path = "../jacquard-facet/crates/jacquard", default-features = false, features = ["derive", "api_bluesky", "tracing", "serde"] } 53 54 # jacquard-identity = { path = "../jacquard-facet/crates/jacquard-identity", features = ["cache"] }
+7
crates/weaver-common/Cargo.toml
··· 10 10 dev = [] 11 11 native = ["jacquard/dns"] 12 12 iroh = ["dep:iroh", "dep:iroh-gossip", "dep:iroh-tickets"] 13 + telemetry = ["dep:metrics", "dep:metrics-exporter-prometheus", "dep:tracing-subscriber", "dep:tracing-loki"] 13 14 14 15 [dependencies] 15 16 n0-future = { workspace = true } ··· 42 43 iroh = { version = "0.95", default-features = false, optional = true } 43 44 iroh-gossip = { version = "0.95", default-features = false, features = ["net"], optional = true } 44 45 iroh-tickets = { version = "0.2", optional = true } 46 + 47 + # Telemetry (optional, native-only) 48 + metrics = { version = "0.24.2", optional = true } 49 + metrics-exporter-prometheus = { version = "0.17.2", optional = true } 50 + tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"], optional = true } 51 + tracing-loki = { version = "0.2", optional = true } 45 52 46 53 getrandom = { version = "0.3", features = [] } 47 54 ring = { version = "0.17", default-features = false }
+2
crates/weaver-common/src/lib.rs
··· 4 4 pub mod constellation; 5 5 pub mod error; 6 6 pub mod resolve; 7 + #[cfg(feature = "telemetry")] 8 + pub mod telemetry; 7 9 pub mod transport; 8 10 pub mod worker_rt; 9 11
+170
crates/weaver-common/src/telemetry.rs
··· 1 + //! Telemetry infrastructure for weaver services. 2 + //! 3 + //! Provides: 4 + //! - Prometheus metrics with `/metrics` endpoint 5 + //! - Tracing with pretty console output + optional Loki push 6 + //! 7 + //! # Usage 8 + //! 9 + //! ```ignore 10 + //! use weaver_common::telemetry::{self, TelemetryConfig}; 11 + //! 12 + //! #[tokio::main] 13 + //! async fn main() { 14 + //! // Initialize telemetry (metrics + tracing) 15 + //! let config = TelemetryConfig::from_env("weaver-index"); 16 + //! telemetry::init(config).await; 17 + //! 18 + //! // Mount the metrics endpoint in your axum router 19 + //! let app = Router::new() 20 + //! .route("/metrics", get(|| async { telemetry::render() })); 21 + //! 22 + //! // Use metrics 23 + //! metrics::counter!("requests_total").increment(1); 24 + //! 25 + //! // Use tracing (goes to both console and loki if configured) 26 + //! tracing::info!("server started"); 27 + //! } 28 + //! ``` 29 + 30 + use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle}; 31 + use std::sync::OnceLock; 32 + use tracing::Level; 33 + use tracing_subscriber::layer::SubscriberExt; 34 + use tracing_subscriber::util::SubscriberInitExt; 35 + use tracing_subscriber::{EnvFilter, Layer}; 36 + 37 + static PROMETHEUS_HANDLE: OnceLock<PrometheusHandle> = OnceLock::new(); 38 + 39 + /// Telemetry configuration 40 + #[derive(Debug, Clone)] 41 + pub struct TelemetryConfig { 42 + /// Service name for labeling (e.g., "weaver-index", "weaver-app") 43 + pub service_name: String, 44 + /// Loki push URL (e.g., "http://localhost:3100"). None disables Loki. 45 + pub loki_url: Option<String>, 46 + /// Console log level (default: INFO, DEBUG in debug builds) 47 + pub console_level: Level, 48 + } 49 + 50 + impl TelemetryConfig { 51 + /// Load config from environment variables. 52 + /// 53 + /// - `LOKI_URL`: Loki push endpoint (optional) 54 + /// - `RUST_LOG`: Standard env filter (optional, overrides console_level) 55 + pub fn from_env(service_name: impl Into<String>) -> Self { 56 + let console_level = if cfg!(debug_assertions) { 57 + Level::DEBUG 58 + } else { 59 + Level::INFO 60 + }; 61 + 62 + Self { 63 + service_name: service_name.into(), 64 + loki_url: std::env::var("LOKI_URL").ok(), 65 + console_level, 66 + } 67 + } 68 + } 69 + 70 + /// Initialize telemetry (metrics + tracing). 71 + /// 72 + /// Call once at application startup. If `LOKI_URL` is set, spawns a background 73 + /// task to push logs to Loki. 74 + pub async fn init(config: TelemetryConfig) { 75 + // Initialize prometheus metrics 76 + init_metrics(); 77 + 78 + // Initialize tracing 79 + init_tracing(config).await; 80 + } 81 + 82 + /// Initialize just the prometheus metrics recorder. 83 + pub fn init_metrics() -> &'static PrometheusHandle { 84 + PROMETHEUS_HANDLE.get_or_init(|| { 85 + PrometheusBuilder::new() 86 + .install_recorder() 87 + .expect("failed to install prometheus recorder") 88 + }) 89 + } 90 + 91 + /// Initialize tracing with console + optional Loki layers. 92 + async fn init_tracing(config: TelemetryConfig) { 93 + let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { 94 + EnvFilter::new(format!( 95 + "{}", 96 + config.console_level.as_str().to_lowercase() 97 + )) 98 + }); 99 + 100 + // Pretty console layer for human-readable stdout 101 + let console_layer = tracing_subscriber::fmt::layer() 102 + .with_target(true) 103 + .with_thread_ids(false) 104 + .with_file(false) 105 + .with_line_number(false) 106 + .compact() 107 + .with_filter(env_filter); 108 + 109 + // Optional Loki layer for structured logs 110 + if let Some(loki_url) = config.loki_url { 111 + match tracing_loki::url::Url::parse(&loki_url) { 112 + Ok(url) => { 113 + let (loki_layer, loki_task) = tracing_loki::builder() 114 + .label("service", config.service_name.clone()) 115 + .expect("invalid label") 116 + .build_url(url) 117 + .expect("failed to build loki layer"); 118 + 119 + tracing_subscriber::registry() 120 + .with(console_layer) 121 + .with(loki_layer) 122 + .init(); 123 + 124 + // Spawn the background task that pushes to Loki 125 + tokio::spawn(loki_task); 126 + 127 + tracing::info!( 128 + service = %config.service_name, 129 + loki_url = %loki_url, 130 + "telemetry initialized with loki" 131 + ); 132 + } 133 + Err(e) => { 134 + // Invalid URL - fall back to console only 135 + tracing_subscriber::registry().with(console_layer).init(); 136 + 137 + tracing::warn!( 138 + error = %e, 139 + loki_url = %loki_url, 140 + "invalid LOKI_URL, falling back to console only" 141 + ); 142 + } 143 + } 144 + } else { 145 + // No Loki URL - console only 146 + tracing_subscriber::registry().with(console_layer).init(); 147 + 148 + tracing::debug!( 149 + service = %config.service_name, 150 + "telemetry initialized (console only, set LOKI_URL to enable loki)" 151 + ); 152 + } 153 + } 154 + 155 + /// Get the prometheus handle. 156 + pub fn handle() -> &'static PrometheusHandle { 157 + PROMETHEUS_HANDLE.get_or_init(|| { 158 + PrometheusBuilder::new() 159 + .install_recorder() 160 + .expect("failed to install prometheus recorder") 161 + }) 162 + } 163 + 164 + /// Render metrics in prometheus text format. 165 + pub fn render() -> String { 166 + handle().render() 167 + } 168 + 169 + // Re-export the metrics crate for convenience 170 + pub use metrics::{counter, gauge, histogram};
+16 -1
crates/weaver-index/Cargo.toml
··· 21 21 [dependencies] 22 22 # Internal 23 23 weaver-api = { path = "../weaver-api", features = ["streaming"] } 24 + weaver-common = { path = "../weaver-common", features = ["telemetry"] } 24 25 25 26 # AT Protocol / Jacquard 26 - jacquard = { workspace = true, features = ["websocket", "zstd"] } 27 + jacquard = { workspace = true, features = ["websocket", "zstd", "dns", "cache"] } 27 28 jacquard-common = { workspace = true } 28 29 jacquard-repo = { workspace = true } 30 + jacquard-axum = { workspace = true } 29 31 30 32 # ClickHouse 31 33 clickhouse = { version = "0.14", features = ["inserter", "chrono", "rustls-tls-ring", "rustls-tls-webpki-roots"] } ··· 66 68 base64 = "0.22" 67 69 dashmap = "6" 68 70 include_dir = "0.7.4" 71 + 72 + # WebSocket (for tap consumer) 73 + tokio-tungstenite = { version = "0.26", features = ["native-tls"] } 74 + futures-util = "0.3" 75 + 76 + # HTTP server 77 + axum = { version = "0.8.7", features = ["macros"] } 78 + tower = "0.5.2" 79 + tower-http = { version = "0.6.7", features = ["trace", "cors"] } 80 + 81 + # SQLite (shard storage) 82 + rusqlite = { version = "0.37.0", features = ["bundled"] } 83 + rusqlite_migration = { version = "2.1.0", features = ["from-directory"] }
+39
crates/weaver-index/Dockerfile
··· 1 + # Build stage 2 + FROM rust:1.91-trixie AS builder 3 + 4 + WORKDIR /build 5 + 6 + # Install build dependencies 7 + RUN apt-get update && apt-get install -y \ 8 + pkg-config \ 9 + libssl-dev \ 10 + && rm -rf /var/lib/apt/lists/* 11 + 12 + # Copy workspace files 13 + COPY Cargo.toml Cargo.lock ./ 14 + COPY crates ./crates 15 + 16 + # Build release binary 17 + RUN cargo build --release -p weaver-index --bin indexer 18 + 19 + # Runtime stage 20 + FROM debian:trixie-slim 21 + 22 + RUN apt-get update && apt-get install -y \ 23 + ca-certificates \ 24 + libssl3 \ 25 + && rm -rf /var/lib/apt/lists/* 26 + 27 + WORKDIR /app 28 + 29 + # Copy binary from builder 30 + COPY --from=builder /build/target/release/indexer /app/indexer 31 + 32 + # Default environment variables 33 + ENV RUST_LOG=info 34 + ENV RUST_BACKTRACE=1 35 + ENV CLICKHOUSE_URL=http://clickhouse:8123 36 + ENV CLICKHOUSE_DATABASE=default 37 + 38 + # Run the indexer 39 + ENTRYPOINT ["/app/indexer"]
+4
crates/weaver-index/migrations/clickhouse/001_raw_records.sql
··· 35 35 -- Populated by async batch validation, not in hot path 36 36 validation_state LowCardinality(String) DEFAULT 'unchecked', 37 37 38 + -- Whether this came from live firehose (true) or backfill (false) 39 + -- Backfill events may not reflect current state until repo is fully synced 40 + is_live Bool DEFAULT true, 41 + 38 42 -- Materialized AT URI for convenience 39 43 uri String MATERIALIZED concat('at://', did, '/', collection, '/', rkey), 40 44
+2 -8
crates/weaver-index/src/bin/storage_benchmark.rs
··· 178 178 let firehose_config = FirehoseConfig::from_env()?; 179 179 180 180 info!( 181 - "Connecting to ClickHouse at {} (database: {})", 181 + "Connecting to ClickHouse at:\n{} (database: {})", 182 182 ch_config.url, ch_config.database 183 183 ); 184 184 let client = Client::new(&ch_config)?; ··· 189 189 drop_benchmark_tables(&client).await?; 190 190 } 191 191 192 - // Create tables 193 192 info!("Creating benchmark tables..."); 194 193 create_benchmark_tables(&client).await?; 195 194 196 - // Create inserters 197 195 let mut json_inserter = client.inserter::<RawRecordJson>(TABLE_JSON); 198 196 let mut cbor_inserter = client.inserter::<RawRecordCbor>(TABLE_CBOR); 199 197 200 - // Connect to firehose 201 - info!("Connecting to firehose at {}", firehose_config.relay_url); 198 + info!("Connecting to firehose at:\n {}", firehose_config.relay_url); 202 199 let consumer = FirehoseConsumer::new(firehose_config); 203 200 let mut stream = consumer.connect().await?; 204 201 ··· 353 350 } 354 351 } 355 352 356 - // Final flush 357 353 info!("Flushing remaining records..."); 358 354 json_inserter 359 355 .end() ··· 370 366 source: e, 371 367 })?; 372 368 373 - // Final report 374 369 info!("\n========== FINAL RESULTS =========="); 375 370 report_progress( 376 371 &client, ··· 453 448 errors 454 449 ); 455 450 456 - // Lag info - critical for detecting if we're falling behind 457 451 if lag.sample_count > 0 { 458 452 info!( 459 453 " Lag: current={:.1}s, min={:.1}s, max={:.1}s (window)",
+132 -25
crates/weaver-index/src/bin/weaver_indexer.rs
··· 1 1 use clap::{Parser, Subcommand}; 2 - use miette::IntoDiagnostic; 3 - use tracing::{Level, info, warn}; 4 - use tracing_subscriber::EnvFilter; 2 + use tracing::{error, info, warn}; 5 3 use weaver_index::clickhouse::{Client, Migrator, Tables}; 6 - use weaver_index::config::{ClickHouseConfig, FirehoseConfig, IndexerConfig}; 4 + use weaver_index::config::{ 5 + ClickHouseConfig, FirehoseConfig, IndexerConfig, ShardConfig, SourceMode, TapConfig, 6 + }; 7 7 use weaver_index::firehose::FirehoseConsumer; 8 - use weaver_index::{Indexer, load_cursor}; 8 + use weaver_index::server::{AppState, ServerConfig, TelemetryConfig, telemetry}; 9 + use weaver_index::{FirehoseIndexer, TapIndexer, load_cursor}; 9 10 10 11 #[derive(Parser)] 11 12 #[command(name = "indexer")] ··· 31 32 /// Check database connectivity 32 33 Health, 33 34 34 - /// Start the indexer service (not yet implemented) 35 + /// Start the full service (indexer + HTTP server) 35 36 Run, 37 + 38 + /// Start only the HTTP server (no indexing) 39 + Serve, 40 + 41 + /// Start only the indexer (no HTTP server) 42 + Index, 36 43 } 37 44 38 45 #[tokio::main] 39 46 async fn main() -> miette::Result<()> { 40 47 dotenvy::dotenv().ok(); 41 48 42 - let console_level = if cfg!(debug_assertions) { 43 - Level::DEBUG 44 - } else { 45 - Level::INFO 46 - }; 47 - 48 - tracing_subscriber::fmt() 49 - .with_env_filter( 50 - tracing_subscriber::EnvFilter::builder() 51 - .from_env_lossy() 52 - .add_directive(console_level.into()) 53 - .add_directive("hyper_util=info".parse().into_diagnostic()?), 54 - ) 55 - .init(); 49 + // Initialize telemetry (metrics + tracing with optional Loki) 50 + let telemetry_config = TelemetryConfig::from_env("weaver-index"); 51 + telemetry::init(telemetry_config).await; 56 52 57 53 let args = Args::parse(); 58 54 59 55 match args.command { 60 56 Command::Migrate { dry_run, reset } => run_migrate(dry_run, reset).await, 61 57 Command::Health => run_health().await, 62 - Command::Run => run_indexer().await, 58 + Command::Run => run_full().await, 59 + Command::Serve => run_server_only().await, 60 + Command::Index => run_indexer_only().await, 63 61 } 64 62 } 65 63 ··· 126 124 Ok(()) 127 125 } 128 126 129 - async fn run_indexer() -> miette::Result<()> { 127 + /// Run both indexer and HTTP server concurrently (production mode) 128 + async fn run_full() -> miette::Result<()> { 129 + let ch_config = ClickHouseConfig::from_env()?; 130 + let shard_config = ShardConfig::from_env(); 131 + let server_config = ServerConfig::from_env(); 132 + let indexer_config = IndexerConfig::from_env(); 133 + let source_mode = SourceMode::from_env(); 134 + 135 + info!( 136 + "Connecting to ClickHouse at {} (database: {})", 137 + ch_config.url, ch_config.database 138 + ); 139 + info!("SQLite shards at {}", shard_config.base_path.display()); 140 + 141 + // Create separate clients for indexer and server 142 + let indexer_client = Client::new(&ch_config)?; 143 + let server_client = Client::new(&ch_config)?; 144 + 145 + // Build AppState for server 146 + let state = AppState::new(server_client, shard_config); 147 + 148 + // Spawn the indexer task 149 + let indexer_handle = match source_mode { 150 + SourceMode::Firehose => { 151 + let mut firehose_config = FirehoseConfig::from_env()?; 152 + if firehose_config.cursor.is_none() { 153 + if let Some(cursor) = load_cursor(&indexer_client).await? { 154 + firehose_config.cursor = Some(cursor); 155 + } 156 + } 157 + info!( 158 + "Connecting to firehose at {} (cursor: {:?})", 159 + firehose_config.relay_url, firehose_config.cursor 160 + ); 161 + let consumer = FirehoseConsumer::new(firehose_config); 162 + let indexer = FirehoseIndexer::new(indexer_client, consumer, indexer_config).await?; 163 + info!("Starting firehose indexer"); 164 + tokio::spawn(async move { indexer.run().await }) 165 + } 166 + SourceMode::Tap => { 167 + let tap_config = TapConfig::from_env()?; 168 + let indexer = TapIndexer::new(indexer_client, tap_config, indexer_config); 169 + info!("Starting tap indexer"); 170 + tokio::spawn(async move { indexer.run().await }) 171 + } 172 + }; 173 + 174 + // Run server, monitoring indexer health 175 + tokio::select! { 176 + result = weaver_index::server::run(state, server_config) => { 177 + result?; 178 + } 179 + result = indexer_handle => { 180 + match result { 181 + Ok(Ok(())) => info!("Indexer completed"), 182 + Ok(Err(e)) => error!("Indexer failed: {}", e), 183 + Err(e) => error!("Indexer task panicked: {}", e), 184 + } 185 + } 186 + } 187 + 188 + Ok(()) 189 + } 190 + 191 + /// Run only the indexer (no HTTP server) 192 + async fn run_indexer_only() -> miette::Result<()> { 130 193 let ch_config = ClickHouseConfig::from_env()?; 131 - let mut firehose_config = FirehoseConfig::from_env()?; 132 194 let indexer_config = IndexerConfig::from_env(); 195 + let source_mode = SourceMode::from_env(); 133 196 134 197 info!( 135 198 "Connecting to ClickHouse at {} (database: {})", ··· 137 200 ); 138 201 let client = Client::new(&ch_config)?; 139 202 203 + match source_mode { 204 + SourceMode::Firehose => run_firehose_indexer(client, indexer_config).await, 205 + SourceMode::Tap => { 206 + let tap_config = TapConfig::from_env()?; 207 + run_tap_indexer(client, tap_config, indexer_config).await 208 + } 209 + } 210 + } 211 + 212 + async fn run_firehose_indexer(client: Client, indexer_config: IndexerConfig) -> miette::Result<()> { 213 + let mut firehose_config = FirehoseConfig::from_env()?; 214 + 140 215 // Load cursor from ClickHouse if not overridden by env var 141 216 if firehose_config.cursor.is_none() { 142 217 if let Some(cursor) = load_cursor(&client).await? { ··· 150 225 ); 151 226 let consumer = FirehoseConsumer::new(firehose_config); 152 227 153 - let indexer = Indexer::new(client, consumer, indexer_config).await?; 228 + let indexer = FirehoseIndexer::new(client, consumer, indexer_config).await?; 229 + 230 + info!("Starting firehose indexer"); 231 + indexer.run().await?; 154 232 155 - info!("Starting indexer"); 233 + Ok(()) 234 + } 235 + 236 + async fn run_tap_indexer( 237 + client: Client, 238 + tap_config: TapConfig, 239 + indexer_config: IndexerConfig, 240 + ) -> miette::Result<()> { 241 + let indexer = TapIndexer::new(client, tap_config, indexer_config); 242 + 243 + info!("Starting tap indexer"); 156 244 indexer.run().await?; 157 245 158 246 Ok(()) 159 247 } 248 + 249 + async fn run_server_only() -> miette::Result<()> { 250 + let ch_config = ClickHouseConfig::from_env()?; 251 + let shard_config = ShardConfig::from_env(); 252 + let server_config = ServerConfig::from_env(); 253 + 254 + info!( 255 + "Connecting to ClickHouse at {} (database: {})", 256 + ch_config.url, ch_config.database 257 + ); 258 + info!("SQLite shards at {}", shard_config.base_path.display()); 259 + 260 + let client = Client::new(&ch_config)?; 261 + 262 + let state = AppState::new(client, shard_config); 263 + weaver_index::server::run(state, server_config).await?; 264 + 265 + Ok(()) 266 + }
+172 -1
crates/weaver-index/src/clickhouse/client.rs
··· 1 + use std::time::Duration; 2 + 1 3 use crate::config::ClickHouseConfig; 2 4 use crate::error::{ClickHouseError, IndexError}; 3 5 use clickhouse::Row; 4 6 use clickhouse::inserter::Inserter; 7 + use serde::Deserialize; 5 8 6 9 /// ClickHouse client wrapper with connection pooling and batched inserts 7 10 pub struct Client { ··· 19 22 // Enable JSON type support (treated as string at transport level) 20 23 .with_option("allow_experimental_json_type", "1") 21 24 .with_option("input_format_binary_read_json_as_string", "1") 22 - .with_option("output_format_binary_write_json_as_string", "1"); 25 + .with_option("output_format_binary_write_json_as_string", "1") 26 + .with_option("send_timeout", "120") 27 + .with_option("receive_timeout", "120"); 23 28 24 29 Ok(Self { inner }) 25 30 } ··· 45 50 .inserter(table) 46 51 .with_max_rows(1000) 47 52 .with_period_bias(0.1) 53 + .with_period(Some(Duration::from_secs(1))) 48 54 .with_max_bytes(1_048_576) 49 55 } 50 56 ··· 90 96 pub fn inner(&self) -> &clickhouse::Client { 91 97 &self.inner 92 98 } 99 + 100 + /// Get a single record by (did, collection, rkey) 101 + /// 102 + /// Returns the latest non-deleted version from raw_records. 103 + pub async fn get_record( 104 + &self, 105 + did: &str, 106 + collection: &str, 107 + rkey: &str, 108 + ) -> Result<Option<RecordRow>, IndexError> { 109 + // FINAL ensures ReplacingMergeTree deduplication is applied 110 + let query = r#" 111 + SELECT cid, record 112 + FROM raw_records FINAL 113 + WHERE did = ? 114 + AND collection = ? 115 + AND rkey = ? 116 + AND operation != 'delete' 117 + ORDER BY event_time DESC 118 + LIMIT 1 119 + "#; 120 + 121 + let row = self 122 + .inner 123 + .query(query) 124 + .bind(did) 125 + .bind(collection) 126 + .bind(rkey) 127 + .fetch_optional::<RecordRow>() 128 + .await 129 + .map_err(|e| ClickHouseError::Query { 130 + message: "failed to get record".into(), 131 + source: e, 132 + })?; 133 + 134 + Ok(row) 135 + } 136 + 137 + /// Insert a single record (for cache-on-miss) 138 + /// 139 + /// Used when fetching a record from upstream that wasn't in our cache. 140 + pub async fn insert_record( 141 + &self, 142 + did: &str, 143 + collection: &str, 144 + rkey: &str, 145 + cid: &str, 146 + record_json: &str, 147 + ) -> Result<(), IndexError> { 148 + use crate::clickhouse::schema::RawRecordInsert; 149 + use chrono::DateTime; 150 + use smol_str::SmolStr; 151 + 152 + let row = RawRecordInsert { 153 + did: SmolStr::new(did), 154 + collection: SmolStr::new(collection), 155 + rkey: SmolStr::new(rkey), 156 + cid: SmolStr::new(cid), 157 + rev: SmolStr::new_static(""), // Unknown from upstream fetch 158 + record: SmolStr::new(record_json), 159 + operation: SmolStr::new_static("cache"), // Distinguish from firehose ops 160 + seq: 0, // Not from firehose 161 + event_time: DateTime::UNIX_EPOCH, // Sort behind canonical firehose data 162 + is_live: false, // Fetched on-demand, not from firehose 163 + }; 164 + 165 + let mut insert = self 166 + .inner 167 + .insert::<RawRecordInsert>("raw_records") 168 + .await 169 + .map_err(|e| ClickHouseError::Insert { 170 + message: "failed to create insert".into(), 171 + source: e, 172 + })?; 173 + 174 + insert 175 + .write(&row) 176 + .await 177 + .map_err(|e| ClickHouseError::Insert { 178 + message: "failed to write record".into(), 179 + source: e, 180 + })?; 181 + 182 + insert.end().await.map_err(|e| ClickHouseError::Insert { 183 + message: "failed to flush insert".into(), 184 + source: e, 185 + })?; 186 + 187 + Ok(()) 188 + } 189 + 190 + /// List records for a repo+collection 191 + /// 192 + /// Returns non-deleted records ordered by rkey, with cursor-based pagination. 193 + pub async fn list_records( 194 + &self, 195 + did: &str, 196 + collection: &str, 197 + limit: u32, 198 + cursor: Option<&str>, 199 + reverse: bool, 200 + ) -> Result<Vec<RecordListRow>, IndexError> { 201 + let order = if reverse { "DESC" } else { "ASC" }; 202 + let cursor_op = if reverse { "<" } else { ">" }; 203 + 204 + // Build query with optional cursor 205 + let query = if cursor.is_some() { 206 + format!( 207 + r#" 208 + SELECT rkey, cid, record 209 + FROM raw_records FINAL 210 + WHERE did = ? 211 + AND collection = ? 212 + AND rkey {cursor_op} ? 213 + AND operation != 'delete' 214 + ORDER BY rkey {order} 215 + LIMIT ? 216 + "#, 217 + ) 218 + } else { 219 + format!( 220 + r#" 221 + SELECT rkey, cid, record 222 + FROM raw_records FINAL 223 + WHERE did = ? 224 + AND collection = ? 225 + AND operation != 'delete' 226 + ORDER BY rkey {order} 227 + LIMIT ? 228 + "#, 229 + ) 230 + }; 231 + 232 + let mut q = self.inner.query(&query).bind(did).bind(collection); 233 + 234 + if let Some(cursor_rkey) = cursor { 235 + q = q.bind(cursor_rkey); 236 + } 237 + 238 + let rows = q 239 + .bind(limit) 240 + .fetch_all::<RecordListRow>() 241 + .await 242 + .map_err(|e| ClickHouseError::Query { 243 + message: "failed to list records".into(), 244 + source: e, 245 + })?; 246 + 247 + Ok(rows) 248 + } 93 249 } 94 250 95 251 /// Table size statistics from system.parts ··· 121 277 } 122 278 } 123 279 } 280 + 281 + /// Single record from raw_records (for getRecord) 282 + #[derive(Debug, Clone, Row, Deserialize)] 283 + pub struct RecordRow { 284 + pub cid: String, 285 + pub record: String, // JSON string 286 + } 287 + 288 + /// Record with rkey from raw_records (for listRecords) 289 + #[derive(Debug, Clone, Row, Deserialize)] 290 + pub struct RecordListRow { 291 + pub rkey: String, 292 + pub cid: String, 293 + pub record: String, // JSON string 294 + }
+7
crates/weaver-index/src/clickhouse/schema.rs
··· 30 30 31 31 /// Validation states for records 32 32 pub mod validation { 33 + #[allow(dead_code)] 33 34 pub const UNCHECKED: &str = "unchecked"; 35 + #[allow(dead_code)] 34 36 pub const VALID: &str = "valid"; 37 + #[allow(dead_code)] 35 38 pub const INVALID_REV: &str = "invalid_rev"; 39 + #[allow(dead_code)] 36 40 pub const INVALID_GAP: &str = "invalid_gap"; 41 + #[allow(dead_code)] 37 42 pub const INVALID_ACCOUNT: &str = "invalid_account"; 38 43 } 39 44 ··· 51 56 pub seq: u64, 52 57 #[serde(with = "clickhouse::serde::chrono::datetime64::millis")] 53 58 pub event_time: DateTime<Utc>, 59 + /// Whether this came from live firehose (true) or backfill (false) 60 + pub is_live: bool, 54 61 // Note: indexed_at has DEFAULT now64(3), omit from insert 55 62 // Note: validation_state has DEFAULT 'unchecked', omit from insert 56 63 }
+85
crates/weaver-index/src/config.rs
··· 202 202 } 203 203 } 204 204 205 + /// Tap connection configuration 206 + #[derive(Debug, Clone)] 207 + pub struct TapConfig { 208 + pub url: Url, 209 + pub send_acks: bool, 210 + } 211 + 212 + impl TapConfig { 213 + /// Default tap URL (local) 214 + pub const DEFAULT_URL: &'static str = "ws://localhost:2480/channel"; 215 + 216 + /// Load configuration from environment variables. 217 + /// 218 + /// Optional env vars: 219 + /// - `TAP_URL`: Tap WebSocket URL (default: ws://localhost:2480/channel) 220 + /// - `TAP_SEND_ACKS`: Whether to send acks (default: true) 221 + pub fn from_env() -> Result<Self, IndexError> { 222 + let url_str = std::env::var("TAP_URL").unwrap_or_else(|_| Self::DEFAULT_URL.to_string()); 223 + 224 + let url = Url::parse(&url_str).map_err(|e| ConfigError::UrlParse { 225 + url: url_str, 226 + message: e.to_string(), 227 + })?; 228 + 229 + let send_acks = std::env::var("TAP_SEND_ACKS") 230 + .map(|s| s.to_lowercase() != "false") 231 + .unwrap_or(true); 232 + 233 + Ok(Self { url, send_acks }) 234 + } 235 + } 236 + 237 + /// Source mode for the indexer 238 + #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] 239 + pub enum SourceMode { 240 + /// Direct firehose connection 241 + #[default] 242 + Firehose, 243 + /// Consume from tap 244 + Tap, 245 + } 246 + 247 + impl SourceMode { 248 + pub fn from_env() -> Self { 249 + match std::env::var("INDEXER_SOURCE").as_deref() { 250 + Ok("tap") => SourceMode::Tap, 251 + _ => SourceMode::Firehose, 252 + } 253 + } 254 + } 255 + 256 + /// SQLite shard configuration 257 + #[derive(Debug, Clone)] 258 + pub struct ShardConfig { 259 + pub base_path: std::path::PathBuf, 260 + } 261 + 262 + impl Default for ShardConfig { 263 + fn default() -> Self { 264 + Self { 265 + base_path: std::path::PathBuf::from("./shards"), 266 + } 267 + } 268 + } 269 + 270 + impl ShardConfig { 271 + /// Load configuration from environment variables. 272 + /// 273 + /// Optional env vars: 274 + /// - `SHARD_BASE_PATH`: Base directory for SQLite shards (default: ./shards) 275 + pub fn from_env() -> Self { 276 + let base_path = std::env::var("SHARD_BASE_PATH") 277 + .map(std::path::PathBuf::from) 278 + .unwrap_or_else(|_| std::path::PathBuf::from("./shards")); 279 + 280 + Self { base_path } 281 + } 282 + } 283 + 205 284 /// Combined configuration for the indexer 206 285 #[derive(Debug, Clone)] 207 286 pub struct Config { 208 287 pub clickhouse: ClickHouseConfig, 209 288 pub firehose: FirehoseConfig, 289 + pub tap: TapConfig, 210 290 pub indexer: IndexerConfig, 291 + pub shard: ShardConfig, 292 + pub source: SourceMode, 211 293 } 212 294 213 295 impl Config { ··· 216 298 Ok(Self { 217 299 clickhouse: ClickHouseConfig::from_env()?, 218 300 firehose: FirehoseConfig::from_env()?, 301 + tap: TapConfig::from_env()?, 219 302 indexer: IndexerConfig::from_env(), 303 + shard: ShardConfig::from_env(), 304 + source: SourceMode::from_env(), 220 305 }) 221 306 } 222 307 }
+3
crates/weaver-index/src/endpoints/mod.rs
··· 1 + //! XRPC endpoint handlers for the appview. 2 + 3 + pub mod repo;
+248
crates/weaver-index/src/endpoints/repo.rs
··· 1 + //! com.atproto.repo.* endpoint handlers 2 + //! 3 + //! These serve as a record cache, reading from the raw_records table 4 + //! populated by firehose/tap ingestion. On cache miss, fetches from 5 + //! upstream via Slingshot and caches the result. 6 + 7 + use axum::{Json, extract::State, http::StatusCode, response::IntoResponse}; 8 + use jacquard::IntoStatic; 9 + use jacquard::api::com_atproto::repo::{ 10 + get_record::{GetRecordOutput, GetRecordRequest}, 11 + list_records::{ListRecordsOutput, ListRecordsRequest, Record}, 12 + }; 13 + use jacquard::client::AgentSessionExt; 14 + use jacquard::identity::resolver::IdentityResolver; 15 + use jacquard::types::ident::AtIdentifier; 16 + use jacquard::types::string::{AtUri, Cid}; 17 + use jacquard::types::value::Data; 18 + use jacquard_axum::ExtractXrpc; 19 + use serde_json::json; 20 + 21 + use crate::server::AppState; 22 + 23 + /// Error response for XRPC endpoints 24 + pub struct XrpcErrorResponse { 25 + pub status: StatusCode, 26 + pub error: String, 27 + pub message: Option<String>, 28 + } 29 + 30 + impl XrpcErrorResponse { 31 + pub fn not_found(message: impl Into<String>) -> Self { 32 + Self { 33 + status: StatusCode::NOT_FOUND, 34 + error: "RecordNotFound".to_string(), 35 + message: Some(message.into()), 36 + } 37 + } 38 + 39 + pub fn invalid_request(message: impl Into<String>) -> Self { 40 + Self { 41 + status: StatusCode::BAD_REQUEST, 42 + error: "InvalidRequest".to_string(), 43 + message: Some(message.into()), 44 + } 45 + } 46 + 47 + pub fn internal_error(message: impl Into<String>) -> Self { 48 + Self { 49 + status: StatusCode::INTERNAL_SERVER_ERROR, 50 + error: "InternalServerError".to_string(), 51 + message: Some(message.into()), 52 + } 53 + } 54 + } 55 + 56 + impl IntoResponse for XrpcErrorResponse { 57 + fn into_response(self) -> axum::response::Response { 58 + let body = json!({ 59 + "error": self.error, 60 + "message": self.message, 61 + }); 62 + (self.status, Json(body)).into_response() 63 + } 64 + } 65 + 66 + /// Handle com.atproto.repo.getRecord 67 + /// 68 + /// Fetches a single record from the raw_records cache. On cache miss, 69 + /// fetches from upstream via Slingshot and caches the result. 70 + pub async fn get_record( 71 + State(state): State<AppState>, 72 + ExtractXrpc(args): ExtractXrpc<GetRecordRequest>, 73 + ) -> Result<Json<GetRecordOutput<'static>>, XrpcErrorResponse> { 74 + // Resolve identifier to DID 75 + let did = match &args.repo { 76 + AtIdentifier::Did(did) => did.clone(), 77 + AtIdentifier::Handle(handle) => { 78 + state.resolver.resolve_handle(handle).await.map_err(|e| { 79 + tracing::warn!("Handle resolution failed for {}: {}", handle, e); 80 + XrpcErrorResponse::invalid_request(format!("Could not resolve handle: {}", handle)) 81 + })? 82 + } 83 + }; 84 + 85 + let collection = args.collection.as_str(); 86 + let rkey: &str = args.rkey.as_ref(); 87 + 88 + // Query ClickHouse for the record 89 + let cached = state 90 + .clickhouse 91 + .get_record(did.as_str(), collection, rkey) 92 + .await 93 + .map_err(|e| { 94 + tracing::error!("ClickHouse query failed: {}", e); 95 + XrpcErrorResponse::internal_error("Database query failed") 96 + })?; 97 + 98 + if let Some(row) = cached { 99 + // Cache hit - return from ClickHouse 100 + let value: Data<'_> = serde_json::from_str(&row.record).map_err(|e| { 101 + tracing::error!("Failed to parse record JSON: {}", e); 102 + XrpcErrorResponse::internal_error("Failed to parse stored record") 103 + })?; 104 + 105 + let uri_str = format!("at://{}/{}/{}", did, collection, rkey); 106 + let uri = AtUri::new_owned(uri_str).map_err(|e| { 107 + tracing::error!("Failed to construct AT URI: {}", e); 108 + XrpcErrorResponse::internal_error("Failed to construct URI") 109 + })?; 110 + 111 + let cid = Cid::new(row.cid.as_bytes()).map_err(|e| { 112 + tracing::error!("Invalid CID in database: {}", e); 113 + XrpcErrorResponse::internal_error("Invalid CID stored") 114 + })?; 115 + 116 + return Ok(Json( 117 + GetRecordOutput { 118 + cid: Some(cid), 119 + uri, 120 + value, 121 + extra_data: None, 122 + } 123 + .into_static(), 124 + )); 125 + } 126 + 127 + // Cache miss - fetch from Slingshot 128 + tracing::debug!( 129 + "Cache miss for {}/{}/{}, fetching from Slingshot", 130 + did, 131 + collection, 132 + rkey 133 + ); 134 + 135 + let uri_str = format!("at://{}/{}/{}", did, collection, rkey); 136 + let uri = AtUri::new_owned(uri_str.clone()).map_err(|e| { 137 + tracing::error!("Failed to construct AT URI: {}", e); 138 + XrpcErrorResponse::internal_error("Failed to construct URI") 139 + })?; 140 + 141 + let upstream = state 142 + .resolver 143 + .fetch_record_slingshot(&uri) 144 + .await 145 + .map_err(|e| { 146 + tracing::warn!("Slingshot fetch failed for {}: {}", uri, e); 147 + XrpcErrorResponse::not_found("Record not found") 148 + })?; 149 + 150 + // Cache the fetched record (fire-and-forget, don't block response) 151 + let cid_str = upstream 152 + .cid 153 + .as_ref() 154 + .map(|c| c.as_str().to_string()) 155 + .unwrap_or_default(); 156 + let record_json = serde_json::to_string(&upstream.value).unwrap_or_default(); 157 + 158 + if !cid_str.is_empty() && !record_json.is_empty() { 159 + let clickhouse = state.clickhouse.clone(); 160 + let did_str = did.as_str().to_string(); 161 + let collection_str = collection.to_string(); 162 + let rkey_str = rkey.to_string(); 163 + 164 + tokio::spawn(async move { 165 + if let Err(e) = clickhouse 166 + .insert_record(&did_str, &collection_str, &rkey_str, &cid_str, &record_json) 167 + .await 168 + { 169 + tracing::warn!("Failed to cache fetched record: {}", e); 170 + } 171 + }); 172 + } 173 + 174 + Ok(Json(upstream)) 175 + } 176 + 177 + /// Handle com.atproto.repo.listRecords 178 + /// 179 + /// Lists records for a repo+collection from the raw_records cache. 180 + pub async fn list_records( 181 + State(state): State<AppState>, 182 + ExtractXrpc(args): ExtractXrpc<ListRecordsRequest>, 183 + ) -> Result<Json<ListRecordsOutput<'static>>, XrpcErrorResponse> { 184 + // Resolve identifier to DID 185 + let did = match &args.repo { 186 + AtIdentifier::Did(did) => did.clone(), 187 + AtIdentifier::Handle(handle) => { 188 + state.resolver.resolve_handle(handle).await.map_err(|e| { 189 + tracing::warn!("Handle resolution failed for {}: {}", handle, e); 190 + XrpcErrorResponse::invalid_request(format!("Could not resolve handle: {}", handle)) 191 + })? 192 + } 193 + }; 194 + 195 + let collection = args.collection.as_str(); 196 + let limit = args.limit.unwrap_or(50).clamp(1, 100) as u32; 197 + let cursor = args.cursor.as_deref(); 198 + let reverse = args.reverse.unwrap_or(false); 199 + 200 + // Query ClickHouse for records 201 + let rows = state 202 + .clickhouse 203 + .list_records(did.as_str(), collection, limit, cursor, reverse) 204 + .await 205 + .map_err(|e| { 206 + tracing::error!("ClickHouse query failed: {}", e); 207 + XrpcErrorResponse::internal_error("Database query failed") 208 + })?; 209 + 210 + // Convert rows to Record output 211 + let mut records = Vec::with_capacity(rows.len()); 212 + for row in &rows { 213 + let value: Data<'_> = serde_json::from_str(&row.record).map_err(|e| { 214 + tracing::error!("Failed to parse record JSON: {}", e); 215 + XrpcErrorResponse::internal_error("Failed to parse stored record") 216 + })?; 217 + 218 + let uri_str = format!("at://{}/{}/{}", did, collection, row.rkey); 219 + let uri = AtUri::new_owned(uri_str) 220 + .map_err(|_| XrpcErrorResponse::internal_error("Failed to construct URI"))?; 221 + 222 + let cid = Cid::new(row.cid.as_bytes()) 223 + .map_err(|_| XrpcErrorResponse::internal_error("Invalid CID stored"))?; 224 + 225 + records.push( 226 + Record { 227 + uri, 228 + cid, 229 + value, 230 + extra_data: None, 231 + } 232 + .into_static(), 233 + ); 234 + } 235 + 236 + // Cursor is the rkey of the last record, if we have more 237 + let next_cursor = if records.len() == limit as usize { 238 + rows.last().map(|r| r.rkey.clone().into()) 239 + } else { 240 + None 241 + }; 242 + 243 + Ok(Json(ListRecordsOutput { 244 + records, 245 + cursor: next_cursor, 246 + extra_data: None, 247 + })) 248 + }
+69
crates/weaver-index/src/error.rs
··· 1 + use std::path::PathBuf; 2 + 1 3 use miette::Diagnostic; 2 4 use thiserror::Error; 3 5 ··· 19 21 #[error(transparent)] 20 22 #[diagnostic(transparent)] 21 23 Config(#[from] ConfigError), 24 + 25 + #[error(transparent)] 26 + #[diagnostic(transparent)] 27 + Server(#[from] ServerError), 28 + 29 + #[error(transparent)] 30 + #[diagnostic(transparent)] 31 + Sqlite(#[from] SqliteError), 32 + } 33 + 34 + /// HTTP server errors 35 + #[derive(Debug, Error, Diagnostic)] 36 + pub enum ServerError { 37 + #[error("failed to bind to {addr}")] 38 + #[diagnostic(code(server::bind))] 39 + Bind { 40 + addr: std::net::SocketAddr, 41 + #[source] 42 + source: std::io::Error, 43 + }, 44 + 45 + #[error("server terminated unexpectedly")] 46 + #[diagnostic(code(server::serve))] 47 + Serve { 48 + #[source] 49 + source: std::io::Error, 50 + }, 51 + } 52 + 53 + /// SQLite shard errors 54 + #[derive(Debug, Error, Diagnostic)] 55 + pub enum SqliteError { 56 + #[error("failed to open database at {}", path.display())] 57 + #[diagnostic(code(sqlite::open))] 58 + Open { 59 + path: PathBuf, 60 + #[source] 61 + source: rusqlite::Error, 62 + }, 63 + 64 + #[error("failed to create directory {}", path.display())] 65 + #[diagnostic(code(sqlite::io))] 66 + Io { 67 + path: PathBuf, 68 + #[source] 69 + source: std::io::Error, 70 + }, 71 + 72 + #[error("failed to set pragma {pragma}")] 73 + #[diagnostic(code(sqlite::pragma))] 74 + Pragma { 75 + pragma: &'static str, 76 + #[source] 77 + source: rusqlite::Error, 78 + }, 79 + 80 + #[error("migration failed: {message}")] 81 + #[diagnostic(code(sqlite::migration))] 82 + Migration { message: String }, 83 + 84 + #[error("query failed: {message}")] 85 + #[diagnostic(code(sqlite::query))] 86 + Query { message: String }, 87 + 88 + #[error("shard lock poisoned")] 89 + #[diagnostic(code(sqlite::lock))] 90 + LockPoisoned, 22 91 } 23 92 24 93 /// ClickHouse database errors
+255 -7
crates/weaver-index/src/indexer.rs
··· 5 5 use dashmap::DashMap; 6 6 use n0_future::StreamExt; 7 7 use smol_str::{SmolStr, ToSmolStr}; 8 - use tracing::{debug, info, warn}; 8 + use tracing::{debug, info, trace, warn}; 9 9 10 10 use chrono::DateTime; 11 11 ··· 13 13 AccountRevState, Client, FirehoseCursor, RawAccountEvent, RawIdentityEvent, RawRecordInsert, 14 14 }; 15 15 use crate::config::IndexerConfig; 16 - use crate::error::{IndexError, Result}; 16 + use crate::config::TapConfig; 17 + use crate::error::{ClickHouseError, IndexError, Result}; 17 18 use crate::firehose::{ 18 19 Account, Commit, ExtractedRecord, FirehoseConsumer, Identity, MessageStream, 19 20 SubscribeReposMessage, extract_records, 20 21 }; 22 + use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent}; 21 23 22 24 /// Default consumer ID for cursor tracking 23 25 const CONSUMER_ID: &str = "main"; ··· 160 162 } 161 163 } 162 164 163 - /// Main indexer that consumes firehose and writes to ClickHouse 164 - pub struct Indexer { 165 + /// Firehose indexer that consumes AT Protocol firehose and writes to ClickHouse 166 + pub struct FirehoseIndexer { 165 167 client: Arc<Client>, 166 168 consumer: FirehoseConsumer, 167 169 rev_cache: RevCache, 168 170 config: IndexerConfig, 169 171 } 170 172 171 - impl Indexer { 172 - /// Create a new indexer 173 + impl FirehoseIndexer { 174 + /// Create a new firehose indexer 173 175 pub async fn new( 174 176 client: Client, 175 177 consumer: FirehoseConsumer, ··· 226 228 227 229 info!("starting indexer loop"); 228 230 229 - while let Some(result) = stream.next().await { 231 + loop { 232 + // Get time until next required flush - must commit before socket timeout (30s) 233 + let records_time = records.time_left().unwrap_or(Duration::from_secs(10)); 234 + let identities_time = identities.time_left().unwrap_or(Duration::from_secs(10)); 235 + let accounts_time = accounts.time_left().unwrap_or(Duration::from_secs(10)); 236 + let time_left = records_time.min(identities_time).min(accounts_time); 237 + 238 + let result = 239 + match tokio::time::timeout(time_left, stream.next()).await { 240 + Ok(Some(result)) => result, 241 + Ok(None) => { 242 + // Stream ended 243 + break; 244 + } 245 + Err(_) => { 246 + // Timeout - flush inserters to keep INSERT alive 247 + debug!("flush timeout, committing inserters"); 248 + records.commit().await.map_err(|e| { 249 + crate::error::ClickHouseError::Query { 250 + message: "periodic records commit failed".into(), 251 + source: e, 252 + } 253 + })?; 254 + identities.commit().await.map_err(|e| { 255 + crate::error::ClickHouseError::Query { 256 + message: "periodic identities commit failed".into(), 257 + source: e, 258 + } 259 + })?; 260 + accounts.commit().await.map_err(|e| { 261 + crate::error::ClickHouseError::Query { 262 + message: "periodic accounts commit failed".into(), 263 + source: e, 264 + } 265 + })?; 266 + continue; 267 + } 268 + }; 269 + 230 270 let msg = match result { 231 271 Ok(msg) => msg, 232 272 Err(e) => { ··· 381 421 operation: record.operation.clone(), 382 422 seq: record.seq as u64, 383 423 event_time: record.event_time, 424 + is_live: true, 384 425 }) 385 426 .await 386 427 .map_err(|e| crate::error::ClickHouseError::Query { ··· 455 496 /// Minimal struct for delete lookups - just the fields we need to process the delete 456 497 #[derive(Debug, Clone, clickhouse::Row, serde::Deserialize)] 457 498 struct LookupRawRecord { 499 + #[allow(dead_code)] 458 500 did: SmolStr, 501 + #[allow(dead_code)] 459 502 collection: SmolStr, 503 + #[allow(dead_code)] 460 504 rkey: SmolStr, 505 + #[allow(dead_code)] 461 506 record: SmolStr, // JSON string of the original record 462 507 } 463 508 ··· 510 555 tokio::time::sleep(Duration::from_secs(1)).await; 511 556 } 512 557 } 558 + 559 + // ============================================================================ 560 + // TapIndexer - consumes from tap websocket 561 + // ============================================================================ 562 + 563 + /// Consumer ID for tap cursor tracking 564 + const TAP_CONSUMER_ID: &str = "tap"; 565 + 566 + /// Tap indexer that consumes from tap websocket and writes to ClickHouse 567 + pub struct TapIndexer { 568 + client: Arc<Client>, 569 + tap_config: TapConfig, 570 + config: IndexerConfig, 571 + } 572 + 573 + impl TapIndexer { 574 + /// Create a new tap indexer 575 + pub fn new(client: Client, tap_config: TapConfig, config: IndexerConfig) -> Self { 576 + Self { 577 + client: Arc::new(client), 578 + tap_config, 579 + config, 580 + } 581 + } 582 + 583 + /// Save tap cursor to ClickHouse for visibility 584 + async fn save_cursor(&self, seq: u64) -> Result<()> { 585 + let query = format!( 586 + "INSERT INTO firehose_cursor (consumer_id, seq, event_time) VALUES ('{}', {}, now64(3))", 587 + TAP_CONSUMER_ID, seq 588 + ); 589 + 590 + self.client.execute(&query).await?; 591 + debug!(seq, "saved tap cursor"); 592 + Ok(()) 593 + } 594 + 595 + /// Run the tap indexer loop 596 + pub async fn run(&self) -> Result<()> { 597 + info!(url = %self.tap_config.url, "connecting to tap..."); 598 + 599 + let consumer_config = TapConsumerConfig::new(self.tap_config.url.clone()) 600 + .with_acks(self.tap_config.send_acks); 601 + let consumer = TapConsumer::new(consumer_config); 602 + 603 + let (mut events, ack_tx) = consumer.connect().await?; 604 + 605 + let mut records = self.client.inserter::<RawRecordInsert>("raw_records"); 606 + let mut identities = self 607 + .client 608 + .inserter::<RawIdentityEvent>("raw_identity_events"); 609 + 610 + let mut processed: u64 = 0; 611 + let mut last_seq: u64 = 0; 612 + let mut last_stats = Instant::now(); 613 + let mut last_cursor_save = Instant::now(); 614 + 615 + info!("starting tap indexer loop"); 616 + 617 + loop { 618 + // Get time until next required flush - must commit before socket timeout (30s) 619 + let records_time = records.time_left().unwrap_or(Duration::from_secs(10)); 620 + let identities_time = identities.time_left().unwrap_or(Duration::from_secs(10)); 621 + let time_left = records_time.min(identities_time); 622 + 623 + let event = match tokio::time::timeout(time_left, events.recv()).await { 624 + Ok(Some(event)) => event, 625 + Ok(None) => { 626 + // Channel closed, exit loop 627 + break; 628 + } 629 + Err(_) => { 630 + // Timeout - flush inserters to keep INSERT alive 631 + trace!("flush timeout, committing inserters"); 632 + records.commit().await.map_err(|e| ClickHouseError::Query { 633 + message: "periodic records commit failed".into(), 634 + source: e, 635 + })?; 636 + identities 637 + .commit() 638 + .await 639 + .map_err(|e| ClickHouseError::Query { 640 + message: "periodic identities commit failed".into(), 641 + source: e, 642 + })?; 643 + continue; 644 + } 645 + }; 646 + 647 + let event_id = event.id(); 648 + last_seq = event_id; 649 + 650 + match event { 651 + TapEvent::Record(envelope) => { 652 + let record = &envelope.record; 653 + 654 + // Collection filter 655 + if !self.config.collections.matches(&record.collection) { 656 + // Still ack even if filtered 657 + let _ = ack_tx.send(event_id).await; 658 + continue; 659 + } 660 + 661 + let json = record 662 + .record 663 + .as_ref() 664 + .map(|v| serde_json::to_string(v).unwrap_or_default()) 665 + .unwrap_or_default(); 666 + 667 + debug!( 668 + op = record.action.as_str(), 669 + id = event_id, 670 + len = json.len(), 671 + "writing record" 672 + ); 673 + 674 + records 675 + .write(&RawRecordInsert { 676 + did: record.did.clone(), 677 + collection: record.collection.clone(), 678 + rkey: record.rkey.clone(), 679 + cid: record.cid.clone(), 680 + rev: record.rev.clone(), 681 + record: json.to_smolstr(), 682 + operation: record.action.as_str().to_smolstr(), 683 + seq: event_id, 684 + event_time: Utc::now(), 685 + is_live: record.live, 686 + }) 687 + .await 688 + .map_err(|e| ClickHouseError::Query { 689 + message: "record write failed".into(), 690 + source: e, 691 + })?; 692 + records.commit().await.map_err(|e| ClickHouseError::Query { 693 + message: format!("record commit failed for id {}", event_id), 694 + source: e, 695 + })?; 696 + 697 + processed += 1; 698 + } 699 + TapEvent::Identity(envelope) => { 700 + let identity = &envelope.identity; 701 + 702 + identities 703 + .write(&RawIdentityEvent { 704 + did: identity.did.clone(), 705 + handle: identity.handle.clone(), 706 + seq: event_id, 707 + event_time: Utc::now(), 708 + }) 709 + .await 710 + .map_err(|e| ClickHouseError::Query { 711 + message: "identity write failed".into(), 712 + source: e, 713 + })?; 714 + identities 715 + .commit() 716 + .await 717 + .map_err(|e| ClickHouseError::Query { 718 + message: "identity commit failed".into(), 719 + source: e, 720 + })?; 721 + } 722 + } 723 + 724 + // Send ack after successful write+commit 725 + let _ = ack_tx.send(event_id).await; 726 + 727 + // Periodic stats 728 + if last_stats.elapsed() >= Duration::from_secs(10) { 729 + info!(processed, last_seq, "tap indexer stats"); 730 + last_stats = Instant::now(); 731 + } 732 + 733 + // Save cursor every 30s for visibility 734 + if last_cursor_save.elapsed() >= Duration::from_secs(30) && last_seq > 0 { 735 + if let Err(e) = self.save_cursor(last_seq).await { 736 + warn!(error = ?e, "failed to save tap cursor"); 737 + } 738 + last_cursor_save = Instant::now(); 739 + } 740 + } 741 + 742 + // Final flush 743 + records.end().await.map_err(|e| ClickHouseError::Query { 744 + message: "final records flush failed".into(), 745 + source: e, 746 + })?; 747 + identities.end().await.map_err(|e| ClickHouseError::Query { 748 + message: "final identities flush failed".into(), 749 + source: e, 750 + })?; 751 + 752 + // Final cursor save 753 + if last_seq > 0 { 754 + self.save_cursor(last_seq).await?; 755 + } 756 + 757 + info!(last_seq, "tap stream ended"); 758 + Ok(()) 759 + } 760 + }
+7 -1
crates/weaver-index/src/lib.rs
··· 1 1 pub mod clickhouse; 2 2 pub mod config; 3 + pub mod endpoints; 3 4 pub mod error; 4 5 pub mod firehose; 5 6 pub mod indexer; 7 + pub mod server; 8 + pub mod sqlite; 9 + pub mod tap; 6 10 7 11 pub use config::Config; 8 12 pub use error::{IndexError, Result}; 9 - pub use indexer::{load_cursor, Indexer}; 13 + pub use indexer::{FirehoseIndexer, TapIndexer, load_cursor}; 14 + pub use server::{AppState, ServerConfig}; 15 + pub use sqlite::{ShardKey, ShardRouter, SqliteShard};
+141
crates/weaver-index/src/server.rs
··· 1 + use std::net::SocketAddr; 2 + use std::sync::Arc; 3 + 4 + use axum::{Json, Router, extract::State, http::StatusCode, response::IntoResponse, routing::get}; 5 + use jacquard::api::com_atproto::repo::{ 6 + get_record::GetRecordRequest, list_records::ListRecordsRequest, 7 + }; 8 + use jacquard::client::UnauthenticatedSession; 9 + use jacquard::identity::JacquardResolver; 10 + use jacquard_axum::IntoRouter; 11 + use serde::Serialize; 12 + use tower_http::trace::TraceLayer; 13 + use tracing::info; 14 + 15 + use crate::clickhouse::Client; 16 + use crate::config::ShardConfig; 17 + use crate::endpoints::repo; 18 + use crate::error::{IndexError, ServerError}; 19 + use crate::sqlite::ShardRouter; 20 + 21 + pub use weaver_common::telemetry::{self, TelemetryConfig}; 22 + 23 + /// Identity resolver type (unauthenticated, just for handle/DID resolution) 24 + pub type Resolver = UnauthenticatedSession<JacquardResolver>; 25 + 26 + /// Shared application state 27 + #[derive(Clone)] 28 + pub struct AppState { 29 + pub clickhouse: Arc<Client>, 30 + pub shards: Arc<ShardRouter>, 31 + pub resolver: Resolver, 32 + } 33 + 34 + impl AppState { 35 + pub fn new(clickhouse: Client, shard_config: ShardConfig) -> Self { 36 + Self { 37 + clickhouse: Arc::new(clickhouse), 38 + shards: Arc::new(ShardRouter::new(shard_config.base_path)), 39 + resolver: UnauthenticatedSession::new_slingshot(), 40 + } 41 + } 42 + } 43 + 44 + /// Build the axum router with all XRPC endpoints 45 + pub fn router(state: AppState) -> Router { 46 + Router::new() 47 + .route("/xrpc/_health", get(health)) 48 + .route("/metrics", get(metrics)) 49 + // com.atproto.repo.* endpoints (record cache) 50 + .merge(GetRecordRequest::into_router(repo::get_record)) 51 + .merge(ListRecordsRequest::into_router(repo::list_records)) 52 + .layer(TraceLayer::new_for_http()) 53 + .with_state(state) 54 + } 55 + 56 + /// Prometheus metrics endpoint 57 + async fn metrics() -> String { 58 + telemetry::render() 59 + } 60 + 61 + /// Health check response 62 + #[derive(Serialize)] 63 + struct HealthResponse { 64 + status: &'static str, 65 + clickhouse: bool, 66 + shard_count: usize, 67 + } 68 + 69 + /// Health check endpoint 70 + /// 71 + /// Returns 200 OK with stats if healthy, 503 if ClickHouse unreachable. 72 + async fn health(State(state): State<AppState>) -> impl IntoResponse { 73 + let clickhouse_ok = state.clickhouse.execute("SELECT 1").await.is_ok(); 74 + let shard_count = state.shards.shard_count(); 75 + 76 + let response = HealthResponse { 77 + status: if clickhouse_ok { "ok" } else { "degraded" }, 78 + clickhouse: clickhouse_ok, 79 + shard_count, 80 + }; 81 + 82 + let status = if clickhouse_ok { 83 + StatusCode::OK 84 + } else { 85 + StatusCode::SERVICE_UNAVAILABLE 86 + }; 87 + 88 + (status, Json(response)) 89 + } 90 + 91 + /// Server configuration 92 + #[derive(Debug, Clone)] 93 + pub struct ServerConfig { 94 + pub host: String, 95 + pub port: u16, 96 + } 97 + 98 + impl Default for ServerConfig { 99 + fn default() -> Self { 100 + Self { 101 + host: "0.0.0.0".to_string(), 102 + port: 3000, 103 + } 104 + } 105 + } 106 + 107 + impl ServerConfig { 108 + pub fn from_env() -> Self { 109 + let host = std::env::var("SERVER_HOST").unwrap_or_else(|_| "0.0.0.0".to_string()); 110 + let port = std::env::var("SERVER_PORT") 111 + .ok() 112 + .and_then(|s| s.parse().ok()) 113 + .unwrap_or(3000); 114 + 115 + Self { host, port } 116 + } 117 + 118 + pub fn addr(&self) -> SocketAddr { 119 + format!("{}:{}", self.host, self.port) 120 + .parse() 121 + .expect("valid socket address") 122 + } 123 + } 124 + 125 + /// Run the HTTP server 126 + pub async fn run(state: AppState, config: ServerConfig) -> Result<(), IndexError> { 127 + let addr = config.addr(); 128 + let app = router(state); 129 + 130 + info!("Starting HTTP server on {}", addr); 131 + 132 + let listener = tokio::net::TcpListener::bind(addr) 133 + .await 134 + .map_err(|e| ServerError::Bind { addr, source: e })?; 135 + 136 + axum::serve(listener, app) 137 + .await 138 + .map_err(|e| ServerError::Serve { source: e })?; 139 + 140 + Ok(()) 141 + }
+209
crates/weaver-index/src/sqlite.rs
··· 1 + use std::collections::hash_map::DefaultHasher; 2 + use std::fs; 3 + use std::hash::{Hash, Hasher}; 4 + use std::path::{Path, PathBuf}; 5 + use std::sync::Mutex; 6 + use std::time::Instant; 7 + 8 + use dashmap::DashMap; 9 + use rusqlite::Connection; 10 + use rusqlite_migration::{M, Migrations}; 11 + use smol_str::SmolStr; 12 + 13 + use crate::error::{IndexError, SqliteError}; 14 + 15 + /// Key for shard routing - (collection, rkey) tuple 16 + #[derive(Debug, Clone, Hash, PartialEq, Eq)] 17 + pub struct ShardKey { 18 + pub collection: SmolStr, 19 + pub rkey: SmolStr, 20 + } 21 + 22 + impl ShardKey { 23 + pub fn new(collection: impl Into<SmolStr>, rkey: impl Into<SmolStr>) -> Self { 24 + Self { 25 + collection: collection.into(), 26 + rkey: rkey.into(), 27 + } 28 + } 29 + 30 + fn hash_prefix(&self) -> String { 31 + let mut hasher = DefaultHasher::new(); 32 + self.hash(&mut hasher); 33 + let hash = hasher.finish(); 34 + format!("{:02x}", (hash & 0xFF) as u8) 35 + } 36 + 37 + /// Directory path: {base}/{hash(collection,rkey)[0..2]}/{rkey}/ 38 + fn dir_path(&self, base: &Path) -> PathBuf { 39 + base.join(self.hash_prefix()).join(self.rkey.as_str()) 40 + } 41 + 42 + pub fn collection(&self) -> &str { 43 + &self.collection 44 + } 45 + 46 + pub fn rkey(&self) -> &str { 47 + &self.rkey 48 + } 49 + } 50 + 51 + /// A single SQLite shard for a resource 52 + pub struct SqliteShard { 53 + conn: Mutex<Connection>, 54 + path: PathBuf, 55 + last_accessed: Mutex<Instant>, 56 + } 57 + 58 + impl SqliteShard { 59 + const DB_FILENAME: &'static str = "store.sqlite"; 60 + 61 + fn open(dir: &Path) -> Result<Self, IndexError> { 62 + fs::create_dir_all(dir).map_err(|e| SqliteError::Io { 63 + path: dir.to_path_buf(), 64 + source: e, 65 + })?; 66 + 67 + let db_path = dir.join(Self::DB_FILENAME); 68 + let mut conn = Connection::open(&db_path).map_err(|e| SqliteError::Open { 69 + path: db_path.clone(), 70 + source: e, 71 + })?; 72 + 73 + // Enable WAL mode for better concurrency 74 + conn.pragma_update(None, "journal_mode", "WAL") 75 + .map_err(|e| SqliteError::Pragma { 76 + pragma: "journal_mode", 77 + source: e, 78 + })?; 79 + 80 + // Run migrations 81 + // PERF: rusqlite_migration checks user_version pragma, which is fast when 82 + // no migrations needed. If shard open becomes a bottleneck, consider adding 83 + // a signal file (e.g., .schema_v{N}) to skip migration check entirely. 84 + Self::migrations() 85 + .to_latest(&mut conn) 86 + .map_err(|e| SqliteError::Migration { 87 + message: e.to_string(), 88 + })?; 89 + 90 + Ok(Self { 91 + conn: Mutex::new(conn), 92 + path: db_path, 93 + last_accessed: Mutex::new(Instant::now()), 94 + }) 95 + } 96 + 97 + fn migrations() -> Migrations<'static> { 98 + Migrations::new(vec![ 99 + M::up(include_str!("sqlite/migrations/001_edit_graph.sql")), 100 + M::up(include_str!("sqlite/migrations/002_collaboration.sql")), 101 + M::up(include_str!("sqlite/migrations/003_permissions.sql")), 102 + ]) 103 + } 104 + 105 + pub fn path(&self) -> &Path { 106 + &self.path 107 + } 108 + 109 + pub fn touch(&self) { 110 + if let Ok(mut last) = self.last_accessed.lock() { 111 + *last = Instant::now(); 112 + } 113 + } 114 + 115 + pub fn last_accessed(&self) -> Instant { 116 + self.last_accessed 117 + .lock() 118 + .map(|t| *t) 119 + .unwrap_or_else(|_| Instant::now()) 120 + } 121 + 122 + /// Execute a read operation on the shard 123 + pub fn read<F, T>(&self, f: F) -> Result<T, IndexError> 124 + where 125 + F: FnOnce(&Connection) -> Result<T, rusqlite::Error>, 126 + { 127 + self.touch(); 128 + let conn = self.conn.lock().map_err(|_| SqliteError::LockPoisoned)?; 129 + f(&conn).map_err(|e| { 130 + SqliteError::Query { 131 + message: e.to_string(), 132 + } 133 + .into() 134 + }) 135 + } 136 + 137 + /// Execute a write operation on the shard 138 + pub fn write<F, T>(&self, f: F) -> Result<T, IndexError> 139 + where 140 + F: FnOnce(&Connection) -> Result<T, rusqlite::Error>, 141 + { 142 + self.touch(); 143 + let conn = self.conn.lock().map_err(|_| SqliteError::LockPoisoned)?; 144 + f(&conn).map_err(|e| { 145 + SqliteError::Query { 146 + message: e.to_string(), 147 + } 148 + .into() 149 + }) 150 + } 151 + } 152 + 153 + /// Routes resources to their SQLite shards 154 + pub struct ShardRouter { 155 + base_path: PathBuf, 156 + shards: DashMap<ShardKey, std::sync::Arc<SqliteShard>>, 157 + } 158 + 159 + impl ShardRouter { 160 + pub fn new(base_path: impl Into<PathBuf>) -> Self { 161 + Self { 162 + base_path: base_path.into(), 163 + shards: DashMap::new(), 164 + } 165 + } 166 + 167 + /// Get or create a shard for the given key 168 + pub fn get_or_create(&self, key: &ShardKey) -> Result<std::sync::Arc<SqliteShard>, IndexError> { 169 + // Fast path: already cached 170 + if let Some(shard) = self.shards.get(key) { 171 + shard.touch(); 172 + return Ok(shard.clone()); 173 + } 174 + 175 + // Slow path: create new shard 176 + let dir = key.dir_path(&self.base_path); 177 + let shard = std::sync::Arc::new(SqliteShard::open(&dir)?); 178 + self.shards.insert(key.clone(), shard.clone()); 179 + 180 + Ok(shard) 181 + } 182 + 183 + /// Get an existing shard without creating 184 + pub fn get(&self, key: &ShardKey) -> Option<std::sync::Arc<SqliteShard>> { 185 + self.shards.get(key).map(|s| { 186 + s.touch(); 187 + s.clone() 188 + }) 189 + } 190 + 191 + /// Number of active shards 192 + pub fn shard_count(&self) -> usize { 193 + self.shards.len() 194 + } 195 + 196 + /// Iterate over shards that haven't been accessed since the given instant 197 + pub fn idle_shards(&self, since: Instant) -> Vec<ShardKey> { 198 + self.shards 199 + .iter() 200 + .filter(|entry| entry.value().last_accessed() < since) 201 + .map(|entry| entry.key().clone()) 202 + .collect() 203 + } 204 + 205 + /// Remove a shard from the cache (for eviction) 206 + pub fn evict(&self, key: &ShardKey) -> Option<std::sync::Arc<SqliteShard>> { 207 + self.shards.remove(key).map(|(_, shard)| shard) 208 + } 209 + }
+66
crates/weaver-index/src/sqlite/migrations/001_edit_graph.sql
··· 1 + -- Edit graph storage (roots and diffs) 2 + -- Supports DAG structure for future merge support 3 + 4 + CREATE TABLE edit_nodes ( 5 + -- Edit record identity (decomposed) 6 + did TEXT NOT NULL, 7 + collection TEXT NOT NULL, -- 'sh.weaver.edit.root' or 'sh.weaver.edit.diff' 8 + rkey TEXT NOT NULL, 9 + 10 + -- Resource being edited (decomposed) 11 + resource_did TEXT NOT NULL, 12 + resource_collection TEXT NOT NULL, 13 + resource_rkey TEXT NOT NULL, 14 + 15 + node_type TEXT NOT NULL, -- 'root' | 'diff' 16 + created_at TEXT NOT NULL, 17 + indexed_at TEXT NOT NULL, 18 + 19 + PRIMARY KEY (did, collection, rkey) 20 + ); 21 + 22 + CREATE INDEX idx_edit_nodes_resource ON edit_nodes(resource_did, resource_collection, resource_rkey); 23 + CREATE INDEX idx_edit_nodes_author ON edit_nodes(did); 24 + 25 + -- Edit graph edges (supports DAG) 26 + CREATE TABLE edit_edges ( 27 + -- Child reference (decomposed) 28 + child_did TEXT NOT NULL, 29 + child_collection TEXT NOT NULL, 30 + child_rkey TEXT NOT NULL, 31 + 32 + -- Parent reference (decomposed) 33 + parent_did TEXT NOT NULL, 34 + parent_collection TEXT NOT NULL, 35 + parent_rkey TEXT NOT NULL, 36 + 37 + edge_type TEXT NOT NULL, -- 'prev' | 'merge' (future) 38 + 39 + PRIMARY KEY (child_did, child_collection, child_rkey, parent_did, parent_collection, parent_rkey), 40 + FOREIGN KEY (child_did, child_collection, child_rkey) REFERENCES edit_nodes(did, collection, rkey), 41 + FOREIGN KEY (parent_did, parent_collection, parent_rkey) REFERENCES edit_nodes(did, collection, rkey) 42 + ); 43 + 44 + CREATE INDEX idx_edit_edges_parent ON edit_edges(parent_did, parent_collection, parent_rkey); 45 + 46 + -- Fast path: track current head per resource 47 + CREATE TABLE edit_heads ( 48 + -- Resource identity (decomposed) 49 + resource_did TEXT NOT NULL, 50 + resource_collection TEXT NOT NULL, 51 + resource_rkey TEXT NOT NULL, 52 + 53 + -- Latest root reference (decomposed) 54 + root_did TEXT, 55 + root_collection TEXT, 56 + root_rkey TEXT, 57 + 58 + -- Current head reference (decomposed) 59 + head_did TEXT, 60 + head_collection TEXT, 61 + head_rkey TEXT, 62 + 63 + updated_at TEXT NOT NULL, 64 + 65 + PRIMARY KEY (resource_did, resource_collection, resource_rkey) 66 + );
+73
crates/weaver-index/src/sqlite/migrations/002_collaboration.sql
··· 1 + -- Valid collaborators (invite + accept pairs) 2 + CREATE TABLE collaborators ( 3 + -- Resource reference (decomposed) 4 + resource_did TEXT NOT NULL, 5 + resource_collection TEXT NOT NULL, 6 + resource_rkey TEXT NOT NULL, 7 + 8 + collaborator_did TEXT NOT NULL, 9 + 10 + -- Invite record reference (decomposed) 11 + invite_did TEXT NOT NULL, 12 + invite_rkey TEXT NOT NULL, 13 + 14 + -- Accept record reference (decomposed) 15 + accept_did TEXT NOT NULL, 16 + accept_rkey TEXT NOT NULL, 17 + 18 + scope TEXT NOT NULL, -- 'direct' | 'inherited' 19 + granted_at TEXT NOT NULL, 20 + indexed_at TEXT NOT NULL, 21 + 22 + PRIMARY KEY (resource_did, resource_collection, resource_rkey, collaborator_did) 23 + ); 24 + 25 + CREATE INDEX idx_collaborators_did ON collaborators(collaborator_did); 26 + 27 + -- Active sessions (TTL-based, cleaned up on expiry) 28 + CREATE TABLE sessions ( 29 + -- Session record identity (decomposed) 30 + did TEXT NOT NULL, 31 + rkey TEXT NOT NULL, 32 + 33 + -- Resource reference (decomposed) 34 + resource_did TEXT NOT NULL, 35 + resource_collection TEXT NOT NULL, 36 + resource_rkey TEXT NOT NULL, 37 + 38 + participant_did TEXT NOT NULL, 39 + node_id TEXT NOT NULL, 40 + relay_url TEXT, -- NULL if no relay 41 + created_at TEXT NOT NULL, 42 + expires_at TEXT, -- NULL = no expiry 43 + indexed_at TEXT NOT NULL, 44 + 45 + PRIMARY KEY (did, rkey) 46 + ); 47 + 48 + CREATE INDEX idx_sessions_resource ON sessions(resource_did, resource_collection, resource_rkey); 49 + CREATE INDEX idx_sessions_expires ON sessions(expires_at); 50 + 51 + -- Pending invites (no accept yet) 52 + CREATE TABLE pending_invites ( 53 + -- Invite record identity (decomposed) 54 + did TEXT NOT NULL, -- inviter DID 55 + rkey TEXT NOT NULL, 56 + 57 + -- Resource reference (decomposed) 58 + resource_did TEXT NOT NULL, 59 + resource_collection TEXT NOT NULL, 60 + resource_rkey TEXT NOT NULL, 61 + 62 + inviter_did TEXT NOT NULL, -- same as did 63 + invitee_did TEXT NOT NULL, 64 + message TEXT, -- NULL if no message 65 + expires_at TEXT, -- NULL = no expiry 66 + created_at TEXT NOT NULL, 67 + indexed_at TEXT NOT NULL, 68 + 69 + PRIMARY KEY (did, rkey) 70 + ); 71 + 72 + CREATE INDEX idx_pending_invites_resource ON pending_invites(resource_did, resource_collection, resource_rkey); 73 + CREATE INDEX idx_pending_invites_invitee ON pending_invites(invitee_did);
+24
crates/weaver-index/src/sqlite/migrations/003_permissions.sql
··· 1 + -- Permissions cache 2 + -- Local cache of permissions for collab-related hot paths. 3 + -- ClickHouse is authoritative; this is populated on-demand for active resources. 4 + CREATE TABLE permissions ( 5 + -- Resource reference (decomposed) 6 + resource_did TEXT NOT NULL, 7 + resource_collection TEXT NOT NULL, 8 + resource_rkey TEXT NOT NULL, 9 + 10 + did TEXT NOT NULL, -- user who has permission 11 + 12 + scope TEXT NOT NULL, -- 'owner' | 'direct' | 'inherited' 13 + 14 + -- Source reference (decomposed) - resource itself for owner, invite for others 15 + source_did TEXT NOT NULL, 16 + source_collection TEXT NOT NULL, 17 + source_rkey TEXT NOT NULL, 18 + 19 + granted_at TEXT NOT NULL, 20 + 21 + PRIMARY KEY (resource_did, resource_collection, resource_rkey, did) 22 + ); 23 + 24 + CREATE INDEX idx_permissions_did ON permissions(did);
+5
crates/weaver-index/src/tap.rs
··· 1 + mod consumer; 2 + mod types; 3 + 4 + pub use consumer::{TapConfig, TapConsumer}; 5 + pub use types::*;
+254
crates/weaver-index/src/tap/consumer.rs
··· 1 + use std::time::Duration; 2 + 3 + use futures_util::{SinkExt, StreamExt}; 4 + use tokio::sync::mpsc; 5 + use tokio_tungstenite::{connect_async, tungstenite::Message}; 6 + use tracing::{debug, error, info, trace, warn}; 7 + use url::Url; 8 + 9 + use crate::error::IndexError; 10 + 11 + use super::{TapAck, TapEvent}; 12 + 13 + /// Messages sent to the writer task 14 + enum WriteCommand { 15 + #[allow(dead_code)] 16 + Ack(u64), 17 + Pong(bytes::Bytes), 18 + } 19 + 20 + /// Configuration for tap consumer 21 + #[derive(Debug, Clone)] 22 + pub struct TapConfig { 23 + /// WebSocket URL for tap (e.g., ws://localhost:2480/channel) 24 + pub url: Url, 25 + /// Whether to send acks (disable for fire-and-forget mode) 26 + pub send_acks: bool, 27 + /// Reconnect delay on connection failure 28 + pub reconnect_delay: Duration, 29 + } 30 + 31 + impl TapConfig { 32 + pub fn new(url: Url) -> Self { 33 + Self { 34 + url, 35 + send_acks: true, 36 + reconnect_delay: Duration::from_secs(5), 37 + } 38 + } 39 + 40 + pub fn with_acks(mut self, send_acks: bool) -> Self { 41 + self.send_acks = send_acks; 42 + self 43 + } 44 + } 45 + 46 + /// Consumer that connects to tap's websocket and yields events 47 + pub struct TapConsumer { 48 + config: TapConfig, 49 + } 50 + 51 + impl TapConsumer { 52 + pub fn new(config: TapConfig) -> Self { 53 + Self { config } 54 + } 55 + 56 + /// Connect to tap and return channels for events and acks 57 + /// 58 + /// Returns a receiver for events and a sender for acks. 59 + /// The consumer handles reconnection internally. 60 + pub async fn connect( 61 + &self, 62 + ) -> Result<(mpsc::Receiver<TapEvent>, mpsc::Sender<u64>), IndexError> { 63 + let (event_tx, event_rx) = mpsc::channel::<TapEvent>(10000); 64 + let (ack_tx, ack_rx) = mpsc::channel::<u64>(10000); 65 + 66 + let config = self.config.clone(); 67 + tokio::spawn(async move { 68 + run_connection_loop(config, event_tx, ack_rx).await; 69 + }); 70 + 71 + Ok((event_rx, ack_tx)) 72 + } 73 + } 74 + 75 + async fn run_connection_loop( 76 + config: TapConfig, 77 + event_tx: mpsc::Sender<TapEvent>, 78 + ack_rx: mpsc::Receiver<u64>, 79 + ) { 80 + loop { 81 + info!(url = %config.url, "connecting to tap"); 82 + 83 + match connect_async(config.url.as_str()).await { 84 + Ok((ws_stream, _response)) => { 85 + info!("connected to tap"); 86 + 87 + let (write, read) = ws_stream.split(); 88 + 89 + // Channel for reader -> writer communication (pongs, etc) 90 + let (write_tx, write_rx) = mpsc::channel::<WriteCommand>(10000); 91 + 92 + // Spawn writer task 93 + let send_acks = config.send_acks; 94 + let writer_handle = tokio::spawn(run_writer(write, write_rx, ack_rx, send_acks)); 95 + 96 + // Run reader in current task 97 + let reader_result = run_reader(read, event_tx.clone(), write_tx, send_acks).await; 98 + 99 + // Reader finished - abort writer and wait for it 100 + writer_handle.abort(); 101 + let _ = writer_handle.await; 102 + 103 + // Get back the ack_rx from... wait, we moved it. Need to restructure. 104 + // For now, if reader dies we'll reconnect with a fresh ack channel state 105 + 106 + match reader_result { 107 + ReaderResult::Closed => { 108 + info!("tap connection closed"); 109 + } 110 + ReaderResult::Error(e) => { 111 + warn!(error = %e, "tap reader error"); 112 + } 113 + ReaderResult::ChannelClosed => { 114 + error!("event channel closed, stopping tap consumer"); 115 + return; 116 + } 117 + } 118 + 119 + // We lost the ack_rx to the writer task, need to break out 120 + // and let caller reconnect if needed 121 + break; 122 + } 123 + Err(e) => { 124 + error!(error = ?e, "failed to connect to tap"); 125 + } 126 + } 127 + 128 + // Reconnect after delay 129 + info!(delay = ?config.reconnect_delay, "reconnecting to tap"); 130 + tokio::time::sleep(config.reconnect_delay).await; 131 + } 132 + } 133 + 134 + enum ReaderResult { 135 + Closed, 136 + Error(String), 137 + ChannelClosed, 138 + } 139 + 140 + async fn run_reader<S>( 141 + mut read: S, 142 + event_tx: mpsc::Sender<TapEvent>, 143 + write_tx: mpsc::Sender<WriteCommand>, 144 + send_acks: bool, 145 + ) -> ReaderResult 146 + where 147 + S: StreamExt<Item = Result<Message, tokio_tungstenite::tungstenite::Error>> + Unpin, 148 + { 149 + while let Some(msg) = read.next().await { 150 + match msg { 151 + Ok(Message::Text(text)) => match serde_json::from_str::<TapEvent>(&text) { 152 + Ok(event) => { 153 + let event_id = event.id(); 154 + if event_tx.send(event).await.is_err() { 155 + return ReaderResult::ChannelClosed; 156 + } 157 + 158 + if !send_acks { 159 + debug!(id = event_id, "event received (fire-and-forget)"); 160 + } 161 + } 162 + Err(e) => { 163 + warn!(error = ?e, text = %text, "failed to parse tap event"); 164 + } 165 + }, 166 + Ok(Message::Ping(data)) => { 167 + if write_tx.send(WriteCommand::Pong(data)).await.is_err() { 168 + return ReaderResult::Error("writer channel closed".into()); 169 + } 170 + } 171 + Ok(Message::Close(_)) => { 172 + return ReaderResult::Closed; 173 + } 174 + Ok(_) => { 175 + // Ignore binary, pong, etc. 176 + } 177 + Err(e) => { 178 + return ReaderResult::Error(e.to_string()); 179 + } 180 + } 181 + } 182 + ReaderResult::Closed 183 + } 184 + 185 + async fn run_writer<S>( 186 + mut write: S, 187 + mut write_rx: mpsc::Receiver<WriteCommand>, 188 + mut ack_rx: mpsc::Receiver<u64>, 189 + send_acks: bool, 190 + ) where 191 + S: SinkExt<Message> + Unpin, 192 + S::Error: std::fmt::Display, 193 + { 194 + loop { 195 + tokio::select! { 196 + biased; 197 + 198 + // Handle pongs and other write commands from reader 199 + cmd = write_rx.recv() => { 200 + match cmd { 201 + Some(WriteCommand::Pong(data)) => { 202 + if let Err(e) = write.send(Message::Pong(data)).await { 203 + warn!(error = %e, "failed to send pong"); 204 + return; 205 + } 206 + } 207 + Some(WriteCommand::Ack(id)) => { 208 + if send_acks { 209 + if let Err(e) = send_ack(&mut write, id).await { 210 + warn!(error = %e, id, "failed to send ack"); 211 + return; 212 + } 213 + } 214 + } 215 + None => { 216 + // Reader closed the channel, we're done 217 + return; 218 + } 219 + } 220 + } 221 + 222 + // Handle acks from the indexer 223 + id = ack_rx.recv(), if send_acks => { 224 + match id { 225 + Some(id) => { 226 + if let Err(e) = send_ack(&mut write, id).await { 227 + warn!(error = %e, id, "failed to send ack"); 228 + return; 229 + } 230 + } 231 + None => { 232 + // Ack channel closed, indexer is done 233 + return; 234 + } 235 + } 236 + } 237 + } 238 + } 239 + } 240 + 241 + async fn send_ack<S>(write: &mut S, id: u64) -> Result<(), String> 242 + where 243 + S: SinkExt<Message> + Unpin, 244 + S::Error: std::fmt::Display, 245 + { 246 + let ack = TapAck::new(id); 247 + let json = serde_json::to_string(&ack).map_err(|e| e.to_string())?; 248 + write 249 + .send(Message::Text(json.into())) 250 + .await 251 + .map_err(|e| e.to_string())?; 252 + trace!(id, "sent ack"); 253 + Ok(()) 254 + }
+113
crates/weaver-index/src/tap/types.rs
··· 1 + use serde::{Deserialize, Serialize}; 2 + use smol_str::SmolStr; 3 + 4 + /// Event received from tap's websocket channel 5 + #[derive(Debug, Clone, Deserialize)] 6 + #[serde(tag = "type", rename_all = "lowercase")] 7 + pub enum TapEvent { 8 + Record(TapRecordEnvelope), 9 + Identity(TapIdentityEnvelope), 10 + } 11 + 12 + impl TapEvent { 13 + pub fn id(&self) -> u64 { 14 + match self { 15 + TapEvent::Record(r) => r.id, 16 + TapEvent::Identity(i) => i.id, 17 + } 18 + } 19 + } 20 + 21 + /// Envelope for record events 22 + #[derive(Debug, Clone, Deserialize)] 23 + pub struct TapRecordEnvelope { 24 + pub id: u64, 25 + pub record: TapRecordEvent, 26 + } 27 + 28 + /// Record event from tap 29 + #[derive(Debug, Clone, Deserialize)] 30 + pub struct TapRecordEvent { 31 + /// Whether this is a live event (true) or backfill (false) 32 + pub live: bool, 33 + /// Repository revision 34 + pub rev: SmolStr, 35 + /// DID of the account 36 + pub did: SmolStr, 37 + /// Collection NSID (e.g., "app.bsky.feed.post") 38 + pub collection: SmolStr, 39 + /// Record key 40 + pub rkey: SmolStr, 41 + /// Operation: create, update, delete 42 + pub action: RecordAction, 43 + /// Content identifier 44 + pub cid: SmolStr, 45 + /// The actual record data (only present for create/update) 46 + #[serde(default)] 47 + pub record: Option<serde_json::Value>, 48 + } 49 + 50 + /// Record operation type 51 + #[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] 52 + #[serde(rename_all = "lowercase")] 53 + pub enum RecordAction { 54 + Create, 55 + Update, 56 + Delete, 57 + } 58 + 59 + impl RecordAction { 60 + pub fn as_str(&self) -> &'static str { 61 + match self { 62 + RecordAction::Create => "create", 63 + RecordAction::Update => "update", 64 + RecordAction::Delete => "delete", 65 + } 66 + } 67 + } 68 + 69 + /// Envelope for identity events 70 + #[derive(Debug, Clone, Deserialize)] 71 + pub struct TapIdentityEnvelope { 72 + pub id: u64, 73 + pub identity: TapIdentityEvent, 74 + } 75 + 76 + /// Identity event from tap (handle or status changes) 77 + #[derive(Debug, Clone, Deserialize)] 78 + pub struct TapIdentityEvent { 79 + pub did: SmolStr, 80 + pub handle: SmolStr, 81 + pub is_active: bool, 82 + pub status: AccountStatus, 83 + } 84 + 85 + /// Account status 86 + #[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] 87 + #[serde(rename_all = "lowercase")] 88 + pub enum AccountStatus { 89 + Active, 90 + Deactivated, 91 + Suspended, 92 + Deleted, 93 + Takendown, 94 + #[serde(other)] 95 + Unknown, 96 + } 97 + 98 + /// Ack message to send back to tap 99 + #[derive(Debug, Clone, Serialize)] 100 + pub struct TapAck { 101 + #[serde(rename = "type")] 102 + pub msg_type: &'static str, 103 + pub id: u64, 104 + } 105 + 106 + impl TapAck { 107 + pub fn new(id: u64) -> Self { 108 + Self { 109 + msg_type: "ack", 110 + id, 111 + } 112 + } 113 + }
+64
docker-compose.yml
··· 1 + services: 2 + # Tap - AT Protocol sync utility 3 + # Build from local indigo checkout, or use pre-built image 4 + tap: 5 + container_name: weaver-tap 6 + build: 7 + # Build from local indigo checkout on sync-tool branch 8 + # git clone https://github.com/bluesky-social/indigo.git && git checkout sync-tool 9 + context: ../../Git_Repos/indigo 10 + dockerfile: cmd/tap/Dockerfile 11 + ports: 12 + - "2480:2480" 13 + volumes: 14 + - tap_data:/data/tap 15 + environment: 16 + TAP_DATABASE_URL: sqlite:///data/tap/tap.db 17 + TAP_BIND: ":2480" 18 + TAP_DISABLE_ACKS: "false" 19 + TAP_LOG_LEVEL: info 20 + # Filter to weaver collections only 21 + TAP_SIGNAL_COLLECTION: sh.weaver.edit.root 22 + TAP_COLLECTION_FILTERS: "sh.weaver.*,app.bsky.actor.profile" 23 + healthcheck: 24 + test: ["CMD", "wget", "-q", "--spider", "http://localhost:2480/health"] 25 + interval: 20s 26 + timeout: 5s 27 + retries: 3 28 + 29 + # Weaver indexer - consumes from tap or direct firehose 30 + indexer: 31 + container_name: weaver-indexer 32 + build: 33 + context: . 34 + dockerfile: crates/weaver-index/Dockerfile 35 + command: ["run"] 36 + ports: 37 + - "3000:3000" 38 + environment: 39 + RUST_LOG: debug,weaver_index=debug 40 + # ClickHouse connection (set these for your cloud/homelab instance) 41 + CLICKHOUSE_URL: ${CLICKHOUSE_URL} 42 + CLICKHOUSE_DATABASE: ${CLICKHOUSE_DATABASE:-weaver} 43 + CLICKHOUSE_USER: ${CLICKHOUSE_USER} 44 + CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD} 45 + # Source mode: "firehose" or "tap" 46 + INDEXER_SOURCE: tap 47 + # Tap connection (when INDEXER_SOURCE=tap) 48 + TAP_URL: ws://tap:2480/channel 49 + TAP_SEND_ACKS: "true" 50 + # Firehose connection (when INDEXER_SOURCE=firehose) 51 + FIREHOSE_RELAY_URL: wss://bsky.network 52 + # Collection filters 53 + INDEXER_COLLECTIONS: "sh.weaver.*,app.bsky.actor.profile" 54 + depends_on: 55 + tap: 56 + condition: service_healthy 57 + healthcheck: 58 + test: ["CMD", "wget", "-q", "--spider", "http://localhost:2480/xrpc/_health"] 59 + interval: 20s 60 + timeout: 5s 61 + retries: 3 62 + 63 + volumes: 64 + tap_data: