Parakeet is a Rust-based Bluesky AppView aiming to implement most of the functionality required to support the Bluesky client

feat(consumer): Consumer Performance Improvements

Changed files
+2136 -1500
consumer
migrations
2025-01-29-213341_follows_and_blocks
2025-02-07-203450_lists
2025-02-16-142357_posts
2025-04-05-114428_likes_and_reposts
2025-04-18-185717_verification
parakeet-db
+186 -59
Cargo.lock
··· 39 39 ] 40 40 41 41 [[package]] 42 + name = "alloc-no-stdlib" 43 + version = "2.0.4" 44 + source = "registry+https://github.com/rust-lang/crates.io-index" 45 + checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" 46 + 47 + [[package]] 48 + name = "alloc-stdlib" 49 + version = "0.2.2" 50 + source = "registry+https://github.com/rust-lang/crates.io-index" 51 + checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" 52 + dependencies = [ 53 + "alloc-no-stdlib", 54 + ] 55 + 56 + [[package]] 42 57 name = "android-tzdata" 43 58 version = "0.1.1" 44 59 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 130 145 "event-listener-strategy", 131 146 "futures-core", 132 147 "pin-project-lite", 148 + ] 149 + 150 + [[package]] 151 + name = "async-compression" 152 + version = "0.4.22" 153 + source = "registry+https://github.com/rust-lang/crates.io-index" 154 + checksum = "59a194f9d963d8099596278594b3107448656ba73831c9d8c783e613ce86da64" 155 + dependencies = [ 156 + "brotli", 157 + "futures-core", 158 + "memchr", 159 + "pin-project-lite", 160 + "tokio", 133 161 ] 134 162 135 163 [[package]] ··· 472 500 ] 473 501 474 502 [[package]] 503 + name = "brotli" 504 + version = "7.0.0" 505 + source = "registry+https://github.com/rust-lang/crates.io-index" 506 + checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" 507 + dependencies = [ 508 + "alloc-no-stdlib", 509 + "alloc-stdlib", 510 + "brotli-decompressor", 511 + ] 512 + 513 + [[package]] 514 + name = "brotli-decompressor" 515 + version = "4.0.3" 516 + source = "registry+https://github.com/rust-lang/crates.io-index" 517 + checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" 518 + dependencies = [ 519 + "alloc-no-stdlib", 520 + "alloc-stdlib", 521 + ] 522 + 523 + [[package]] 475 524 name = "bumpalo" 476 525 version = "3.16.0" 477 526 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 688 737 "chrono", 689 738 "ciborium", 690 739 "clap", 740 + "deadpool-postgres", 691 741 "did-resolver", 692 - "diesel", 693 - "diesel-async", 694 742 "eyre", 695 743 "figment", 696 - "flume", 697 744 "foldhash", 698 745 "futures", 699 746 "ipld-core", ··· 715 762 "tokio-postgres", 716 763 "tokio-stream", 717 764 "tokio-tungstenite", 765 + "tokio-util", 718 766 "tracing", 719 767 "tracing-subscriber", 720 768 ] ··· 800 848 checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" 801 849 dependencies = [ 802 850 "generic-array", 803 - "rand_core", 851 + "rand_core 0.6.4", 804 852 "subtle", 805 853 "zeroize", 806 854 ] ··· 920 968 dependencies = [ 921 969 "deadpool-runtime", 922 970 "num_cpus", 971 + "serde", 923 972 "tokio", 924 973 ] 925 974 926 975 [[package]] 976 + name = "deadpool-postgres" 977 + version = "0.14.1" 978 + source = "registry+https://github.com/rust-lang/crates.io-index" 979 + checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9" 980 + dependencies = [ 981 + "async-trait", 982 + "deadpool", 983 + "getrandom 0.2.15", 984 + "serde", 985 + "tokio", 986 + "tokio-postgres", 987 + "tracing", 988 + ] 989 + 990 + [[package]] 927 991 name = "deadpool-runtime" 928 992 version = "0.1.4" 929 993 source = "registry+https://github.com/rust-lang/crates.io-index" 930 994 checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" 995 + dependencies = [ 996 + "tokio", 997 + ] 931 998 932 999 [[package]] 933 1000 name = "der" ··· 1114 1181 "hkdf", 1115 1182 "pem-rfc7468", 1116 1183 "pkcs8", 1117 - "rand_core", 1184 + "rand_core 0.6.4", 1118 1185 "sec1", 1119 1186 "subtle", 1120 1187 "zeroize", ··· 1212 1279 source = "registry+https://github.com/rust-lang/crates.io-index" 1213 1280 checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" 1214 1281 dependencies = [ 1215 - "rand_core", 1282 + "rand_core 0.6.4", 1216 1283 "subtle", 1217 1284 ] 1218 1285 ··· 1241 1308 version = "0.5.7" 1242 1309 source = "registry+https://github.com/rust-lang/crates.io-index" 1243 1310 checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" 1244 - 1245 - [[package]] 1246 - name = "flume" 1247 - version = "0.11.1" 1248 - source = "registry+https://github.com/rust-lang/crates.io-index" 1249 - checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" 1250 - dependencies = [ 1251 - "futures-core", 1252 - "futures-sink", 1253 - "nanorand", 1254 - "spin", 1255 - ] 1256 1311 1257 1312 [[package]] 1258 1313 name = "fnv" ··· 1437 1492 "cfg-if", 1438 1493 "js-sys", 1439 1494 "libc", 1440 - "wasi", 1495 + "wasi 0.11.0+wasi-snapshot-preview1", 1441 1496 "wasm-bindgen", 1442 1497 ] 1443 1498 1444 1499 [[package]] 1500 + name = "getrandom" 1501 + version = "0.3.3" 1502 + source = "registry+https://github.com/rust-lang/crates.io-index" 1503 + checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" 1504 + dependencies = [ 1505 + "cfg-if", 1506 + "libc", 1507 + "r-efi", 1508 + "wasi 0.14.2+wasi-0.2.4", 1509 + ] 1510 + 1511 + [[package]] 1445 1512 name = "gimli" 1446 1513 version = "0.31.1" 1447 1514 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1472 1539 checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" 1473 1540 dependencies = [ 1474 1541 "ff", 1475 - "rand_core", 1542 + "rand_core 0.6.4", 1476 1543 "subtle", 1477 1544 ] 1478 1545 ··· 1572 1639 "idna", 1573 1640 "ipnet", 1574 1641 "once_cell", 1575 - "rand", 1642 + "rand 0.8.5", 1576 1643 "thiserror 1.0.69", 1577 1644 "tinyvec", 1578 1645 "tokio", ··· 1593 1660 "lru-cache", 1594 1661 "once_cell", 1595 1662 "parking_lot 0.12.3", 1596 - "rand", 1663 + "rand 0.8.5", 1597 1664 "resolv-conf", 1598 1665 "smallvec", 1599 1666 "thiserror 1.0.69", ··· 2072 2139 dependencies = [ 2073 2140 "base64 0.22.1", 2074 2141 "ed25519-dalek", 2075 - "getrandom", 2142 + "getrandom 0.2.15", 2076 2143 "hmac", 2077 2144 "js-sys", 2078 2145 "k256", 2079 2146 "p256", 2080 2147 "p384", 2081 2148 "pem", 2082 - "rand", 2083 - "rand_core", 2149 + "rand 0.8.5", 2150 + "rand_core 0.6.4", 2084 2151 "rsa", 2085 2152 "serde", 2086 2153 "serde_json", ··· 2273 2340 "hashbrown", 2274 2341 "metrics", 2275 2342 "quanta", 2276 - "rand", 2343 + "rand 0.8.5", 2277 2344 "rand_xoshiro", 2278 2345 "sketches-ddsketch", 2279 2346 ] ··· 2306 2373 checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" 2307 2374 dependencies = [ 2308 2375 "libc", 2309 - "wasi", 2376 + "wasi 0.11.0+wasi-snapshot-preview1", 2310 2377 "windows-sys 0.52.0", 2311 2378 ] 2312 2379 ··· 2339 2406 checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" 2340 2407 2341 2408 [[package]] 2342 - name = "nanorand" 2343 - version = "0.7.0" 2344 - source = "registry+https://github.com/rust-lang/crates.io-index" 2345 - checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" 2346 - dependencies = [ 2347 - "getrandom", 2348 - ] 2349 - 2350 - [[package]] 2351 2409 name = "native-tls" 2352 2410 version = "0.2.12" 2353 2411 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2406 2464 "num-integer", 2407 2465 "num-iter", 2408 2466 "num-traits", 2409 - "rand", 2467 + "rand 0.8.5", 2410 2468 "smallvec", 2411 2469 "zeroize", 2412 2470 ] ··· 2581 2639 dependencies = [ 2582 2640 "chrono", 2583 2641 "diesel", 2642 + "postgres-types", 2584 2643 "serde_json", 2585 2644 ] 2586 2645 ··· 2843 2902 2844 2903 [[package]] 2845 2904 name = "postgres-protocol" 2846 - version = "0.6.7" 2905 + version = "0.6.8" 2847 2906 source = "registry+https://github.com/rust-lang/crates.io-index" 2848 - checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23" 2907 + checksum = "76ff0abab4a9b844b93ef7b81f1efc0a366062aaef2cd702c76256b5dc075c54" 2849 2908 dependencies = [ 2850 2909 "base64 0.22.1", 2851 2910 "byteorder", ··· 2854 2913 "hmac", 2855 2914 "md-5", 2856 2915 "memchr", 2857 - "rand", 2916 + "rand 0.9.1", 2858 2917 "sha2", 2859 2918 "stringprep", 2860 2919 ] 2861 2920 2862 2921 [[package]] 2863 2922 name = "postgres-types" 2864 - version = "0.2.8" 2923 + version = "0.2.9" 2865 2924 source = "registry+https://github.com/rust-lang/crates.io-index" 2866 - checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f" 2925 + checksum = "613283563cd90e1dfc3518d548caee47e0e725455ed619881f5cf21f36de4b48" 2867 2926 dependencies = [ 2868 2927 "bytes", 2869 2928 "chrono", 2870 2929 "fallible-iterator", 2871 2930 "postgres-protocol", 2931 + "serde", 2932 + "serde_json", 2872 2933 ] 2873 2934 2874 2935 [[package]] ··· 2989 3050 "libc", 2990 3051 "once_cell", 2991 3052 "raw-cpuid", 2992 - "wasi", 3053 + "wasi 0.11.0+wasi-snapshot-preview1", 2993 3054 "web-sys", 2994 3055 "winapi", 2995 3056 ] ··· 3010 3071 ] 3011 3072 3012 3073 [[package]] 3074 + name = "r-efi" 3075 + version = "5.2.0" 3076 + source = "registry+https://github.com/rust-lang/crates.io-index" 3077 + checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" 3078 + 3079 + [[package]] 3013 3080 name = "rand" 3014 3081 version = "0.8.5" 3015 3082 source = "registry+https://github.com/rust-lang/crates.io-index" 3016 3083 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 3017 3084 dependencies = [ 3018 3085 "libc", 3019 - "rand_chacha", 3020 - "rand_core", 3086 + "rand_chacha 0.3.1", 3087 + "rand_core 0.6.4", 3088 + ] 3089 + 3090 + [[package]] 3091 + name = "rand" 3092 + version = "0.9.1" 3093 + source = "registry+https://github.com/rust-lang/crates.io-index" 3094 + checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" 3095 + dependencies = [ 3096 + "rand_chacha 0.9.0", 3097 + "rand_core 0.9.3", 3021 3098 ] 3022 3099 3023 3100 [[package]] ··· 3027 3104 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 3028 3105 dependencies = [ 3029 3106 "ppv-lite86", 3030 - "rand_core", 3107 + "rand_core 0.6.4", 3108 + ] 3109 + 3110 + [[package]] 3111 + name = "rand_chacha" 3112 + version = "0.9.0" 3113 + source = "registry+https://github.com/rust-lang/crates.io-index" 3114 + checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 3115 + dependencies = [ 3116 + "ppv-lite86", 3117 + "rand_core 0.9.3", 3031 3118 ] 3032 3119 3033 3120 [[package]] ··· 3036 3123 source = "registry+https://github.com/rust-lang/crates.io-index" 3037 3124 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 3038 3125 dependencies = [ 3039 - "getrandom", 3126 + "getrandom 0.2.15", 3127 + ] 3128 + 3129 + [[package]] 3130 + name = "rand_core" 3131 + version = "0.9.3" 3132 + source = "registry+https://github.com/rust-lang/crates.io-index" 3133 + checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 3134 + dependencies = [ 3135 + "getrandom 0.3.3", 3040 3136 ] 3041 3137 3042 3138 [[package]] ··· 3045 3141 source = "registry+https://github.com/rust-lang/crates.io-index" 3046 3142 checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" 3047 3143 dependencies = [ 3048 - "rand_core", 3144 + "rand_core 0.6.4", 3049 3145 ] 3050 3146 3051 3147 [[package]] ··· 3134 3230 source = "registry+https://github.com/rust-lang/crates.io-index" 3135 3231 checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" 3136 3232 dependencies = [ 3233 + "async-compression", 3137 3234 "base64 0.22.1", 3138 3235 "bytes", 3139 3236 "encoding_rs", ··· 3163 3260 "system-configuration", 3164 3261 "tokio", 3165 3262 "tokio-native-tls", 3263 + "tokio-util", 3166 3264 "tower", 3167 3265 "tower-service", 3168 3266 "url", 3169 3267 "wasm-bindgen", 3170 3268 "wasm-bindgen-futures", 3269 + "wasm-streams", 3171 3270 "web-sys", 3172 3271 "windows-registry", 3173 3272 ] ··· 3200 3299 dependencies = [ 3201 3300 "cc", 3202 3301 "cfg-if", 3203 - "getrandom", 3302 + "getrandom 0.2.15", 3204 3303 "libc", 3205 3304 "spin", 3206 3305 "untrusted", ··· 3220 3319 "num-traits", 3221 3320 "pkcs1", 3222 3321 "pkcs8", 3223 - "rand_core", 3322 + "rand_core 0.6.4", 3224 3323 "signature", 3225 3324 "spki", 3226 3325 "subtle", ··· 3571 3670 checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" 3572 3671 dependencies = [ 3573 3672 "digest", 3574 - "rand_core", 3673 + "rand_core 0.6.4", 3575 3674 ] 3576 3675 3577 3676 [[package]] ··· 3644 3743 version = "0.9.8" 3645 3744 source = "registry+https://github.com/rust-lang/crates.io-index" 3646 3745 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 3647 - dependencies = [ 3648 - "lock_api", 3649 - ] 3650 3746 3651 3747 [[package]] 3652 3748 name = "spki" ··· 3747 3843 dependencies = [ 3748 3844 "cfg-if", 3749 3845 "fastrand", 3750 - "getrandom", 3846 + "getrandom 0.2.15", 3751 3847 "once_cell", 3752 3848 "rustix", 3753 3849 "windows-sys 0.59.0", ··· 3917 4013 "pin-project-lite", 3918 4014 "postgres-protocol", 3919 4015 "postgres-types", 3920 - "rand", 4016 + "rand 0.8.5", 3921 4017 "socket2", 3922 4018 "tokio", 3923 4019 "tokio-util", ··· 3962 4058 3963 4059 [[package]] 3964 4060 name = "tokio-util" 3965 - version = "0.7.13" 4061 + version = "0.7.15" 3966 4062 source = "registry+https://github.com/rust-lang/crates.io-index" 3967 - checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" 4063 + checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" 3968 4064 dependencies = [ 3969 4065 "bytes", 3970 4066 "futures-core", ··· 4186 4282 "httparse", 4187 4283 "log", 4188 4284 "native-tls", 4189 - "rand", 4285 + "rand 0.8.5", 4190 4286 "sha1", 4191 4287 "thiserror 2.0.12", 4192 4288 "utf-8", ··· 4337 4433 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 4338 4434 4339 4435 [[package]] 4436 + name = "wasi" 4437 + version = "0.14.2+wasi-0.2.4" 4438 + source = "registry+https://github.com/rust-lang/crates.io-index" 4439 + checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" 4440 + dependencies = [ 4441 + "wit-bindgen-rt", 4442 + ] 4443 + 4444 + [[package]] 4340 4445 name = "wasite" 4341 4446 version = "0.1.0" 4342 4447 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 4411 4516 checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 4412 4517 dependencies = [ 4413 4518 "unicode-ident", 4519 + ] 4520 + 4521 + [[package]] 4522 + name = "wasm-streams" 4523 + version = "0.4.2" 4524 + source = "registry+https://github.com/rust-lang/crates.io-index" 4525 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 4526 + dependencies = [ 4527 + "futures-util", 4528 + "js-sys", 4529 + "wasm-bindgen", 4530 + "wasm-bindgen-futures", 4531 + "web-sys", 4414 4532 ] 4415 4533 4416 4534 [[package]] ··· 4687 4805 dependencies = [ 4688 4806 "cfg-if", 4689 4807 "windows-sys 0.48.0", 4808 + ] 4809 + 4810 + [[package]] 4811 + name = "wit-bindgen-rt" 4812 + version = "0.39.0" 4813 + source = "registry+https://github.com/rust-lang/crates.io-index" 4814 + checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" 4815 + dependencies = [ 4816 + "bitflags 2.8.0", 4690 4817 ] 4691 4818 4692 4819 [[package]]
+5 -6
consumer/Cargo.toml
··· 7 7 chrono = { version = "0.4.39", features = ["serde"] } 8 8 ciborium = "0.2.2" 9 9 clap = { version = "4.5.34", features = ["derive"] } 10 + deadpool-postgres = { version = "0.14.1", features = ["serde"] } 10 11 did-resolver = { path = "../did-resolver" } 11 - diesel = { version = "2.2.6", features = ["chrono", "serde_json"] } 12 - diesel-async = { version = "0.5.2", features = ["deadpool", "postgres"] } 13 12 eyre = "0.6.12" 14 13 figment = { version = "0.10.19", features = ["env", "toml"] } 15 - flume = { version = "0.11.1", features = ["async"] } 16 14 foldhash = "0.1.4" 17 15 futures = "0.3.31" 18 16 ipld-core = "0.4.1" ··· 20 18 lexica = { path = "../lexica" } 21 19 metrics = "0.24.1" 22 20 metrics-exporter-prometheus = "0.16.2" 23 - parakeet-db = { path = "../parakeet-db" } 21 + parakeet-db = { path = "../parakeet-db", default-features = false, features = ["postgres"] } 24 22 parakeet-index = { path = "../parakeet-index" } 25 23 redis = { version = "0.31", features = ["tokio-native-tls-comp"] } 26 - reqwest = { version = "0.12.12", features = ["native-tls"] } 24 + reqwest = { version = "0.12.12", features = ["native-tls", "brotli", "stream"] } 27 25 serde = { version = "1.0.217", features = ["derive"] } 28 26 serde_bytes = "0.11" 29 27 serde_ipld_dagcbor = "0.6.1" ··· 31 29 sled = "0.34.7" 32 30 thiserror = "2" 33 31 tokio = { version = "1.42.0", features = ["full"] } 34 - tokio-postgres = { version = "0.7.12", features = ["with-chrono-0_4"] } 32 + tokio-postgres = { version = "0.7.12", features = ["with-chrono-0_4", "with-serde_json-1"] } 35 33 tokio-stream = "0.1.17" 36 34 tokio-tungstenite = { version = "0.26.1", features = ["native-tls"] } 35 + tokio-util = { version = "0.7.14", features = ["io"] } 37 36 tracing = "0.1.40" 38 37 tracing-subscriber = "0.3.18"
-99
consumer/src/backfill/db.rs
··· 1 - use diesel::prelude::*; 2 - use diesel_async::{AsyncPgConnection, RunQueryDsl}; 3 - use parakeet_db::{models, schema, types}; 4 - 5 - pub async fn write_backfill_job( 6 - conn: &mut AsyncPgConnection, 7 - repo: &str, 8 - status: &str, 9 - ) -> QueryResult<usize> { 10 - diesel::insert_into(schema::backfill_jobs::table) 11 - .values(( 12 - schema::backfill_jobs::did.eq(repo), 13 - schema::backfill_jobs::status.eq(status), 14 - )) 15 - .on_conflict_do_nothing() 16 - .execute(conn) 17 - .await 18 - } 19 - 20 - pub async fn get_actor_status( 21 - conn: &mut AsyncPgConnection, 22 - did: &str, 23 - ) -> QueryResult<(types::ActorStatus, types::ActorSyncState)> { 24 - schema::actors::table 25 - .select((schema::actors::status, schema::actors::sync_state)) 26 - .find(&did) 27 - .get_result(conn) 28 - .await 29 - } 30 - 31 - pub async fn update_repo_sync_state( 32 - conn: &mut AsyncPgConnection, 33 - did: &str, 34 - sync_state: types::ActorSyncState, 35 - ) -> QueryResult<usize> { 36 - diesel::update(schema::actors::table) 37 - .set(schema::actors::sync_state.eq(sync_state)) 38 - .filter(schema::actors::did.eq(did)) 39 - .execute(conn) 40 - .await 41 - } 42 - 43 - pub async fn update_handle( 44 - conn: &mut AsyncPgConnection, 45 - did: &str, 46 - handle: Option<String>, 47 - ) -> QueryResult<usize> { 48 - diesel::update(schema::actors::table) 49 - .set(schema::actors::handle.eq(handle)) 50 - .filter(schema::actors::did.eq(did)) 51 - .execute(conn) 52 - .await 53 - } 54 - 55 - pub async fn update_actor_status( 56 - conn: &mut AsyncPgConnection, 57 - did: &str, 58 - status: types::ActorStatus, 59 - sync_state: types::ActorSyncState, 60 - ) -> QueryResult<usize> { 61 - diesel::update(schema::actors::table) 62 - .set(( 63 - schema::actors::status.eq(status), 64 - schema::actors::sync_state.eq(sync_state), 65 - )) 66 - .filter(schema::actors::did.eq(did)) 67 - .execute(conn) 68 - .await 69 - } 70 - 71 - pub async fn defer(conn: &mut AsyncPgConnection) -> QueryResult<usize> { 72 - diesel::sql_query("SET CONSTRAINTS ALL DEFERRED") 73 - .execute(conn) 74 - .await 75 - } 76 - 77 - pub async fn pull_backfill_rows( 78 - conn: &mut AsyncPgConnection, 79 - repo: &str, 80 - rev: &str, 81 - ) -> QueryResult<Vec<models::BackfillRow>> { 82 - schema::backfill::table 83 - .select(models::BackfillRow::as_select()) 84 - .filter( 85 - schema::backfill::repo 86 - .eq(repo) 87 - .and(schema::backfill::repo_ver.gt(rev)), 88 - ) 89 - .order(schema::backfill::repo_ver) 90 - .load(conn) 91 - .await 92 - } 93 - 94 - pub async fn clear_backfill_rows(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> { 95 - diesel::delete(schema::backfill::table) 96 - .filter(schema::backfill::repo.eq(repo)) 97 - .execute(conn) 98 - .await 99 - }
+138 -85
consumer/src/backfill/mod.rs
··· 1 + use crate::config::BackfillConfig; 2 + use crate::db; 1 3 use crate::indexer::types::{AggregateDeltaStore, BackfillItem, BackfillItemInner}; 2 - use crate::indexer::{self, db as indexer_db}; 4 + use crate::indexer::{self, records}; 5 + use chrono::prelude::*; 6 + use deadpool_postgres::{Object, Pool, Transaction}; 3 7 use did_resolver::Resolver; 4 - use diesel_async::pooled_connection::deadpool::Pool; 5 - use diesel_async::AsyncPgConnection; 6 8 use ipld_core::cid::Cid; 7 9 use metrics::counter; 8 10 use parakeet_db::types::{ActorStatus, ActorSyncState}; 9 11 use redis::aio::MultiplexedConnection; 10 12 use redis::{AsyncCommands, Direction}; 11 13 use reqwest::{Client, StatusCode}; 12 - use std::collections::HashMap; 13 14 use std::str::FromStr; 14 15 use std::sync::Arc; 15 16 use tokio::sync::Semaphore; 16 17 use tracing::instrument; 17 18 18 - mod db; 19 19 mod repo; 20 20 mod types; 21 21 ··· 28 28 pub struct BackfillManagerInner { 29 29 resolver: Arc<Resolver>, 30 30 client: Client, 31 - index_client: parakeet_index::Client, 31 + index_client: Option<parakeet_index::Client>, 32 + opts: BackfillConfig, 32 33 } 33 34 34 35 pub struct BackfillManager { 35 - pool: Pool<AsyncPgConnection>, 36 + pool: Pool, 36 37 redis: MultiplexedConnection, 37 38 semaphore: Arc<Semaphore>, 38 39 inner: BackfillManagerInner, ··· 40 41 41 42 impl BackfillManager { 42 43 pub async fn new( 43 - pool: Pool<AsyncPgConnection>, 44 + pool: Pool, 44 45 redis: MultiplexedConnection, 45 46 resolver: Arc<Resolver>, 46 - index_client: parakeet_index::Client, 47 - threads: u8, 47 + index_client: Option<parakeet_index::Client>, 48 + opts: BackfillConfig, 48 49 ) -> eyre::Result<Self> { 49 - let client = Client::new(); 50 - let semaphore = Arc::new(Semaphore::new(threads as usize)); 50 + let client = Client::builder().brotli(true).build()?; 51 + let semaphore = Arc::new(Semaphore::new(opts.backfill_workers as usize)); 51 52 52 53 Ok(BackfillManager { 53 54 pool, ··· 57 58 resolver, 58 59 client, 59 60 index_client, 61 + opts, 60 62 }, 61 63 }) 62 64 } ··· 93 95 tracing::error!(did = &job, "backfill failed: {e}"); 94 96 counter!("backfill_failure").increment(1); 95 97 96 - db::write_backfill_job(&mut conn, &job, "failed") 98 + db::backfill_job_write(&mut conn, &job, "failed") 97 99 .await 98 100 .unwrap(); 99 101 } else { 100 102 counter!("backfill_success").increment(1); 101 103 102 - db::write_backfill_job(&mut conn, &job, "successful") 104 + db::backfill_job_write(&mut conn, &job, "successful") 103 105 .await 104 106 .unwrap(); 105 107 } ··· 119 121 120 122 #[instrument(skip(conn, inner))] 121 123 async fn backfill_actor( 122 - conn: &mut AsyncPgConnection, 124 + conn: &mut Object, 123 125 inner: &mut BackfillManagerInner, 124 126 did: &str, 125 127 ) -> eyre::Result<()> { 126 - let (status, sync_state) = db::get_actor_status(conn, did).await?; 128 + let Some((status, sync_state)) = db::actor_get_statuses(conn, did).await? else { 129 + tracing::error!("skipping backfill on unknown repo"); 130 + return Ok(()); 131 + }; 127 132 128 133 if sync_state != ActorSyncState::Dirty || status != ActorStatus::Active { 129 134 tracing::debug!("skipping non-dirty or inactive repo"); ··· 135 140 eyre::bail!("missing did doc"); 136 141 }; 137 142 138 - let Some(handle) = did_doc 139 - .also_known_as 140 - .and_then(|aka| aka.first().cloned()) 141 - .and_then(|handle| handle.strip_prefix("at://").map(String::from)) 142 - else { 143 - eyre::bail!("DID doc contained no handle"); 144 - }; 145 143 let Some(service) = did_doc 146 144 .service 147 145 .and_then(|services| services.into_iter().find(|svc| svc.id == PDS_SERVICE_ID)) ··· 156 154 let Some(repo_status) = check_pds_repo_status(&inner.client, &pds_url, did).await? else { 157 155 // this repo can't be found - set dirty and assume deleted. 158 156 tracing::debug!("repo was deleted"); 159 - db::update_actor_status(conn, did, ActorStatus::Deleted, ActorSyncState::Dirty).await?; 157 + db::actor_upsert( 158 + conn, 159 + did, 160 + ActorStatus::Deleted, 161 + ActorSyncState::Dirty, 162 + Utc::now(), 163 + ) 164 + .await?; 160 165 return Ok(()); 161 166 }; 162 167 ··· 165 170 let status = repo_status 166 171 .status 167 172 .unwrap_or(crate::firehose::AtpAccountStatus::Deleted); 168 - db::update_actor_status(conn, did, status.into(), ActorSyncState::Dirty).await?; 173 + db::actor_upsert( 174 + conn, 175 + did, 176 + status.into(), 177 + ActorSyncState::Dirty, 178 + Utc::now(), 179 + ) 180 + .await?; 169 181 return Ok(()); 170 182 } 171 183 172 - // at this point, the account will be active and we can attempt to resolve the handle. 184 + if !inner.opts.skip_handle_validation { 185 + // at this point, the account will be active and we can attempt to resolve the handle. 186 + let Some(handle) = did_doc 187 + .also_known_as 188 + .and_then(|aka| aka.first().cloned()) 189 + .and_then(|handle| handle.strip_prefix("at://").map(String::from)) 190 + else { 191 + eyre::bail!("DID doc contained no handle"); 192 + }; 173 193 174 - // in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems 175 - // like a way to end up with really sus handles. 176 - let Some(handle_did) = inner.resolver.resolve_handle(&handle).await? else { 177 - eyre::bail!("Failed to resolve did for handle {handle}"); 178 - }; 194 + // in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems 195 + // like a way to end up with really sus handles. 196 + let Some(handle_did) = inner.resolver.resolve_handle(&handle).await? else { 197 + eyre::bail!("Failed to resolve did for handle {handle}"); 198 + }; 179 199 180 - if handle_did != did { 181 - eyre::bail!("requested DID doesn't match handle"); 182 - } 200 + if handle_did != did { 201 + eyre::bail!("requested DID doesn't match handle"); 202 + } 183 203 184 - // set the handle from above 185 - db::update_handle(conn, did, Some(handle)).await?; 204 + // set the handle from above 205 + db::actor_upsert_handle( 206 + conn, 207 + did, 208 + ActorSyncState::Processing, 209 + Some(handle), 210 + Utc::now(), 211 + ) 212 + .await?; 213 + } 186 214 187 215 // now we can start actually backfilling 188 - db::update_repo_sync_state(conn, did, ActorSyncState::Processing).await?; 216 + db::actor_set_sync_status(conn, did, ActorSyncState::Processing, Utc::now()).await?; 217 + 218 + let mut t = conn.transaction().await?; 219 + t.execute("SET CONSTRAINTS ALL DEFERRED", &[]).await?; 189 220 190 221 tracing::trace!("pulling repo"); 191 222 192 - let (rev, cid, records) = repo::pull_repo(&inner.client, did, &pds_url).await?; 223 + let (commit, mut deltas, copies) = 224 + repo::stream_and_insert_repo(&mut t, &inner.client, did, &pds_url).await?; 193 225 194 - tracing::trace!("repo pulled - inserting"); 226 + db::actor_set_repo_state(&mut t, did, &commit.rev, commit.data).await?; 195 227 196 - let mut delta_store = HashMap::new(); 228 + copies.submit(&mut t, did).await?; 197 229 198 - db::defer(conn).await?; 199 - 200 - indexer_db::update_repo_version(conn, did, &rev, cid).await?; 201 - 202 - for (path, (cid, record)) in records { 203 - let Some((collection, rkey)) = path.split_once("/") else { 204 - tracing::warn!("record contained invalid path {}", path); 205 - return Err(diesel::result::Error::RollbackTransaction.into()); 206 - }; 207 - 208 - counter!("backfilled_commits", "collection" => collection.to_string()).increment(1); 209 - 210 - let full_path = format!("at://{did}/{path}"); 211 - 212 - indexer::index_op(conn, &mut delta_store, did, cid, record, &full_path, rkey).await? 213 - } 214 - 215 - db::update_repo_sync_state(conn, did, ActorSyncState::Synced).await?; 230 + t.execute( 231 + "UPDATE actors SET sync_state=$2, last_indexed=$3 WHERE did=$1", 232 + &[&did, &ActorSyncState::Synced, &Utc::now().naive_utc()], 233 + ) 234 + .await?; 216 235 217 - handle_backfill_rows(conn, &mut delta_store, did, &rev).await?; 236 + handle_backfill_rows(&mut t, &mut deltas, did, &commit.rev).await?; 218 237 219 238 tracing::trace!("insertion finished"); 220 239 221 - // submit the deltas 222 - let delta_store = delta_store 223 - .into_iter() 224 - .map(|((uri, typ), delta)| parakeet_index::AggregateDeltaReq { 225 - typ, 226 - uri: uri.to_string(), 227 - delta, 228 - }) 229 - .collect::<Vec<_>>(); 240 + if let Some(index_client) = &mut inner.index_client { 241 + // submit the deltas 242 + let delta_store = deltas 243 + .into_iter() 244 + .map(|((uri, typ), delta)| parakeet_index::AggregateDeltaReq { 245 + typ, 246 + uri: uri.to_string(), 247 + delta, 248 + }) 249 + .collect::<Vec<_>>(); 230 250 231 - let mut read = 0; 251 + let mut read = 0; 232 252 233 - while read < delta_store.len() { 234 - let rem = delta_store.len() - read; 235 - let take = DELTA_BATCH_SIZE.min(rem); 253 + while read < delta_store.len() { 254 + let rem = delta_store.len() - read; 255 + let take = DELTA_BATCH_SIZE.min(rem); 236 256 237 - tracing::debug!("reading & submitting {take} deltas"); 257 + tracing::debug!("reading & submitting {take} deltas"); 238 258 239 - let deltas = delta_store[read..read + take].to_vec(); 240 - inner 241 - .index_client 242 - .submit_aggregate_delta_batch(parakeet_index::AggregateDeltaBatchReq { deltas }) 243 - .await?; 259 + let deltas = delta_store[read..read + take].to_vec(); 260 + index_client 261 + .submit_aggregate_delta_batch(parakeet_index::AggregateDeltaBatchReq { deltas }) 262 + .await?; 244 263 245 - read += take; 246 - tracing::debug!("read {read} of {} deltas", delta_store.len()); 264 + read += take; 265 + tracing::debug!("read {read} of {} deltas", delta_store.len()); 266 + } 247 267 } 268 + 269 + t.commit().await?; 248 270 249 271 Ok(()) 250 272 } 251 273 252 274 async fn handle_backfill_rows( 253 - conn: &mut AsyncPgConnection, 275 + conn: &mut Transaction<'_>, 254 276 deltas: &mut impl AggregateDeltaStore, 255 277 repo: &str, 256 278 rev: &str, 257 - ) -> diesel::QueryResult<()> { 279 + ) -> Result<(), tokio_postgres::Error> { 258 280 // `pull_backfill_rows` filters out anything before the last commit we pulled 259 - let backfill_rows = db::pull_backfill_rows(conn, repo, rev).await?; 281 + let backfill_rows = db::backfill_rows_get(conn, repo, rev).await?; 260 282 261 283 for row in backfill_rows { 262 284 // blindly unwrap-ing this CID as we've already parsed it and re-serialized it 263 285 let repo_cid = Cid::from_str(&row.cid).unwrap(); 264 - indexer_db::update_repo_version(conn, repo, &row.repo_ver, repo_cid).await?; 286 + db::actor_set_repo_state(conn, repo, &row.repo_ver, repo_cid).await?; 265 287 266 288 // again, we've serialized this. 267 289 let items: Vec<BackfillItem> = serde_json::from_value(row.data).unwrap(); ··· 288 310 } 289 311 290 312 // finally, clear the backfill table entries for this actor 291 - db::clear_backfill_rows(conn, repo).await?; 313 + db::backfill_delete_rows(conn, repo).await?; 292 314 293 315 Ok(()) 294 316 } ··· 311 333 312 334 Ok(res.json().await?) 313 335 } 336 + 337 + #[derive(Debug, Default)] 338 + struct CopyStore { 339 + likes: Vec<(String, records::StrongRef, DateTime<Utc>)>, 340 + posts: Vec<(String, Cid, records::AppBskyFeedPost)>, 341 + reposts: Vec<(String, records::StrongRef, DateTime<Utc>)>, 342 + blocks: Vec<(String, String, DateTime<Utc>)>, 343 + follows: Vec<(String, String, DateTime<Utc>)>, 344 + list_items: Vec<(String, records::AppBskyGraphListItem)>, 345 + verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>, 346 + records: Vec<(String, Cid)>, 347 + } 348 + 349 + impl CopyStore { 350 + async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> { 351 + db::copy::copy_likes(t, did, self.likes).await?; 352 + db::copy::copy_posts(t, did, self.posts).await?; 353 + db::copy::copy_reposts(t, did, self.reposts).await?; 354 + db::copy::copy_blocks(t, did, self.blocks).await?; 355 + db::copy::copy_follows(t, did, self.follows).await?; 356 + db::copy::copy_list_items(t, self.list_items).await?; 357 + db::copy::copy_verification(t, did, self.verifications).await?; 358 + db::copy::copy_records(t, did, self.records).await?; 359 + 360 + Ok(()) 361 + } 362 + 363 + fn push_record(&mut self, at_uri: &str, cid: Cid) { 364 + self.records.push((at_uri.to_string(), cid)) 365 + } 366 + }
+158 -44
consumer/src/backfill/repo.rs
··· 1 - use super::types::{CarCommitEntry, CarEntry}; 2 - use crate::indexer::types::RecordTypes; 3 - use futures::{StreamExt, TryStreamExt}; 1 + use super::{ 2 + types::{CarCommitEntry, CarEntry}, 3 + CopyStore, 4 + }; 5 + use crate::indexer::records; 6 + use crate::indexer::types::{AggregateDeltaStore, RecordTypes}; 7 + use crate::{db, indexer}; 8 + use deadpool_postgres::Transaction; 9 + use futures::TryStreamExt; 4 10 use ipld_core::cid::Cid; 5 11 use iroh_car::CarReader; 12 + use metrics::counter; 13 + use parakeet_index::AggregateType; 6 14 use reqwest::Client; 7 15 use std::collections::HashMap; 16 + use std::io::ErrorKind; 17 + use tokio::io::BufReader; 18 + use tokio_util::io::StreamReader; 8 19 9 - pub async fn pull_repo<'a>( 20 + type BackfillDeltaStore = HashMap<(String, i32), i32>; 21 + 22 + pub async fn stream_and_insert_repo( 23 + t: &mut Transaction<'_>, 10 24 client: &Client, 11 25 repo: &str, 12 26 pds: &str, 13 - ) -> eyre::Result<(String, Cid, HashMap<String, (Cid, RecordTypes)>)> { 27 + ) -> eyre::Result<(CarCommitEntry, BackfillDeltaStore, CopyStore)> { 14 28 let res = client 15 29 .get(format!("{pds}/xrpc/com.atproto.sync.getRepo?did={repo}")) 16 30 .send() 17 31 .await? 18 32 .error_for_status()?; 19 33 20 - let body = res.bytes().await?; 34 + let strm = res 35 + .bytes_stream() 36 + .map_err(|err| std::io::Error::new(ErrorKind::Other, err)); 37 + let reader = StreamReader::new(strm); 38 + let mut car_stream = CarReader::new(BufReader::new(reader)).await?; 21 39 22 - let (commit, records) = read_car(&body).await?; 40 + // the root should be the commit block 41 + let root = car_stream.header().roots().first().cloned().unwrap(); 23 42 24 - Ok((commit.rev, commit.data, records)) 25 - } 43 + let mut commit = None; 44 + let mut mst_nodes: HashMap<Cid, String> = HashMap::new(); 45 + let mut records: HashMap<Cid, RecordTypes> = HashMap::new(); 46 + let mut deltas = HashMap::new(); 47 + let mut copies = CopyStore::default(); 26 48 27 - // beware: this is probably: 1. insecure, 2. slow, 3. a/n other crimes 28 - async fn read_car( 29 - data: &[u8], 30 - ) -> eyre::Result<(CarCommitEntry, HashMap<String, (Cid, RecordTypes)>)> { 31 - let car = CarReader::new(data).await?; 49 + while let Some((cid, block)) = car_stream.next_block().await? { 50 + let Ok(block) = serde_ipld_dagcbor::from_slice::<CarEntry>(&block) else { 51 + tracing::warn!("failed to parse block {cid}"); 52 + continue; 53 + }; 32 54 33 - let entries = car 34 - .stream() 35 - .map_ok( 36 - |(cid, block)| match serde_ipld_dagcbor::from_slice::<CarEntry>(&block) { 37 - Ok(decoded) => Some((cid, decoded)), 38 - Err(_) => None, 39 - }, 40 - ) 41 - .filter_map(|v| async move { v.ok().flatten() }) 42 - .collect::<HashMap<_, _>>() 43 - .await; 55 + if root == cid { 56 + if let CarEntry::Commit(commit_entry) = block { 57 + commit = Some(commit_entry); 58 + } else { 59 + tracing::warn!("root did not point to a commit entry"); 60 + } 61 + continue; 62 + } 44 63 45 - let mut commit = None; 46 - let mut mst_nodes = Vec::new(); 47 - let mut records = HashMap::new(); 48 - 49 - for (cid, entry) in entries { 50 - match entry { 51 - CarEntry::Record(rec) => { 52 - records.insert(cid, rec); 64 + match block { 65 + CarEntry::Commit(_) => { 66 + tracing::warn!("got commit entry that was not in root") 67 + } 68 + CarEntry::Record(record) => { 69 + if let Some(path) = mst_nodes.remove(&cid) { 70 + record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?; 71 + } else { 72 + records.insert(cid, record); 73 + } 53 74 } 54 75 CarEntry::Mst(mst) => { 55 76 let mut out = Vec::with_capacity(mst.e.len()); ··· 60 81 let key = if node.p == 0 { 61 82 ks.to_string() 62 83 } else { 63 - let (prev, _): &(String, Cid) = out.last().unwrap(); 84 + let (_, prev): &(Cid, String) = out.last().unwrap(); 64 85 let prefix = &prev[..node.p as usize]; 65 86 66 87 format!("{prefix}{ks}") 67 88 }; 68 89 69 - out.push((key, node.v)); 90 + out.push((node.v, key.to_string())); 70 91 } 71 92 72 93 mst_nodes.extend(out); 73 94 } 74 - CarEntry::Commit(car_commit) => { 75 - commit = Some(car_commit); 76 - } 77 95 } 78 96 } 79 97 80 - let records_out = mst_nodes 81 - .into_iter() 82 - .filter_map(|(key, cid)| records.remove(&cid).map(|v| (key, (cid, v)))) 83 - .collect::<HashMap<_, _>>(); 98 + for (cid, record) in records { 99 + if let Some(path) = mst_nodes.remove(&cid) { 100 + record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?; 101 + } else { 102 + tracing::warn!("couldn't find MST node for record {cid}") 103 + } 104 + } 84 105 85 - let commit = commit.ok_or(eyre::eyre!("no commit found"))?; 106 + let commit = commit.unwrap(); 107 + 108 + Ok((commit, deltas, copies)) 109 + } 110 + 111 + async fn record_index( 112 + t: &mut Transaction<'_>, 113 + copies: &mut CopyStore, 114 + deltas: &mut BackfillDeltaStore, 115 + did: &str, 116 + path: &str, 117 + cid: Cid, 118 + record: RecordTypes, 119 + ) -> eyre::Result<()> { 120 + let Some((collection_raw, rkey)) = path.split_once("/") else { 121 + tracing::warn!("op contained invalid path {path}"); 122 + return Ok(()); 123 + }; 124 + 125 + counter!("backfilled_commits", "collection" => collection_raw.to_string()).increment(1); 126 + 127 + let at_uri = format!("at://{did}/{path}"); 128 + 129 + match record { 130 + RecordTypes::AppBskyFeedLike(rec) => { 131 + deltas.incr(&rec.subject.uri, AggregateType::Like).await; 132 + 133 + copies.push_record(&at_uri, cid); 134 + copies.likes.push((at_uri, rec.subject, rec.created_at)); 135 + } 136 + RecordTypes::AppBskyFeedPost(rec) => { 137 + let maybe_reply = rec.reply.as_ref().map(|v| v.parent.uri.clone()); 138 + let maybe_embed = rec 139 + .embed 140 + .as_ref() 141 + .and_then(|v| v.as_bsky()) 142 + .and_then(|v| match v { 143 + records::AppBskyEmbed::Record(r) => Some(r.record.uri.clone()), 144 + records::AppBskyEmbed::RecordWithMedia(r) => Some(r.record.record.uri.clone()), 145 + _ => None, 146 + }); 147 + 148 + if let Some(labels) = rec.labels.clone() { 149 + db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?; 150 + } 151 + if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) { 152 + db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?; 153 + } 154 + 155 + deltas.incr(did, AggregateType::ProfilePost).await; 156 + if let Some(reply) = maybe_reply { 157 + deltas.incr(&reply, AggregateType::Reply).await; 158 + } 159 + if let Some(embed) = maybe_embed { 160 + deltas.incr(&embed, AggregateType::Embed).await; 161 + } 162 + 163 + copies.push_record(&at_uri, cid); 164 + copies.posts.push((at_uri, cid, rec)); 165 + } 166 + RecordTypes::AppBskyFeedRepost(rec) => { 167 + deltas.incr(&rec.subject.uri, AggregateType::Repost).await; 86 168 87 - Ok((commit, records_out)) 169 + copies.push_record(&at_uri, cid); 170 + copies.reposts.push((at_uri, rec.subject, rec.created_at)); 171 + } 172 + RecordTypes::AppBskyGraphBlock(rec) => { 173 + copies.push_record(&at_uri, cid); 174 + copies.blocks.push((at_uri, rec.subject, rec.created_at)); 175 + } 176 + RecordTypes::AppBskyGraphFollow(rec) => { 177 + deltas.incr(did, AggregateType::Follow).await; 178 + deltas.incr(&rec.subject, AggregateType::Follower).await; 179 + 180 + copies.push_record(&at_uri, cid); 181 + copies.follows.push((at_uri, rec.subject, rec.created_at)); 182 + } 183 + RecordTypes::AppBskyGraphListItem(rec) => { 184 + let split_aturi = rec.list.rsplitn(4, '/').collect::<Vec<_>>(); 185 + if did != split_aturi[2] { 186 + // it's also probably a bad idea to log *all* the attempts to do this... 187 + tracing::warn!("tried to create a listitem on a list we don't control!"); 188 + return Ok(()); 189 + } 190 + 191 + copies.push_record(&at_uri, cid); 192 + copies.list_items.push((at_uri, rec)); 193 + } 194 + RecordTypes::AppBskyGraphVerification(rec) => { 195 + copies.push_record(&at_uri, cid); 196 + copies.verifications.push((at_uri, cid, rec)); 197 + } 198 + _ => indexer::index_op(t, deltas, did, cid, record, &at_uri, rkey).await?, 199 + } 200 + 201 + Ok(()) 88 202 }
+13 -3
consumer/src/config.rs
··· 14 14 #[derive(Debug, Deserialize)] 15 15 pub struct Config { 16 16 pub index_uri: String, 17 - pub database_url: String, 17 + pub database: deadpool_postgres::Config, 18 18 pub redis_uri: String, 19 19 pub plc_directory: Option<String>, 20 20 /// Adds contact details (email / bluesky handle / website) to the UA header. 21 21 pub ua_contact: Option<String>, 22 - #[serde(default = "default_backfill_workers")] 23 - pub backfill_workers: u8, 24 22 /// DIDs of label services to force subscription to. 25 23 #[serde(default)] 26 24 pub initial_label_services: Vec<String>, ··· 29 27 30 28 /// Configuration items specific to indexer 31 29 pub indexer: Option<IndexerConfig>, 30 + /// Configuration items specific to backfill 31 + pub backfill: Option<BackfillConfig>, 32 32 } 33 33 34 34 #[derive(Debug, Deserialize)] ··· 51 51 BackfillHistory, 52 52 /// Discover new accounts as they come and do not import history 53 53 Realtime, 54 + } 55 + 56 + #[derive(Clone, Debug, Deserialize)] 57 + pub struct BackfillConfig { 58 + #[serde(default = "default_backfill_workers")] 59 + pub backfill_workers: u8, 60 + #[serde(default)] 61 + pub skip_aggregation: bool, 62 + #[serde(default)] 63 + pub skip_handle_validation: bool, 54 64 } 55 65 56 66 fn default_backfill_workers() -> u8 {
+101
consumer/src/db/actor.rs
··· 1 + use super::{PgExecResult, PgOptResult}; 2 + use chrono::{DateTime, Utc}; 3 + use deadpool_postgres::GenericClient; 4 + use ipld_core::cid::Cid; 5 + use parakeet_db::types::{ActorStatus, ActorSyncState}; 6 + 7 + pub async fn actor_upsert<C: GenericClient>( 8 + conn: &mut C, 9 + did: &str, 10 + status: ActorStatus, 11 + sync_state: ActorSyncState, 12 + time: DateTime<Utc>, 13 + ) -> PgExecResult { 14 + conn.execute( 15 + r#"INSERT INTO actors (did, status, sync_state, last_indexed) VALUES ($1, $2, $3, $4) 16 + ON CONFLICT (did) DO UPDATE SET status=EXCLUDED.status, last_indexed=EXCLUDED.last_indexed"#, 17 + &[&did, &status, &sync_state, &time.naive_utc()], 18 + ).await 19 + } 20 + 21 + pub async fn actor_upsert_handle<C: GenericClient>( 22 + conn: &mut C, 23 + did: &str, 24 + sync_state: ActorSyncState, 25 + handle: Option<String>, 26 + time: DateTime<Utc>, 27 + ) -> PgExecResult { 28 + conn.execute( 29 + r#"INSERT INTO actors (did, handle, sync_state, last_indexed) VALUES ($1, $2, $3, $4) 30 + ON CONFLICT (did) DO UPDATE SET handle=EXCLUDED.handle, last_indexed=EXCLUDED.last_indexed"#, 31 + &[&did, &handle, &sync_state, &time.naive_utc()] 32 + ).await 33 + } 34 + 35 + pub async fn actor_set_sync_status<C: GenericClient>( 36 + conn: &mut C, 37 + did: &str, 38 + sync_state: ActorSyncState, 39 + time: DateTime<Utc>, 40 + ) -> PgExecResult { 41 + conn.execute( 42 + "UPDATE actors SET sync_state=$2, last_indexed=$3 WHERE did=$1", 43 + &[&did, &sync_state, &time.naive_utc()], 44 + ) 45 + .await 46 + } 47 + 48 + pub async fn actor_set_repo_state<C: GenericClient>( 49 + conn: &mut C, 50 + did: &str, 51 + rev: &str, 52 + cid: Cid, 53 + ) -> PgExecResult { 54 + conn.execute( 55 + "UPDATE actors SET repo_rev=$2, repo_cid=$3 WHERE did=$1", 56 + &[&did, &rev, &cid.to_string()], 57 + ) 58 + .await 59 + } 60 + 61 + pub async fn actor_get_status_and_rev<C: GenericClient>( 62 + conn: &mut C, 63 + did: &str, 64 + ) -> PgOptResult<(ActorStatus, Option<String>)> { 65 + let res = conn 66 + .query_opt( 67 + "SELECT status, repo_rev FROM actors WHERE did=$1 LIMIT 1", 68 + &[&did], 69 + ) 70 + .await?; 71 + 72 + Ok(res.map(|v| (v.get(0), v.get(1)))) 73 + } 74 + 75 + pub async fn actor_get_repo_status<C: GenericClient>( 76 + conn: &mut C, 77 + did: &str, 78 + ) -> PgOptResult<(ActorSyncState, Option<String>)> { 79 + let res = conn 80 + .query_opt( 81 + "SELECT sync_state, repo_rev FROM actors WHERE did=$1 LIMIT 1", 82 + &[&did], 83 + ) 84 + .await?; 85 + 86 + Ok(res.map(|v| (v.get(0), v.get(1)))) 87 + } 88 + 89 + pub async fn actor_get_statuses<C: GenericClient>( 90 + conn: &mut C, 91 + did: &str, 92 + ) -> PgOptResult<(ActorStatus, ActorSyncState)> { 93 + let res = conn 94 + .query_opt( 95 + "SELECT status, sync_state FROM actors WHERE did=$1 LIMIT 1", 96 + &[&did], 97 + ) 98 + .await?; 99 + 100 + Ok(res.map(|v| (v.get(0), v.get(1)))) 101 + }
+65
consumer/src/db/backfill.rs
··· 1 + use super::{PgExecResult, PgResult}; 2 + use chrono::NaiveDateTime; 3 + use deadpool_postgres::GenericClient; 4 + use ipld_core::cid::Cid; 5 + 6 + pub struct BackfillRow { 7 + pub repo: String, 8 + pub repo_ver: String, 9 + pub cid: String, 10 + 11 + pub data: serde_json::Value, 12 + 13 + pub indexed_at: NaiveDateTime, 14 + } 15 + 16 + pub async fn backfill_job_write<C: GenericClient>(conn: &mut C, did: &str, status: &str) -> PgExecResult { 17 + conn.execute( 18 + "INSERT INTO backfill_jobs (did, status) VALUES ($1, $2)", 19 + &[&did, &status], 20 + ) 21 + .await 22 + } 23 + 24 + pub async fn backfill_write_row<C: GenericClient>( 25 + conn: &mut C, 26 + repo: &str, 27 + rev: &str, 28 + cid: Cid, 29 + data: serde_json::Value, 30 + ) -> PgExecResult { 31 + conn.execute( 32 + "INSERT INTO backfill (repo, repo_ver, cid, data) VALUES ($1, $2, $3, $4)", 33 + &[&repo, &rev, &cid.to_string(), &data], 34 + ) 35 + .await 36 + } 37 + 38 + pub async fn backfill_rows_get<C: GenericClient>( 39 + conn: &mut C, 40 + repo: &str, 41 + rev: &str, 42 + ) -> PgResult<Vec<BackfillRow>> { 43 + let res = conn 44 + .query( 45 + "SELECT * FROM backfill WHERE repo=$1 AND repo_ver > $2 ORDER BY repo_ver", 46 + &[&repo, &rev], 47 + ) 48 + .await?; 49 + 50 + Ok(res 51 + .into_iter() 52 + .map(|row| BackfillRow { 53 + repo: row.get(0), 54 + repo_ver: row.get(1), 55 + cid: row.get(2), 56 + data: row.get(3), 57 + indexed_at: row.get(4), 58 + }) 59 + .collect()) 60 + } 61 + 62 + pub async fn backfill_delete_rows<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult { 63 + conn.execute("DELETE FROM backfill WHERE repo=$1", &[&repo]) 64 + .await 65 + }
+385
consumer/src/db/copy.rs
··· 1 + use super::PgExecResult; 2 + use crate::indexer::records; 3 + use crate::utils::strongref_to_parts; 4 + use chrono::prelude::*; 5 + use deadpool_postgres::Transaction; 6 + use futures::pin_mut; 7 + use ipld_core::cid::Cid; 8 + use tokio_postgres::binary_copy::BinaryCopyInWriter; 9 + use tokio_postgres::types::Type; 10 + 11 + // StrongRefs are used in both likes and reposts 12 + const STRONGREF_TYPES: &[Type] = &[ 13 + Type::TEXT, 14 + Type::TEXT, 15 + Type::TEXT, 16 + Type::TEXT, 17 + Type::TIMESTAMP, 18 + ]; 19 + type StrongRefRow = (String, records::StrongRef, DateTime<Utc>); 20 + 21 + // SubjectRefs are used in both blocks and follows 22 + const SUBJECT_TYPES: &[Type] = &[Type::TEXT, Type::TEXT, Type::TEXT, Type::TIMESTAMP]; 23 + type SubjectRefRow = (String, String, DateTime<Utc>); 24 + 25 + pub async fn copy_likes( 26 + conn: &mut Transaction<'_>, 27 + did: &str, 28 + data: Vec<StrongRefRow>, 29 + ) -> PgExecResult { 30 + if data.is_empty() { 31 + return Ok(0); 32 + } 33 + 34 + conn.execute( 35 + "CREATE TEMP TABLE likes_tmp (LIKE likes INCLUDING DEFAULTS) ON COMMIT DROP", 36 + &[], 37 + ) 38 + .await?; 39 + 40 + let writer = conn 41 + .copy_in( 42 + "COPY likes_tmp (at_uri, did, subject, subject_cid, created_at) FROM STDIN (FORMAT binary)", 43 + ) 44 + .await?; 45 + let writer = BinaryCopyInWriter::new(writer, STRONGREF_TYPES); 46 + 47 + pin_mut!(writer); 48 + 49 + for row in data { 50 + let writer = writer.as_mut(); 51 + writer 52 + .write(&[ 53 + &row.0, 54 + &did, 55 + &row.1.uri, 56 + &row.1.cid.to_string(), 57 + &row.2.naive_utc(), 58 + ]) 59 + .await?; 60 + } 61 + 62 + writer.finish().await?; 63 + 64 + conn.execute("INSERT INTO likes (SELECT * FROM likes_tmp)", &[]) 65 + .await 66 + } 67 + 68 + pub async fn copy_reposts( 69 + conn: &mut Transaction<'_>, 70 + did: &str, 71 + data: Vec<StrongRefRow>, 72 + ) -> PgExecResult { 73 + if data.is_empty() { 74 + return Ok(0); 75 + } 76 + 77 + conn.execute( 78 + "CREATE TEMP TABLE reposts_tmp (LIKE reposts INCLUDING DEFAULTS) ON COMMIT DROP", 79 + &[], 80 + ) 81 + .await?; 82 + 83 + let writer = conn 84 + .copy_in( 85 + "COPY reposts_tmp (at_uri, did, post, post_cid, created_at) FROM STDIN (FORMAT binary)", 86 + ) 87 + .await?; 88 + let writer = BinaryCopyInWriter::new(writer, STRONGREF_TYPES); 89 + 90 + pin_mut!(writer); 91 + 92 + for row in data { 93 + let writer = writer.as_mut(); 94 + writer 95 + .write(&[ 96 + &row.0, 97 + &did, 98 + &row.1.uri, 99 + &row.1.cid.to_string(), 100 + &row.2.naive_utc(), 101 + ]) 102 + .await?; 103 + } 104 + 105 + writer.finish().await?; 106 + 107 + conn.execute("INSERT INTO reposts (SELECT * FROM reposts_tmp)", &[]) 108 + .await 109 + } 110 + 111 + const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)"; 112 + const POST_TYPES: &[Type] = &[ 113 + Type::TEXT, 114 + Type::TEXT, 115 + Type::TEXT, 116 + Type::JSONB, 117 + Type::TEXT, 118 + Type::JSONB, 119 + Type::TEXT_ARRAY, 120 + Type::TEXT_ARRAY, 121 + Type::TEXT, 122 + Type::TEXT, 123 + Type::TEXT, 124 + Type::TEXT, 125 + Type::TEXT, 126 + Type::TEXT, 127 + Type::TIMESTAMP, 128 + ]; 129 + pub async fn copy_posts( 130 + conn: &mut Transaction<'_>, 131 + did: &str, 132 + data: Vec<(String, Cid, records::AppBskyFeedPost)>, 133 + ) -> PgExecResult { 134 + if data.is_empty() { 135 + return Ok(0); 136 + } 137 + 138 + conn.execute( 139 + "CREATE TEMP TABLE posts_tmp (LIKE posts INCLUDING DEFAULTS) ON COMMIT DROP", 140 + &[], 141 + ) 142 + .await?; 143 + 144 + let writer = conn.copy_in(POST_STMT).await?; 145 + let writer = BinaryCopyInWriter::new(writer, POST_TYPES); 146 + 147 + pin_mut!(writer); 148 + 149 + for (at_uri, cid, post) in data { 150 + let record = serde_json::to_value(&post).unwrap(); 151 + let facets = post.facets.and_then(|v| serde_json::to_value(v).ok()); 152 + let embed = post.embed.as_ref().map(|v| v.as_str()); 153 + let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype()); 154 + let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent)); 155 + let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root)); 156 + 157 + let writer = writer.as_mut(); 158 + writer 159 + .write(&[ 160 + &at_uri, 161 + &cid.to_string(), 162 + &did, 163 + &record, 164 + &post.text, 165 + &facets, 166 + &post.langs.unwrap_or_default(), 167 + &post.tags.unwrap_or_default(), 168 + &parent_uri, 169 + &parent_cid, 170 + &root_uri, 171 + &root_cid, 172 + &embed, 173 + &embed_subtype, 174 + &post.created_at.naive_utc(), 175 + ]) 176 + .await?; 177 + } 178 + 179 + writer.finish().await?; 180 + 181 + conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[]) 182 + .await 183 + } 184 + 185 + pub async fn copy_blocks( 186 + conn: &mut Transaction<'_>, 187 + did: &str, 188 + data: Vec<SubjectRefRow>, 189 + ) -> PgExecResult { 190 + if data.is_empty() { 191 + return Ok(0); 192 + } 193 + 194 + conn.execute( 195 + "CREATE TEMP TABLE blocks_tmp (LIKE blocks INCLUDING DEFAULTS) ON COMMIT DROP", 196 + &[], 197 + ) 198 + .await?; 199 + 200 + let writer = conn 201 + .copy_in("COPY blocks_tmp (at_uri, did, subject, created_at) FROM STDIN (FORMAT binary)") 202 + .await?; 203 + let writer = BinaryCopyInWriter::new(writer, SUBJECT_TYPES); 204 + 205 + pin_mut!(writer); 206 + 207 + for row in data { 208 + let writer = writer.as_mut(); 209 + writer 210 + .write(&[&row.0, &did, &row.1, &row.2.naive_utc()]) 211 + .await?; 212 + } 213 + 214 + writer.finish().await?; 215 + 216 + conn.execute("INSERT INTO blocks (SELECT * FROM blocks_tmp)", &[]) 217 + .await 218 + } 219 + 220 + pub async fn copy_list_items( 221 + conn: &mut Transaction<'_>, 222 + data: Vec<(String, records::AppBskyGraphListItem)>, 223 + ) -> PgExecResult { 224 + if data.is_empty() { 225 + return Ok(0); 226 + } 227 + 228 + conn.execute( 229 + "CREATE TEMP TABLE list_items_tmp (LIKE list_items INCLUDING DEFAULTS) ON COMMIT DROP", 230 + &[], 231 + ) 232 + .await?; 233 + 234 + let writer = conn 235 + .copy_in( 236 + "COPY list_items_tmp (at_uri, list_uri, subject, created_at) FROM STDIN (FORMAT binary)", 237 + ) 238 + .await?; 239 + let writer = BinaryCopyInWriter::new( 240 + writer, 241 + &[Type::TEXT, Type::TEXT, Type::TEXT, Type::TIMESTAMP], 242 + ); 243 + 244 + pin_mut!(writer); 245 + 246 + for (at_uri, record) in data { 247 + let writer = writer.as_mut(); 248 + writer 249 + .write(&[ 250 + &at_uri, 251 + &record.list, 252 + &record.subject, 253 + &record.created_at.naive_utc(), 254 + ]) 255 + .await?; 256 + } 257 + 258 + writer.finish().await?; 259 + 260 + conn.execute("INSERT INTO list_items (SELECT * FROM list_items_tmp)", &[]) 261 + .await 262 + } 263 + 264 + pub async fn copy_follows( 265 + conn: &mut Transaction<'_>, 266 + did: &str, 267 + data: Vec<SubjectRefRow>, 268 + ) -> PgExecResult { 269 + if data.is_empty() { 270 + return Ok(0); 271 + } 272 + 273 + conn.execute( 274 + "CREATE TEMP TABLE follows_tmp (LIKE follows INCLUDING DEFAULTS) ON COMMIT DROP", 275 + &[], 276 + ) 277 + .await?; 278 + 279 + let writer = conn 280 + .copy_in("COPY follows_tmp (at_uri, did, subject, created_at) FROM STDIN (FORMAT binary)") 281 + .await?; 282 + let writer = BinaryCopyInWriter::new(writer, SUBJECT_TYPES); 283 + 284 + pin_mut!(writer); 285 + 286 + for row in data { 287 + let writer = writer.as_mut(); 288 + writer 289 + .write(&[&row.0, &did, &row.1, &row.2.naive_utc()]) 290 + .await?; 291 + } 292 + 293 + writer.finish().await?; 294 + 295 + conn.execute("INSERT INTO follows (SELECT * FROM follows_tmp)", &[]) 296 + .await 297 + } 298 + 299 + const VERIFICATION_TYPES: &[Type] = &[ 300 + Type::TEXT, 301 + Type::TEXT, 302 + Type::TEXT, 303 + Type::TEXT, 304 + Type::TEXT, 305 + Type::TEXT, 306 + Type::TIMESTAMP, 307 + ]; 308 + pub async fn copy_verification( 309 + conn: &mut Transaction<'_>, 310 + did: &str, 311 + data: Vec<(String, Cid, records::AppBskyGraphVerification)>, 312 + ) -> PgExecResult { 313 + if data.is_empty() { 314 + return Ok(0); 315 + } 316 + 317 + conn.execute( 318 + "CREATE TEMP TABLE verification_tmp (LIKE verification INCLUDING DEFAULTS) ON COMMIT DROP", 319 + &[], 320 + ) 321 + .await?; 322 + 323 + let writer = conn 324 + .copy_in("COPY verification_tmp (at_uri, cid, verifier, subject, handle, display_name, created_at) FROM STDIN (FORMAT binary)") 325 + .await?; 326 + let writer = BinaryCopyInWriter::new(writer, VERIFICATION_TYPES); 327 + 328 + pin_mut!(writer); 329 + 330 + for (at_uri, cid, record) in data { 331 + let writer = writer.as_mut(); 332 + writer 333 + .write(&[ 334 + &at_uri, 335 + &cid.to_string(), 336 + &did, 337 + &record.subject, 338 + &record.handle, 339 + &record.display_name, 340 + &record.created_at.naive_utc(), 341 + ]) 342 + .await?; 343 + } 344 + 345 + writer.finish().await?; 346 + 347 + conn.execute( 348 + "INSERT INTO verification (SELECT * FROM verification_tmp)", 349 + &[], 350 + ) 351 + .await 352 + } 353 + 354 + pub async fn copy_records( 355 + conn: &mut Transaction<'_>, 356 + did: &str, 357 + data: Vec<(String, Cid)>, 358 + ) -> PgExecResult { 359 + if data.is_empty() { 360 + return Ok(0); 361 + } 362 + 363 + conn.execute( 364 + "CREATE TEMP TABLE records_tmp (LIKE records INCLUDING DEFAULTS) ON COMMIT DROP", 365 + &[], 366 + ) 367 + .await?; 368 + 369 + let writer = conn 370 + .copy_in("COPY records_tmp (at_uri, cid, did) FROM STDIN (FORMAT binary)") 371 + .await?; 372 + let writer = BinaryCopyInWriter::new(writer, &[Type::TEXT, Type::TEXT, Type::TEXT]); 373 + 374 + pin_mut!(writer); 375 + 376 + for (at_uri, cid) in data { 377 + let writer = writer.as_mut(); 378 + writer.write(&[&at_uri, &did, &cid.to_string()]).await?; 379 + } 380 + 381 + writer.finish().await?; 382 + 383 + conn.execute("INSERT INTO records (SELECT * FROM records_tmp)", &[]) 384 + .await 385 + }
+79
consumer/src/db/labels.rs
··· 1 + use super::PgExecResult; 2 + use crate::indexer::records::AppBskyLabelerService; 3 + use deadpool_postgres::GenericClient; 4 + use ipld_core::cid::Cid; 5 + use lexica::com_atproto::label::{LabelValueDefinition, SelfLabels}; 6 + use std::collections::HashMap; 7 + 8 + pub async fn maintain_label_defs<C: GenericClient>( 9 + conn: &mut C, 10 + repo: &str, 11 + rec: &AppBskyLabelerService, 12 + ) -> PgExecResult { 13 + // drop any label defs not currently in the list 14 + conn.execute( 15 + "DELETE FROM labeler_defs WHERE labeler=$1 AND NOT label_identifier = any($2)", 16 + &[&repo, &rec.policies.label_values], 17 + ) 18 + .await?; 19 + 20 + let definitions = rec 21 + .policies 22 + .label_value_definitions 23 + .iter() 24 + .map(|def| (def.identifier.clone(), def)) 25 + .collect::<HashMap<String, &LabelValueDefinition>>(); 26 + 27 + for label in &rec.policies.label_values { 28 + let definition = definitions.get(label); 29 + 30 + let severity = definition.map(|v| v.severity.to_string()); 31 + let blurs = definition.map(|v| v.blurs.to_string()); 32 + let default_setting = definition 33 + .and_then(|v| v.default_setting) 34 + .map(|v| v.to_string()); 35 + let adult_only = definition.and_then(|v| v.adult_only); 36 + let locales = definition.and_then(|v| serde_json::to_value(&v.locales).ok()); 37 + 38 + conn.execute( 39 + include_str!("sql/label_defs_upsert.sql"), 40 + &[ 41 + &repo, 42 + &label, 43 + &severity, 44 + &blurs, 45 + &default_setting, 46 + &adult_only, 47 + &locales, 48 + ], 49 + ) 50 + .await?; 51 + } 52 + 53 + Ok(0) 54 + } 55 + 56 + pub async fn maintain_self_labels<C: GenericClient>( 57 + conn: &mut C, 58 + repo: &str, 59 + cid: Option<Cid>, 60 + at_uri: &str, 61 + self_labels: SelfLabels, 62 + ) -> PgExecResult { 63 + conn.execute( 64 + "DELETE FROM labels WHERE self_label=TRUE AND uri=$1", 65 + &[&at_uri], 66 + ) 67 + .await?; 68 + 69 + let cid = cid.map(|cid| cid.to_string()); 70 + 71 + let stmt = conn.prepare_cached("INSERT INTO labels (labeler, label, uri, self_label, cid, created_at) VALUES ($1, $2, $3, TRUE, $4, NOW())").await?; 72 + 73 + for label in self_labels.values { 74 + conn.execute(&stmt, &[&repo, &label.val, &at_uri, &cid.clone()]) 75 + .await?; 76 + } 77 + 78 + Ok(0) 79 + }
+16
consumer/src/db/mod.rs
··· 1 + use tokio_postgres::Error as PgError; 2 + 3 + type PgResult<T> = Result<T, PgError>; 4 + type PgExecResult = PgResult<u64>; 5 + type PgOptResult<T> = PgResult<Option<T>>; 6 + 7 + mod actor; 8 + mod backfill; 9 + pub mod copy; 10 + mod labels; 11 + mod record; 12 + 13 + pub use actor::*; 14 + pub use backfill::*; 15 + pub use labels::*; 16 + pub use record::*;
+651
consumer/src/db/record.rs
··· 1 + use super::{PgExecResult, PgOptResult}; 2 + use crate::indexer::records::*; 3 + use crate::utils::{blob_ref, strongref_to_parts}; 4 + use chrono::prelude::*; 5 + use deadpool_postgres::GenericClient; 6 + use ipld_core::cid::Cid; 7 + 8 + pub async fn record_upsert<C: GenericClient>( 9 + conn: &mut C, 10 + at_uri: &str, 11 + repo: &str, 12 + cid: Cid, 13 + ) -> PgExecResult { 14 + conn.execute( 15 + "INSERT INTO records (at_uri, did, cid) VALUES ($1, $2, $3) ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid", 16 + &[&repo, &at_uri, &cid.to_string()], 17 + ).await 18 + } 19 + 20 + pub async fn record_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 21 + conn.execute("DELETE FROM records WHERE at_uri=$1", &[&at_uri]) 22 + .await 23 + } 24 + 25 + pub async fn block_insert<C: GenericClient>( 26 + conn: &mut C, 27 + at_uri: &str, 28 + repo: &str, 29 + rec: AppBskyGraphBlock, 30 + ) -> PgExecResult { 31 + conn.execute( 32 + "INSERT INTO blocks (at_uri, did, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING", 33 + &[&at_uri, &repo, &rec.subject, &rec.created_at], 34 + ).await 35 + } 36 + 37 + pub async fn block_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 38 + conn.execute("DELETE FROM blocks WHERE at_uri=$1", &[&at_uri]) 39 + .await 40 + } 41 + 42 + pub async fn chat_decl_upsert<C: GenericClient>( 43 + conn: &mut C, 44 + repo: &str, 45 + rec: ChatBskyActorDeclaration, 46 + ) -> PgExecResult { 47 + conn.execute( 48 + "INSERT INTO chat_decls (did, allow_incoming) VALUES ($1, $2) ON CONFLICT (did) DO UPDATE SET allow_incoming=EXCLUDED.allow_incoming", 49 + &[&repo, &rec.allow_incoming.to_string()] 50 + ).await 51 + } 52 + 53 + pub async fn chat_decl_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult { 54 + conn.execute("DELETE FROM chat_decls WHERE did=$1", &[&repo]) 55 + .await 56 + } 57 + 58 + pub async fn feedgen_upsert<C: GenericClient>( 59 + conn: &mut C, 60 + at_uri: &str, 61 + repo: &str, 62 + cid: Cid, 63 + rec: AppBskyFeedGenerator, 64 + ) -> PgExecResult { 65 + let cid = cid.to_string(); 66 + let description_facets = rec 67 + .description_facets 68 + .and_then(|v| serde_json::to_value(v).ok()); 69 + let avatar = blob_ref(rec.avatar); 70 + 71 + conn.execute( 72 + include_str!("sql/feedgen_upsert.sql"), 73 + &[ 74 + &at_uri, 75 + &repo, 76 + &cid, 77 + &rec.did, 78 + &rec.content_mode, 79 + &rec.display_name, 80 + &rec.description, 81 + &description_facets, 82 + &avatar, 83 + &rec.created_at, 84 + ], 85 + ) 86 + .await 87 + } 88 + 89 + pub async fn feedgen_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 90 + conn.execute("DELETE FROM feedgens WHERE at_uri=$1", &[&at_uri]) 91 + .await 92 + } 93 + 94 + pub async fn follow_insert<C: GenericClient>( 95 + conn: &mut C, 96 + at_uri: &str, 97 + repo: &str, 98 + rec: AppBskyGraphFollow, 99 + ) -> PgExecResult { 100 + conn.execute( 101 + "INSERT INTO follows (at_uri, did, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING", 102 + &[&at_uri, &repo, &rec.subject, &rec.created_at], 103 + ).await 104 + } 105 + 106 + pub async fn follow_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> { 107 + let res = conn 108 + .query_opt( 109 + "DELETE FROM follows WHERE at_uri=$1 RETURNING subject", 110 + &[&at_uri], 111 + ) 112 + .await?; 113 + 114 + Ok(res.map(|v| v.get(0))) 115 + } 116 + 117 + pub async fn labeler_upsert<C: GenericClient>( 118 + conn: &mut C, 119 + repo: &str, 120 + cid: Cid, 121 + rec: AppBskyLabelerService, 122 + ) -> PgExecResult { 123 + let cid = cid.to_string(); 124 + let reasons = rec 125 + .reason_types 126 + .as_ref() 127 + .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<_>>()); 128 + let subject_types = rec 129 + .subject_types 130 + .as_ref() 131 + .map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<_>>()); 132 + 133 + conn.execute( 134 + include_str!("sql/label_service_upsert.sql"), 135 + &[ 136 + &repo, 137 + &cid, 138 + &reasons, 139 + &subject_types, 140 + &rec.subject_collections, 141 + ], 142 + ) 143 + .await?; 144 + 145 + super::maintain_label_defs(conn, repo, &rec).await 146 + } 147 + 148 + pub async fn labeler_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult { 149 + conn.execute("DELETE FROM labelers WHERE did=$1", &[&repo]) 150 + .await 151 + } 152 + 153 + pub async fn like_insert<C: GenericClient>( 154 + conn: &mut C, 155 + at_uri: &str, 156 + repo: &str, 157 + rec: AppBskyFeedLike, 158 + ) -> PgExecResult { 159 + conn.execute( 160 + "INSERT INTO likes (at_uri, did, subject, subject_cid, created_at) VALUES ($1, $2, $3, $4, $5)", 161 + &[&at_uri, &repo, &rec.subject.uri, &rec.subject.cid.to_string(), &rec.created_at] 162 + ).await 163 + } 164 + 165 + pub async fn like_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> { 166 + let res = conn 167 + .query_opt( 168 + "DELETE FROM likes WHERE at_uri=$1 RETURNING subject", 169 + &[&at_uri], 170 + ) 171 + .await?; 172 + 173 + Ok(res.map(|v| v.get(0))) 174 + } 175 + 176 + pub async fn list_upsert<C: GenericClient>( 177 + conn: &mut C, 178 + at_uri: &str, 179 + repo: &str, 180 + cid: Cid, 181 + rec: AppBskyGraphList, 182 + ) -> PgExecResult { 183 + let cid = cid.to_string(); 184 + let description_facets = rec 185 + .description_facets 186 + .and_then(|v| serde_json::to_value(v).ok()); 187 + let avatar = blob_ref(rec.avatar); 188 + 189 + conn.execute( 190 + include_str!("sql/list_upsert.sql"), 191 + &[ 192 + &at_uri, 193 + &repo, 194 + &cid, 195 + &rec.purpose, 196 + &rec.name, 197 + &rec.description, 198 + &description_facets, 199 + &avatar, 200 + &rec.created_at, 201 + ], 202 + ) 203 + .await 204 + } 205 + 206 + pub async fn list_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 207 + conn.execute("DELETE FROM lists WHERE at_uri=$1", &[&at_uri]) 208 + .await 209 + } 210 + 211 + pub async fn list_block_insert<C: GenericClient>( 212 + conn: &mut C, 213 + at_uri: &str, 214 + repo: &str, 215 + rec: AppBskyGraphListBlock, 216 + ) -> PgExecResult { 217 + conn.execute( 218 + "INSERT INTO list_blocks (at_uri, did, list_uri, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING", 219 + &[&at_uri, &repo, &rec.subject, &rec.created_at], 220 + ).await 221 + } 222 + 223 + pub async fn list_block_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 224 + conn.execute("DELETE FROM list_blocks WHERE at_uri=$1", &[&at_uri]) 225 + .await 226 + } 227 + 228 + pub async fn list_item_insert<C: GenericClient>( 229 + conn: &mut C, 230 + at_uri: &str, 231 + rec: AppBskyGraphListItem, 232 + ) -> PgExecResult { 233 + conn.execute( 234 + "INSERT INTO list_items (at_uri, list_uri, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING", 235 + &[&at_uri, &rec.list, &rec.subject, &rec.created_at], 236 + ).await 237 + } 238 + 239 + pub async fn list_item_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 240 + conn.execute("DELETE FROM list_items WHERE at_uri=$1", &[&at_uri]) 241 + .await 242 + } 243 + 244 + pub async fn post_insert<C: GenericClient>( 245 + conn: &mut C, 246 + at_uri: &str, 247 + repo: &str, 248 + cid: Cid, 249 + rec: AppBskyFeedPost, 250 + ) -> PgExecResult { 251 + let cid = cid.to_string(); 252 + let record = serde_json::to_value(&rec).unwrap(); 253 + let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok()); 254 + let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent)); 255 + let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root)); 256 + let embed = rec.embed.as_ref().map(|v| v.as_str()); 257 + let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype()); 258 + 259 + let count = conn 260 + .execute( 261 + include_str!("sql/post_insert.sql"), 262 + &[ 263 + &at_uri, 264 + &repo, 265 + &cid, 266 + &record, 267 + &rec.text, 268 + &facets, 269 + &rec.langs.unwrap_or_default(), 270 + &rec.tags.unwrap_or_default(), 271 + &parent_uri, 272 + &parent_cid, 273 + &root_uri, 274 + &root_cid, 275 + &embed, 276 + &embed_subtype, 277 + &rec.created_at, 278 + ], 279 + ) 280 + .await?; 281 + 282 + if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) { 283 + post_embed_insert(conn, at_uri, embed, rec.created_at).await?; 284 + } 285 + 286 + Ok(count) 287 + } 288 + 289 + pub async fn post_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 290 + conn.execute("DELETE FROM posts WHERE at_uri=$1", &[&at_uri]) 291 + .await 292 + } 293 + 294 + pub async fn post_get_info_for_delete<C: GenericClient>( 295 + conn: &mut C, 296 + at_uri: &str, 297 + ) -> PgOptResult<(Option<String>, Option<String>)> { 298 + let res = conn 299 + .query_opt( 300 + "SELECT parent_uri, per.uri FROM posts LEFT JOIN post_embed_record per on at_uri = per.post_uri WHERE at_uri = $1", 301 + &[&at_uri], 302 + ) 303 + .await?; 304 + 305 + Ok(res.map(|row| (row.get(0), row.get(1)))) 306 + } 307 + 308 + pub async fn post_embed_insert<C: GenericClient>( 309 + conn: &mut C, 310 + post: &str, 311 + embed: AppBskyEmbed, 312 + created_at: DateTime<Utc>, 313 + ) -> PgExecResult { 314 + match embed { 315 + AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 316 + AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, 317 + AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await, 318 + AppBskyEmbed::Record(embed) => { 319 + post_embed_record_insert(conn, post, embed, created_at).await 320 + } 321 + AppBskyEmbed::RecordWithMedia(embed) => { 322 + post_embed_record_insert(conn, post, embed.record, created_at).await?; 323 + match *embed.media { 324 + AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 325 + AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, 326 + AppBskyEmbed::External(embed) => { 327 + post_embed_external_insert(conn, post, embed).await 328 + } 329 + _ => unreachable!(), 330 + } 331 + } 332 + } 333 + } 334 + 335 + async fn post_embed_image_insert<C: GenericClient>( 336 + conn: &mut C, 337 + post: &str, 338 + embed: AppBskyEmbedImages, 339 + ) -> PgExecResult { 340 + let stmt = conn.prepare("INSERT INTO post_embed_images (post_uri, seq, cid, mime_type, alt, width, height) VALUES ($1, $2, $3, $4, $5, $6, $7)").await?; 341 + 342 + for (idx, image) in embed.images.iter().enumerate() { 343 + let cid = image.image.r#ref.to_string(); 344 + let width = image.aspect_ratio.as_ref().map(|v| v.width); 345 + let height = image.aspect_ratio.as_ref().map(|v| v.height); 346 + 347 + conn.execute( 348 + &stmt, 349 + &[ 350 + &post, 351 + &(idx as i16), 352 + &cid, 353 + &image.image.mime_type, 354 + &image.alt, 355 + &width, 356 + &height, 357 + ], 358 + ) 359 + .await?; 360 + } 361 + 362 + Ok(0) 363 + } 364 + 365 + async fn post_embed_video_insert<C: GenericClient>( 366 + conn: &mut C, 367 + post: &str, 368 + embed: AppBskyEmbedVideo, 369 + ) -> PgExecResult { 370 + let cid = embed.video.r#ref.to_string(); 371 + let width = embed.aspect_ratio.as_ref().map(|v| v.width); 372 + let height = embed.aspect_ratio.as_ref().map(|v| v.height); 373 + 374 + let count = conn.execute( 375 + "INSERT INTO post_embed_video (post_uri, cid, mime_type, alt, width, height) VALUES ($1, $2, $3, $4, $5, $6)", 376 + &[&post, &cid, &embed.video.mime_type, &embed.alt, &width, &height], 377 + ).await?; 378 + 379 + if let Some(captions) = embed.captions { 380 + let stmt = conn.prepare_cached("INSERT INTO post_embed_video_captions (post_uri, cid, mime_type, language) VALUES ($1, $2, $3, $4)").await?; 381 + 382 + for caption in captions { 383 + let cid = caption.file.r#ref.to_string(); 384 + conn.execute( 385 + &stmt, 386 + &[&post, &cid, &caption.file.mime_type, &caption.lang], 387 + ) 388 + .await?; 389 + } 390 + } 391 + 392 + Ok(count) 393 + } 394 + 395 + async fn post_embed_external_insert<C: GenericClient>( 396 + conn: &mut C, 397 + post: &str, 398 + embed: AppBskyEmbedExternal, 399 + ) -> PgExecResult { 400 + let thumb_mime = embed.external.thumb.as_ref().map(|v| v.mime_type.clone()); 401 + let thumb_cid = embed.external.thumb.as_ref().map(|v| v.r#ref.to_string()); 402 + 403 + conn.execute( 404 + "INSERT INTO post_embed_ext (post_uri, uri, title, description, thumb_mime_type, thumb_cid) VALUES ($1, $2, $3, $4, $5, $6)", 405 + &[&post, &embed.external.uri, &embed.external.title, &embed.external.description, &thumb_mime, &thumb_cid], 406 + ).await 407 + } 408 + 409 + async fn post_embed_record_insert<C: GenericClient>( 410 + conn: &mut C, 411 + post: &str, 412 + embed: AppBskyEmbedRecord, 413 + post_created_at: DateTime<Utc>, 414 + ) -> PgExecResult { 415 + // strip "at://" then break into parts by '/' 416 + let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>(); 417 + 418 + let detached = if parts[1] == "app.bsky.feed.post" { 419 + let postgate_effective: Option<DateTime<Utc>> = conn 420 + .query_opt( 421 + "SELECT created_at FROM postgates WHERE post_uri=$1", 422 + &[&post], 423 + ) 424 + .await? 425 + .map(|v| v.get(0)); 426 + 427 + postgate_effective 428 + .map(|v| Utc::now().min(post_created_at) > v) 429 + .unwrap_or_default() 430 + } else { 431 + false 432 + }; 433 + 434 + conn.execute( 435 + "INSERT INTO post_embed_record (post_uri, record_type, uri, cid, detached) VALUES ($1, $2, $3, $4, $5)", 436 + &[&post, &parts[1], &embed.record.uri, &embed.record.cid.to_string(), &detached], 437 + ).await 438 + } 439 + 440 + pub async fn postgate_upsert<C: GenericClient>( 441 + conn: &mut C, 442 + at_uri: &str, 443 + cid: Cid, 444 + rec: &AppBskyFeedPostgate, 445 + ) -> PgExecResult { 446 + let rules = rec 447 + .embedding_rules 448 + .iter() 449 + .map(|v| v.as_str().to_string()) 450 + .collect::<Vec<_>>(); 451 + 452 + conn.execute( 453 + include_str!("sql/postgate_upsert.sql"), 454 + &[ 455 + &at_uri, 456 + &cid.to_string(), 457 + &rec.post, 458 + &rec.detached_embedding_uris, 459 + &rules, 460 + &rec.created_at, 461 + ], 462 + ) 463 + .await 464 + } 465 + 466 + pub async fn postgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 467 + conn.execute("DELETE FROM postgates WHERE at_uri=$1", &[&at_uri]) 468 + .await 469 + } 470 + 471 + pub async fn postgate_maintain_detaches<C: GenericClient>( 472 + conn: &mut C, 473 + post: &str, 474 + detached: &[String], 475 + disable_effective: Option<NaiveDateTime>, 476 + ) -> PgExecResult { 477 + conn.execute( 478 + "SELECT maintain_postgates($1, $2, $3)", 479 + &[&post, &detached, &disable_effective], 480 + ) 481 + .await 482 + } 483 + 484 + pub async fn profile_upsert<C: GenericClient>( 485 + conn: &mut C, 486 + repo: &str, 487 + cid: Cid, 488 + rec: AppBskyActorProfile, 489 + ) -> PgExecResult { 490 + let cid = cid.to_string(); 491 + let avatar = blob_ref(rec.avatar); 492 + let banner = blob_ref(rec.banner); 493 + let (pinned_uri, pinned_cid) = strongref_to_parts(rec.pinned_post.as_ref()); 494 + let (joined_sp_uri, joined_sp_cid) = strongref_to_parts(rec.joined_via_starter_pack.as_ref()); 495 + 496 + conn.execute( 497 + include_str!("sql/profile_upsert.sql"), 498 + &[ 499 + &repo, 500 + &cid, 501 + &avatar, 502 + &banner, 503 + &rec.display_name, 504 + &rec.description, 505 + &pinned_uri, 506 + &pinned_cid, 507 + &joined_sp_uri, 508 + &joined_sp_cid, 509 + &rec.created_at.unwrap_or(Utc::now()).naive_utc(), 510 + ], 511 + ) 512 + .await 513 + } 514 + 515 + pub async fn profile_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult { 516 + conn.execute("DELETE FROM profiles WHERE did=$1", &[&repo]) 517 + .await 518 + } 519 + 520 + pub async fn repost_insert<C: GenericClient>( 521 + conn: &mut C, 522 + at_uri: &str, 523 + repo: &str, 524 + rec: AppBskyFeedRepost, 525 + ) -> PgExecResult { 526 + conn.execute( 527 + "INSERT INTO reposts (at_uri, did, post, post_cid, created_at) VALUES ($1, $2, $3, $4, $5)", 528 + &[ 529 + &at_uri, 530 + &repo, 531 + &rec.subject.uri, 532 + &rec.subject.cid.to_string(), 533 + &rec.created_at, 534 + ], 535 + ) 536 + .await 537 + } 538 + 539 + pub async fn repost_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> { 540 + let res = conn 541 + .query_opt( 542 + "DELETE FROM reposts WHERE at_uri=$1 RETURNING post", 543 + &[&at_uri], 544 + ) 545 + .await?; 546 + 547 + Ok(res.map(|v| v.get(0))) 548 + } 549 + 550 + pub async fn starter_pack_upsert<C: GenericClient>( 551 + conn: &mut C, 552 + at_uri: &str, 553 + repo: &str, 554 + cid: Cid, 555 + rec: AppBskyGraphStarterPack, 556 + ) -> PgExecResult { 557 + let cid = cid.to_string(); 558 + let record = serde_json::to_value(&rec).unwrap(); 559 + let description_facets = rec 560 + .description_facets 561 + .and_then(|v| serde_json::to_value(v).ok()); 562 + let feeds = rec 563 + .feeds 564 + .map(|v| v.into_iter().map(|item| item.uri).collect::<Vec<_>>()); 565 + 566 + conn.execute( 567 + include_str!("sql/starterpack_upsert.sql"), 568 + &[ 569 + &at_uri, 570 + &repo, 571 + &cid, 572 + &record, 573 + &rec.name, 574 + &rec.description, 575 + &description_facets, 576 + &rec.list, 577 + &feeds, 578 + &rec.created_at, 579 + ], 580 + ) 581 + .await 582 + } 583 + 584 + pub async fn starter_pack_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 585 + conn.execute("DELETE FROM starterpacks WHERE at_uri=$1", &[&at_uri]) 586 + .await 587 + } 588 + 589 + pub async fn threadgate_upsert<C: GenericClient>( 590 + conn: &mut C, 591 + at_uri: &str, 592 + cid: Cid, 593 + rec: AppBskyFeedThreadgate, 594 + ) -> PgExecResult { 595 + let record = serde_json::to_value(&rec).unwrap(); 596 + 597 + let allowed_lists = rec 598 + .allow 599 + .iter() 600 + .filter_map(|rule| match rule { 601 + ThreadgateRule::List { list } => Some(list.clone()), 602 + _ => None, 603 + }) 604 + .collect::<Vec<_>>(); 605 + 606 + let allow = rec 607 + .allow 608 + .into_iter() 609 + .map(|v| v.as_str().to_string()) 610 + .collect::<Vec<_>>(); 611 + 612 + conn.execute( 613 + include_str!("sql/threadgate_upsert.sql"), 614 + &[ 615 + &at_uri, 616 + &cid.to_string(), 617 + &rec.post, 618 + &rec.hidden_replies, 619 + &allow, 620 + &allowed_lists, 621 + &record, 622 + &rec.created_at, 623 + ], 624 + ) 625 + .await 626 + } 627 + 628 + pub async fn threadgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 629 + conn.execute("DELETE FROM threadgates WHERE at_uri=$1", &[&at_uri]) 630 + .await 631 + } 632 + 633 + pub async fn verification_insert<C: GenericClient>( 634 + conn: &mut C, 635 + at_uri: &str, 636 + repo: &str, 637 + cid: Cid, 638 + rec: AppBskyGraphVerification, 639 + ) -> PgExecResult { 640 + let cid = cid.to_string(); 641 + 642 + conn.execute( 643 + "INSERT INTO verification (at_uri, verifier, cid, subject, handle, display_name, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT DO NOTHING", 644 + &[&at_uri, &repo, &cid, &rec.subject, &rec.handle, &rec.display_name, &rec.created_at], 645 + ).await 646 + } 647 + 648 + pub async fn verification_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 649 + conn.execute("DELETE FROM verification WHERE at_uri=$1", &[&at_uri]) 650 + .await 651 + }
+11
consumer/src/db/sql/feedgen_upsert.sql
··· 1 + INSERT INTO feedgens (at_uri, owner, cid, service_did, content_mode, name, description, description_facets, avatar_cid, 2 + created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) 4 + ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid, 5 + service_did=EXCLUDED.service_did, 6 + content_mode=EXCLUDED.content_mode, 7 + name=EXCLUDED.name, 8 + description=EXCLUDED.description, 9 + description_facets=EXCLUDED.description_facets, 10 + avatar_cid=EXCLUDED.avatar_cid, 11 + indexed_at=NOW()
+9
consumer/src/db/sql/label_defs_upsert.sql
··· 1 + INSERT INTO labeler_defs (labeler, label_identifier, severity, blurs, default_setting, adult_only, locales) 2 + VALUES ($1, $2, $3, $4, $5, $6, $7) 3 + ON CONFLICT (labeler, label_identifier) DO UPDATE 4 + SET severity=EXCLUDED.severity, 5 + blurs=EXCLUDED.blurs, 6 + default_setting=EXCLUDED.default_setting, 7 + adult_only=EXCLUDED.adult_only, 8 + locales=EXCLUDED.locales, 9 + indexed_at=NOW()
+7
consumer/src/db/sql/label_service_upsert.sql
··· 1 + INSERT INTO labelers (did, cid, reasons, subject_types, subject_collections) 2 + VALUES ($1, $2, $3, $4, $5) 3 + ON CONFLICT (did) DO UPDATE SET cid=EXCLUDED.cid, 4 + reasons=EXCLUDED.reasons, 5 + subject_types=EXCLUDED.subject_types, 6 + subject_collections=EXCLUDED.subject_collections, 7 + indexed_at=NOW()
+9
consumer/src/db/sql/list_upsert.sql
··· 1 + INSERT INTO lists (at_uri, owner, cid, list_type, name, description, description_facets, avatar_cid, created_at) 2 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) 3 + ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid, 4 + list_type=EXCLUDED.list_type, 5 + name=EXCLUDED.name, 6 + description=EXCLUDED.description, 7 + description_facets=EXCLUDED.description_facets, 8 + avatar_cid=EXCLUDED.avatar_cid, 9 + indexed_at=NOW()
+4
consumer/src/db/sql/post_insert.sql
··· 1 + INSERT INTO posts (at_uri, did, cid, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, 2 + root_cid, embed, embed_subtype, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 4 + ON CONFLICT DO NOTHING
+7
consumer/src/db/sql/postgate_upsert.sql
··· 1 + INSERT INTO postgates (at_uri, cid, post_uri, detached, rules, created_at) 2 + VALUES ($1, $2, $3, $4, $5, $6) 3 + ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid, 4 + post_uri=EXCLUDED.post_uri, 5 + detached=EXCLUDED.detached, 6 + rules=EXCLUDED.rules, 7 + indexed_at=NOW()
+13
consumer/src/db/sql/profile_upsert.sql
··· 1 + INSERT INTO profiles (did, cid, avatar_cid, banner_cid, display_name, description, pinned_uri, pinned_cid, 2 + joined_sp_uri, joined_sp_cid, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) 4 + ON CONFLICT (did) DO UPDATE SET cid=EXCLUDED.cid, 5 + avatar_cid=EXCLUDED.avatar_cid, 6 + banner_cid=EXCLUDED.banner_cid, 7 + display_name=EXCLUDED.display_name, 8 + description=EXCLUDED.description, 9 + pinned_uri=EXCLUDED.pinned_uri, 10 + pinned_cid=EXCLUDED.pinned_cid, 11 + joined_sp_uri=EXCLUDED.joined_sp_uri, 12 + joined_sp_cid=EXCLUDED.joined_sp_cid, 13 + indexed_at=NOW()
+10
consumer/src/db/sql/starterpack_upsert.sql
··· 1 + INSERT INTO starterpacks (at_uri, owner, cid, record, name, description, description_facets, list, feeds, created_at) 2 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) 3 + ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid, 4 + record=EXCLUDED.record, 5 + name=EXCLUDED.name, 6 + description=EXCLUDED.description, 7 + description_facets=EXCLUDED.description_facets, 8 + list=EXCLUDED.list, 9 + feeds=EXCLUDED.feeds, 10 + indexed_at=NOW()
+8
consumer/src/db/sql/threadgate_upsert.sql
··· 1 + INSERT INTO threadgates (at_uri, cid, post_uri, hidden_replies, allow, allowed_lists, record, created_at) 2 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 3 + ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid, 4 + hidden_replies=EXCLUDED.hidden_replies, 5 + allow=EXCLUDED.allow, 6 + allowed_lists=EXCLUDED.allowed_lists, 7 + record=EXCLUDED.record, 8 + indexed_at=NOW()
+2 -2
consumer/src/firehose/error.rs
··· 1 - use thiserror::Error; 2 1 use std::io::Error as IoError; 2 + use thiserror::Error; 3 3 4 4 #[derive(Debug, Error)] 5 5 pub enum FirehoseError { ··· 9 9 IpldCbor(#[from] serde_ipld_dagcbor::error::DecodeError<IoError>), 10 10 #[error("{0}")] 11 11 Websocket(#[from] tokio_tungstenite::tungstenite::error::Error), 12 - } 12 + }
+4 -1
consumer/src/firehose/mod.rs
··· 140 140 match err { 141 141 WsError::Protocol(ProtocolError::ResetWithoutClosingHandshake) 142 142 | WsError::ConnectionClosed => true, 143 - WsError::Io(ioerr) => matches!(ioerr.kind(), ErrorKind::BrokenPipe | ErrorKind::ConnectionReset), 143 + WsError::Io(ioerr) => matches!( 144 + ioerr.kind(), 145 + ErrorKind::BrokenPipe | ErrorKind::ConnectionReset 146 + ), 144 147 _ => false, 145 148 } 146 149 }
-971
consumer/src/indexer/db.rs
··· 1 - use super::records::{self, AppBskyEmbed}; 2 - use crate::utils::{blob_ref, empty_str_as_none, strongref_to_parts}; 3 - use chrono::prelude::*; 4 - use diesel::prelude::*; 5 - use diesel::sql_types::{Array, Nullable, Text, Timestamp}; 6 - use diesel_async::{AsyncPgConnection, RunQueryDsl}; 7 - use ipld_core::cid::Cid; 8 - use lexica::com_atproto::label::{LabelValueDefinition, SelfLabels}; 9 - use parakeet_db::{models, schema, types}; 10 - use std::collections::HashMap; 11 - 12 - pub async fn write_record( 13 - conn: &mut AsyncPgConnection, 14 - at_uri: &str, 15 - repo: &str, 16 - cid: Cid, 17 - ) -> QueryResult<usize> { 18 - let cid = cid.to_string(); 19 - 20 - diesel::insert_into(schema::records::table) 21 - .values(( 22 - schema::records::at_uri.eq(at_uri), 23 - schema::records::did.eq(repo), 24 - schema::records::cid.eq(&cid), 25 - )) 26 - .on_conflict(schema::records::at_uri) 27 - .do_update() 28 - .set(schema::records::cid.eq(&cid)) 29 - .execute(conn) 30 - .await 31 - } 32 - 33 - pub async fn delete_record(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 34 - diesel::delete(schema::records::table) 35 - .filter(schema::records::at_uri.eq(at_uri)) 36 - .execute(conn) 37 - .await 38 - } 39 - 40 - pub async fn write_backfill_row( 41 - conn: &mut AsyncPgConnection, 42 - repo: &str, 43 - rev: &str, 44 - cid: Cid, 45 - data: serde_json::Value, 46 - ) -> QueryResult<usize> { 47 - diesel::insert_into(schema::backfill::table) 48 - .values(models::NewBackfillRow { 49 - repo, 50 - repo_ver: rev, 51 - cid: cid.to_string(), 52 - data, 53 - }) 54 - .execute(conn) 55 - .await 56 - } 57 - 58 - pub async fn get_repo_info( 59 - conn: &mut AsyncPgConnection, 60 - repo: &str, 61 - ) -> QueryResult<Option<(Option<String>, types::ActorSyncState)>> { 62 - schema::actors::table 63 - .select((schema::actors::repo_rev, schema::actors::sync_state)) 64 - .find(repo) 65 - .get_result(conn) 66 - .await 67 - .optional() 68 - } 69 - 70 - pub async fn upsert_actor( 71 - conn: &mut AsyncPgConnection, 72 - did: &str, 73 - handle: Option<Option<String>>, 74 - status: Option<types::ActorStatus>, 75 - sync_state: Option<types::ActorSyncState>, 76 - time: DateTime<Utc>, 77 - ) -> QueryResult<usize> { 78 - let data = models::NewActor { 79 - did, 80 - handle, 81 - status, 82 - sync_state, 83 - last_indexed: Some(time.naive_utc()), 84 - }; 85 - 86 - diesel::insert_into(schema::actors::table) 87 - .values(&data) 88 - .on_conflict(schema::actors::did) 89 - .do_update() 90 - .set(&data) 91 - .execute(conn) 92 - .await 93 - } 94 - 95 - pub async fn account_status_and_rev( 96 - conn: &mut AsyncPgConnection, 97 - did: &str, 98 - ) -> QueryResult<Option<(types::ActorStatus, Option<String>)>> { 99 - schema::actors::table 100 - .select((schema::actors::status, schema::actors::repo_rev)) 101 - .for_update() 102 - .find(did) 103 - .get_result(conn) 104 - .await 105 - .optional() 106 - } 107 - 108 - /// Attempts to update a repo to the given version. 109 - /// returns false if the repo doesn't exist or is too new, or true if the update succeeded. 110 - pub async fn update_repo_version( 111 - conn: &mut AsyncPgConnection, 112 - repo: &str, 113 - rev: &str, 114 - cid: Cid, 115 - ) -> QueryResult<usize> { 116 - diesel::update(schema::actors::table) 117 - .set(( 118 - schema::actors::repo_rev.eq(rev), 119 - schema::actors::repo_cid.eq(cid.to_string()), 120 - )) 121 - .filter(schema::actors::did.eq(repo)) 122 - .execute(conn) 123 - .await 124 - } 125 - 126 - pub async fn insert_block( 127 - conn: &mut AsyncPgConnection, 128 - repo: &str, 129 - at_uri: &str, 130 - rec: records::AppBskyGraphBlock, 131 - ) -> QueryResult<usize> { 132 - diesel::insert_into(schema::blocks::table) 133 - .values(&models::NewBlock { 134 - at_uri, 135 - did: repo, 136 - subject: &rec.subject, 137 - created_at: rec.created_at.naive_utc(), 138 - }) 139 - .on_conflict_do_nothing() 140 - .execute(conn) 141 - .await 142 - } 143 - 144 - pub async fn delete_block(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 145 - diesel::delete(schema::blocks::table) 146 - .filter(schema::blocks::at_uri.eq(at_uri)) 147 - .execute(conn) 148 - .await 149 - } 150 - 151 - pub async fn insert_follow( 152 - conn: &mut AsyncPgConnection, 153 - repo: &str, 154 - at_uri: &str, 155 - rec: records::AppBskyGraphFollow, 156 - ) -> QueryResult<usize> { 157 - diesel::insert_into(schema::follows::table) 158 - .values(&models::NewFollow { 159 - at_uri, 160 - did: repo, 161 - subject: &rec.subject, 162 - created_at: rec.created_at.naive_utc(), 163 - }) 164 - .on_conflict_do_nothing() 165 - .execute(conn) 166 - .await 167 - } 168 - 169 - pub async fn delete_follow( 170 - conn: &mut AsyncPgConnection, 171 - at_uri: &str, 172 - ) -> QueryResult<Option<String>> { 173 - diesel::delete(schema::follows::table) 174 - .filter(schema::follows::at_uri.eq(at_uri)) 175 - .returning(schema::follows::subject) 176 - .get_result(conn) 177 - .await 178 - .optional() 179 - } 180 - 181 - pub async fn upsert_profile( 182 - conn: &mut AsyncPgConnection, 183 - repo: &str, 184 - cid: Cid, 185 - rec: records::AppBskyActorProfile, 186 - ) -> QueryResult<usize> { 187 - let (pinned_uri, pinned_cid) = strongref_to_parts(rec.pinned_post.as_ref()); 188 - let (joined_sp_uri, joined_sp_cid) = strongref_to_parts(rec.joined_via_starter_pack.as_ref()); 189 - 190 - let data = models::UpsertProfile { 191 - did: repo, 192 - cid: cid.to_string(), 193 - avatar_cid: blob_ref(rec.avatar), 194 - banner_cid: blob_ref(rec.banner), 195 - display_name: rec.display_name, 196 - description: rec.description, 197 - pinned_uri, 198 - pinned_cid, 199 - joined_sp_uri, 200 - joined_sp_cid, 201 - created_at: rec.created_at.map(|val| val.naive_utc()), 202 - indexed_at: Utc::now().naive_utc(), 203 - }; 204 - 205 - diesel::insert_into(schema::profiles::table) 206 - .values(&data) 207 - .on_conflict(schema::profiles::did) 208 - .do_update() 209 - .set(&data) 210 - .execute(conn) 211 - .await 212 - } 213 - 214 - pub async fn delete_profile(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> { 215 - diesel::delete(schema::profiles::table) 216 - .filter(schema::profiles::did.eq(repo)) 217 - .execute(conn) 218 - .await 219 - } 220 - 221 - pub async fn upsert_list( 222 - conn: &mut AsyncPgConnection, 223 - repo: &str, 224 - at_uri: &str, 225 - cid: Cid, 226 - rec: records::AppBskyGraphList, 227 - ) -> QueryResult<usize> { 228 - let description_facets = rec 229 - .description_facets 230 - .and_then(|v| serde_json::to_value(v).ok()); 231 - 232 - let data = models::UpsertList { 233 - at_uri, 234 - owner: repo, 235 - cid: cid.to_string(), 236 - list_type: &rec.purpose, 237 - name: &rec.name, 238 - description: rec.description, 239 - description_facets, 240 - avatar_cid: blob_ref(rec.avatar), 241 - created_at: rec.created_at.naive_utc(), 242 - indexed_at: Utc::now().naive_utc(), 243 - }; 244 - 245 - diesel::insert_into(schema::lists::table) 246 - .values(&data) 247 - .on_conflict(schema::lists::at_uri) 248 - .do_update() 249 - .set(&data) 250 - .execute(conn) 251 - .await 252 - } 253 - 254 - pub async fn delete_list(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 255 - diesel::delete(schema::lists::table) 256 - .filter(schema::lists::at_uri.eq(at_uri)) 257 - .execute(conn) 258 - .await 259 - } 260 - 261 - pub async fn insert_list_block( 262 - conn: &mut AsyncPgConnection, 263 - repo: &str, 264 - at_uri: &str, 265 - rec: records::AppBskyGraphListBlock, 266 - ) -> QueryResult<usize> { 267 - let data = models::NewListBlock { 268 - at_uri, 269 - did: repo, 270 - list_uri: &rec.subject, 271 - created_at: rec.created_at.naive_utc(), 272 - indexed_at: Utc::now().naive_utc(), 273 - }; 274 - 275 - diesel::insert_into(schema::list_blocks::table) 276 - .values(&data) 277 - .execute(conn) 278 - .await 279 - } 280 - 281 - pub async fn delete_list_block(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 282 - diesel::delete(schema::list_blocks::table) 283 - .filter(schema::list_blocks::at_uri.eq(at_uri)) 284 - .execute(conn) 285 - .await 286 - } 287 - 288 - pub async fn insert_list_item( 289 - conn: &mut AsyncPgConnection, 290 - at_uri: &str, 291 - rec: records::AppBskyGraphListItem, 292 - ) -> QueryResult<usize> { 293 - let data = models::NewListItem { 294 - at_uri, 295 - list_uri: &rec.list, 296 - subject: &rec.subject, 297 - created_at: rec.created_at.naive_utc(), 298 - indexed_at: Utc::now().naive_utc(), 299 - }; 300 - 301 - diesel::insert_into(schema::list_items::table) 302 - .values(&data) 303 - .execute(conn) 304 - .await 305 - } 306 - 307 - pub async fn delete_list_item(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 308 - diesel::delete(schema::list_items::table) 309 - .filter(schema::list_items::at_uri.eq(at_uri)) 310 - .execute(conn) 311 - .await 312 - } 313 - 314 - pub async fn upsert_feedgen( 315 - conn: &mut AsyncPgConnection, 316 - repo: &str, 317 - cid: Cid, 318 - at_uri: &str, 319 - rec: records::AppBskyFeedGenerator, 320 - ) -> QueryResult<usize> { 321 - let description_facets = rec 322 - .description_facets 323 - .and_then(|v| serde_json::to_value(v).ok()); 324 - 325 - let data = models::UpsertFeedGen { 326 - at_uri, 327 - cid: &cid.to_string(), 328 - owner: repo, 329 - service_did: &rec.did, 330 - content_mode: rec.content_mode, 331 - name: &rec.display_name, 332 - description: rec.description, 333 - description_facets, 334 - avatar_cid: blob_ref(rec.avatar), 335 - accepts_interactions: rec.accepts_interactions, 336 - created_at: rec.created_at.naive_utc(), 337 - indexed_at: Utc::now().naive_utc(), 338 - }; 339 - 340 - diesel::insert_into(schema::feedgens::table) 341 - .values(&data) 342 - .on_conflict(schema::feedgens::at_uri) 343 - .do_update() 344 - .set(&data) 345 - .execute(conn) 346 - .await 347 - } 348 - 349 - pub async fn delete_feedgen(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 350 - diesel::delete(schema::feedgens::table) 351 - .filter(schema::feedgens::at_uri.eq(at_uri)) 352 - .execute(conn) 353 - .await 354 - } 355 - 356 - pub async fn insert_post( 357 - conn: &mut AsyncPgConnection, 358 - did: &str, 359 - cid: Cid, 360 - at_uri: &str, 361 - rec: records::AppBskyFeedPost, 362 - ) -> QueryResult<usize> { 363 - let record = serde_json::to_value(&rec).unwrap(); 364 - let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok()); 365 - 366 - let embed = rec.embed.as_ref().map(|v| v.as_str()); 367 - let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype()); 368 - 369 - let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent)); 370 - let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root)); 371 - 372 - let res = diesel::insert_into(schema::posts::table) 373 - .values(models::NewPost { 374 - at_uri, 375 - cid: cid.to_string(), 376 - did, 377 - record, 378 - content: &rec.text, 379 - facets, 380 - languages: rec.langs.unwrap_or_default(), 381 - tags: rec.tags.unwrap_or_default(), 382 - parent_uri, 383 - parent_cid, 384 - root_uri, 385 - root_cid, 386 - embed, 387 - embed_subtype, 388 - created_at: rec.created_at.naive_utc(), 389 - }) 390 - .execute(conn) 391 - .await?; 392 - 393 - match rec.embed.and_then(|v| v.into_bsky()) { 394 - Some(AppBskyEmbed::Images(embed)) => insert_post_embed_images(conn, at_uri, embed).await, 395 - Some(AppBskyEmbed::Video(embed)) => insert_post_embed_video(conn, at_uri, embed).await, 396 - Some(AppBskyEmbed::External(embed)) => insert_post_embed_ext(conn, at_uri, embed).await, 397 - Some(AppBskyEmbed::Record(embed)) => { 398 - insert_post_embed_record(conn, at_uri, embed, rec.created_at).await 399 - } 400 - Some(AppBskyEmbed::RecordWithMedia(embed)) => { 401 - insert_post_embed_record(conn, at_uri, embed.record, rec.created_at).await?; 402 - match *embed.media { 403 - AppBskyEmbed::Images(embed) => insert_post_embed_images(conn, at_uri, embed).await, 404 - AppBskyEmbed::Video(embed) => insert_post_embed_video(conn, at_uri, embed).await, 405 - AppBskyEmbed::External(embed) => insert_post_embed_ext(conn, at_uri, embed).await, 406 - _ => unreachable!(), 407 - } 408 - } 409 - _ => Ok(res), 410 - } 411 - } 412 - 413 - async fn insert_post_embed_images( 414 - conn: &mut AsyncPgConnection, 415 - at_uri: &str, 416 - rec: records::AppBskyEmbedImages, 417 - ) -> QueryResult<usize> { 418 - let images = rec 419 - .images 420 - .into_iter() 421 - .enumerate() 422 - .map(|(idx, img)| models::NewPostEmbedImage { 423 - post_uri: at_uri, 424 - seq: idx as i16, 425 - mime_type: img.image.mime_type, 426 - cid: img.image.r#ref.to_string(), 427 - alt: empty_str_as_none(img.alt), 428 - width: img.aspect_ratio.as_ref().map(|v| v.width), 429 - height: img.aspect_ratio.map(|v| v.height), 430 - }) 431 - .collect::<Vec<_>>(); 432 - 433 - diesel::insert_into(schema::post_embed_images::table) 434 - .values(images) 435 - .execute(conn) 436 - .await 437 - } 438 - 439 - async fn insert_post_embed_video( 440 - conn: &mut AsyncPgConnection, 441 - at_uri: &str, 442 - rec: records::AppBskyEmbedVideo, 443 - ) -> QueryResult<usize> { 444 - let res = diesel::insert_into(schema::post_embed_video::table) 445 - .values(models::NewPostEmbedVideo { 446 - post_uri: at_uri, 447 - mime_type: &rec.video.mime_type, 448 - cid: rec.video.r#ref.to_string(), 449 - alt: rec.alt, 450 - width: rec.aspect_ratio.as_ref().map(|v| v.width), 451 - height: rec.aspect_ratio.map(|v| v.height), 452 - }) 453 - .execute(conn) 454 - .await?; 455 - 456 - match rec.captions { 457 - Some(captions) => insert_post_embed_video_captions(conn, at_uri, &captions).await, 458 - None => Ok(res), 459 - } 460 - } 461 - 462 - async fn insert_post_embed_video_captions( 463 - conn: &mut AsyncPgConnection, 464 - at_uri: &str, 465 - captions: &[records::EmbedVideoCaptions], 466 - ) -> QueryResult<usize> { 467 - let captions = captions 468 - .iter() 469 - .map(|caption| models::NewPostEmbedVideoCaption { 470 - post_uri: at_uri, 471 - language: caption.lang.clone(), 472 - mime_type: caption.file.mime_type.clone(), 473 - cid: caption.file.r#ref.to_string(), 474 - }) 475 - .collect::<Vec<_>>(); 476 - 477 - diesel::insert_into(schema::post_embed_video_captions::table) 478 - .values(captions) 479 - .execute(conn) 480 - .await 481 - } 482 - 483 - async fn insert_post_embed_ext( 484 - conn: &mut AsyncPgConnection, 485 - at_uri: &str, 486 - rec: records::AppBskyEmbedExternal, 487 - ) -> QueryResult<usize> { 488 - diesel::insert_into(schema::post_embed_ext::table) 489 - .values(models::NewPostEmbedExt { 490 - post_uri: at_uri, 491 - uri: &rec.external.uri, 492 - title: &rec.external.title, 493 - description: &rec.external.description, 494 - thumb_mime_type: rec.external.thumb.as_ref().map(|v| v.mime_type.clone()), 495 - thumb_cid: rec.external.thumb.as_ref().map(|v| v.r#ref.to_string()), 496 - }) 497 - .execute(conn) 498 - .await 499 - } 500 - 501 - async fn insert_post_embed_record( 502 - conn: &mut AsyncPgConnection, 503 - at_uri: &str, 504 - rec: records::AppBskyEmbedRecord, 505 - post_created_at: DateTime<Utc>, 506 - ) -> QueryResult<usize> { 507 - // strip "at://" then break into parts by '/' 508 - let parts = rec.record.uri[5..].split('/').collect::<Vec<_>>(); 509 - 510 - let detached = if parts[1] == "app.bsky.feed.post" { 511 - // do a lookup on if we have a postgate for this record 512 - let postgate_effective = schema::postgates::table 513 - .select(schema::postgates::created_at) 514 - .filter(schema::postgates::post_uri.eq(at_uri)) 515 - .get_result::<DateTime<Utc>>(conn) 516 - .await 517 - .optional()?; 518 - 519 - postgate_effective.map(|v| Utc::now().min(post_created_at) > v) 520 - } else { 521 - None 522 - }; 523 - 524 - diesel::insert_into(schema::post_embed_record::table) 525 - .values(models::NewPostEmbedRecord { 526 - post_uri: at_uri, 527 - record_type: parts[1], 528 - uri: &rec.record.uri, 529 - cid: rec.record.cid.to_string(), 530 - detached, 531 - }) 532 - .execute(conn) 533 - .await 534 - } 535 - 536 - pub async fn delete_post(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 537 - diesel::delete(schema::posts::table) 538 - .filter(schema::posts::at_uri.eq(at_uri)) 539 - .execute(conn) 540 - .await 541 - } 542 - 543 - pub async fn get_post_info_for_delete( 544 - conn: &mut AsyncPgConnection, 545 - at_uri: &str, 546 - ) -> QueryResult<Option<(Option<String>, Option<String>)>> { 547 - schema::posts::table 548 - .left_join( 549 - schema::post_embed_record::table 550 - .on(schema::posts::at_uri.eq(schema::post_embed_record::post_uri)), 551 - ) 552 - .select(( 553 - schema::posts::parent_uri, 554 - schema::post_embed_record::uri.nullable(), 555 - )) 556 - .filter(schema::posts::at_uri.eq(at_uri)) 557 - .get_result(conn) 558 - .await 559 - .optional() 560 - } 561 - 562 - pub async fn upsert_postgate( 563 - conn: &mut AsyncPgConnection, 564 - at_uri: &str, 565 - cid: Cid, 566 - rec: &records::AppBskyFeedPostgate, 567 - ) -> QueryResult<usize> { 568 - let rules = rec 569 - .embedding_rules 570 - .iter() 571 - .map(|v| v.as_str().to_string()) 572 - .collect(); 573 - 574 - let data = models::UpsertPostgate { 575 - at_uri, 576 - cid: cid.to_string(), 577 - post_uri: &rec.post, 578 - detached: &rec.detached_embedding_uris, 579 - rules, 580 - created_at: rec.created_at.naive_utc(), 581 - }; 582 - 583 - diesel::insert_into(schema::postgates::table) 584 - .values(&data) 585 - .on_conflict(schema::postgates::at_uri) 586 - .do_update() 587 - .set(&data) 588 - .execute(conn) 589 - .await 590 - } 591 - 592 - pub async fn delete_postgate(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 593 - diesel::delete(schema::postgates::table) 594 - .filter(schema::postgates::at_uri.eq(at_uri)) 595 - .execute(conn) 596 - .await 597 - } 598 - 599 - define_sql_function! {fn maintain_postgates(post: Text, detached: Array<Text>, effective: Nullable<Timestamp>)} 600 - 601 - pub async fn postgate_maintain_detaches( 602 - conn: &mut AsyncPgConnection, 603 - post: &str, 604 - detached: &[String], 605 - disable_effective: Option<NaiveDateTime>, 606 - ) -> QueryResult<usize> { 607 - diesel::select(maintain_postgates(post, detached, disable_effective)) 608 - .execute(conn) 609 - .await 610 - } 611 - 612 - pub async fn upsert_threadgate( 613 - conn: &mut AsyncPgConnection, 614 - at_uri: &str, 615 - cid: Cid, 616 - rec: records::AppBskyFeedThreadgate, 617 - ) -> QueryResult<usize> { 618 - let record = serde_json::to_value(&rec).unwrap(); 619 - 620 - let allowed_lists = rec 621 - .allow 622 - .iter() 623 - .filter_map(|rule| match rule { 624 - records::ThreadgateRule::List { list } => Some(list.clone()), 625 - _ => None, 626 - }) 627 - .collect(); 628 - 629 - let allow = rec 630 - .allow 631 - .into_iter() 632 - .map(|v| v.as_str().to_string()) 633 - .collect(); 634 - 635 - let data = models::UpsertThreadgate { 636 - at_uri, 637 - cid: cid.to_string(), 638 - post_uri: &rec.post, 639 - hidden_replies: rec.hidden_replies, 640 - allow, 641 - allowed_lists, 642 - record, 643 - created_at: rec.created_at.naive_utc(), 644 - }; 645 - 646 - diesel::insert_into(schema::threadgates::table) 647 - .values(&data) 648 - .on_conflict(schema::threadgates::at_uri) 649 - .do_update() 650 - .set(&data) 651 - .execute(conn) 652 - .await 653 - } 654 - 655 - pub async fn delete_threadgate(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 656 - diesel::delete(schema::threadgates::table) 657 - .filter(schema::threadgates::at_uri.eq(at_uri)) 658 - .execute(conn) 659 - .await 660 - } 661 - 662 - pub async fn insert_like( 663 - conn: &mut AsyncPgConnection, 664 - did: &str, 665 - at_uri: &str, 666 - rec: records::AppBskyFeedLike, 667 - ) -> QueryResult<usize> { 668 - let data = models::NewLike { 669 - at_uri, 670 - did, 671 - subject: &rec.subject.uri, 672 - subject_cid: rec.subject.cid.to_string(), 673 - created_at: rec.created_at.naive_utc(), 674 - }; 675 - 676 - diesel::insert_into(schema::likes::table) 677 - .values(&data) 678 - .execute(conn) 679 - .await 680 - } 681 - 682 - pub async fn delete_like( 683 - conn: &mut AsyncPgConnection, 684 - at_uri: &str, 685 - ) -> QueryResult<Option<String>> { 686 - diesel::delete(schema::likes::table) 687 - .filter(schema::likes::at_uri.eq(at_uri)) 688 - .returning(schema::likes::subject) 689 - .get_result(conn) 690 - .await 691 - .optional() 692 - } 693 - 694 - pub async fn insert_repost( 695 - conn: &mut AsyncPgConnection, 696 - did: &str, 697 - at_uri: &str, 698 - rec: records::AppBskyFeedRepost, 699 - ) -> QueryResult<usize> { 700 - let data = models::NewRepost { 701 - at_uri, 702 - did, 703 - post: &rec.subject.uri, 704 - post_cid: rec.subject.cid.to_string(), 705 - created_at: rec.created_at.naive_utc(), 706 - }; 707 - 708 - diesel::insert_into(schema::reposts::table) 709 - .values(&data) 710 - .execute(conn) 711 - .await 712 - } 713 - 714 - pub async fn delete_repost( 715 - conn: &mut AsyncPgConnection, 716 - at_uri: &str, 717 - ) -> QueryResult<Option<String>> { 718 - diesel::delete(schema::reposts::table) 719 - .filter(schema::reposts::at_uri.eq(at_uri)) 720 - .returning(schema::reposts::post) 721 - .get_result(conn) 722 - .await 723 - .optional() 724 - } 725 - 726 - pub async fn upsert_chat_decl( 727 - conn: &mut AsyncPgConnection, 728 - did: &str, 729 - rec: records::ChatBskyActorDeclaration, 730 - ) -> QueryResult<usize> { 731 - let data = models::NewChatDecl { 732 - did, 733 - allow_incoming: rec.allow_incoming.to_string(), 734 - }; 735 - 736 - diesel::insert_into(schema::chat_decls::table) 737 - .values(&data) 738 - .on_conflict(schema::chat_decls::did) 739 - .do_update() 740 - .set(&data) 741 - .execute(conn) 742 - .await 743 - } 744 - 745 - pub async fn delete_chat_decl(conn: &mut AsyncPgConnection, did: &str) -> QueryResult<usize> { 746 - diesel::delete(schema::chat_decls::table) 747 - .filter(schema::chat_decls::did.eq(did)) 748 - .execute(conn) 749 - .await 750 - } 751 - 752 - pub async fn upsert_starterpack( 753 - conn: &mut AsyncPgConnection, 754 - did: &str, 755 - cid: Cid, 756 - at_uri: &str, 757 - rec: records::AppBskyGraphStarterPack, 758 - ) -> QueryResult<usize> { 759 - let record = serde_json::to_value(&rec).unwrap(); 760 - 761 - let feeds = rec 762 - .feeds 763 - .map(|v| v.into_iter().map(|item| item.uri).collect()); 764 - 765 - let description_facets = rec 766 - .description_facets 767 - .and_then(|v| serde_json::to_value(v).ok()); 768 - 769 - let data = models::NewStarterPack { 770 - at_uri, 771 - cid: cid.to_string(), 772 - owner: did, 773 - record, 774 - name: &rec.name, 775 - description: rec.description, 776 - description_facets, 777 - list: &rec.list, 778 - feeds, 779 - created_at: rec.created_at.naive_utc(), 780 - indexed_at: Utc::now().naive_utc(), 781 - }; 782 - 783 - diesel::insert_into(schema::starterpacks::table) 784 - .values(&data) 785 - .on_conflict(schema::starterpacks::at_uri) 786 - .do_update() 787 - .set(&data) 788 - .execute(conn) 789 - .await 790 - } 791 - 792 - pub async fn delete_starterpack(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 793 - diesel::delete(schema::starterpacks::table) 794 - .filter(schema::starterpacks::at_uri.eq(at_uri)) 795 - .execute(conn) 796 - .await 797 - } 798 - 799 - pub async fn upsert_label_service( 800 - conn: &mut AsyncPgConnection, 801 - repo: &str, 802 - cid: Cid, 803 - rec: records::AppBskyLabelerService, 804 - ) -> QueryResult<usize> { 805 - let reasons = rec 806 - .reason_types 807 - .as_ref() 808 - .map(|v| v.iter().map(|v| v.to_string()).collect()); 809 - let subject_types = rec 810 - .subject_types 811 - .as_ref() 812 - .map(|v| v.iter().map(|v| v.to_string()).collect()); 813 - 814 - let data = models::UpsertLabelerService { 815 - did: repo, 816 - cid: cid.to_string(), 817 - reasons, 818 - subject_types, 819 - subject_collections: rec.subject_collections.as_ref(), 820 - indexed_at: Utc::now().naive_utc(), 821 - }; 822 - 823 - let res = diesel::insert_into(schema::labelers::table) 824 - .values(&data) 825 - .on_conflict(schema::labelers::did) 826 - .do_update() 827 - .set(&data) 828 - .execute(conn) 829 - .await?; 830 - 831 - maintain_label_defs(conn, repo, &rec).await?; 832 - 833 - Ok(res) 834 - } 835 - 836 - pub async fn delete_label_service(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> { 837 - diesel::delete(schema::labelers::table) 838 - .filter(schema::labelers::did.eq(repo)) 839 - .execute(conn) 840 - .await 841 - } 842 - 843 - pub async fn maintain_label_defs( 844 - conn: &mut AsyncPgConnection, 845 - repo: &str, 846 - rec: &records::AppBskyLabelerService, 847 - ) -> QueryResult<()> { 848 - // drop any label defs not currently in the list 849 - diesel::delete(schema::labeler_defs::table) 850 - .filter( 851 - schema::labeler_defs::labeler 852 - .eq(repo) 853 - .and(schema::labeler_defs::label_identifier.ne_all(&rec.policies.label_values)), 854 - ) 855 - .execute(conn) 856 - .await?; 857 - 858 - let definitions = rec 859 - .policies 860 - .label_value_definitions 861 - .iter() 862 - .map(|def| (def.identifier.clone(), def)) 863 - .collect::<HashMap<String, &LabelValueDefinition>>(); 864 - 865 - for label in &rec.policies.label_values { 866 - let definition = definitions.get(label); 867 - 868 - let locales = definition.and_then(|v| serde_json::to_value(&v.locales).ok()); 869 - 870 - let data = models::UpsertLabelDefinition { 871 - labeler: repo, 872 - label_identifier: label, 873 - severity: definition.map(|v| v.severity.to_string()), 874 - blurs: definition.map(|v| v.blurs.to_string()), 875 - default_setting: definition 876 - .and_then(|v| v.default_setting) 877 - .map(|v| v.to_string()), 878 - adult_only: definition.and_then(|v| v.adult_only), 879 - locales, 880 - indexed_at: Utc::now().naive_utc(), 881 - }; 882 - 883 - diesel::insert_into(schema::labeler_defs::table) 884 - .values(&data) 885 - .on_conflict(( 886 - schema::labeler_defs::labeler, 887 - schema::labeler_defs::label_identifier, 888 - )) 889 - .do_update() 890 - .set(&data) 891 - .execute(conn) 892 - .await?; 893 - } 894 - 895 - Ok(()) 896 - } 897 - 898 - pub async fn maintain_self_labels( 899 - conn: &mut AsyncPgConnection, 900 - repo: &str, 901 - cid: Option<Cid>, 902 - at_uri: &str, 903 - self_labels: SelfLabels, 904 - ) -> QueryResult<usize> { 905 - // purge any existing self-labels 906 - diesel::delete(schema::labels::table) 907 - .filter( 908 - schema::labels::self_label 909 - .eq(true) 910 - .and(schema::labels::uri.eq(at_uri)), 911 - ) 912 - .execute(conn) 913 - .await?; 914 - 915 - let cid = cid.map(|cid| cid.to_string()); 916 - let now = Utc::now().naive_utc(); 917 - 918 - let labels = self_labels 919 - .values 920 - .iter() 921 - .map(|v| models::NewLabel { 922 - labeler: repo, 923 - label: &v.val, 924 - uri: at_uri, 925 - self_label: true, 926 - cid: cid.clone(), 927 - expires: None, 928 - sig: None, 929 - created_at: now, 930 - }) 931 - .collect::<Vec<_>>(); 932 - 933 - diesel::insert_into(schema::labels::table) 934 - .values(&labels) 935 - .execute(conn) 936 - .await 937 - } 938 - 939 - pub async fn upsert_verification( 940 - conn: &mut AsyncPgConnection, 941 - did: &str, 942 - cid: Cid, 943 - at_uri: &str, 944 - rec: records::AppBskyGraphVerification, 945 - ) -> QueryResult<usize> { 946 - let data = models::NewVerificationEntry { 947 - at_uri, 948 - cid: cid.to_string(), 949 - verifier: did, 950 - subject: &rec.subject, 951 - handle: &rec.handle, 952 - display_name: &rec.display_name, 953 - created_at: rec.created_at.naive_utc(), 954 - indexed_at: None, 955 - }; 956 - 957 - diesel::insert_into(schema::verification::table) 958 - .values(&data) 959 - .on_conflict(schema::verification::at_uri) 960 - .do_update() 961 - .set(&data) 962 - .execute(conn) 963 - .await 964 - } 965 - 966 - pub async fn delete_verification(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> { 967 - diesel::delete(schema::verification::table) 968 - .filter(schema::verification::at_uri.eq(at_uri)) 969 - .execute(conn) 970 - .await 971 - }
+88 -101
consumer/src/indexer/mod.rs
··· 1 1 use crate::config::HistoryMode; 2 + use crate::db; 2 3 use crate::firehose::{ 3 4 AtpAccountEvent, AtpCommitEvent, AtpIdentityEvent, CommitOp, FirehoseConsumer, FirehoseEvent, 4 5 FirehoseOutput, ··· 6 7 use crate::indexer::types::{ 7 8 AggregateDeltaStore, BackfillItem, BackfillItemInner, CollectionType, RecordTypes, 8 9 }; 10 + use deadpool_postgres::{Object, Pool, Transaction}; 9 11 use did_resolver::Resolver; 10 - use diesel_async::pooled_connection::deadpool::Pool; 11 - use diesel_async::{AsyncConnection, AsyncPgConnection}; 12 12 use foldhash::quality::RandomState; 13 13 use futures::StreamExt; 14 14 use ipld_core::cid::Cid; ··· 23 23 use tokio::sync::mpsc::{channel, Sender}; 24 24 use tracing::instrument; 25 25 26 - pub mod db; 27 26 pub mod records; 28 27 pub mod types; 29 28 ··· 41 40 } 42 41 43 42 pub struct RelayIndexer { 44 - pool: Pool<AsyncPgConnection>, 43 + pool: Pool, 45 44 redis: MultiplexedConnection, 46 45 state: RelayIndexerState, 47 46 firehose: FirehoseConsumer, ··· 51 50 52 51 impl RelayIndexer { 53 52 pub async fn new( 54 - pool: Pool<AsyncPgConnection>, 53 + pool: Pool, 55 54 redis: MultiplexedConnection, 56 55 idxc_tx: Sender<parakeet_index::AggregateDeltaReq>, 57 56 resolver: Arc<Resolver>, ··· 197 196 #[instrument(skip_all, fields(seq = identity.seq, repo = identity.did))] 198 197 async fn index_identity( 199 198 state: &RelayIndexerState, 200 - conn: &mut AsyncPgConnection, 199 + conn: &mut Object, 201 200 identity: AtpIdentityEvent, 202 201 ) -> eyre::Result<()> { 203 202 let new_handle = match state.do_handle_res { 204 203 true => resolve_handle(state, &identity.did, identity.handle).await?, 205 - false => Some(identity.handle), 204 + false => identity.handle, 206 205 }; 207 206 208 - let sync_state = (!state.do_backfill).then_some(ActorSyncState::Synced); 207 + let sync_state = match state.do_backfill { 208 + true => ActorSyncState::Dirty, 209 + false => ActorSyncState::Synced, 210 + }; 209 211 210 - db::upsert_actor( 211 - conn, 212 - &identity.did, 213 - new_handle, 214 - None, 215 - sync_state, 216 - identity.time, 217 - ) 218 - .await?; 212 + db::actor_upsert_handle(conn, &identity.did, sync_state, new_handle, identity.time).await?; 219 213 220 214 Ok(()) 221 215 } ··· 224 218 state: &RelayIndexerState, 225 219 did: &str, 226 220 expected_handle: Option<String>, 227 - ) -> eyre::Result<Option<Option<String>>> { 221 + ) -> eyre::Result<Option<String>> { 228 222 // Resolve the did doc 229 223 let Some(did_doc) = state.resolver.resolve_did(did).await? else { 230 224 eyre::bail!("missing did doc"); ··· 232 226 233 227 // if there's no handles in aka or the expected is none, set to none in DB. 234 228 if did_doc.also_known_as.as_ref().is_none_or(|v| v.is_empty()) || expected_handle.is_none() { 235 - return Ok(Some(None)); 229 + return Ok(None); 236 230 } 237 231 238 232 let expected = expected_handle.unwrap(); ··· 241 235 let expected_in_doc = did_doc.also_known_as.is_some_and(|v| { 242 236 v.iter() 243 237 .filter_map(|v| v.strip_prefix("at://")) 244 - .any(|v| v == &expected) 238 + .any(|v| v == expected) 245 239 }); 246 240 247 241 // if it isn't, set to invalid. 248 242 if !expected_in_doc { 249 243 tracing::warn!("Handle not in DID doc"); 250 - return Ok(Some(None)); 244 + return Ok(None); 251 245 } 252 246 253 247 // in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems 254 248 // like a way to end up with really sus handles. 255 249 let Some(handle_did) = state.resolver.resolve_handle(&expected).await? else { 256 - return Ok(Some(None)); 250 + return Ok(None); 257 251 }; 258 252 259 253 // finally, check if the event did matches the handle, if not, set invalid, otherwise set the handle. 260 254 if handle_did != did { 261 - Ok(Some(None)) 255 + Ok(None) 262 256 } else { 263 - Ok(Some(Some(expected))) 257 + Ok(Some(expected)) 264 258 } 265 259 } 266 260 267 261 #[instrument(skip_all, fields(seq = account.seq, repo = account.did))] 268 262 async fn index_account( 269 263 state: &RelayIndexerState, 270 - conn: &mut AsyncPgConnection, 264 + conn: &mut Object, 271 265 rc: &mut MultiplexedConnection, 272 266 account: AtpAccountEvent, 273 267 ) -> eyre::Result<()> { ··· 279 273 let trigger_bf = if state.do_backfill && status == ActorStatus::Active { 280 274 // check old status - if they exist (Some(*)), AND were previously != Active but not Deleted, 281 275 // AND have a rev == null, then trigger backfill. 282 - db::account_status_and_rev(conn, &account.did) 276 + db::actor_get_status_and_rev(conn, &account.did) 283 277 .await? 284 278 .is_some_and(|(old_status, old_rev)| { 285 279 old_rev.is_none() ··· 290 284 false 291 285 }; 292 286 293 - let sync_state = (!state.do_backfill).then_some(ActorSyncState::Synced); 287 + let sync_state = match state.do_backfill { 288 + true => ActorSyncState::Dirty, 289 + false => ActorSyncState::Synced, 290 + }; 294 291 295 - db::upsert_actor( 296 - conn, 297 - &account.did, 298 - None, 299 - Some(status), 300 - sync_state, 301 - account.time, 302 - ) 303 - .await?; 292 + db::actor_upsert(conn, &account.did, status, sync_state, account.time).await?; 304 293 305 294 if trigger_bf { 306 295 tracing::debug!("triggering backfill due to account coming out of inactive state"); ··· 313 302 #[instrument(skip_all, fields(seq = commit.seq, repo = commit.repo, rev = commit.rev))] 314 303 async fn index_commit( 315 304 state: &mut RelayIndexerState, 316 - conn: &mut AsyncPgConnection, 305 + conn: &mut Object, 317 306 rc: &mut MultiplexedConnection, 318 307 commit: AtpCommitEvent, 319 308 ) -> eyre::Result<()> { 320 - let (current_rev, sync_status) = db::get_repo_info(conn, &commit.repo).await?.unzip(); 309 + let (sync_status, current_rev) = db::actor_get_repo_status(conn, &commit.repo).await?.unzip(); 321 310 322 311 // what's the backfill status of this account? this respects locks held by the backfiller. 323 312 // we should drop events for 'dirty' and queue 'processing' ··· 338 327 } 339 328 340 329 // this is the first commit in an actor's repo - set them to Synced. 341 - db::upsert_actor( 342 - conn, 343 - &commit.repo, 344 - None, 345 - None, 346 - Some(ActorSyncState::Synced), 347 - commit.time, 348 - ) 349 - .await?; 330 + db::actor_set_sync_status(conn, &commit.repo, ActorSyncState::Synced, commit.time) 331 + .await?; 350 332 351 333 true 352 334 } ··· 354 336 tracing::debug!("found new repo from commit"); 355 337 let trigger_backfill = state.do_backfill && commit.since.is_some(); 356 338 357 - let sync_state = (!trigger_backfill).then_some(ActorSyncState::Synced); 339 + let sync_state = match trigger_backfill { 340 + true => ActorSyncState::Dirty, 341 + false => ActorSyncState::Synced, 342 + }; 358 343 359 - db::upsert_actor(conn, &commit.repo, None, None, sync_state, commit.time).await?; 344 + db::actor_upsert( 345 + conn, 346 + &commit.repo, 347 + ActorStatus::Active, 348 + sync_state, 349 + commit.time, 350 + ) 351 + .await?; 360 352 361 353 if trigger_backfill { 362 354 rc.rpush::<_, _, i32>("backfill_queue", commit.repo).await?; ··· 383 375 .await; 384 376 385 377 if is_active { 386 - conn.transaction::<_, diesel::result::Error, _>(|t| { 387 - Box::pin(async move { 388 - db::update_repo_version(t, &commit.repo, &commit.rev, commit.commit).await?; 378 + let mut t = conn.transaction().await?; 379 + db::actor_set_repo_state(&mut t, &commit.repo, &commit.rev, commit.commit).await?; 389 380 390 - for op in &commit.ops { 391 - process_op(t, &mut state.idxc_tx, &commit.repo, op, &blocks).await?; 392 - } 393 - Ok(true) 394 - }) 395 - }) 396 - .await?; 381 + for op in &commit.ops { 382 + process_op(&mut t, &mut state.idxc_tx, &commit.repo, op, &blocks).await?; 383 + } 397 384 } else { 398 385 let items = commit 399 386 .ops ··· 402 389 .collect::<Vec<_>>(); 403 390 let items = serde_json::to_value(items).unwrap_or_default(); 404 391 405 - db::write_backfill_row(conn, &commit.repo, &commit.rev, commit.commit, items).await?; 392 + db::backfill_write_row(conn, &commit.repo, &commit.rev, commit.commit, items).await?; 406 393 } 407 394 408 395 Ok(()) ··· 456 443 457 444 #[inline(always)] 458 445 async fn process_op( 459 - conn: &mut AsyncPgConnection, 446 + conn: &mut Transaction<'_>, 460 447 deltas: &mut impl AggregateDeltaStore, 461 448 repo: &str, 462 449 op: &CommitOp, 463 450 blocks: &HashMap<Cid, Vec<u8>>, 464 - ) -> diesel::QueryResult<()> { 451 + ) -> Result<(), tokio_postgres::Error> { 465 452 let Some((collection_raw, rkey)) = op.path.split_once("/") else { 466 453 tracing::warn!("op contained invalid path {}", op.path); 467 454 return Ok(()); ··· 512 499 } 513 500 514 501 pub async fn index_op( 515 - conn: &mut AsyncPgConnection, 502 + conn: &mut Transaction<'_>, 516 503 deltas: &mut impl AggregateDeltaStore, 517 504 repo: &str, 518 505 cid: Cid, 519 506 record: RecordTypes, 520 507 at_uri: &str, 521 508 rkey: &str, 522 - ) -> diesel::QueryResult<()> { 509 + ) -> Result<(), tokio_postgres::Error> { 523 510 match record { 524 511 RecordTypes::AppBskyActorProfile(record) => { 525 512 if rkey == "self" { 526 513 let labels = record.labels.clone(); 527 - db::upsert_profile(conn, repo, cid, record).await?; 514 + db::profile_upsert(conn, repo, cid, record).await?; 528 515 529 516 if let Some(labels) = labels { 530 517 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; ··· 533 520 } 534 521 RecordTypes::AppBskyFeedGenerator(record) => { 535 522 let labels = record.labels.clone(); 536 - let count = db::upsert_feedgen(conn, repo, cid, at_uri, record).await?; 523 + let count = db::feedgen_upsert(conn, at_uri, repo, cid, record).await?; 537 524 538 525 if let Some(labels) = labels { 539 526 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; ··· 545 532 } 546 533 RecordTypes::AppBskyFeedLike(record) => { 547 534 let subject = record.subject.uri.clone(); 548 - let count = db::insert_like(conn, repo, at_uri, record).await?; 535 + let count = db::like_insert(conn, at_uri, repo, record).await?; 549 536 550 537 deltas 551 538 .add_delta(&subject, AggregateType::Like, count as i32) ··· 575 562 }); 576 563 577 564 let labels = record.labels.clone(); 578 - db::insert_post(conn, repo, cid, at_uri, record).await?; 565 + db::post_insert(conn, at_uri, repo, cid, record).await?; 579 566 if let Some(labels) = labels { 580 567 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 581 568 } ··· 600 587 .contains(&records::PostgateEmbeddingRules::Disable); 601 588 let disable_effective = has_disable_rule.then_some(record.created_at.naive_utc()); 602 589 603 - db::upsert_postgate(conn, at_uri, cid, &record).await?; 590 + db::postgate_upsert(conn, at_uri, cid, &record).await?; 604 591 605 592 db::postgate_maintain_detaches( 606 593 conn, ··· 614 601 deltas 615 602 .incr(&record.subject.uri, AggregateType::Repost) 616 603 .await; 617 - db::insert_repost(conn, repo, at_uri, record).await?; 604 + db::repost_insert(conn, at_uri, repo, record).await?; 618 605 } 619 606 RecordTypes::AppBskyFeedThreadgate(record) => { 620 607 let split_aturi = record.post.rsplitn(4, '/').collect::<Vec<_>>(); ··· 623 610 return Ok(()); 624 611 } 625 612 626 - db::upsert_threadgate(conn, at_uri, cid, record).await?; 613 + db::threadgate_upsert(conn, at_uri, cid, record).await?; 627 614 } 628 615 RecordTypes::AppBskyGraphBlock(record) => { 629 - db::insert_block(conn, repo, at_uri, record).await?; 616 + db::block_insert(conn, at_uri, repo, record).await?; 630 617 } 631 618 RecordTypes::AppBskyGraphFollow(record) => { 632 619 let subject = record.subject.clone(); 633 - let count = db::insert_follow(conn, repo, at_uri, record).await?; 620 + let count = db::follow_insert(conn, at_uri, repo, record).await?; 634 621 635 622 deltas 636 623 .add_delta(repo, AggregateType::Follow, count as i32) ··· 641 628 } 642 629 RecordTypes::AppBskyGraphList(record) => { 643 630 let labels = record.labels.clone(); 644 - let count = db::upsert_list(conn, repo, at_uri, cid, record).await?; 631 + let count = db::list_upsert(conn, at_uri, repo, cid, record).await?; 645 632 646 633 if let Some(labels) = labels { 647 634 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; ··· 652 639 .await; 653 640 } 654 641 RecordTypes::AppBskyGraphListBlock(record) => { 655 - db::insert_list_block(conn, repo, at_uri, record).await?; 642 + db::list_block_insert(conn, at_uri, repo, record).await?; 656 643 } 657 644 RecordTypes::AppBskyGraphListItem(record) => { 658 645 let split_aturi = record.list.rsplitn(4, '/').collect::<Vec<_>>(); ··· 662 649 return Ok(()); 663 650 } 664 651 665 - db::insert_list_item(conn, at_uri, record).await?; 652 + db::list_item_insert(conn, at_uri, record).await?; 666 653 } 667 654 RecordTypes::AppBskyGraphStarterPack(record) => { 668 - let count = db::upsert_starterpack(conn, repo, cid, at_uri, record).await?; 655 + let count = db::starter_pack_upsert(conn, at_uri, repo, cid, record).await?; 669 656 deltas 670 657 .add_delta(repo, AggregateType::ProfileStarterpack, count as i32) 671 658 .await; 672 659 } 673 660 RecordTypes::AppBskyGraphVerification(record) => { 674 - db::upsert_verification(conn, repo, cid, at_uri, record).await?; 661 + db::verification_insert(conn, at_uri, repo, cid, record).await?; 675 662 } 676 663 RecordTypes::AppBskyLabelerService(record) => { 677 664 if rkey == "self" { 678 665 let labels = record.labels.clone(); 679 - db::upsert_label_service(conn, repo, cid, record).await?; 666 + db::labeler_upsert(conn, repo, cid, record).await?; 680 667 681 668 if let Some(labels) = labels { 682 669 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; ··· 685 672 } 686 673 RecordTypes::ChatBskyActorDeclaration(record) => { 687 674 if rkey == "self" { 688 - db::upsert_chat_decl(conn, repo, record).await?; 675 + db::chat_decl_upsert(conn, repo, record).await?; 689 676 } 690 677 } 691 678 } 692 679 693 - db::write_record(conn, at_uri, repo, cid).await?; 680 + db::record_upsert(conn, at_uri, repo, cid).await?; 694 681 695 682 Ok(()) 696 683 } 697 684 698 685 pub async fn index_op_delete( 699 - conn: &mut AsyncPgConnection, 686 + conn: &mut Transaction<'_>, 700 687 deltas: &mut impl AggregateDeltaStore, 701 688 repo: &str, 702 689 collection: CollectionType, 703 690 at_uri: &str, 704 - ) -> diesel::QueryResult<()> { 691 + ) -> Result<(), tokio_postgres::Error> { 705 692 match collection { 706 - CollectionType::BskyProfile => db::delete_profile(conn, repo).await?, 707 - CollectionType::BskyBlock => db::delete_block(conn, at_uri).await?, 693 + CollectionType::BskyProfile => db::profile_delete(conn, repo).await?, 694 + CollectionType::BskyBlock => db::block_delete(conn, at_uri).await?, 708 695 CollectionType::BskyFeedGen => { 709 - let count = db::delete_feedgen(conn, at_uri).await?; 696 + let count = db::feedgen_delete(conn, at_uri).await?; 710 697 deltas 711 698 .add_delta(repo, AggregateType::ProfileFeed, -(count as i32)) 712 699 .await; 713 700 count 714 701 } 715 702 CollectionType::BskyFeedLike => { 716 - if let Some(subject) = db::delete_like(conn, at_uri).await? { 703 + if let Some(subject) = db::like_delete(conn, at_uri).await? { 717 704 deltas.decr(&subject, AggregateType::Like).await; 718 705 } 719 706 0 720 707 } 721 708 CollectionType::BskyFeedPost => { 722 - let post_info = db::get_post_info_for_delete(conn, at_uri).await?; 709 + let post_info = db::post_get_info_for_delete(conn, at_uri).await?; 723 710 724 - db::delete_post(conn, at_uri).await?; 711 + db::post_delete(conn, at_uri).await?; 725 712 726 713 if let Some((reply_to, embed)) = post_info { 727 714 deltas.decr(repo, AggregateType::ProfilePost).await; ··· 735 722 736 723 0 737 724 } 738 - CollectionType::BskyFeedPostgate => db::delete_postgate(conn, at_uri).await?, 725 + CollectionType::BskyFeedPostgate => db::postgate_delete(conn, at_uri).await?, 739 726 CollectionType::BskyFeedRepost => { 740 - if let Some(subject) = db::delete_repost(conn, at_uri).await? { 727 + if let Some(subject) = db::repost_delete(conn, at_uri).await? { 741 728 deltas.decr(&subject, AggregateType::Repost).await; 742 729 } 743 730 0 744 731 } 745 - CollectionType::BskyFeedThreadgate => db::delete_threadgate(conn, at_uri).await?, 732 + CollectionType::BskyFeedThreadgate => db::threadgate_delete(conn, at_uri).await?, 746 733 CollectionType::BskyFollow => { 747 - if let Some(followee) = db::delete_follow(conn, at_uri).await? { 734 + if let Some(followee) = db::follow_delete(conn, at_uri).await? { 748 735 deltas.decr(&followee, AggregateType::Follower).await; 749 736 deltas.decr(repo, AggregateType::Follow).await; 750 737 } 751 738 0 752 739 } 753 740 CollectionType::BskyList => { 754 - let count = db::delete_list(conn, at_uri).await?; 741 + let count = db::list_delete(conn, at_uri).await?; 755 742 deltas 756 743 .add_delta(repo, AggregateType::ProfileList, -(count as i32)) 757 744 .await; 758 745 count 759 746 } 760 - CollectionType::BskyListBlock => db::delete_list_block(conn, at_uri).await?, 761 - CollectionType::BskyListItem => db::delete_list_item(conn, at_uri).await?, 747 + CollectionType::BskyListBlock => db::list_block_delete(conn, at_uri).await?, 748 + CollectionType::BskyListItem => db::list_item_delete(conn, at_uri).await?, 762 749 CollectionType::BskyStarterPack => { 763 - let count = db::delete_starterpack(conn, at_uri).await?; 750 + let count = db::starter_pack_delete(conn, at_uri).await?; 764 751 deltas 765 752 .add_delta(repo, AggregateType::ProfileStarterpack, -(count as i32)) 766 753 .await; 767 754 count 768 755 } 769 - CollectionType::BskyVerification => db::delete_verification(conn, at_uri).await?, 770 - CollectionType::BskyLabelerService => db::delete_label_service(conn, at_uri).await?, 771 - CollectionType::ChatActorDecl => db::delete_chat_decl(conn, at_uri).await?, 756 + CollectionType::BskyVerification => db::verification_delete(conn, at_uri).await?, 757 + CollectionType::BskyLabelerService => db::labeler_delete(conn, at_uri).await?, 758 + CollectionType::ChatActorDecl => db::chat_decl_delete(conn, at_uri).await?, 772 759 _ => unreachable!(), 773 760 }; 774 761 775 - db::delete_record(conn, at_uri).await?; 762 + db::record_delete(conn, at_uri).await?; 776 763 777 764 Ok(()) 778 765 }
+12 -12
consumer/src/indexer/records.rs
··· 9 9 use lexica::com_atproto::moderation::{ReasonType, SubjectType}; 10 10 use serde::{Deserialize, Serialize}; 11 11 12 - #[derive(Debug, Deserialize, Serialize)] 12 + #[derive(Clone, Debug, Deserialize, Serialize)] 13 13 pub struct StrongRef { 14 14 #[serde( 15 15 deserialize_with = "utils::cid_from_string", ··· 19 19 pub uri: String, 20 20 } 21 21 22 - #[derive(Debug, Deserialize, Serialize)] 22 + #[derive(Clone, Debug, Deserialize, Serialize)] 23 23 #[serde(tag = "$type")] 24 24 #[serde(rename = "blob")] 25 25 #[serde(rename_all = "camelCase")] ··· 43 43 pub created_at: Option<DateTime<Utc>>, 44 44 } 45 45 46 - #[derive(Debug, Deserialize, Serialize)] 46 + #[derive(Clone, Debug, Deserialize, Serialize)] 47 47 #[serde(untagged)] 48 48 pub enum EmbedOuter { 49 49 Bsky(AppBskyEmbed), ··· 83 83 } 84 84 } 85 85 86 - #[derive(Debug, Deserialize, Serialize)] 86 + #[derive(Clone, Debug, Deserialize, Serialize)] 87 87 #[serde(tag = "$type")] 88 88 pub enum AppBskyEmbed { 89 89 #[serde(rename = "app.bsky.embed.images")] ··· 117 117 } 118 118 } 119 119 120 - #[derive(Debug, Deserialize, Serialize)] 120 + #[derive(Clone, Debug, Deserialize, Serialize)] 121 121 #[serde(rename_all = "camelCase")] 122 122 pub struct AppBskyEmbedImages { 123 123 pub images: Vec<EmbedImage>, 124 124 } 125 125 126 - #[derive(Debug, Deserialize, Serialize)] 126 + #[derive(Clone, Debug, Deserialize, Serialize)] 127 127 #[serde(rename_all = "camelCase")] 128 128 pub struct EmbedImage { 129 129 pub image: Blob, ··· 132 132 pub aspect_ratio: Option<AspectRatio>, 133 133 } 134 134 135 - #[derive(Debug, Deserialize, Serialize)] 135 + #[derive(Clone, Debug, Deserialize, Serialize)] 136 136 #[serde(rename_all = "camelCase")] 137 137 pub struct AppBskyEmbedVideo { 138 138 pub video: Blob, ··· 144 144 pub aspect_ratio: Option<AspectRatio>, 145 145 } 146 146 147 - #[derive(Debug, Deserialize, Serialize)] 147 + #[derive(Clone, Debug, Deserialize, Serialize)] 148 148 pub struct EmbedVideoCaptions { 149 149 pub lang: String, 150 150 pub file: Blob, 151 151 } 152 152 153 - #[derive(Debug, Deserialize, Serialize)] 153 + #[derive(Clone, Debug, Deserialize, Serialize)] 154 154 pub struct AppBskyEmbedExternal { 155 155 pub external: EmbedExternal, 156 156 } 157 157 158 - #[derive(Debug, Deserialize, Serialize)] 158 + #[derive(Clone, Debug, Deserialize, Serialize)] 159 159 pub struct EmbedExternal { 160 160 pub uri: String, 161 161 pub title: String, ··· 164 164 pub thumb: Option<Blob>, 165 165 } 166 166 167 - #[derive(Debug, Deserialize, Serialize)] 167 + #[derive(Clone, Debug, Deserialize, Serialize)] 168 168 pub struct AppBskyEmbedRecord { 169 169 pub record: StrongRef, 170 170 } 171 171 172 - #[derive(Debug, Deserialize, Serialize)] 172 + #[derive(Clone, Debug, Deserialize, Serialize)] 173 173 pub struct AppBskyEmbedRecordWithMedia { 174 174 pub record: AppBskyEmbedRecord, 175 175 pub media: Box<AppBskyEmbed>,
+12 -16
consumer/src/label_indexer/mod.rs
··· 6 6 use std::sync::Arc; 7 7 use std::time::Duration; 8 8 use tokio::sync::mpsc::{channel, Receiver, Sender}; 9 + use tokio::sync::watch::Receiver as WatchReceiver; 9 10 use tokio::task::JoinHandle; 10 11 use tokio::time::Instant; 11 12 use tokio_postgres::binary_copy::BinaryCopyInWriter; 12 13 use tokio_postgres::types::Type; 13 - use tokio_postgres::NoTls; 14 14 use tracing::instrument; 15 - use tokio::sync::watch::Receiver as WatchReceiver; 16 15 17 16 const LABELER_SERVICE_ID: &str = "#atproto_labeler"; 18 17 19 18 pub struct LabelServiceManager { 20 - client: tokio_postgres::Client, 19 + conn: deadpool_postgres::Object, 21 20 rx: Receiver<String>, 22 21 resolver: Arc<Resolver>, 23 22 services: HashMap<String, JoinHandle<()>>, ··· 27 26 28 27 impl LabelServiceManager { 29 28 pub async fn new( 30 - pg_url: &str, 29 + pool: deadpool_postgres::Pool, 31 30 resolver: Arc<Resolver>, 32 31 resume: sled::Db, 33 32 user_agent: String, 34 33 ) -> eyre::Result<(Self, Sender<String>)> { 35 - let (client, connection) = tokio_postgres::connect(pg_url, NoTls).await?; 36 - 37 - tokio::spawn(async move { 38 - if let Err(e) = connection.await { 39 - tracing::error!("connection error: {}", e); 40 - } 41 - }); 42 - 34 + let conn = pool.get().await?; 43 35 let (tx, rx) = channel(8); 44 36 45 37 let lsm = LabelServiceManager { 46 - client, 38 + conn, 47 39 rx, 48 40 resolver, 49 41 resume, ··· 112 104 continue; 113 105 } 114 106 tracing::debug!("got {} labels", buf.len()); 115 - store_labels(&mut self.client, &buf).await 107 + store_labels(&mut self.conn, &buf).await 116 108 } 117 109 }; 118 110 ··· 169 161 170 162 let count = binary_writer.finish().await?; 171 163 172 - t.execute(include_str!("../sql/label_copy_upsert.sql"), &[]) 164 + t.execute(include_str!("../db/sql/label_copy_upsert.sql"), &[]) 173 165 .await?; 174 166 175 167 t.commit().await?; ··· 186 178 user_agent: String, 187 179 db_tx: Sender<AtpLabel>, 188 180 ) { 189 - let start_seq = resume.get(&service_did).ok().flatten().and_then(crate::utils::u64_from_ivec); 181 + let start_seq = resume 182 + .get(&service_did) 183 + .ok() 184 + .flatten() 185 + .and_then(crate::utils::u64_from_ivec); 190 186 191 187 if let Some(start_seq) = start_seq { 192 188 tracing::info!("starting {service_did} label consumer from {start_seq}");
+11 -8
consumer/src/main.rs
··· 1 + use deadpool_postgres::Runtime; 1 2 use did_resolver::{Resolver, ResolverOpts}; 2 - use diesel_async::pooled_connection::deadpool::Pool; 3 - use diesel_async::pooled_connection::AsyncDieselConnectionManager; 4 - use diesel_async::AsyncPgConnection; 5 3 use eyre::OptionExt; 6 4 use metrics_exporter_prometheus::PrometheusBuilder; 7 5 use std::sync::Arc; 6 + use tokio_postgres::NoTls; 8 7 9 8 mod backfill; 10 9 mod cmd; 11 10 mod config; 11 + mod db; 12 12 mod firehose; 13 13 mod indexer; 14 14 mod label_indexer; ··· 24 24 25 25 let user_agent = build_ua(&conf.ua_contact); 26 26 27 - let db_mgr = AsyncDieselConnectionManager::<AsyncPgConnection>::new(&conf.database_url); 28 - let pool = Pool::builder(db_mgr).build()?; 27 + let pool = conf.database.create_pool(Some(Runtime::Tokio1), NoTls)?; 29 28 30 29 let (redis_conn, redis_fut) = redis::Client::open(conf.redis_uri)? 31 30 .create_multiplexed_tokio_connection() ··· 61 60 let resume = resume.clone().unwrap(); 62 61 63 62 let (label_mgr, _label_svc_tx) = label_indexer::LabelServiceManager::new( 64 - &conf.database_url, 63 + pool.clone(), 65 64 resolver.clone(), 66 65 resume, 67 66 user_agent.clone(), ··· 72 71 } 73 72 74 73 if cli.backfill { 74 + let bf_cfg = conf 75 + .backfill 76 + .ok_or_eyre("Config item [backfill] must be specified when using --backfill")?; 77 + 75 78 let backfiller = backfill::BackfillManager::new( 76 79 pool.clone(), 77 80 redis_conn.clone(), 78 81 resolver.clone(), 79 - index_client.clone(), 80 - conf.backfill_workers, 82 + (!bf_cfg.skip_aggregation).then_some(index_client.clone()), 83 + bf_cfg, 81 84 ) 82 85 .await?; 83 86
consumer/src/sql/label_copy_upsert.sql consumer/src/db/sql/label_copy_upsert.sql
+4 -4
migrations/2025-01-29-213341_follows_and_blocks/up.sql
··· 6 6 created_at timestamptz not null 7 7 ); 8 8 9 - create index blocks_did_index on blocks using hash (did); 10 - create index blocks_subject_index on blocks using hash (subject); 9 + create index blocks_did_index on blocks (did); 10 + create index blocks_subject_index on blocks (subject); 11 11 12 12 create table follows 13 13 ( ··· 17 17 created_at timestamptz not null 18 18 ); 19 19 20 - create index follow_did_index on follows using hash (did); 21 - create index follow_subject_index on follows using hash (subject); 20 + create index follow_did_index on follows (did); 21 + create index follow_subject_index on follows (subject);
+2 -2
migrations/2025-02-07-203450_lists/up.sql
··· 25 25 indexed_at timestamp not null default now() 26 26 ); 27 27 28 - create index listitems_list_index on list_items using hash (list_uri); 29 - create index listitems_subject_index on list_items using hash (subject); 28 + create index listitems_list_index on list_items (list_uri); 29 + create index listitems_subject_index on list_items (subject); 30 30 31 31 create table list_blocks 32 32 (
+10 -10
migrations/2025-02-16-142357_posts/up.sql
··· 22 22 indexed_at timestamp not null default now() 23 23 ); 24 24 25 - create index posts_did_index on posts using hash (did); 26 - create index posts_parent_index on posts using hash (parent_uri); 27 - create index posts_root_index on posts using hash (root_uri); 25 + create index posts_did_index on posts (did); 26 + create index posts_parent_index on posts (parent_uri); 27 + create index posts_root_index on posts (root_uri); 28 28 create index posts_lang_index on posts using gin (languages); 29 29 create index posts_tags_index on posts using gin (tags); 30 30 31 31 create table post_embed_images 32 32 ( 33 - post_uri text not null references posts (at_uri) on delete cascade, 33 + post_uri text not null references posts (at_uri) on delete cascade deferrable, 34 34 seq smallint not null, 35 35 36 36 mime_type text not null, ··· 47 47 48 48 create table post_embed_video 49 49 ( 50 - post_uri text primary key references posts (at_uri) on delete cascade, 50 + post_uri text primary key references posts (at_uri) on delete cascade deferrable, 51 51 52 52 mime_type text not null, 53 53 cid text not null, ··· 61 61 62 62 create table post_embed_video_captions 63 63 ( 64 - post_uri text not null references posts (at_uri) on delete cascade, 64 + post_uri text not null references posts (at_uri) on delete cascade deferrable, 65 65 language text not null, 66 66 67 67 mime_type text not null, ··· 74 74 75 75 create table post_embed_ext 76 76 ( 77 - post_uri text primary key references posts (at_uri) on delete cascade, 77 + post_uri text primary key references posts (at_uri) on delete cascade deferrable, 78 78 79 79 uri text not null, 80 80 title text not null, ··· 87 87 88 88 create table post_embed_record 89 89 ( 90 - post_uri text primary key references posts (at_uri) on delete cascade, 90 + post_uri text primary key references posts (at_uri) on delete cascade deferrable, 91 91 92 92 record_type text not null, 93 93 uri text not null, ··· 101 101 ( 102 102 at_uri text primary key, 103 103 cid text not null, 104 - post_uri text not null references posts (at_uri) on delete cascade, 104 + post_uri text not null references posts (at_uri) on delete cascade deferrable, 105 105 106 106 detached text[] not null, 107 107 rules text[] not null, ··· 118 118 ( 119 119 at_uri text primary key, 120 120 cid text not null, 121 - post_uri text not null references posts (at_uri) on delete cascade, 121 + post_uri text not null references posts (at_uri) on delete cascade deferrable, 122 122 123 123 hidden_replies text[] not null, 124 124 allow text[] not null,
+4 -4
migrations/2025-04-05-114428_likes_and_reposts/up.sql
··· 8 8 indexed_at timestamp not null default now() 9 9 ); 10 10 11 - create index likes_did_index on likes using hash (did); 12 - create index likes_subject_index on likes using hash (subject); 11 + create index likes_did_index on likes (did); 12 + create index likes_subject_index on likes (subject); 13 13 14 14 create table reposts 15 15 ( ··· 21 21 indexed_at timestamp not null default now() 22 22 ); 23 23 24 - create index reposts_did_index on reposts using hash (did); 25 - create index reposts_post_index on reposts using hash (post); 24 + create index reposts_did_index on reposts (did); 25 + create index reposts_post_index on reposts (post);
+2 -2
migrations/2025-04-18-185717_verification/up.sql
··· 12 12 indexed_at timestamp not null default now() 13 13 ); 14 14 15 - create index verification_verifier_index on verification using hash (verifier); 16 - create index verification_subject_index on verification using hash (subject); 15 + create index verification_verifier_index on verification (verifier); 16 + create index verification_subject_index on verification (subject);
+8 -2
parakeet-db/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 chrono = { version = "0.4.39", features = ["serde"] } 8 - diesel = { version = "2.2.6", features = ["chrono", "serde_json"] } 9 - serde_json = "1.0.134" 8 + diesel = { version = "2.2.6", features = ["chrono", "serde_json"], optional = true } 9 + postgres-types = { version = "0.2.9", optional = true } 10 + serde_json = "1.0.134" 11 + 12 + [features] 13 + default = ["diesel"] 14 + diesel = ["dep:diesel"] 15 + postgres = ["dep:postgres-types"]
+2
parakeet-db/src/lib.rs
··· 1 + #[cfg(feature = "diesel")] 1 2 pub mod models; 3 + #[cfg(feature = "diesel")] 2 4 pub mod schema; 3 5 pub mod types;
+100 -69
parakeet-db/src/types.rs
··· 1 - use diesel::backend::Backend; 2 - use diesel::deserialize::FromSql; 3 - use diesel::pg::Pg; 4 - use diesel::serialize::{Output, ToSql}; 5 - use diesel::{AsExpression, FromSqlRow}; 1 + macro_rules! text_enum { 2 + (enum $name:ident {$($variant:ident = $value:expr,)*}) => { 3 + #[derive(Debug, PartialOrd, PartialEq)] 4 + #[cfg_attr(feature = "diesel", derive(diesel::AsExpression, diesel::FromSqlRow))] 5 + #[cfg_attr(feature = "diesel", diesel(sql_type = diesel::sql_types::Text))] 6 + pub enum $name { 7 + $($variant,)* 8 + } 9 + 10 + impl std::fmt::Display for $name { 11 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 12 + match self { 13 + $(Self::$variant => write!(f, $value),)* 14 + } 15 + } 16 + } 17 + 18 + impl std::str::FromStr for $name { 19 + type Err = String; 20 + 21 + fn from_str(s: &str) -> Result<Self, Self::Err> { 22 + match s { 23 + $($value => Ok(Self::$variant),)* 24 + x => Err(format!("Unrecognized variant {}", x).into()), 25 + } 26 + } 27 + } 28 + 29 + #[cfg(feature = "postgres")] 30 + impl postgres_types::FromSql<'_> for $name { 31 + fn from_sql( 32 + ty: &postgres_types::Type, 33 + raw: &[u8] 34 + ) -> Result<Self, Box<dyn std::error::Error + Sync + Send>> { 35 + Ok(String::from_sql(ty, raw)?.parse()?) 36 + } 37 + 38 + fn accepts(ty: &postgres_types::Type) -> bool { 39 + ty == &postgres_types::Type::TEXT 40 + } 41 + } 42 + 43 + #[cfg(feature = "postgres")] 44 + impl postgres_types::ToSql for $name { 45 + fn to_sql( 46 + &self, 47 + ty: &postgres_types::Type, 48 + out: &mut postgres_types::private::BytesMut 49 + ) -> Result<postgres_types::IsNull, Box<dyn std::error::Error + Sync + Send>> 50 + where 51 + Self: Sized, 52 + { 53 + self.to_string().to_sql(ty, out) 54 + } 6 55 7 - #[derive(Debug, PartialOrd, PartialEq, AsExpression, FromSqlRow)] 8 - #[diesel(sql_type = diesel::sql_types::Text)] 9 - pub enum ActorStatus { 10 - Active, 11 - Takendown, 12 - Suspended, 13 - Deleted, 14 - Deactivated, 15 - } 56 + fn accepts(ty: &postgres_types::Type) -> bool 57 + where 58 + Self: Sized, 59 + { 60 + ty == &postgres_types::Type::TEXT 61 + } 16 62 17 - impl<DB> FromSql<diesel::sql_types::Text, DB> for ActorStatus 18 - where 19 - DB: Backend, 20 - String: FromSql<diesel::sql_types::Text, DB>, 21 - { 22 - fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> { 23 - match String::from_sql(bytes)?.as_str() { 24 - "active" => Ok(ActorStatus::Active), 25 - "takendown" => Ok(ActorStatus::Takendown), 26 - "suspended" => Ok(ActorStatus::Suspended), 27 - "deleted" => Ok(ActorStatus::Deleted), 28 - "deactivated" => Ok(ActorStatus::Deactivated), 29 - x => Err(format!("Unrecognized variant {}", x).into()), 63 + postgres_types::to_sql_checked!(); 30 64 } 31 - } 32 - } 33 65 34 - impl ToSql<diesel::sql_types::Text, Pg> for ActorStatus { 35 - fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { 36 - let val = match self { 37 - ActorStatus::Active => "active", 38 - ActorStatus::Takendown => "takendown", 39 - ActorStatus::Suspended => "suspended", 40 - ActorStatus::Deleted => "deleted", 41 - ActorStatus::Deactivated => "deactivated", 42 - }; 66 + #[cfg(feature = "diesel")] 67 + impl<DB> diesel::deserialize::FromSql<diesel::sql_types::Text, DB> for $name 68 + where 69 + DB: diesel::backend::Backend, 70 + String: diesel::deserialize::FromSql<diesel::sql_types::Text, DB>, 71 + { 72 + fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> { 73 + use std::str::FromStr; 43 74 44 - <str as ToSql<diesel::sql_types::Text, Pg>>::to_sql(val, out) 45 - } 46 - } 75 + let st = <String as diesel::deserialize::FromSql<diesel::sql_types::Text, DB>>::from_sql(bytes)?; 47 76 48 - #[derive(Debug, PartialOrd, PartialEq, AsExpression, FromSqlRow)] 49 - #[diesel(sql_type = diesel::sql_types::Text)] 50 - pub enum ActorSyncState { 51 - Synced, 52 - Dirty, 53 - Processing, 54 - } 77 + let out = Self::from_str(&st)?; 78 + Ok(out) 79 + } 80 + } 55 81 56 - impl<DB> FromSql<diesel::sql_types::Text, DB> for ActorSyncState 57 - where 58 - DB: Backend, 59 - String: FromSql<diesel::sql_types::Text, DB>, 60 - { 61 - fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> { 62 - match String::from_sql(bytes)?.as_str() { 63 - "synced" => Ok(ActorSyncState::Synced), 64 - "dirty" => Ok(ActorSyncState::Dirty), 65 - "processing" => Ok(ActorSyncState::Processing), 66 - x => Err(format!("Unrecognized variant {}", x).into()), 82 + #[cfg(feature = "diesel")] 83 + impl diesel::serialize::ToSql<diesel::sql_types::Text, diesel::pg::Pg> for $name { 84 + fn to_sql<'b>(&'b self, out: &mut diesel::serialize::Output<'b, '_, diesel::pg::Pg>) -> diesel::serialize::Result { 85 + use std::io::Write; 86 + let val = self.to_string(); 87 + 88 + out.write(val.as_bytes())?; 89 + Ok(diesel::serialize::IsNull::No) 90 + } 67 91 } 68 - } 92 + 93 + }; 69 94 } 70 95 71 - impl ToSql<diesel::sql_types::Text, Pg> for ActorSyncState { 72 - fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { 73 - let val = match self { 74 - ActorSyncState::Synced => "synced", 75 - ActorSyncState::Dirty => "dirty", 76 - ActorSyncState::Processing => "processing", 77 - }; 96 + text_enum!( 97 + enum ActorStatus { 98 + Active = "active", 99 + Takendown = "takendown", 100 + Suspended = "suspended", 101 + Deleted = "deleted", 102 + Deactivated = "deactivated", 103 + } 104 + ); 78 105 79 - <str as ToSql<diesel::sql_types::Text, Pg>>::to_sql(val, out) 106 + text_enum!( 107 + enum ActorSyncState { 108 + Synced = "synced", 109 + Dirty = "dirty", 110 + Processing = "processing", 80 111 } 81 - } 112 + );