+186
-59
Cargo.lock
+186
-59
Cargo.lock
···
39
39
]
40
40
41
41
[[package]]
42
+
name = "alloc-no-stdlib"
43
+
version = "2.0.4"
44
+
source = "registry+https://github.com/rust-lang/crates.io-index"
45
+
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
46
+
47
+
[[package]]
48
+
name = "alloc-stdlib"
49
+
version = "0.2.2"
50
+
source = "registry+https://github.com/rust-lang/crates.io-index"
51
+
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
52
+
dependencies = [
53
+
"alloc-no-stdlib",
54
+
]
55
+
56
+
[[package]]
42
57
name = "android-tzdata"
43
58
version = "0.1.1"
44
59
source = "registry+https://github.com/rust-lang/crates.io-index"
···
130
145
"event-listener-strategy",
131
146
"futures-core",
132
147
"pin-project-lite",
148
+
]
149
+
150
+
[[package]]
151
+
name = "async-compression"
152
+
version = "0.4.22"
153
+
source = "registry+https://github.com/rust-lang/crates.io-index"
154
+
checksum = "59a194f9d963d8099596278594b3107448656ba73831c9d8c783e613ce86da64"
155
+
dependencies = [
156
+
"brotli",
157
+
"futures-core",
158
+
"memchr",
159
+
"pin-project-lite",
160
+
"tokio",
133
161
]
134
162
135
163
[[package]]
···
472
500
]
473
501
474
502
[[package]]
503
+
name = "brotli"
504
+
version = "7.0.0"
505
+
source = "registry+https://github.com/rust-lang/crates.io-index"
506
+
checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
507
+
dependencies = [
508
+
"alloc-no-stdlib",
509
+
"alloc-stdlib",
510
+
"brotli-decompressor",
511
+
]
512
+
513
+
[[package]]
514
+
name = "brotli-decompressor"
515
+
version = "4.0.3"
516
+
source = "registry+https://github.com/rust-lang/crates.io-index"
517
+
checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
518
+
dependencies = [
519
+
"alloc-no-stdlib",
520
+
"alloc-stdlib",
521
+
]
522
+
523
+
[[package]]
475
524
name = "bumpalo"
476
525
version = "3.16.0"
477
526
source = "registry+https://github.com/rust-lang/crates.io-index"
···
688
737
"chrono",
689
738
"ciborium",
690
739
"clap",
740
+
"deadpool-postgres",
691
741
"did-resolver",
692
-
"diesel",
693
-
"diesel-async",
694
742
"eyre",
695
743
"figment",
696
-
"flume",
697
744
"foldhash",
698
745
"futures",
699
746
"ipld-core",
···
715
762
"tokio-postgres",
716
763
"tokio-stream",
717
764
"tokio-tungstenite",
765
+
"tokio-util",
718
766
"tracing",
719
767
"tracing-subscriber",
720
768
]
···
800
848
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
801
849
dependencies = [
802
850
"generic-array",
803
-
"rand_core",
851
+
"rand_core 0.6.4",
804
852
"subtle",
805
853
"zeroize",
806
854
]
···
920
968
dependencies = [
921
969
"deadpool-runtime",
922
970
"num_cpus",
971
+
"serde",
923
972
"tokio",
924
973
]
925
974
926
975
[[package]]
976
+
name = "deadpool-postgres"
977
+
version = "0.14.1"
978
+
source = "registry+https://github.com/rust-lang/crates.io-index"
979
+
checksum = "3d697d376cbfa018c23eb4caab1fd1883dd9c906a8c034e8d9a3cb06a7e0bef9"
980
+
dependencies = [
981
+
"async-trait",
982
+
"deadpool",
983
+
"getrandom 0.2.15",
984
+
"serde",
985
+
"tokio",
986
+
"tokio-postgres",
987
+
"tracing",
988
+
]
989
+
990
+
[[package]]
927
991
name = "deadpool-runtime"
928
992
version = "0.1.4"
929
993
source = "registry+https://github.com/rust-lang/crates.io-index"
930
994
checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
995
+
dependencies = [
996
+
"tokio",
997
+
]
931
998
932
999
[[package]]
933
1000
name = "der"
···
1114
1181
"hkdf",
1115
1182
"pem-rfc7468",
1116
1183
"pkcs8",
1117
-
"rand_core",
1184
+
"rand_core 0.6.4",
1118
1185
"sec1",
1119
1186
"subtle",
1120
1187
"zeroize",
···
1212
1279
source = "registry+https://github.com/rust-lang/crates.io-index"
1213
1280
checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
1214
1281
dependencies = [
1215
-
"rand_core",
1282
+
"rand_core 0.6.4",
1216
1283
"subtle",
1217
1284
]
1218
1285
···
1241
1308
version = "0.5.7"
1242
1309
source = "registry+https://github.com/rust-lang/crates.io-index"
1243
1310
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
1244
-
1245
-
[[package]]
1246
-
name = "flume"
1247
-
version = "0.11.1"
1248
-
source = "registry+https://github.com/rust-lang/crates.io-index"
1249
-
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
1250
-
dependencies = [
1251
-
"futures-core",
1252
-
"futures-sink",
1253
-
"nanorand",
1254
-
"spin",
1255
-
]
1256
1311
1257
1312
[[package]]
1258
1313
name = "fnv"
···
1437
1492
"cfg-if",
1438
1493
"js-sys",
1439
1494
"libc",
1440
-
"wasi",
1495
+
"wasi 0.11.0+wasi-snapshot-preview1",
1441
1496
"wasm-bindgen",
1442
1497
]
1443
1498
1444
1499
[[package]]
1500
+
name = "getrandom"
1501
+
version = "0.3.3"
1502
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1503
+
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1504
+
dependencies = [
1505
+
"cfg-if",
1506
+
"libc",
1507
+
"r-efi",
1508
+
"wasi 0.14.2+wasi-0.2.4",
1509
+
]
1510
+
1511
+
[[package]]
1445
1512
name = "gimli"
1446
1513
version = "0.31.1"
1447
1514
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1472
1539
checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
1473
1540
dependencies = [
1474
1541
"ff",
1475
-
"rand_core",
1542
+
"rand_core 0.6.4",
1476
1543
"subtle",
1477
1544
]
1478
1545
···
1572
1639
"idna",
1573
1640
"ipnet",
1574
1641
"once_cell",
1575
-
"rand",
1642
+
"rand 0.8.5",
1576
1643
"thiserror 1.0.69",
1577
1644
"tinyvec",
1578
1645
"tokio",
···
1593
1660
"lru-cache",
1594
1661
"once_cell",
1595
1662
"parking_lot 0.12.3",
1596
-
"rand",
1663
+
"rand 0.8.5",
1597
1664
"resolv-conf",
1598
1665
"smallvec",
1599
1666
"thiserror 1.0.69",
···
2072
2139
dependencies = [
2073
2140
"base64 0.22.1",
2074
2141
"ed25519-dalek",
2075
-
"getrandom",
2142
+
"getrandom 0.2.15",
2076
2143
"hmac",
2077
2144
"js-sys",
2078
2145
"k256",
2079
2146
"p256",
2080
2147
"p384",
2081
2148
"pem",
2082
-
"rand",
2083
-
"rand_core",
2149
+
"rand 0.8.5",
2150
+
"rand_core 0.6.4",
2084
2151
"rsa",
2085
2152
"serde",
2086
2153
"serde_json",
···
2273
2340
"hashbrown",
2274
2341
"metrics",
2275
2342
"quanta",
2276
-
"rand",
2343
+
"rand 0.8.5",
2277
2344
"rand_xoshiro",
2278
2345
"sketches-ddsketch",
2279
2346
]
···
2306
2373
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
2307
2374
dependencies = [
2308
2375
"libc",
2309
-
"wasi",
2376
+
"wasi 0.11.0+wasi-snapshot-preview1",
2310
2377
"windows-sys 0.52.0",
2311
2378
]
2312
2379
···
2339
2406
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
2340
2407
2341
2408
[[package]]
2342
-
name = "nanorand"
2343
-
version = "0.7.0"
2344
-
source = "registry+https://github.com/rust-lang/crates.io-index"
2345
-
checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
2346
-
dependencies = [
2347
-
"getrandom",
2348
-
]
2349
-
2350
-
[[package]]
2351
2409
name = "native-tls"
2352
2410
version = "0.2.12"
2353
2411
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2406
2464
"num-integer",
2407
2465
"num-iter",
2408
2466
"num-traits",
2409
-
"rand",
2467
+
"rand 0.8.5",
2410
2468
"smallvec",
2411
2469
"zeroize",
2412
2470
]
···
2581
2639
dependencies = [
2582
2640
"chrono",
2583
2641
"diesel",
2642
+
"postgres-types",
2584
2643
"serde_json",
2585
2644
]
2586
2645
···
2843
2902
2844
2903
[[package]]
2845
2904
name = "postgres-protocol"
2846
-
version = "0.6.7"
2905
+
version = "0.6.8"
2847
2906
source = "registry+https://github.com/rust-lang/crates.io-index"
2848
-
checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23"
2907
+
checksum = "76ff0abab4a9b844b93ef7b81f1efc0a366062aaef2cd702c76256b5dc075c54"
2849
2908
dependencies = [
2850
2909
"base64 0.22.1",
2851
2910
"byteorder",
···
2854
2913
"hmac",
2855
2914
"md-5",
2856
2915
"memchr",
2857
-
"rand",
2916
+
"rand 0.9.1",
2858
2917
"sha2",
2859
2918
"stringprep",
2860
2919
]
2861
2920
2862
2921
[[package]]
2863
2922
name = "postgres-types"
2864
-
version = "0.2.8"
2923
+
version = "0.2.9"
2865
2924
source = "registry+https://github.com/rust-lang/crates.io-index"
2866
-
checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f"
2925
+
checksum = "613283563cd90e1dfc3518d548caee47e0e725455ed619881f5cf21f36de4b48"
2867
2926
dependencies = [
2868
2927
"bytes",
2869
2928
"chrono",
2870
2929
"fallible-iterator",
2871
2930
"postgres-protocol",
2931
+
"serde",
2932
+
"serde_json",
2872
2933
]
2873
2934
2874
2935
[[package]]
···
2989
3050
"libc",
2990
3051
"once_cell",
2991
3052
"raw-cpuid",
2992
-
"wasi",
3053
+
"wasi 0.11.0+wasi-snapshot-preview1",
2993
3054
"web-sys",
2994
3055
"winapi",
2995
3056
]
···
3010
3071
]
3011
3072
3012
3073
[[package]]
3074
+
name = "r-efi"
3075
+
version = "5.2.0"
3076
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3077
+
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
3078
+
3079
+
[[package]]
3013
3080
name = "rand"
3014
3081
version = "0.8.5"
3015
3082
source = "registry+https://github.com/rust-lang/crates.io-index"
3016
3083
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
3017
3084
dependencies = [
3018
3085
"libc",
3019
-
"rand_chacha",
3020
-
"rand_core",
3086
+
"rand_chacha 0.3.1",
3087
+
"rand_core 0.6.4",
3088
+
]
3089
+
3090
+
[[package]]
3091
+
name = "rand"
3092
+
version = "0.9.1"
3093
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3094
+
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
3095
+
dependencies = [
3096
+
"rand_chacha 0.9.0",
3097
+
"rand_core 0.9.3",
3021
3098
]
3022
3099
3023
3100
[[package]]
···
3027
3104
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
3028
3105
dependencies = [
3029
3106
"ppv-lite86",
3030
-
"rand_core",
3107
+
"rand_core 0.6.4",
3108
+
]
3109
+
3110
+
[[package]]
3111
+
name = "rand_chacha"
3112
+
version = "0.9.0"
3113
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3114
+
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
3115
+
dependencies = [
3116
+
"ppv-lite86",
3117
+
"rand_core 0.9.3",
3031
3118
]
3032
3119
3033
3120
[[package]]
···
3036
3123
source = "registry+https://github.com/rust-lang/crates.io-index"
3037
3124
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
3038
3125
dependencies = [
3039
-
"getrandom",
3126
+
"getrandom 0.2.15",
3127
+
]
3128
+
3129
+
[[package]]
3130
+
name = "rand_core"
3131
+
version = "0.9.3"
3132
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3133
+
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
3134
+
dependencies = [
3135
+
"getrandom 0.3.3",
3040
3136
]
3041
3137
3042
3138
[[package]]
···
3045
3141
source = "registry+https://github.com/rust-lang/crates.io-index"
3046
3142
checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
3047
3143
dependencies = [
3048
-
"rand_core",
3144
+
"rand_core 0.6.4",
3049
3145
]
3050
3146
3051
3147
[[package]]
···
3134
3230
source = "registry+https://github.com/rust-lang/crates.io-index"
3135
3231
checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
3136
3232
dependencies = [
3233
+
"async-compression",
3137
3234
"base64 0.22.1",
3138
3235
"bytes",
3139
3236
"encoding_rs",
···
3163
3260
"system-configuration",
3164
3261
"tokio",
3165
3262
"tokio-native-tls",
3263
+
"tokio-util",
3166
3264
"tower",
3167
3265
"tower-service",
3168
3266
"url",
3169
3267
"wasm-bindgen",
3170
3268
"wasm-bindgen-futures",
3269
+
"wasm-streams",
3171
3270
"web-sys",
3172
3271
"windows-registry",
3173
3272
]
···
3200
3299
dependencies = [
3201
3300
"cc",
3202
3301
"cfg-if",
3203
-
"getrandom",
3302
+
"getrandom 0.2.15",
3204
3303
"libc",
3205
3304
"spin",
3206
3305
"untrusted",
···
3220
3319
"num-traits",
3221
3320
"pkcs1",
3222
3321
"pkcs8",
3223
-
"rand_core",
3322
+
"rand_core 0.6.4",
3224
3323
"signature",
3225
3324
"spki",
3226
3325
"subtle",
···
3571
3670
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
3572
3671
dependencies = [
3573
3672
"digest",
3574
-
"rand_core",
3673
+
"rand_core 0.6.4",
3575
3674
]
3576
3675
3577
3676
[[package]]
···
3644
3743
version = "0.9.8"
3645
3744
source = "registry+https://github.com/rust-lang/crates.io-index"
3646
3745
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
3647
-
dependencies = [
3648
-
"lock_api",
3649
-
]
3650
3746
3651
3747
[[package]]
3652
3748
name = "spki"
···
3747
3843
dependencies = [
3748
3844
"cfg-if",
3749
3845
"fastrand",
3750
-
"getrandom",
3846
+
"getrandom 0.2.15",
3751
3847
"once_cell",
3752
3848
"rustix",
3753
3849
"windows-sys 0.59.0",
···
3917
4013
"pin-project-lite",
3918
4014
"postgres-protocol",
3919
4015
"postgres-types",
3920
-
"rand",
4016
+
"rand 0.8.5",
3921
4017
"socket2",
3922
4018
"tokio",
3923
4019
"tokio-util",
···
3962
4058
3963
4059
[[package]]
3964
4060
name = "tokio-util"
3965
-
version = "0.7.13"
4061
+
version = "0.7.15"
3966
4062
source = "registry+https://github.com/rust-lang/crates.io-index"
3967
-
checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
4063
+
checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
3968
4064
dependencies = [
3969
4065
"bytes",
3970
4066
"futures-core",
···
4186
4282
"httparse",
4187
4283
"log",
4188
4284
"native-tls",
4189
-
"rand",
4285
+
"rand 0.8.5",
4190
4286
"sha1",
4191
4287
"thiserror 2.0.12",
4192
4288
"utf-8",
···
4337
4433
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
4338
4434
4339
4435
[[package]]
4436
+
name = "wasi"
4437
+
version = "0.14.2+wasi-0.2.4"
4438
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4439
+
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
4440
+
dependencies = [
4441
+
"wit-bindgen-rt",
4442
+
]
4443
+
4444
+
[[package]]
4340
4445
name = "wasite"
4341
4446
version = "0.1.0"
4342
4447
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4411
4516
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
4412
4517
dependencies = [
4413
4518
"unicode-ident",
4519
+
]
4520
+
4521
+
[[package]]
4522
+
name = "wasm-streams"
4523
+
version = "0.4.2"
4524
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4525
+
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
4526
+
dependencies = [
4527
+
"futures-util",
4528
+
"js-sys",
4529
+
"wasm-bindgen",
4530
+
"wasm-bindgen-futures",
4531
+
"web-sys",
4414
4532
]
4415
4533
4416
4534
[[package]]
···
4687
4805
dependencies = [
4688
4806
"cfg-if",
4689
4807
"windows-sys 0.48.0",
4808
+
]
4809
+
4810
+
[[package]]
4811
+
name = "wit-bindgen-rt"
4812
+
version = "0.39.0"
4813
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4814
+
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
4815
+
dependencies = [
4816
+
"bitflags 2.8.0",
4690
4817
]
4691
4818
4692
4819
[[package]]
+5
-6
consumer/Cargo.toml
+5
-6
consumer/Cargo.toml
···
7
7
chrono = { version = "0.4.39", features = ["serde"] }
8
8
ciborium = "0.2.2"
9
9
clap = { version = "4.5.34", features = ["derive"] }
10
+
deadpool-postgres = { version = "0.14.1", features = ["serde"] }
10
11
did-resolver = { path = "../did-resolver" }
11
-
diesel = { version = "2.2.6", features = ["chrono", "serde_json"] }
12
-
diesel-async = { version = "0.5.2", features = ["deadpool", "postgres"] }
13
12
eyre = "0.6.12"
14
13
figment = { version = "0.10.19", features = ["env", "toml"] }
15
-
flume = { version = "0.11.1", features = ["async"] }
16
14
foldhash = "0.1.4"
17
15
futures = "0.3.31"
18
16
ipld-core = "0.4.1"
···
20
18
lexica = { path = "../lexica" }
21
19
metrics = "0.24.1"
22
20
metrics-exporter-prometheus = "0.16.2"
23
-
parakeet-db = { path = "../parakeet-db" }
21
+
parakeet-db = { path = "../parakeet-db", default-features = false, features = ["postgres"] }
24
22
parakeet-index = { path = "../parakeet-index" }
25
23
redis = { version = "0.31", features = ["tokio-native-tls-comp"] }
26
-
reqwest = { version = "0.12.12", features = ["native-tls"] }
24
+
reqwest = { version = "0.12.12", features = ["native-tls", "brotli", "stream"] }
27
25
serde = { version = "1.0.217", features = ["derive"] }
28
26
serde_bytes = "0.11"
29
27
serde_ipld_dagcbor = "0.6.1"
···
31
29
sled = "0.34.7"
32
30
thiserror = "2"
33
31
tokio = { version = "1.42.0", features = ["full"] }
34
-
tokio-postgres = { version = "0.7.12", features = ["with-chrono-0_4"] }
32
+
tokio-postgres = { version = "0.7.12", features = ["with-chrono-0_4", "with-serde_json-1"] }
35
33
tokio-stream = "0.1.17"
36
34
tokio-tungstenite = { version = "0.26.1", features = ["native-tls"] }
35
+
tokio-util = { version = "0.7.14", features = ["io"] }
37
36
tracing = "0.1.40"
38
37
tracing-subscriber = "0.3.18"
-99
consumer/src/backfill/db.rs
-99
consumer/src/backfill/db.rs
···
1
-
use diesel::prelude::*;
2
-
use diesel_async::{AsyncPgConnection, RunQueryDsl};
3
-
use parakeet_db::{models, schema, types};
4
-
5
-
pub async fn write_backfill_job(
6
-
conn: &mut AsyncPgConnection,
7
-
repo: &str,
8
-
status: &str,
9
-
) -> QueryResult<usize> {
10
-
diesel::insert_into(schema::backfill_jobs::table)
11
-
.values((
12
-
schema::backfill_jobs::did.eq(repo),
13
-
schema::backfill_jobs::status.eq(status),
14
-
))
15
-
.on_conflict_do_nothing()
16
-
.execute(conn)
17
-
.await
18
-
}
19
-
20
-
pub async fn get_actor_status(
21
-
conn: &mut AsyncPgConnection,
22
-
did: &str,
23
-
) -> QueryResult<(types::ActorStatus, types::ActorSyncState)> {
24
-
schema::actors::table
25
-
.select((schema::actors::status, schema::actors::sync_state))
26
-
.find(&did)
27
-
.get_result(conn)
28
-
.await
29
-
}
30
-
31
-
pub async fn update_repo_sync_state(
32
-
conn: &mut AsyncPgConnection,
33
-
did: &str,
34
-
sync_state: types::ActorSyncState,
35
-
) -> QueryResult<usize> {
36
-
diesel::update(schema::actors::table)
37
-
.set(schema::actors::sync_state.eq(sync_state))
38
-
.filter(schema::actors::did.eq(did))
39
-
.execute(conn)
40
-
.await
41
-
}
42
-
43
-
pub async fn update_handle(
44
-
conn: &mut AsyncPgConnection,
45
-
did: &str,
46
-
handle: Option<String>,
47
-
) -> QueryResult<usize> {
48
-
diesel::update(schema::actors::table)
49
-
.set(schema::actors::handle.eq(handle))
50
-
.filter(schema::actors::did.eq(did))
51
-
.execute(conn)
52
-
.await
53
-
}
54
-
55
-
pub async fn update_actor_status(
56
-
conn: &mut AsyncPgConnection,
57
-
did: &str,
58
-
status: types::ActorStatus,
59
-
sync_state: types::ActorSyncState,
60
-
) -> QueryResult<usize> {
61
-
diesel::update(schema::actors::table)
62
-
.set((
63
-
schema::actors::status.eq(status),
64
-
schema::actors::sync_state.eq(sync_state),
65
-
))
66
-
.filter(schema::actors::did.eq(did))
67
-
.execute(conn)
68
-
.await
69
-
}
70
-
71
-
pub async fn defer(conn: &mut AsyncPgConnection) -> QueryResult<usize> {
72
-
diesel::sql_query("SET CONSTRAINTS ALL DEFERRED")
73
-
.execute(conn)
74
-
.await
75
-
}
76
-
77
-
pub async fn pull_backfill_rows(
78
-
conn: &mut AsyncPgConnection,
79
-
repo: &str,
80
-
rev: &str,
81
-
) -> QueryResult<Vec<models::BackfillRow>> {
82
-
schema::backfill::table
83
-
.select(models::BackfillRow::as_select())
84
-
.filter(
85
-
schema::backfill::repo
86
-
.eq(repo)
87
-
.and(schema::backfill::repo_ver.gt(rev)),
88
-
)
89
-
.order(schema::backfill::repo_ver)
90
-
.load(conn)
91
-
.await
92
-
}
93
-
94
-
pub async fn clear_backfill_rows(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> {
95
-
diesel::delete(schema::backfill::table)
96
-
.filter(schema::backfill::repo.eq(repo))
97
-
.execute(conn)
98
-
.await
99
-
}
+138
-85
consumer/src/backfill/mod.rs
+138
-85
consumer/src/backfill/mod.rs
···
1
+
use crate::config::BackfillConfig;
2
+
use crate::db;
1
3
use crate::indexer::types::{AggregateDeltaStore, BackfillItem, BackfillItemInner};
2
-
use crate::indexer::{self, db as indexer_db};
4
+
use crate::indexer::{self, records};
5
+
use chrono::prelude::*;
6
+
use deadpool_postgres::{Object, Pool, Transaction};
3
7
use did_resolver::Resolver;
4
-
use diesel_async::pooled_connection::deadpool::Pool;
5
-
use diesel_async::AsyncPgConnection;
6
8
use ipld_core::cid::Cid;
7
9
use metrics::counter;
8
10
use parakeet_db::types::{ActorStatus, ActorSyncState};
9
11
use redis::aio::MultiplexedConnection;
10
12
use redis::{AsyncCommands, Direction};
11
13
use reqwest::{Client, StatusCode};
12
-
use std::collections::HashMap;
13
14
use std::str::FromStr;
14
15
use std::sync::Arc;
15
16
use tokio::sync::Semaphore;
16
17
use tracing::instrument;
17
18
18
-
mod db;
19
19
mod repo;
20
20
mod types;
21
21
···
28
28
pub struct BackfillManagerInner {
29
29
resolver: Arc<Resolver>,
30
30
client: Client,
31
-
index_client: parakeet_index::Client,
31
+
index_client: Option<parakeet_index::Client>,
32
+
opts: BackfillConfig,
32
33
}
33
34
34
35
pub struct BackfillManager {
35
-
pool: Pool<AsyncPgConnection>,
36
+
pool: Pool,
36
37
redis: MultiplexedConnection,
37
38
semaphore: Arc<Semaphore>,
38
39
inner: BackfillManagerInner,
···
40
41
41
42
impl BackfillManager {
42
43
pub async fn new(
43
-
pool: Pool<AsyncPgConnection>,
44
+
pool: Pool,
44
45
redis: MultiplexedConnection,
45
46
resolver: Arc<Resolver>,
46
-
index_client: parakeet_index::Client,
47
-
threads: u8,
47
+
index_client: Option<parakeet_index::Client>,
48
+
opts: BackfillConfig,
48
49
) -> eyre::Result<Self> {
49
-
let client = Client::new();
50
-
let semaphore = Arc::new(Semaphore::new(threads as usize));
50
+
let client = Client::builder().brotli(true).build()?;
51
+
let semaphore = Arc::new(Semaphore::new(opts.backfill_workers as usize));
51
52
52
53
Ok(BackfillManager {
53
54
pool,
···
57
58
resolver,
58
59
client,
59
60
index_client,
61
+
opts,
60
62
},
61
63
})
62
64
}
···
93
95
tracing::error!(did = &job, "backfill failed: {e}");
94
96
counter!("backfill_failure").increment(1);
95
97
96
-
db::write_backfill_job(&mut conn, &job, "failed")
98
+
db::backfill_job_write(&mut conn, &job, "failed")
97
99
.await
98
100
.unwrap();
99
101
} else {
100
102
counter!("backfill_success").increment(1);
101
103
102
-
db::write_backfill_job(&mut conn, &job, "successful")
104
+
db::backfill_job_write(&mut conn, &job, "successful")
103
105
.await
104
106
.unwrap();
105
107
}
···
119
121
120
122
#[instrument(skip(conn, inner))]
121
123
async fn backfill_actor(
122
-
conn: &mut AsyncPgConnection,
124
+
conn: &mut Object,
123
125
inner: &mut BackfillManagerInner,
124
126
did: &str,
125
127
) -> eyre::Result<()> {
126
-
let (status, sync_state) = db::get_actor_status(conn, did).await?;
128
+
let Some((status, sync_state)) = db::actor_get_statuses(conn, did).await? else {
129
+
tracing::error!("skipping backfill on unknown repo");
130
+
return Ok(());
131
+
};
127
132
128
133
if sync_state != ActorSyncState::Dirty || status != ActorStatus::Active {
129
134
tracing::debug!("skipping non-dirty or inactive repo");
···
135
140
eyre::bail!("missing did doc");
136
141
};
137
142
138
-
let Some(handle) = did_doc
139
-
.also_known_as
140
-
.and_then(|aka| aka.first().cloned())
141
-
.and_then(|handle| handle.strip_prefix("at://").map(String::from))
142
-
else {
143
-
eyre::bail!("DID doc contained no handle");
144
-
};
145
143
let Some(service) = did_doc
146
144
.service
147
145
.and_then(|services| services.into_iter().find(|svc| svc.id == PDS_SERVICE_ID))
···
156
154
let Some(repo_status) = check_pds_repo_status(&inner.client, &pds_url, did).await? else {
157
155
// this repo can't be found - set dirty and assume deleted.
158
156
tracing::debug!("repo was deleted");
159
-
db::update_actor_status(conn, did, ActorStatus::Deleted, ActorSyncState::Dirty).await?;
157
+
db::actor_upsert(
158
+
conn,
159
+
did,
160
+
ActorStatus::Deleted,
161
+
ActorSyncState::Dirty,
162
+
Utc::now(),
163
+
)
164
+
.await?;
160
165
return Ok(());
161
166
};
162
167
···
165
170
let status = repo_status
166
171
.status
167
172
.unwrap_or(crate::firehose::AtpAccountStatus::Deleted);
168
-
db::update_actor_status(conn, did, status.into(), ActorSyncState::Dirty).await?;
173
+
db::actor_upsert(
174
+
conn,
175
+
did,
176
+
status.into(),
177
+
ActorSyncState::Dirty,
178
+
Utc::now(),
179
+
)
180
+
.await?;
169
181
return Ok(());
170
182
}
171
183
172
-
// at this point, the account will be active and we can attempt to resolve the handle.
184
+
if !inner.opts.skip_handle_validation {
185
+
// at this point, the account will be active and we can attempt to resolve the handle.
186
+
let Some(handle) = did_doc
187
+
.also_known_as
188
+
.and_then(|aka| aka.first().cloned())
189
+
.and_then(|handle| handle.strip_prefix("at://").map(String::from))
190
+
else {
191
+
eyre::bail!("DID doc contained no handle");
192
+
};
173
193
174
-
// in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems
175
-
// like a way to end up with really sus handles.
176
-
let Some(handle_did) = inner.resolver.resolve_handle(&handle).await? else {
177
-
eyre::bail!("Failed to resolve did for handle {handle}");
178
-
};
194
+
// in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems
195
+
// like a way to end up with really sus handles.
196
+
let Some(handle_did) = inner.resolver.resolve_handle(&handle).await? else {
197
+
eyre::bail!("Failed to resolve did for handle {handle}");
198
+
};
179
199
180
-
if handle_did != did {
181
-
eyre::bail!("requested DID doesn't match handle");
182
-
}
200
+
if handle_did != did {
201
+
eyre::bail!("requested DID doesn't match handle");
202
+
}
183
203
184
-
// set the handle from above
185
-
db::update_handle(conn, did, Some(handle)).await?;
204
+
// set the handle from above
205
+
db::actor_upsert_handle(
206
+
conn,
207
+
did,
208
+
ActorSyncState::Processing,
209
+
Some(handle),
210
+
Utc::now(),
211
+
)
212
+
.await?;
213
+
}
186
214
187
215
// now we can start actually backfilling
188
-
db::update_repo_sync_state(conn, did, ActorSyncState::Processing).await?;
216
+
db::actor_set_sync_status(conn, did, ActorSyncState::Processing, Utc::now()).await?;
217
+
218
+
let mut t = conn.transaction().await?;
219
+
t.execute("SET CONSTRAINTS ALL DEFERRED", &[]).await?;
189
220
190
221
tracing::trace!("pulling repo");
191
222
192
-
let (rev, cid, records) = repo::pull_repo(&inner.client, did, &pds_url).await?;
223
+
let (commit, mut deltas, copies) =
224
+
repo::stream_and_insert_repo(&mut t, &inner.client, did, &pds_url).await?;
193
225
194
-
tracing::trace!("repo pulled - inserting");
226
+
db::actor_set_repo_state(&mut t, did, &commit.rev, commit.data).await?;
195
227
196
-
let mut delta_store = HashMap::new();
228
+
copies.submit(&mut t, did).await?;
197
229
198
-
db::defer(conn).await?;
199
-
200
-
indexer_db::update_repo_version(conn, did, &rev, cid).await?;
201
-
202
-
for (path, (cid, record)) in records {
203
-
let Some((collection, rkey)) = path.split_once("/") else {
204
-
tracing::warn!("record contained invalid path {}", path);
205
-
return Err(diesel::result::Error::RollbackTransaction.into());
206
-
};
207
-
208
-
counter!("backfilled_commits", "collection" => collection.to_string()).increment(1);
209
-
210
-
let full_path = format!("at://{did}/{path}");
211
-
212
-
indexer::index_op(conn, &mut delta_store, did, cid, record, &full_path, rkey).await?
213
-
}
214
-
215
-
db::update_repo_sync_state(conn, did, ActorSyncState::Synced).await?;
230
+
t.execute(
231
+
"UPDATE actors SET sync_state=$2, last_indexed=$3 WHERE did=$1",
232
+
&[&did, &ActorSyncState::Synced, &Utc::now().naive_utc()],
233
+
)
234
+
.await?;
216
235
217
-
handle_backfill_rows(conn, &mut delta_store, did, &rev).await?;
236
+
handle_backfill_rows(&mut t, &mut deltas, did, &commit.rev).await?;
218
237
219
238
tracing::trace!("insertion finished");
220
239
221
-
// submit the deltas
222
-
let delta_store = delta_store
223
-
.into_iter()
224
-
.map(|((uri, typ), delta)| parakeet_index::AggregateDeltaReq {
225
-
typ,
226
-
uri: uri.to_string(),
227
-
delta,
228
-
})
229
-
.collect::<Vec<_>>();
240
+
if let Some(index_client) = &mut inner.index_client {
241
+
// submit the deltas
242
+
let delta_store = deltas
243
+
.into_iter()
244
+
.map(|((uri, typ), delta)| parakeet_index::AggregateDeltaReq {
245
+
typ,
246
+
uri: uri.to_string(),
247
+
delta,
248
+
})
249
+
.collect::<Vec<_>>();
230
250
231
-
let mut read = 0;
251
+
let mut read = 0;
232
252
233
-
while read < delta_store.len() {
234
-
let rem = delta_store.len() - read;
235
-
let take = DELTA_BATCH_SIZE.min(rem);
253
+
while read < delta_store.len() {
254
+
let rem = delta_store.len() - read;
255
+
let take = DELTA_BATCH_SIZE.min(rem);
236
256
237
-
tracing::debug!("reading & submitting {take} deltas");
257
+
tracing::debug!("reading & submitting {take} deltas");
238
258
239
-
let deltas = delta_store[read..read + take].to_vec();
240
-
inner
241
-
.index_client
242
-
.submit_aggregate_delta_batch(parakeet_index::AggregateDeltaBatchReq { deltas })
243
-
.await?;
259
+
let deltas = delta_store[read..read + take].to_vec();
260
+
index_client
261
+
.submit_aggregate_delta_batch(parakeet_index::AggregateDeltaBatchReq { deltas })
262
+
.await?;
244
263
245
-
read += take;
246
-
tracing::debug!("read {read} of {} deltas", delta_store.len());
264
+
read += take;
265
+
tracing::debug!("read {read} of {} deltas", delta_store.len());
266
+
}
247
267
}
268
+
269
+
t.commit().await?;
248
270
249
271
Ok(())
250
272
}
251
273
252
274
async fn handle_backfill_rows(
253
-
conn: &mut AsyncPgConnection,
275
+
conn: &mut Transaction<'_>,
254
276
deltas: &mut impl AggregateDeltaStore,
255
277
repo: &str,
256
278
rev: &str,
257
-
) -> diesel::QueryResult<()> {
279
+
) -> Result<(), tokio_postgres::Error> {
258
280
// `pull_backfill_rows` filters out anything before the last commit we pulled
259
-
let backfill_rows = db::pull_backfill_rows(conn, repo, rev).await?;
281
+
let backfill_rows = db::backfill_rows_get(conn, repo, rev).await?;
260
282
261
283
for row in backfill_rows {
262
284
// blindly unwrap-ing this CID as we've already parsed it and re-serialized it
263
285
let repo_cid = Cid::from_str(&row.cid).unwrap();
264
-
indexer_db::update_repo_version(conn, repo, &row.repo_ver, repo_cid).await?;
286
+
db::actor_set_repo_state(conn, repo, &row.repo_ver, repo_cid).await?;
265
287
266
288
// again, we've serialized this.
267
289
let items: Vec<BackfillItem> = serde_json::from_value(row.data).unwrap();
···
288
310
}
289
311
290
312
// finally, clear the backfill table entries for this actor
291
-
db::clear_backfill_rows(conn, repo).await?;
313
+
db::backfill_delete_rows(conn, repo).await?;
292
314
293
315
Ok(())
294
316
}
···
311
333
312
334
Ok(res.json().await?)
313
335
}
336
+
337
+
#[derive(Debug, Default)]
338
+
struct CopyStore {
339
+
likes: Vec<(String, records::StrongRef, DateTime<Utc>)>,
340
+
posts: Vec<(String, Cid, records::AppBskyFeedPost)>,
341
+
reposts: Vec<(String, records::StrongRef, DateTime<Utc>)>,
342
+
blocks: Vec<(String, String, DateTime<Utc>)>,
343
+
follows: Vec<(String, String, DateTime<Utc>)>,
344
+
list_items: Vec<(String, records::AppBskyGraphListItem)>,
345
+
verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>,
346
+
records: Vec<(String, Cid)>,
347
+
}
348
+
349
+
impl CopyStore {
350
+
async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> {
351
+
db::copy::copy_likes(t, did, self.likes).await?;
352
+
db::copy::copy_posts(t, did, self.posts).await?;
353
+
db::copy::copy_reposts(t, did, self.reposts).await?;
354
+
db::copy::copy_blocks(t, did, self.blocks).await?;
355
+
db::copy::copy_follows(t, did, self.follows).await?;
356
+
db::copy::copy_list_items(t, self.list_items).await?;
357
+
db::copy::copy_verification(t, did, self.verifications).await?;
358
+
db::copy::copy_records(t, did, self.records).await?;
359
+
360
+
Ok(())
361
+
}
362
+
363
+
fn push_record(&mut self, at_uri: &str, cid: Cid) {
364
+
self.records.push((at_uri.to_string(), cid))
365
+
}
366
+
}
+158
-44
consumer/src/backfill/repo.rs
+158
-44
consumer/src/backfill/repo.rs
···
1
-
use super::types::{CarCommitEntry, CarEntry};
2
-
use crate::indexer::types::RecordTypes;
3
-
use futures::{StreamExt, TryStreamExt};
1
+
use super::{
2
+
types::{CarCommitEntry, CarEntry},
3
+
CopyStore,
4
+
};
5
+
use crate::indexer::records;
6
+
use crate::indexer::types::{AggregateDeltaStore, RecordTypes};
7
+
use crate::{db, indexer};
8
+
use deadpool_postgres::Transaction;
9
+
use futures::TryStreamExt;
4
10
use ipld_core::cid::Cid;
5
11
use iroh_car::CarReader;
12
+
use metrics::counter;
13
+
use parakeet_index::AggregateType;
6
14
use reqwest::Client;
7
15
use std::collections::HashMap;
16
+
use std::io::ErrorKind;
17
+
use tokio::io::BufReader;
18
+
use tokio_util::io::StreamReader;
8
19
9
-
pub async fn pull_repo<'a>(
20
+
type BackfillDeltaStore = HashMap<(String, i32), i32>;
21
+
22
+
pub async fn stream_and_insert_repo(
23
+
t: &mut Transaction<'_>,
10
24
client: &Client,
11
25
repo: &str,
12
26
pds: &str,
13
-
) -> eyre::Result<(String, Cid, HashMap<String, (Cid, RecordTypes)>)> {
27
+
) -> eyre::Result<(CarCommitEntry, BackfillDeltaStore, CopyStore)> {
14
28
let res = client
15
29
.get(format!("{pds}/xrpc/com.atproto.sync.getRepo?did={repo}"))
16
30
.send()
17
31
.await?
18
32
.error_for_status()?;
19
33
20
-
let body = res.bytes().await?;
34
+
let strm = res
35
+
.bytes_stream()
36
+
.map_err(|err| std::io::Error::new(ErrorKind::Other, err));
37
+
let reader = StreamReader::new(strm);
38
+
let mut car_stream = CarReader::new(BufReader::new(reader)).await?;
21
39
22
-
let (commit, records) = read_car(&body).await?;
40
+
// the root should be the commit block
41
+
let root = car_stream.header().roots().first().cloned().unwrap();
23
42
24
-
Ok((commit.rev, commit.data, records))
25
-
}
43
+
let mut commit = None;
44
+
let mut mst_nodes: HashMap<Cid, String> = HashMap::new();
45
+
let mut records: HashMap<Cid, RecordTypes> = HashMap::new();
46
+
let mut deltas = HashMap::new();
47
+
let mut copies = CopyStore::default();
26
48
27
-
// beware: this is probably: 1. insecure, 2. slow, 3. a/n other crimes
28
-
async fn read_car(
29
-
data: &[u8],
30
-
) -> eyre::Result<(CarCommitEntry, HashMap<String, (Cid, RecordTypes)>)> {
31
-
let car = CarReader::new(data).await?;
49
+
while let Some((cid, block)) = car_stream.next_block().await? {
50
+
let Ok(block) = serde_ipld_dagcbor::from_slice::<CarEntry>(&block) else {
51
+
tracing::warn!("failed to parse block {cid}");
52
+
continue;
53
+
};
32
54
33
-
let entries = car
34
-
.stream()
35
-
.map_ok(
36
-
|(cid, block)| match serde_ipld_dagcbor::from_slice::<CarEntry>(&block) {
37
-
Ok(decoded) => Some((cid, decoded)),
38
-
Err(_) => None,
39
-
},
40
-
)
41
-
.filter_map(|v| async move { v.ok().flatten() })
42
-
.collect::<HashMap<_, _>>()
43
-
.await;
55
+
if root == cid {
56
+
if let CarEntry::Commit(commit_entry) = block {
57
+
commit = Some(commit_entry);
58
+
} else {
59
+
tracing::warn!("root did not point to a commit entry");
60
+
}
61
+
continue;
62
+
}
44
63
45
-
let mut commit = None;
46
-
let mut mst_nodes = Vec::new();
47
-
let mut records = HashMap::new();
48
-
49
-
for (cid, entry) in entries {
50
-
match entry {
51
-
CarEntry::Record(rec) => {
52
-
records.insert(cid, rec);
64
+
match block {
65
+
CarEntry::Commit(_) => {
66
+
tracing::warn!("got commit entry that was not in root")
67
+
}
68
+
CarEntry::Record(record) => {
69
+
if let Some(path) = mst_nodes.remove(&cid) {
70
+
record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?;
71
+
} else {
72
+
records.insert(cid, record);
73
+
}
53
74
}
54
75
CarEntry::Mst(mst) => {
55
76
let mut out = Vec::with_capacity(mst.e.len());
···
60
81
let key = if node.p == 0 {
61
82
ks.to_string()
62
83
} else {
63
-
let (prev, _): &(String, Cid) = out.last().unwrap();
84
+
let (_, prev): &(Cid, String) = out.last().unwrap();
64
85
let prefix = &prev[..node.p as usize];
65
86
66
87
format!("{prefix}{ks}")
67
88
};
68
89
69
-
out.push((key, node.v));
90
+
out.push((node.v, key.to_string()));
70
91
}
71
92
72
93
mst_nodes.extend(out);
73
94
}
74
-
CarEntry::Commit(car_commit) => {
75
-
commit = Some(car_commit);
76
-
}
77
95
}
78
96
}
79
97
80
-
let records_out = mst_nodes
81
-
.into_iter()
82
-
.filter_map(|(key, cid)| records.remove(&cid).map(|v| (key, (cid, v))))
83
-
.collect::<HashMap<_, _>>();
98
+
for (cid, record) in records {
99
+
if let Some(path) = mst_nodes.remove(&cid) {
100
+
record_index(t, &mut copies, &mut deltas, repo, &path, cid, record).await?;
101
+
} else {
102
+
tracing::warn!("couldn't find MST node for record {cid}")
103
+
}
104
+
}
84
105
85
-
let commit = commit.ok_or(eyre::eyre!("no commit found"))?;
106
+
let commit = commit.unwrap();
107
+
108
+
Ok((commit, deltas, copies))
109
+
}
110
+
111
+
async fn record_index(
112
+
t: &mut Transaction<'_>,
113
+
copies: &mut CopyStore,
114
+
deltas: &mut BackfillDeltaStore,
115
+
did: &str,
116
+
path: &str,
117
+
cid: Cid,
118
+
record: RecordTypes,
119
+
) -> eyre::Result<()> {
120
+
let Some((collection_raw, rkey)) = path.split_once("/") else {
121
+
tracing::warn!("op contained invalid path {path}");
122
+
return Ok(());
123
+
};
124
+
125
+
counter!("backfilled_commits", "collection" => collection_raw.to_string()).increment(1);
126
+
127
+
let at_uri = format!("at://{did}/{path}");
128
+
129
+
match record {
130
+
RecordTypes::AppBskyFeedLike(rec) => {
131
+
deltas.incr(&rec.subject.uri, AggregateType::Like).await;
132
+
133
+
copies.push_record(&at_uri, cid);
134
+
copies.likes.push((at_uri, rec.subject, rec.created_at));
135
+
}
136
+
RecordTypes::AppBskyFeedPost(rec) => {
137
+
let maybe_reply = rec.reply.as_ref().map(|v| v.parent.uri.clone());
138
+
let maybe_embed = rec
139
+
.embed
140
+
.as_ref()
141
+
.and_then(|v| v.as_bsky())
142
+
.and_then(|v| match v {
143
+
records::AppBskyEmbed::Record(r) => Some(r.record.uri.clone()),
144
+
records::AppBskyEmbed::RecordWithMedia(r) => Some(r.record.record.uri.clone()),
145
+
_ => None,
146
+
});
147
+
148
+
if let Some(labels) = rec.labels.clone() {
149
+
db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?;
150
+
}
151
+
if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) {
152
+
db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?;
153
+
}
154
+
155
+
deltas.incr(did, AggregateType::ProfilePost).await;
156
+
if let Some(reply) = maybe_reply {
157
+
deltas.incr(&reply, AggregateType::Reply).await;
158
+
}
159
+
if let Some(embed) = maybe_embed {
160
+
deltas.incr(&embed, AggregateType::Embed).await;
161
+
}
162
+
163
+
copies.push_record(&at_uri, cid);
164
+
copies.posts.push((at_uri, cid, rec));
165
+
}
166
+
RecordTypes::AppBskyFeedRepost(rec) => {
167
+
deltas.incr(&rec.subject.uri, AggregateType::Repost).await;
86
168
87
-
Ok((commit, records_out))
169
+
copies.push_record(&at_uri, cid);
170
+
copies.reposts.push((at_uri, rec.subject, rec.created_at));
171
+
}
172
+
RecordTypes::AppBskyGraphBlock(rec) => {
173
+
copies.push_record(&at_uri, cid);
174
+
copies.blocks.push((at_uri, rec.subject, rec.created_at));
175
+
}
176
+
RecordTypes::AppBskyGraphFollow(rec) => {
177
+
deltas.incr(did, AggregateType::Follow).await;
178
+
deltas.incr(&rec.subject, AggregateType::Follower).await;
179
+
180
+
copies.push_record(&at_uri, cid);
181
+
copies.follows.push((at_uri, rec.subject, rec.created_at));
182
+
}
183
+
RecordTypes::AppBskyGraphListItem(rec) => {
184
+
let split_aturi = rec.list.rsplitn(4, '/').collect::<Vec<_>>();
185
+
if did != split_aturi[2] {
186
+
// it's also probably a bad idea to log *all* the attempts to do this...
187
+
tracing::warn!("tried to create a listitem on a list we don't control!");
188
+
return Ok(());
189
+
}
190
+
191
+
copies.push_record(&at_uri, cid);
192
+
copies.list_items.push((at_uri, rec));
193
+
}
194
+
RecordTypes::AppBskyGraphVerification(rec) => {
195
+
copies.push_record(&at_uri, cid);
196
+
copies.verifications.push((at_uri, cid, rec));
197
+
}
198
+
_ => indexer::index_op(t, deltas, did, cid, record, &at_uri, rkey).await?,
199
+
}
200
+
201
+
Ok(())
88
202
}
+13
-3
consumer/src/config.rs
+13
-3
consumer/src/config.rs
···
14
14
#[derive(Debug, Deserialize)]
15
15
pub struct Config {
16
16
pub index_uri: String,
17
-
pub database_url: String,
17
+
pub database: deadpool_postgres::Config,
18
18
pub redis_uri: String,
19
19
pub plc_directory: Option<String>,
20
20
/// Adds contact details (email / bluesky handle / website) to the UA header.
21
21
pub ua_contact: Option<String>,
22
-
#[serde(default = "default_backfill_workers")]
23
-
pub backfill_workers: u8,
24
22
/// DIDs of label services to force subscription to.
25
23
#[serde(default)]
26
24
pub initial_label_services: Vec<String>,
···
29
27
30
28
/// Configuration items specific to indexer
31
29
pub indexer: Option<IndexerConfig>,
30
+
/// Configuration items specific to backfill
31
+
pub backfill: Option<BackfillConfig>,
32
32
}
33
33
34
34
#[derive(Debug, Deserialize)]
···
51
51
BackfillHistory,
52
52
/// Discover new accounts as they come and do not import history
53
53
Realtime,
54
+
}
55
+
56
+
#[derive(Clone, Debug, Deserialize)]
57
+
pub struct BackfillConfig {
58
+
#[serde(default = "default_backfill_workers")]
59
+
pub backfill_workers: u8,
60
+
#[serde(default)]
61
+
pub skip_aggregation: bool,
62
+
#[serde(default)]
63
+
pub skip_handle_validation: bool,
54
64
}
55
65
56
66
fn default_backfill_workers() -> u8 {
+101
consumer/src/db/actor.rs
+101
consumer/src/db/actor.rs
···
1
+
use super::{PgExecResult, PgOptResult};
2
+
use chrono::{DateTime, Utc};
3
+
use deadpool_postgres::GenericClient;
4
+
use ipld_core::cid::Cid;
5
+
use parakeet_db::types::{ActorStatus, ActorSyncState};
6
+
7
+
pub async fn actor_upsert<C: GenericClient>(
8
+
conn: &mut C,
9
+
did: &str,
10
+
status: ActorStatus,
11
+
sync_state: ActorSyncState,
12
+
time: DateTime<Utc>,
13
+
) -> PgExecResult {
14
+
conn.execute(
15
+
r#"INSERT INTO actors (did, status, sync_state, last_indexed) VALUES ($1, $2, $3, $4)
16
+
ON CONFLICT (did) DO UPDATE SET status=EXCLUDED.status, last_indexed=EXCLUDED.last_indexed"#,
17
+
&[&did, &status, &sync_state, &time.naive_utc()],
18
+
).await
19
+
}
20
+
21
+
pub async fn actor_upsert_handle<C: GenericClient>(
22
+
conn: &mut C,
23
+
did: &str,
24
+
sync_state: ActorSyncState,
25
+
handle: Option<String>,
26
+
time: DateTime<Utc>,
27
+
) -> PgExecResult {
28
+
conn.execute(
29
+
r#"INSERT INTO actors (did, handle, sync_state, last_indexed) VALUES ($1, $2, $3, $4)
30
+
ON CONFLICT (did) DO UPDATE SET handle=EXCLUDED.handle, last_indexed=EXCLUDED.last_indexed"#,
31
+
&[&did, &handle, &sync_state, &time.naive_utc()]
32
+
).await
33
+
}
34
+
35
+
pub async fn actor_set_sync_status<C: GenericClient>(
36
+
conn: &mut C,
37
+
did: &str,
38
+
sync_state: ActorSyncState,
39
+
time: DateTime<Utc>,
40
+
) -> PgExecResult {
41
+
conn.execute(
42
+
"UPDATE actors SET sync_state=$2, last_indexed=$3 WHERE did=$1",
43
+
&[&did, &sync_state, &time.naive_utc()],
44
+
)
45
+
.await
46
+
}
47
+
48
+
pub async fn actor_set_repo_state<C: GenericClient>(
49
+
conn: &mut C,
50
+
did: &str,
51
+
rev: &str,
52
+
cid: Cid,
53
+
) -> PgExecResult {
54
+
conn.execute(
55
+
"UPDATE actors SET repo_rev=$2, repo_cid=$3 WHERE did=$1",
56
+
&[&did, &rev, &cid.to_string()],
57
+
)
58
+
.await
59
+
}
60
+
61
+
pub async fn actor_get_status_and_rev<C: GenericClient>(
62
+
conn: &mut C,
63
+
did: &str,
64
+
) -> PgOptResult<(ActorStatus, Option<String>)> {
65
+
let res = conn
66
+
.query_opt(
67
+
"SELECT status, repo_rev FROM actors WHERE did=$1 LIMIT 1",
68
+
&[&did],
69
+
)
70
+
.await?;
71
+
72
+
Ok(res.map(|v| (v.get(0), v.get(1))))
73
+
}
74
+
75
+
pub async fn actor_get_repo_status<C: GenericClient>(
76
+
conn: &mut C,
77
+
did: &str,
78
+
) -> PgOptResult<(ActorSyncState, Option<String>)> {
79
+
let res = conn
80
+
.query_opt(
81
+
"SELECT sync_state, repo_rev FROM actors WHERE did=$1 LIMIT 1",
82
+
&[&did],
83
+
)
84
+
.await?;
85
+
86
+
Ok(res.map(|v| (v.get(0), v.get(1))))
87
+
}
88
+
89
+
pub async fn actor_get_statuses<C: GenericClient>(
90
+
conn: &mut C,
91
+
did: &str,
92
+
) -> PgOptResult<(ActorStatus, ActorSyncState)> {
93
+
let res = conn
94
+
.query_opt(
95
+
"SELECT status, sync_state FROM actors WHERE did=$1 LIMIT 1",
96
+
&[&did],
97
+
)
98
+
.await?;
99
+
100
+
Ok(res.map(|v| (v.get(0), v.get(1))))
101
+
}
+65
consumer/src/db/backfill.rs
+65
consumer/src/db/backfill.rs
···
1
+
use super::{PgExecResult, PgResult};
2
+
use chrono::NaiveDateTime;
3
+
use deadpool_postgres::GenericClient;
4
+
use ipld_core::cid::Cid;
5
+
6
+
pub struct BackfillRow {
7
+
pub repo: String,
8
+
pub repo_ver: String,
9
+
pub cid: String,
10
+
11
+
pub data: serde_json::Value,
12
+
13
+
pub indexed_at: NaiveDateTime,
14
+
}
15
+
16
+
pub async fn backfill_job_write<C: GenericClient>(conn: &mut C, did: &str, status: &str) -> PgExecResult {
17
+
conn.execute(
18
+
"INSERT INTO backfill_jobs (did, status) VALUES ($1, $2)",
19
+
&[&did, &status],
20
+
)
21
+
.await
22
+
}
23
+
24
+
pub async fn backfill_write_row<C: GenericClient>(
25
+
conn: &mut C,
26
+
repo: &str,
27
+
rev: &str,
28
+
cid: Cid,
29
+
data: serde_json::Value,
30
+
) -> PgExecResult {
31
+
conn.execute(
32
+
"INSERT INTO backfill (repo, repo_ver, cid, data) VALUES ($1, $2, $3, $4)",
33
+
&[&repo, &rev, &cid.to_string(), &data],
34
+
)
35
+
.await
36
+
}
37
+
38
+
pub async fn backfill_rows_get<C: GenericClient>(
39
+
conn: &mut C,
40
+
repo: &str,
41
+
rev: &str,
42
+
) -> PgResult<Vec<BackfillRow>> {
43
+
let res = conn
44
+
.query(
45
+
"SELECT * FROM backfill WHERE repo=$1 AND repo_ver > $2 ORDER BY repo_ver",
46
+
&[&repo, &rev],
47
+
)
48
+
.await?;
49
+
50
+
Ok(res
51
+
.into_iter()
52
+
.map(|row| BackfillRow {
53
+
repo: row.get(0),
54
+
repo_ver: row.get(1),
55
+
cid: row.get(2),
56
+
data: row.get(3),
57
+
indexed_at: row.get(4),
58
+
})
59
+
.collect())
60
+
}
61
+
62
+
pub async fn backfill_delete_rows<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult {
63
+
conn.execute("DELETE FROM backfill WHERE repo=$1", &[&repo])
64
+
.await
65
+
}
+385
consumer/src/db/copy.rs
+385
consumer/src/db/copy.rs
···
1
+
use super::PgExecResult;
2
+
use crate::indexer::records;
3
+
use crate::utils::strongref_to_parts;
4
+
use chrono::prelude::*;
5
+
use deadpool_postgres::Transaction;
6
+
use futures::pin_mut;
7
+
use ipld_core::cid::Cid;
8
+
use tokio_postgres::binary_copy::BinaryCopyInWriter;
9
+
use tokio_postgres::types::Type;
10
+
11
+
// StrongRefs are used in both likes and reposts
12
+
const STRONGREF_TYPES: &[Type] = &[
13
+
Type::TEXT,
14
+
Type::TEXT,
15
+
Type::TEXT,
16
+
Type::TEXT,
17
+
Type::TIMESTAMP,
18
+
];
19
+
type StrongRefRow = (String, records::StrongRef, DateTime<Utc>);
20
+
21
+
// SubjectRefs are used in both blocks and follows
22
+
const SUBJECT_TYPES: &[Type] = &[Type::TEXT, Type::TEXT, Type::TEXT, Type::TIMESTAMP];
23
+
type SubjectRefRow = (String, String, DateTime<Utc>);
24
+
25
+
pub async fn copy_likes(
26
+
conn: &mut Transaction<'_>,
27
+
did: &str,
28
+
data: Vec<StrongRefRow>,
29
+
) -> PgExecResult {
30
+
if data.is_empty() {
31
+
return Ok(0);
32
+
}
33
+
34
+
conn.execute(
35
+
"CREATE TEMP TABLE likes_tmp (LIKE likes INCLUDING DEFAULTS) ON COMMIT DROP",
36
+
&[],
37
+
)
38
+
.await?;
39
+
40
+
let writer = conn
41
+
.copy_in(
42
+
"COPY likes_tmp (at_uri, did, subject, subject_cid, created_at) FROM STDIN (FORMAT binary)",
43
+
)
44
+
.await?;
45
+
let writer = BinaryCopyInWriter::new(writer, STRONGREF_TYPES);
46
+
47
+
pin_mut!(writer);
48
+
49
+
for row in data {
50
+
let writer = writer.as_mut();
51
+
writer
52
+
.write(&[
53
+
&row.0,
54
+
&did,
55
+
&row.1.uri,
56
+
&row.1.cid.to_string(),
57
+
&row.2.naive_utc(),
58
+
])
59
+
.await?;
60
+
}
61
+
62
+
writer.finish().await?;
63
+
64
+
conn.execute("INSERT INTO likes (SELECT * FROM likes_tmp)", &[])
65
+
.await
66
+
}
67
+
68
+
pub async fn copy_reposts(
69
+
conn: &mut Transaction<'_>,
70
+
did: &str,
71
+
data: Vec<StrongRefRow>,
72
+
) -> PgExecResult {
73
+
if data.is_empty() {
74
+
return Ok(0);
75
+
}
76
+
77
+
conn.execute(
78
+
"CREATE TEMP TABLE reposts_tmp (LIKE reposts INCLUDING DEFAULTS) ON COMMIT DROP",
79
+
&[],
80
+
)
81
+
.await?;
82
+
83
+
let writer = conn
84
+
.copy_in(
85
+
"COPY reposts_tmp (at_uri, did, post, post_cid, created_at) FROM STDIN (FORMAT binary)",
86
+
)
87
+
.await?;
88
+
let writer = BinaryCopyInWriter::new(writer, STRONGREF_TYPES);
89
+
90
+
pin_mut!(writer);
91
+
92
+
for row in data {
93
+
let writer = writer.as_mut();
94
+
writer
95
+
.write(&[
96
+
&row.0,
97
+
&did,
98
+
&row.1.uri,
99
+
&row.1.cid.to_string(),
100
+
&row.2.naive_utc(),
101
+
])
102
+
.await?;
103
+
}
104
+
105
+
writer.finish().await?;
106
+
107
+
conn.execute("INSERT INTO reposts (SELECT * FROM reposts_tmp)", &[])
108
+
.await
109
+
}
110
+
111
+
const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)";
112
+
const POST_TYPES: &[Type] = &[
113
+
Type::TEXT,
114
+
Type::TEXT,
115
+
Type::TEXT,
116
+
Type::JSONB,
117
+
Type::TEXT,
118
+
Type::JSONB,
119
+
Type::TEXT_ARRAY,
120
+
Type::TEXT_ARRAY,
121
+
Type::TEXT,
122
+
Type::TEXT,
123
+
Type::TEXT,
124
+
Type::TEXT,
125
+
Type::TEXT,
126
+
Type::TEXT,
127
+
Type::TIMESTAMP,
128
+
];
129
+
pub async fn copy_posts(
130
+
conn: &mut Transaction<'_>,
131
+
did: &str,
132
+
data: Vec<(String, Cid, records::AppBskyFeedPost)>,
133
+
) -> PgExecResult {
134
+
if data.is_empty() {
135
+
return Ok(0);
136
+
}
137
+
138
+
conn.execute(
139
+
"CREATE TEMP TABLE posts_tmp (LIKE posts INCLUDING DEFAULTS) ON COMMIT DROP",
140
+
&[],
141
+
)
142
+
.await?;
143
+
144
+
let writer = conn.copy_in(POST_STMT).await?;
145
+
let writer = BinaryCopyInWriter::new(writer, POST_TYPES);
146
+
147
+
pin_mut!(writer);
148
+
149
+
for (at_uri, cid, post) in data {
150
+
let record = serde_json::to_value(&post).unwrap();
151
+
let facets = post.facets.and_then(|v| serde_json::to_value(v).ok());
152
+
let embed = post.embed.as_ref().map(|v| v.as_str());
153
+
let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype());
154
+
let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent));
155
+
let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root));
156
+
157
+
let writer = writer.as_mut();
158
+
writer
159
+
.write(&[
160
+
&at_uri,
161
+
&cid.to_string(),
162
+
&did,
163
+
&record,
164
+
&post.text,
165
+
&facets,
166
+
&post.langs.unwrap_or_default(),
167
+
&post.tags.unwrap_or_default(),
168
+
&parent_uri,
169
+
&parent_cid,
170
+
&root_uri,
171
+
&root_cid,
172
+
&embed,
173
+
&embed_subtype,
174
+
&post.created_at.naive_utc(),
175
+
])
176
+
.await?;
177
+
}
178
+
179
+
writer.finish().await?;
180
+
181
+
conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[])
182
+
.await
183
+
}
184
+
185
+
pub async fn copy_blocks(
186
+
conn: &mut Transaction<'_>,
187
+
did: &str,
188
+
data: Vec<SubjectRefRow>,
189
+
) -> PgExecResult {
190
+
if data.is_empty() {
191
+
return Ok(0);
192
+
}
193
+
194
+
conn.execute(
195
+
"CREATE TEMP TABLE blocks_tmp (LIKE blocks INCLUDING DEFAULTS) ON COMMIT DROP",
196
+
&[],
197
+
)
198
+
.await?;
199
+
200
+
let writer = conn
201
+
.copy_in("COPY blocks_tmp (at_uri, did, subject, created_at) FROM STDIN (FORMAT binary)")
202
+
.await?;
203
+
let writer = BinaryCopyInWriter::new(writer, SUBJECT_TYPES);
204
+
205
+
pin_mut!(writer);
206
+
207
+
for row in data {
208
+
let writer = writer.as_mut();
209
+
writer
210
+
.write(&[&row.0, &did, &row.1, &row.2.naive_utc()])
211
+
.await?;
212
+
}
213
+
214
+
writer.finish().await?;
215
+
216
+
conn.execute("INSERT INTO blocks (SELECT * FROM blocks_tmp)", &[])
217
+
.await
218
+
}
219
+
220
+
pub async fn copy_list_items(
221
+
conn: &mut Transaction<'_>,
222
+
data: Vec<(String, records::AppBskyGraphListItem)>,
223
+
) -> PgExecResult {
224
+
if data.is_empty() {
225
+
return Ok(0);
226
+
}
227
+
228
+
conn.execute(
229
+
"CREATE TEMP TABLE list_items_tmp (LIKE list_items INCLUDING DEFAULTS) ON COMMIT DROP",
230
+
&[],
231
+
)
232
+
.await?;
233
+
234
+
let writer = conn
235
+
.copy_in(
236
+
"COPY list_items_tmp (at_uri, list_uri, subject, created_at) FROM STDIN (FORMAT binary)",
237
+
)
238
+
.await?;
239
+
let writer = BinaryCopyInWriter::new(
240
+
writer,
241
+
&[Type::TEXT, Type::TEXT, Type::TEXT, Type::TIMESTAMP],
242
+
);
243
+
244
+
pin_mut!(writer);
245
+
246
+
for (at_uri, record) in data {
247
+
let writer = writer.as_mut();
248
+
writer
249
+
.write(&[
250
+
&at_uri,
251
+
&record.list,
252
+
&record.subject,
253
+
&record.created_at.naive_utc(),
254
+
])
255
+
.await?;
256
+
}
257
+
258
+
writer.finish().await?;
259
+
260
+
conn.execute("INSERT INTO list_items (SELECT * FROM list_items_tmp)", &[])
261
+
.await
262
+
}
263
+
264
+
pub async fn copy_follows(
265
+
conn: &mut Transaction<'_>,
266
+
did: &str,
267
+
data: Vec<SubjectRefRow>,
268
+
) -> PgExecResult {
269
+
if data.is_empty() {
270
+
return Ok(0);
271
+
}
272
+
273
+
conn.execute(
274
+
"CREATE TEMP TABLE follows_tmp (LIKE follows INCLUDING DEFAULTS) ON COMMIT DROP",
275
+
&[],
276
+
)
277
+
.await?;
278
+
279
+
let writer = conn
280
+
.copy_in("COPY follows_tmp (at_uri, did, subject, created_at) FROM STDIN (FORMAT binary)")
281
+
.await?;
282
+
let writer = BinaryCopyInWriter::new(writer, SUBJECT_TYPES);
283
+
284
+
pin_mut!(writer);
285
+
286
+
for row in data {
287
+
let writer = writer.as_mut();
288
+
writer
289
+
.write(&[&row.0, &did, &row.1, &row.2.naive_utc()])
290
+
.await?;
291
+
}
292
+
293
+
writer.finish().await?;
294
+
295
+
conn.execute("INSERT INTO follows (SELECT * FROM follows_tmp)", &[])
296
+
.await
297
+
}
298
+
299
+
const VERIFICATION_TYPES: &[Type] = &[
300
+
Type::TEXT,
301
+
Type::TEXT,
302
+
Type::TEXT,
303
+
Type::TEXT,
304
+
Type::TEXT,
305
+
Type::TEXT,
306
+
Type::TIMESTAMP,
307
+
];
308
+
pub async fn copy_verification(
309
+
conn: &mut Transaction<'_>,
310
+
did: &str,
311
+
data: Vec<(String, Cid, records::AppBskyGraphVerification)>,
312
+
) -> PgExecResult {
313
+
if data.is_empty() {
314
+
return Ok(0);
315
+
}
316
+
317
+
conn.execute(
318
+
"CREATE TEMP TABLE verification_tmp (LIKE verification INCLUDING DEFAULTS) ON COMMIT DROP",
319
+
&[],
320
+
)
321
+
.await?;
322
+
323
+
let writer = conn
324
+
.copy_in("COPY verification_tmp (at_uri, cid, verifier, subject, handle, display_name, created_at) FROM STDIN (FORMAT binary)")
325
+
.await?;
326
+
let writer = BinaryCopyInWriter::new(writer, VERIFICATION_TYPES);
327
+
328
+
pin_mut!(writer);
329
+
330
+
for (at_uri, cid, record) in data {
331
+
let writer = writer.as_mut();
332
+
writer
333
+
.write(&[
334
+
&at_uri,
335
+
&cid.to_string(),
336
+
&did,
337
+
&record.subject,
338
+
&record.handle,
339
+
&record.display_name,
340
+
&record.created_at.naive_utc(),
341
+
])
342
+
.await?;
343
+
}
344
+
345
+
writer.finish().await?;
346
+
347
+
conn.execute(
348
+
"INSERT INTO verification (SELECT * FROM verification_tmp)",
349
+
&[],
350
+
)
351
+
.await
352
+
}
353
+
354
+
pub async fn copy_records(
355
+
conn: &mut Transaction<'_>,
356
+
did: &str,
357
+
data: Vec<(String, Cid)>,
358
+
) -> PgExecResult {
359
+
if data.is_empty() {
360
+
return Ok(0);
361
+
}
362
+
363
+
conn.execute(
364
+
"CREATE TEMP TABLE records_tmp (LIKE records INCLUDING DEFAULTS) ON COMMIT DROP",
365
+
&[],
366
+
)
367
+
.await?;
368
+
369
+
let writer = conn
370
+
.copy_in("COPY records_tmp (at_uri, cid, did) FROM STDIN (FORMAT binary)")
371
+
.await?;
372
+
let writer = BinaryCopyInWriter::new(writer, &[Type::TEXT, Type::TEXT, Type::TEXT]);
373
+
374
+
pin_mut!(writer);
375
+
376
+
for (at_uri, cid) in data {
377
+
let writer = writer.as_mut();
378
+
writer.write(&[&at_uri, &did, &cid.to_string()]).await?;
379
+
}
380
+
381
+
writer.finish().await?;
382
+
383
+
conn.execute("INSERT INTO records (SELECT * FROM records_tmp)", &[])
384
+
.await
385
+
}
+79
consumer/src/db/labels.rs
+79
consumer/src/db/labels.rs
···
1
+
use super::PgExecResult;
2
+
use crate::indexer::records::AppBskyLabelerService;
3
+
use deadpool_postgres::GenericClient;
4
+
use ipld_core::cid::Cid;
5
+
use lexica::com_atproto::label::{LabelValueDefinition, SelfLabels};
6
+
use std::collections::HashMap;
7
+
8
+
pub async fn maintain_label_defs<C: GenericClient>(
9
+
conn: &mut C,
10
+
repo: &str,
11
+
rec: &AppBskyLabelerService,
12
+
) -> PgExecResult {
13
+
// drop any label defs not currently in the list
14
+
conn.execute(
15
+
"DELETE FROM labeler_defs WHERE labeler=$1 AND NOT label_identifier = any($2)",
16
+
&[&repo, &rec.policies.label_values],
17
+
)
18
+
.await?;
19
+
20
+
let definitions = rec
21
+
.policies
22
+
.label_value_definitions
23
+
.iter()
24
+
.map(|def| (def.identifier.clone(), def))
25
+
.collect::<HashMap<String, &LabelValueDefinition>>();
26
+
27
+
for label in &rec.policies.label_values {
28
+
let definition = definitions.get(label);
29
+
30
+
let severity = definition.map(|v| v.severity.to_string());
31
+
let blurs = definition.map(|v| v.blurs.to_string());
32
+
let default_setting = definition
33
+
.and_then(|v| v.default_setting)
34
+
.map(|v| v.to_string());
35
+
let adult_only = definition.and_then(|v| v.adult_only);
36
+
let locales = definition.and_then(|v| serde_json::to_value(&v.locales).ok());
37
+
38
+
conn.execute(
39
+
include_str!("sql/label_defs_upsert.sql"),
40
+
&[
41
+
&repo,
42
+
&label,
43
+
&severity,
44
+
&blurs,
45
+
&default_setting,
46
+
&adult_only,
47
+
&locales,
48
+
],
49
+
)
50
+
.await?;
51
+
}
52
+
53
+
Ok(0)
54
+
}
55
+
56
+
pub async fn maintain_self_labels<C: GenericClient>(
57
+
conn: &mut C,
58
+
repo: &str,
59
+
cid: Option<Cid>,
60
+
at_uri: &str,
61
+
self_labels: SelfLabels,
62
+
) -> PgExecResult {
63
+
conn.execute(
64
+
"DELETE FROM labels WHERE self_label=TRUE AND uri=$1",
65
+
&[&at_uri],
66
+
)
67
+
.await?;
68
+
69
+
let cid = cid.map(|cid| cid.to_string());
70
+
71
+
let stmt = conn.prepare_cached("INSERT INTO labels (labeler, label, uri, self_label, cid, created_at) VALUES ($1, $2, $3, TRUE, $4, NOW())").await?;
72
+
73
+
for label in self_labels.values {
74
+
conn.execute(&stmt, &[&repo, &label.val, &at_uri, &cid.clone()])
75
+
.await?;
76
+
}
77
+
78
+
Ok(0)
79
+
}
+16
consumer/src/db/mod.rs
+16
consumer/src/db/mod.rs
···
1
+
use tokio_postgres::Error as PgError;
2
+
3
+
type PgResult<T> = Result<T, PgError>;
4
+
type PgExecResult = PgResult<u64>;
5
+
type PgOptResult<T> = PgResult<Option<T>>;
6
+
7
+
mod actor;
8
+
mod backfill;
9
+
pub mod copy;
10
+
mod labels;
11
+
mod record;
12
+
13
+
pub use actor::*;
14
+
pub use backfill::*;
15
+
pub use labels::*;
16
+
pub use record::*;
+651
consumer/src/db/record.rs
+651
consumer/src/db/record.rs
···
1
+
use super::{PgExecResult, PgOptResult};
2
+
use crate::indexer::records::*;
3
+
use crate::utils::{blob_ref, strongref_to_parts};
4
+
use chrono::prelude::*;
5
+
use deadpool_postgres::GenericClient;
6
+
use ipld_core::cid::Cid;
7
+
8
+
pub async fn record_upsert<C: GenericClient>(
9
+
conn: &mut C,
10
+
at_uri: &str,
11
+
repo: &str,
12
+
cid: Cid,
13
+
) -> PgExecResult {
14
+
conn.execute(
15
+
"INSERT INTO records (at_uri, did, cid) VALUES ($1, $2, $3) ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid",
16
+
&[&repo, &at_uri, &cid.to_string()],
17
+
).await
18
+
}
19
+
20
+
pub async fn record_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
21
+
conn.execute("DELETE FROM records WHERE at_uri=$1", &[&at_uri])
22
+
.await
23
+
}
24
+
25
+
pub async fn block_insert<C: GenericClient>(
26
+
conn: &mut C,
27
+
at_uri: &str,
28
+
repo: &str,
29
+
rec: AppBskyGraphBlock,
30
+
) -> PgExecResult {
31
+
conn.execute(
32
+
"INSERT INTO blocks (at_uri, did, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING",
33
+
&[&at_uri, &repo, &rec.subject, &rec.created_at],
34
+
).await
35
+
}
36
+
37
+
pub async fn block_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
38
+
conn.execute("DELETE FROM blocks WHERE at_uri=$1", &[&at_uri])
39
+
.await
40
+
}
41
+
42
+
pub async fn chat_decl_upsert<C: GenericClient>(
43
+
conn: &mut C,
44
+
repo: &str,
45
+
rec: ChatBskyActorDeclaration,
46
+
) -> PgExecResult {
47
+
conn.execute(
48
+
"INSERT INTO chat_decls (did, allow_incoming) VALUES ($1, $2) ON CONFLICT (did) DO UPDATE SET allow_incoming=EXCLUDED.allow_incoming",
49
+
&[&repo, &rec.allow_incoming.to_string()]
50
+
).await
51
+
}
52
+
53
+
pub async fn chat_decl_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult {
54
+
conn.execute("DELETE FROM chat_decls WHERE did=$1", &[&repo])
55
+
.await
56
+
}
57
+
58
+
pub async fn feedgen_upsert<C: GenericClient>(
59
+
conn: &mut C,
60
+
at_uri: &str,
61
+
repo: &str,
62
+
cid: Cid,
63
+
rec: AppBskyFeedGenerator,
64
+
) -> PgExecResult {
65
+
let cid = cid.to_string();
66
+
let description_facets = rec
67
+
.description_facets
68
+
.and_then(|v| serde_json::to_value(v).ok());
69
+
let avatar = blob_ref(rec.avatar);
70
+
71
+
conn.execute(
72
+
include_str!("sql/feedgen_upsert.sql"),
73
+
&[
74
+
&at_uri,
75
+
&repo,
76
+
&cid,
77
+
&rec.did,
78
+
&rec.content_mode,
79
+
&rec.display_name,
80
+
&rec.description,
81
+
&description_facets,
82
+
&avatar,
83
+
&rec.created_at,
84
+
],
85
+
)
86
+
.await
87
+
}
88
+
89
+
pub async fn feedgen_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
90
+
conn.execute("DELETE FROM feedgens WHERE at_uri=$1", &[&at_uri])
91
+
.await
92
+
}
93
+
94
+
pub async fn follow_insert<C: GenericClient>(
95
+
conn: &mut C,
96
+
at_uri: &str,
97
+
repo: &str,
98
+
rec: AppBskyGraphFollow,
99
+
) -> PgExecResult {
100
+
conn.execute(
101
+
"INSERT INTO follows (at_uri, did, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING",
102
+
&[&at_uri, &repo, &rec.subject, &rec.created_at],
103
+
).await
104
+
}
105
+
106
+
pub async fn follow_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> {
107
+
let res = conn
108
+
.query_opt(
109
+
"DELETE FROM follows WHERE at_uri=$1 RETURNING subject",
110
+
&[&at_uri],
111
+
)
112
+
.await?;
113
+
114
+
Ok(res.map(|v| v.get(0)))
115
+
}
116
+
117
+
pub async fn labeler_upsert<C: GenericClient>(
118
+
conn: &mut C,
119
+
repo: &str,
120
+
cid: Cid,
121
+
rec: AppBskyLabelerService,
122
+
) -> PgExecResult {
123
+
let cid = cid.to_string();
124
+
let reasons = rec
125
+
.reason_types
126
+
.as_ref()
127
+
.map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<_>>());
128
+
let subject_types = rec
129
+
.subject_types
130
+
.as_ref()
131
+
.map(|v| v.iter().map(|v| v.to_string()).collect::<Vec<_>>());
132
+
133
+
conn.execute(
134
+
include_str!("sql/label_service_upsert.sql"),
135
+
&[
136
+
&repo,
137
+
&cid,
138
+
&reasons,
139
+
&subject_types,
140
+
&rec.subject_collections,
141
+
],
142
+
)
143
+
.await?;
144
+
145
+
super::maintain_label_defs(conn, repo, &rec).await
146
+
}
147
+
148
+
pub async fn labeler_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult {
149
+
conn.execute("DELETE FROM labelers WHERE did=$1", &[&repo])
150
+
.await
151
+
}
152
+
153
+
pub async fn like_insert<C: GenericClient>(
154
+
conn: &mut C,
155
+
at_uri: &str,
156
+
repo: &str,
157
+
rec: AppBskyFeedLike,
158
+
) -> PgExecResult {
159
+
conn.execute(
160
+
"INSERT INTO likes (at_uri, did, subject, subject_cid, created_at) VALUES ($1, $2, $3, $4, $5)",
161
+
&[&at_uri, &repo, &rec.subject.uri, &rec.subject.cid.to_string(), &rec.created_at]
162
+
).await
163
+
}
164
+
165
+
pub async fn like_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> {
166
+
let res = conn
167
+
.query_opt(
168
+
"DELETE FROM likes WHERE at_uri=$1 RETURNING subject",
169
+
&[&at_uri],
170
+
)
171
+
.await?;
172
+
173
+
Ok(res.map(|v| v.get(0)))
174
+
}
175
+
176
+
pub async fn list_upsert<C: GenericClient>(
177
+
conn: &mut C,
178
+
at_uri: &str,
179
+
repo: &str,
180
+
cid: Cid,
181
+
rec: AppBskyGraphList,
182
+
) -> PgExecResult {
183
+
let cid = cid.to_string();
184
+
let description_facets = rec
185
+
.description_facets
186
+
.and_then(|v| serde_json::to_value(v).ok());
187
+
let avatar = blob_ref(rec.avatar);
188
+
189
+
conn.execute(
190
+
include_str!("sql/list_upsert.sql"),
191
+
&[
192
+
&at_uri,
193
+
&repo,
194
+
&cid,
195
+
&rec.purpose,
196
+
&rec.name,
197
+
&rec.description,
198
+
&description_facets,
199
+
&avatar,
200
+
&rec.created_at,
201
+
],
202
+
)
203
+
.await
204
+
}
205
+
206
+
pub async fn list_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
207
+
conn.execute("DELETE FROM lists WHERE at_uri=$1", &[&at_uri])
208
+
.await
209
+
}
210
+
211
+
pub async fn list_block_insert<C: GenericClient>(
212
+
conn: &mut C,
213
+
at_uri: &str,
214
+
repo: &str,
215
+
rec: AppBskyGraphListBlock,
216
+
) -> PgExecResult {
217
+
conn.execute(
218
+
"INSERT INTO list_blocks (at_uri, did, list_uri, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING",
219
+
&[&at_uri, &repo, &rec.subject, &rec.created_at],
220
+
).await
221
+
}
222
+
223
+
pub async fn list_block_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
224
+
conn.execute("DELETE FROM list_blocks WHERE at_uri=$1", &[&at_uri])
225
+
.await
226
+
}
227
+
228
+
pub async fn list_item_insert<C: GenericClient>(
229
+
conn: &mut C,
230
+
at_uri: &str,
231
+
rec: AppBskyGraphListItem,
232
+
) -> PgExecResult {
233
+
conn.execute(
234
+
"INSERT INTO list_items (at_uri, list_uri, subject, created_at) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING",
235
+
&[&at_uri, &rec.list, &rec.subject, &rec.created_at],
236
+
).await
237
+
}
238
+
239
+
pub async fn list_item_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
240
+
conn.execute("DELETE FROM list_items WHERE at_uri=$1", &[&at_uri])
241
+
.await
242
+
}
243
+
244
+
pub async fn post_insert<C: GenericClient>(
245
+
conn: &mut C,
246
+
at_uri: &str,
247
+
repo: &str,
248
+
cid: Cid,
249
+
rec: AppBskyFeedPost,
250
+
) -> PgExecResult {
251
+
let cid = cid.to_string();
252
+
let record = serde_json::to_value(&rec).unwrap();
253
+
let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok());
254
+
let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent));
255
+
let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root));
256
+
let embed = rec.embed.as_ref().map(|v| v.as_str());
257
+
let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype());
258
+
259
+
let count = conn
260
+
.execute(
261
+
include_str!("sql/post_insert.sql"),
262
+
&[
263
+
&at_uri,
264
+
&repo,
265
+
&cid,
266
+
&record,
267
+
&rec.text,
268
+
&facets,
269
+
&rec.langs.unwrap_or_default(),
270
+
&rec.tags.unwrap_or_default(),
271
+
&parent_uri,
272
+
&parent_cid,
273
+
&root_uri,
274
+
&root_cid,
275
+
&embed,
276
+
&embed_subtype,
277
+
&rec.created_at,
278
+
],
279
+
)
280
+
.await?;
281
+
282
+
if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) {
283
+
post_embed_insert(conn, at_uri, embed, rec.created_at).await?;
284
+
}
285
+
286
+
Ok(count)
287
+
}
288
+
289
+
pub async fn post_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
290
+
conn.execute("DELETE FROM posts WHERE at_uri=$1", &[&at_uri])
291
+
.await
292
+
}
293
+
294
+
pub async fn post_get_info_for_delete<C: GenericClient>(
295
+
conn: &mut C,
296
+
at_uri: &str,
297
+
) -> PgOptResult<(Option<String>, Option<String>)> {
298
+
let res = conn
299
+
.query_opt(
300
+
"SELECT parent_uri, per.uri FROM posts LEFT JOIN post_embed_record per on at_uri = per.post_uri WHERE at_uri = $1",
301
+
&[&at_uri],
302
+
)
303
+
.await?;
304
+
305
+
Ok(res.map(|row| (row.get(0), row.get(1))))
306
+
}
307
+
308
+
pub async fn post_embed_insert<C: GenericClient>(
309
+
conn: &mut C,
310
+
post: &str,
311
+
embed: AppBskyEmbed,
312
+
created_at: DateTime<Utc>,
313
+
) -> PgExecResult {
314
+
match embed {
315
+
AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await,
316
+
AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await,
317
+
AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await,
318
+
AppBskyEmbed::Record(embed) => {
319
+
post_embed_record_insert(conn, post, embed, created_at).await
320
+
}
321
+
AppBskyEmbed::RecordWithMedia(embed) => {
322
+
post_embed_record_insert(conn, post, embed.record, created_at).await?;
323
+
match *embed.media {
324
+
AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await,
325
+
AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await,
326
+
AppBskyEmbed::External(embed) => {
327
+
post_embed_external_insert(conn, post, embed).await
328
+
}
329
+
_ => unreachable!(),
330
+
}
331
+
}
332
+
}
333
+
}
334
+
335
+
async fn post_embed_image_insert<C: GenericClient>(
336
+
conn: &mut C,
337
+
post: &str,
338
+
embed: AppBskyEmbedImages,
339
+
) -> PgExecResult {
340
+
let stmt = conn.prepare("INSERT INTO post_embed_images (post_uri, seq, cid, mime_type, alt, width, height) VALUES ($1, $2, $3, $4, $5, $6, $7)").await?;
341
+
342
+
for (idx, image) in embed.images.iter().enumerate() {
343
+
let cid = image.image.r#ref.to_string();
344
+
let width = image.aspect_ratio.as_ref().map(|v| v.width);
345
+
let height = image.aspect_ratio.as_ref().map(|v| v.height);
346
+
347
+
conn.execute(
348
+
&stmt,
349
+
&[
350
+
&post,
351
+
&(idx as i16),
352
+
&cid,
353
+
&image.image.mime_type,
354
+
&image.alt,
355
+
&width,
356
+
&height,
357
+
],
358
+
)
359
+
.await?;
360
+
}
361
+
362
+
Ok(0)
363
+
}
364
+
365
+
async fn post_embed_video_insert<C: GenericClient>(
366
+
conn: &mut C,
367
+
post: &str,
368
+
embed: AppBskyEmbedVideo,
369
+
) -> PgExecResult {
370
+
let cid = embed.video.r#ref.to_string();
371
+
let width = embed.aspect_ratio.as_ref().map(|v| v.width);
372
+
let height = embed.aspect_ratio.as_ref().map(|v| v.height);
373
+
374
+
let count = conn.execute(
375
+
"INSERT INTO post_embed_video (post_uri, cid, mime_type, alt, width, height) VALUES ($1, $2, $3, $4, $5, $6)",
376
+
&[&post, &cid, &embed.video.mime_type, &embed.alt, &width, &height],
377
+
).await?;
378
+
379
+
if let Some(captions) = embed.captions {
380
+
let stmt = conn.prepare_cached("INSERT INTO post_embed_video_captions (post_uri, cid, mime_type, language) VALUES ($1, $2, $3, $4)").await?;
381
+
382
+
for caption in captions {
383
+
let cid = caption.file.r#ref.to_string();
384
+
conn.execute(
385
+
&stmt,
386
+
&[&post, &cid, &caption.file.mime_type, &caption.lang],
387
+
)
388
+
.await?;
389
+
}
390
+
}
391
+
392
+
Ok(count)
393
+
}
394
+
395
+
async fn post_embed_external_insert<C: GenericClient>(
396
+
conn: &mut C,
397
+
post: &str,
398
+
embed: AppBskyEmbedExternal,
399
+
) -> PgExecResult {
400
+
let thumb_mime = embed.external.thumb.as_ref().map(|v| v.mime_type.clone());
401
+
let thumb_cid = embed.external.thumb.as_ref().map(|v| v.r#ref.to_string());
402
+
403
+
conn.execute(
404
+
"INSERT INTO post_embed_ext (post_uri, uri, title, description, thumb_mime_type, thumb_cid) VALUES ($1, $2, $3, $4, $5, $6)",
405
+
&[&post, &embed.external.uri, &embed.external.title, &embed.external.description, &thumb_mime, &thumb_cid],
406
+
).await
407
+
}
408
+
409
+
async fn post_embed_record_insert<C: GenericClient>(
410
+
conn: &mut C,
411
+
post: &str,
412
+
embed: AppBskyEmbedRecord,
413
+
post_created_at: DateTime<Utc>,
414
+
) -> PgExecResult {
415
+
// strip "at://" then break into parts by '/'
416
+
let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>();
417
+
418
+
let detached = if parts[1] == "app.bsky.feed.post" {
419
+
let postgate_effective: Option<DateTime<Utc>> = conn
420
+
.query_opt(
421
+
"SELECT created_at FROM postgates WHERE post_uri=$1",
422
+
&[&post],
423
+
)
424
+
.await?
425
+
.map(|v| v.get(0));
426
+
427
+
postgate_effective
428
+
.map(|v| Utc::now().min(post_created_at) > v)
429
+
.unwrap_or_default()
430
+
} else {
431
+
false
432
+
};
433
+
434
+
conn.execute(
435
+
"INSERT INTO post_embed_record (post_uri, record_type, uri, cid, detached) VALUES ($1, $2, $3, $4, $5)",
436
+
&[&post, &parts[1], &embed.record.uri, &embed.record.cid.to_string(), &detached],
437
+
).await
438
+
}
439
+
440
+
pub async fn postgate_upsert<C: GenericClient>(
441
+
conn: &mut C,
442
+
at_uri: &str,
443
+
cid: Cid,
444
+
rec: &AppBskyFeedPostgate,
445
+
) -> PgExecResult {
446
+
let rules = rec
447
+
.embedding_rules
448
+
.iter()
449
+
.map(|v| v.as_str().to_string())
450
+
.collect::<Vec<_>>();
451
+
452
+
conn.execute(
453
+
include_str!("sql/postgate_upsert.sql"),
454
+
&[
455
+
&at_uri,
456
+
&cid.to_string(),
457
+
&rec.post,
458
+
&rec.detached_embedding_uris,
459
+
&rules,
460
+
&rec.created_at,
461
+
],
462
+
)
463
+
.await
464
+
}
465
+
466
+
pub async fn postgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
467
+
conn.execute("DELETE FROM postgates WHERE at_uri=$1", &[&at_uri])
468
+
.await
469
+
}
470
+
471
+
pub async fn postgate_maintain_detaches<C: GenericClient>(
472
+
conn: &mut C,
473
+
post: &str,
474
+
detached: &[String],
475
+
disable_effective: Option<NaiveDateTime>,
476
+
) -> PgExecResult {
477
+
conn.execute(
478
+
"SELECT maintain_postgates($1, $2, $3)",
479
+
&[&post, &detached, &disable_effective],
480
+
)
481
+
.await
482
+
}
483
+
484
+
pub async fn profile_upsert<C: GenericClient>(
485
+
conn: &mut C,
486
+
repo: &str,
487
+
cid: Cid,
488
+
rec: AppBskyActorProfile,
489
+
) -> PgExecResult {
490
+
let cid = cid.to_string();
491
+
let avatar = blob_ref(rec.avatar);
492
+
let banner = blob_ref(rec.banner);
493
+
let (pinned_uri, pinned_cid) = strongref_to_parts(rec.pinned_post.as_ref());
494
+
let (joined_sp_uri, joined_sp_cid) = strongref_to_parts(rec.joined_via_starter_pack.as_ref());
495
+
496
+
conn.execute(
497
+
include_str!("sql/profile_upsert.sql"),
498
+
&[
499
+
&repo,
500
+
&cid,
501
+
&avatar,
502
+
&banner,
503
+
&rec.display_name,
504
+
&rec.description,
505
+
&pinned_uri,
506
+
&pinned_cid,
507
+
&joined_sp_uri,
508
+
&joined_sp_cid,
509
+
&rec.created_at.unwrap_or(Utc::now()).naive_utc(),
510
+
],
511
+
)
512
+
.await
513
+
}
514
+
515
+
pub async fn profile_delete<C: GenericClient>(conn: &mut C, repo: &str) -> PgExecResult {
516
+
conn.execute("DELETE FROM profiles WHERE did=$1", &[&repo])
517
+
.await
518
+
}
519
+
520
+
pub async fn repost_insert<C: GenericClient>(
521
+
conn: &mut C,
522
+
at_uri: &str,
523
+
repo: &str,
524
+
rec: AppBskyFeedRepost,
525
+
) -> PgExecResult {
526
+
conn.execute(
527
+
"INSERT INTO reposts (at_uri, did, post, post_cid, created_at) VALUES ($1, $2, $3, $4, $5)",
528
+
&[
529
+
&at_uri,
530
+
&repo,
531
+
&rec.subject.uri,
532
+
&rec.subject.cid.to_string(),
533
+
&rec.created_at,
534
+
],
535
+
)
536
+
.await
537
+
}
538
+
539
+
pub async fn repost_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgOptResult<String> {
540
+
let res = conn
541
+
.query_opt(
542
+
"DELETE FROM reposts WHERE at_uri=$1 RETURNING post",
543
+
&[&at_uri],
544
+
)
545
+
.await?;
546
+
547
+
Ok(res.map(|v| v.get(0)))
548
+
}
549
+
550
+
pub async fn starter_pack_upsert<C: GenericClient>(
551
+
conn: &mut C,
552
+
at_uri: &str,
553
+
repo: &str,
554
+
cid: Cid,
555
+
rec: AppBskyGraphStarterPack,
556
+
) -> PgExecResult {
557
+
let cid = cid.to_string();
558
+
let record = serde_json::to_value(&rec).unwrap();
559
+
let description_facets = rec
560
+
.description_facets
561
+
.and_then(|v| serde_json::to_value(v).ok());
562
+
let feeds = rec
563
+
.feeds
564
+
.map(|v| v.into_iter().map(|item| item.uri).collect::<Vec<_>>());
565
+
566
+
conn.execute(
567
+
include_str!("sql/starterpack_upsert.sql"),
568
+
&[
569
+
&at_uri,
570
+
&repo,
571
+
&cid,
572
+
&record,
573
+
&rec.name,
574
+
&rec.description,
575
+
&description_facets,
576
+
&rec.list,
577
+
&feeds,
578
+
&rec.created_at,
579
+
],
580
+
)
581
+
.await
582
+
}
583
+
584
+
pub async fn starter_pack_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
585
+
conn.execute("DELETE FROM starterpacks WHERE at_uri=$1", &[&at_uri])
586
+
.await
587
+
}
588
+
589
+
pub async fn threadgate_upsert<C: GenericClient>(
590
+
conn: &mut C,
591
+
at_uri: &str,
592
+
cid: Cid,
593
+
rec: AppBskyFeedThreadgate,
594
+
) -> PgExecResult {
595
+
let record = serde_json::to_value(&rec).unwrap();
596
+
597
+
let allowed_lists = rec
598
+
.allow
599
+
.iter()
600
+
.filter_map(|rule| match rule {
601
+
ThreadgateRule::List { list } => Some(list.clone()),
602
+
_ => None,
603
+
})
604
+
.collect::<Vec<_>>();
605
+
606
+
let allow = rec
607
+
.allow
608
+
.into_iter()
609
+
.map(|v| v.as_str().to_string())
610
+
.collect::<Vec<_>>();
611
+
612
+
conn.execute(
613
+
include_str!("sql/threadgate_upsert.sql"),
614
+
&[
615
+
&at_uri,
616
+
&cid.to_string(),
617
+
&rec.post,
618
+
&rec.hidden_replies,
619
+
&allow,
620
+
&allowed_lists,
621
+
&record,
622
+
&rec.created_at,
623
+
],
624
+
)
625
+
.await
626
+
}
627
+
628
+
pub async fn threadgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
629
+
conn.execute("DELETE FROM threadgates WHERE at_uri=$1", &[&at_uri])
630
+
.await
631
+
}
632
+
633
+
pub async fn verification_insert<C: GenericClient>(
634
+
conn: &mut C,
635
+
at_uri: &str,
636
+
repo: &str,
637
+
cid: Cid,
638
+
rec: AppBskyGraphVerification,
639
+
) -> PgExecResult {
640
+
let cid = cid.to_string();
641
+
642
+
conn.execute(
643
+
"INSERT INTO verification (at_uri, verifier, cid, subject, handle, display_name, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT DO NOTHING",
644
+
&[&at_uri, &repo, &cid, &rec.subject, &rec.handle, &rec.display_name, &rec.created_at],
645
+
).await
646
+
}
647
+
648
+
pub async fn verification_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
649
+
conn.execute("DELETE FROM verification WHERE at_uri=$1", &[&at_uri])
650
+
.await
651
+
}
+11
consumer/src/db/sql/feedgen_upsert.sql
+11
consumer/src/db/sql/feedgen_upsert.sql
···
1
+
INSERT INTO feedgens (at_uri, owner, cid, service_did, content_mode, name, description, description_facets, avatar_cid,
2
+
created_at)
3
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
4
+
ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid,
5
+
service_did=EXCLUDED.service_did,
6
+
content_mode=EXCLUDED.content_mode,
7
+
name=EXCLUDED.name,
8
+
description=EXCLUDED.description,
9
+
description_facets=EXCLUDED.description_facets,
10
+
avatar_cid=EXCLUDED.avatar_cid,
11
+
indexed_at=NOW()
+9
consumer/src/db/sql/label_defs_upsert.sql
+9
consumer/src/db/sql/label_defs_upsert.sql
···
1
+
INSERT INTO labeler_defs (labeler, label_identifier, severity, blurs, default_setting, adult_only, locales)
2
+
VALUES ($1, $2, $3, $4, $5, $6, $7)
3
+
ON CONFLICT (labeler, label_identifier) DO UPDATE
4
+
SET severity=EXCLUDED.severity,
5
+
blurs=EXCLUDED.blurs,
6
+
default_setting=EXCLUDED.default_setting,
7
+
adult_only=EXCLUDED.adult_only,
8
+
locales=EXCLUDED.locales,
9
+
indexed_at=NOW()
+7
consumer/src/db/sql/label_service_upsert.sql
+7
consumer/src/db/sql/label_service_upsert.sql
···
1
+
INSERT INTO labelers (did, cid, reasons, subject_types, subject_collections)
2
+
VALUES ($1, $2, $3, $4, $5)
3
+
ON CONFLICT (did) DO UPDATE SET cid=EXCLUDED.cid,
4
+
reasons=EXCLUDED.reasons,
5
+
subject_types=EXCLUDED.subject_types,
6
+
subject_collections=EXCLUDED.subject_collections,
7
+
indexed_at=NOW()
+9
consumer/src/db/sql/list_upsert.sql
+9
consumer/src/db/sql/list_upsert.sql
···
1
+
INSERT INTO lists (at_uri, owner, cid, list_type, name, description, description_facets, avatar_cid, created_at)
2
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
3
+
ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid,
4
+
list_type=EXCLUDED.list_type,
5
+
name=EXCLUDED.name,
6
+
description=EXCLUDED.description,
7
+
description_facets=EXCLUDED.description_facets,
8
+
avatar_cid=EXCLUDED.avatar_cid,
9
+
indexed_at=NOW()
+4
consumer/src/db/sql/post_insert.sql
+4
consumer/src/db/sql/post_insert.sql
+7
consumer/src/db/sql/postgate_upsert.sql
+7
consumer/src/db/sql/postgate_upsert.sql
+13
consumer/src/db/sql/profile_upsert.sql
+13
consumer/src/db/sql/profile_upsert.sql
···
1
+
INSERT INTO profiles (did, cid, avatar_cid, banner_cid, display_name, description, pinned_uri, pinned_cid,
2
+
joined_sp_uri, joined_sp_cid, created_at)
3
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
4
+
ON CONFLICT (did) DO UPDATE SET cid=EXCLUDED.cid,
5
+
avatar_cid=EXCLUDED.avatar_cid,
6
+
banner_cid=EXCLUDED.banner_cid,
7
+
display_name=EXCLUDED.display_name,
8
+
description=EXCLUDED.description,
9
+
pinned_uri=EXCLUDED.pinned_uri,
10
+
pinned_cid=EXCLUDED.pinned_cid,
11
+
joined_sp_uri=EXCLUDED.joined_sp_uri,
12
+
joined_sp_cid=EXCLUDED.joined_sp_cid,
13
+
indexed_at=NOW()
+10
consumer/src/db/sql/starterpack_upsert.sql
+10
consumer/src/db/sql/starterpack_upsert.sql
···
1
+
INSERT INTO starterpacks (at_uri, owner, cid, record, name, description, description_facets, list, feeds, created_at)
2
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
3
+
ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid,
4
+
record=EXCLUDED.record,
5
+
name=EXCLUDED.name,
6
+
description=EXCLUDED.description,
7
+
description_facets=EXCLUDED.description_facets,
8
+
list=EXCLUDED.list,
9
+
feeds=EXCLUDED.feeds,
10
+
indexed_at=NOW()
+8
consumer/src/db/sql/threadgate_upsert.sql
+8
consumer/src/db/sql/threadgate_upsert.sql
···
1
+
INSERT INTO threadgates (at_uri, cid, post_uri, hidden_replies, allow, allowed_lists, record, created_at)
2
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
3
+
ON CONFLICT (at_uri) DO UPDATE SET cid=EXCLUDED.cid,
4
+
hidden_replies=EXCLUDED.hidden_replies,
5
+
allow=EXCLUDED.allow,
6
+
allowed_lists=EXCLUDED.allowed_lists,
7
+
record=EXCLUDED.record,
8
+
indexed_at=NOW()
+2
-2
consumer/src/firehose/error.rs
+2
-2
consumer/src/firehose/error.rs
···
1
-
use thiserror::Error;
2
1
use std::io::Error as IoError;
2
+
use thiserror::Error;
3
3
4
4
#[derive(Debug, Error)]
5
5
pub enum FirehoseError {
···
9
9
IpldCbor(#[from] serde_ipld_dagcbor::error::DecodeError<IoError>),
10
10
#[error("{0}")]
11
11
Websocket(#[from] tokio_tungstenite::tungstenite::error::Error),
12
-
}
12
+
}
+4
-1
consumer/src/firehose/mod.rs
+4
-1
consumer/src/firehose/mod.rs
···
140
140
match err {
141
141
WsError::Protocol(ProtocolError::ResetWithoutClosingHandshake)
142
142
| WsError::ConnectionClosed => true,
143
-
WsError::Io(ioerr) => matches!(ioerr.kind(), ErrorKind::BrokenPipe | ErrorKind::ConnectionReset),
143
+
WsError::Io(ioerr) => matches!(
144
+
ioerr.kind(),
145
+
ErrorKind::BrokenPipe | ErrorKind::ConnectionReset
146
+
),
144
147
_ => false,
145
148
}
146
149
}
-971
consumer/src/indexer/db.rs
-971
consumer/src/indexer/db.rs
···
1
-
use super::records::{self, AppBskyEmbed};
2
-
use crate::utils::{blob_ref, empty_str_as_none, strongref_to_parts};
3
-
use chrono::prelude::*;
4
-
use diesel::prelude::*;
5
-
use diesel::sql_types::{Array, Nullable, Text, Timestamp};
6
-
use diesel_async::{AsyncPgConnection, RunQueryDsl};
7
-
use ipld_core::cid::Cid;
8
-
use lexica::com_atproto::label::{LabelValueDefinition, SelfLabels};
9
-
use parakeet_db::{models, schema, types};
10
-
use std::collections::HashMap;
11
-
12
-
pub async fn write_record(
13
-
conn: &mut AsyncPgConnection,
14
-
at_uri: &str,
15
-
repo: &str,
16
-
cid: Cid,
17
-
) -> QueryResult<usize> {
18
-
let cid = cid.to_string();
19
-
20
-
diesel::insert_into(schema::records::table)
21
-
.values((
22
-
schema::records::at_uri.eq(at_uri),
23
-
schema::records::did.eq(repo),
24
-
schema::records::cid.eq(&cid),
25
-
))
26
-
.on_conflict(schema::records::at_uri)
27
-
.do_update()
28
-
.set(schema::records::cid.eq(&cid))
29
-
.execute(conn)
30
-
.await
31
-
}
32
-
33
-
pub async fn delete_record(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
34
-
diesel::delete(schema::records::table)
35
-
.filter(schema::records::at_uri.eq(at_uri))
36
-
.execute(conn)
37
-
.await
38
-
}
39
-
40
-
pub async fn write_backfill_row(
41
-
conn: &mut AsyncPgConnection,
42
-
repo: &str,
43
-
rev: &str,
44
-
cid: Cid,
45
-
data: serde_json::Value,
46
-
) -> QueryResult<usize> {
47
-
diesel::insert_into(schema::backfill::table)
48
-
.values(models::NewBackfillRow {
49
-
repo,
50
-
repo_ver: rev,
51
-
cid: cid.to_string(),
52
-
data,
53
-
})
54
-
.execute(conn)
55
-
.await
56
-
}
57
-
58
-
pub async fn get_repo_info(
59
-
conn: &mut AsyncPgConnection,
60
-
repo: &str,
61
-
) -> QueryResult<Option<(Option<String>, types::ActorSyncState)>> {
62
-
schema::actors::table
63
-
.select((schema::actors::repo_rev, schema::actors::sync_state))
64
-
.find(repo)
65
-
.get_result(conn)
66
-
.await
67
-
.optional()
68
-
}
69
-
70
-
pub async fn upsert_actor(
71
-
conn: &mut AsyncPgConnection,
72
-
did: &str,
73
-
handle: Option<Option<String>>,
74
-
status: Option<types::ActorStatus>,
75
-
sync_state: Option<types::ActorSyncState>,
76
-
time: DateTime<Utc>,
77
-
) -> QueryResult<usize> {
78
-
let data = models::NewActor {
79
-
did,
80
-
handle,
81
-
status,
82
-
sync_state,
83
-
last_indexed: Some(time.naive_utc()),
84
-
};
85
-
86
-
diesel::insert_into(schema::actors::table)
87
-
.values(&data)
88
-
.on_conflict(schema::actors::did)
89
-
.do_update()
90
-
.set(&data)
91
-
.execute(conn)
92
-
.await
93
-
}
94
-
95
-
pub async fn account_status_and_rev(
96
-
conn: &mut AsyncPgConnection,
97
-
did: &str,
98
-
) -> QueryResult<Option<(types::ActorStatus, Option<String>)>> {
99
-
schema::actors::table
100
-
.select((schema::actors::status, schema::actors::repo_rev))
101
-
.for_update()
102
-
.find(did)
103
-
.get_result(conn)
104
-
.await
105
-
.optional()
106
-
}
107
-
108
-
/// Attempts to update a repo to the given version.
109
-
/// returns false if the repo doesn't exist or is too new, or true if the update succeeded.
110
-
pub async fn update_repo_version(
111
-
conn: &mut AsyncPgConnection,
112
-
repo: &str,
113
-
rev: &str,
114
-
cid: Cid,
115
-
) -> QueryResult<usize> {
116
-
diesel::update(schema::actors::table)
117
-
.set((
118
-
schema::actors::repo_rev.eq(rev),
119
-
schema::actors::repo_cid.eq(cid.to_string()),
120
-
))
121
-
.filter(schema::actors::did.eq(repo))
122
-
.execute(conn)
123
-
.await
124
-
}
125
-
126
-
pub async fn insert_block(
127
-
conn: &mut AsyncPgConnection,
128
-
repo: &str,
129
-
at_uri: &str,
130
-
rec: records::AppBskyGraphBlock,
131
-
) -> QueryResult<usize> {
132
-
diesel::insert_into(schema::blocks::table)
133
-
.values(&models::NewBlock {
134
-
at_uri,
135
-
did: repo,
136
-
subject: &rec.subject,
137
-
created_at: rec.created_at.naive_utc(),
138
-
})
139
-
.on_conflict_do_nothing()
140
-
.execute(conn)
141
-
.await
142
-
}
143
-
144
-
pub async fn delete_block(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
145
-
diesel::delete(schema::blocks::table)
146
-
.filter(schema::blocks::at_uri.eq(at_uri))
147
-
.execute(conn)
148
-
.await
149
-
}
150
-
151
-
pub async fn insert_follow(
152
-
conn: &mut AsyncPgConnection,
153
-
repo: &str,
154
-
at_uri: &str,
155
-
rec: records::AppBskyGraphFollow,
156
-
) -> QueryResult<usize> {
157
-
diesel::insert_into(schema::follows::table)
158
-
.values(&models::NewFollow {
159
-
at_uri,
160
-
did: repo,
161
-
subject: &rec.subject,
162
-
created_at: rec.created_at.naive_utc(),
163
-
})
164
-
.on_conflict_do_nothing()
165
-
.execute(conn)
166
-
.await
167
-
}
168
-
169
-
pub async fn delete_follow(
170
-
conn: &mut AsyncPgConnection,
171
-
at_uri: &str,
172
-
) -> QueryResult<Option<String>> {
173
-
diesel::delete(schema::follows::table)
174
-
.filter(schema::follows::at_uri.eq(at_uri))
175
-
.returning(schema::follows::subject)
176
-
.get_result(conn)
177
-
.await
178
-
.optional()
179
-
}
180
-
181
-
pub async fn upsert_profile(
182
-
conn: &mut AsyncPgConnection,
183
-
repo: &str,
184
-
cid: Cid,
185
-
rec: records::AppBskyActorProfile,
186
-
) -> QueryResult<usize> {
187
-
let (pinned_uri, pinned_cid) = strongref_to_parts(rec.pinned_post.as_ref());
188
-
let (joined_sp_uri, joined_sp_cid) = strongref_to_parts(rec.joined_via_starter_pack.as_ref());
189
-
190
-
let data = models::UpsertProfile {
191
-
did: repo,
192
-
cid: cid.to_string(),
193
-
avatar_cid: blob_ref(rec.avatar),
194
-
banner_cid: blob_ref(rec.banner),
195
-
display_name: rec.display_name,
196
-
description: rec.description,
197
-
pinned_uri,
198
-
pinned_cid,
199
-
joined_sp_uri,
200
-
joined_sp_cid,
201
-
created_at: rec.created_at.map(|val| val.naive_utc()),
202
-
indexed_at: Utc::now().naive_utc(),
203
-
};
204
-
205
-
diesel::insert_into(schema::profiles::table)
206
-
.values(&data)
207
-
.on_conflict(schema::profiles::did)
208
-
.do_update()
209
-
.set(&data)
210
-
.execute(conn)
211
-
.await
212
-
}
213
-
214
-
pub async fn delete_profile(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> {
215
-
diesel::delete(schema::profiles::table)
216
-
.filter(schema::profiles::did.eq(repo))
217
-
.execute(conn)
218
-
.await
219
-
}
220
-
221
-
pub async fn upsert_list(
222
-
conn: &mut AsyncPgConnection,
223
-
repo: &str,
224
-
at_uri: &str,
225
-
cid: Cid,
226
-
rec: records::AppBskyGraphList,
227
-
) -> QueryResult<usize> {
228
-
let description_facets = rec
229
-
.description_facets
230
-
.and_then(|v| serde_json::to_value(v).ok());
231
-
232
-
let data = models::UpsertList {
233
-
at_uri,
234
-
owner: repo,
235
-
cid: cid.to_string(),
236
-
list_type: &rec.purpose,
237
-
name: &rec.name,
238
-
description: rec.description,
239
-
description_facets,
240
-
avatar_cid: blob_ref(rec.avatar),
241
-
created_at: rec.created_at.naive_utc(),
242
-
indexed_at: Utc::now().naive_utc(),
243
-
};
244
-
245
-
diesel::insert_into(schema::lists::table)
246
-
.values(&data)
247
-
.on_conflict(schema::lists::at_uri)
248
-
.do_update()
249
-
.set(&data)
250
-
.execute(conn)
251
-
.await
252
-
}
253
-
254
-
pub async fn delete_list(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
255
-
diesel::delete(schema::lists::table)
256
-
.filter(schema::lists::at_uri.eq(at_uri))
257
-
.execute(conn)
258
-
.await
259
-
}
260
-
261
-
pub async fn insert_list_block(
262
-
conn: &mut AsyncPgConnection,
263
-
repo: &str,
264
-
at_uri: &str,
265
-
rec: records::AppBskyGraphListBlock,
266
-
) -> QueryResult<usize> {
267
-
let data = models::NewListBlock {
268
-
at_uri,
269
-
did: repo,
270
-
list_uri: &rec.subject,
271
-
created_at: rec.created_at.naive_utc(),
272
-
indexed_at: Utc::now().naive_utc(),
273
-
};
274
-
275
-
diesel::insert_into(schema::list_blocks::table)
276
-
.values(&data)
277
-
.execute(conn)
278
-
.await
279
-
}
280
-
281
-
pub async fn delete_list_block(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
282
-
diesel::delete(schema::list_blocks::table)
283
-
.filter(schema::list_blocks::at_uri.eq(at_uri))
284
-
.execute(conn)
285
-
.await
286
-
}
287
-
288
-
pub async fn insert_list_item(
289
-
conn: &mut AsyncPgConnection,
290
-
at_uri: &str,
291
-
rec: records::AppBskyGraphListItem,
292
-
) -> QueryResult<usize> {
293
-
let data = models::NewListItem {
294
-
at_uri,
295
-
list_uri: &rec.list,
296
-
subject: &rec.subject,
297
-
created_at: rec.created_at.naive_utc(),
298
-
indexed_at: Utc::now().naive_utc(),
299
-
};
300
-
301
-
diesel::insert_into(schema::list_items::table)
302
-
.values(&data)
303
-
.execute(conn)
304
-
.await
305
-
}
306
-
307
-
pub async fn delete_list_item(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
308
-
diesel::delete(schema::list_items::table)
309
-
.filter(schema::list_items::at_uri.eq(at_uri))
310
-
.execute(conn)
311
-
.await
312
-
}
313
-
314
-
pub async fn upsert_feedgen(
315
-
conn: &mut AsyncPgConnection,
316
-
repo: &str,
317
-
cid: Cid,
318
-
at_uri: &str,
319
-
rec: records::AppBskyFeedGenerator,
320
-
) -> QueryResult<usize> {
321
-
let description_facets = rec
322
-
.description_facets
323
-
.and_then(|v| serde_json::to_value(v).ok());
324
-
325
-
let data = models::UpsertFeedGen {
326
-
at_uri,
327
-
cid: &cid.to_string(),
328
-
owner: repo,
329
-
service_did: &rec.did,
330
-
content_mode: rec.content_mode,
331
-
name: &rec.display_name,
332
-
description: rec.description,
333
-
description_facets,
334
-
avatar_cid: blob_ref(rec.avatar),
335
-
accepts_interactions: rec.accepts_interactions,
336
-
created_at: rec.created_at.naive_utc(),
337
-
indexed_at: Utc::now().naive_utc(),
338
-
};
339
-
340
-
diesel::insert_into(schema::feedgens::table)
341
-
.values(&data)
342
-
.on_conflict(schema::feedgens::at_uri)
343
-
.do_update()
344
-
.set(&data)
345
-
.execute(conn)
346
-
.await
347
-
}
348
-
349
-
pub async fn delete_feedgen(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
350
-
diesel::delete(schema::feedgens::table)
351
-
.filter(schema::feedgens::at_uri.eq(at_uri))
352
-
.execute(conn)
353
-
.await
354
-
}
355
-
356
-
pub async fn insert_post(
357
-
conn: &mut AsyncPgConnection,
358
-
did: &str,
359
-
cid: Cid,
360
-
at_uri: &str,
361
-
rec: records::AppBskyFeedPost,
362
-
) -> QueryResult<usize> {
363
-
let record = serde_json::to_value(&rec).unwrap();
364
-
let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok());
365
-
366
-
let embed = rec.embed.as_ref().map(|v| v.as_str());
367
-
let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype());
368
-
369
-
let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent));
370
-
let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root));
371
-
372
-
let res = diesel::insert_into(schema::posts::table)
373
-
.values(models::NewPost {
374
-
at_uri,
375
-
cid: cid.to_string(),
376
-
did,
377
-
record,
378
-
content: &rec.text,
379
-
facets,
380
-
languages: rec.langs.unwrap_or_default(),
381
-
tags: rec.tags.unwrap_or_default(),
382
-
parent_uri,
383
-
parent_cid,
384
-
root_uri,
385
-
root_cid,
386
-
embed,
387
-
embed_subtype,
388
-
created_at: rec.created_at.naive_utc(),
389
-
})
390
-
.execute(conn)
391
-
.await?;
392
-
393
-
match rec.embed.and_then(|v| v.into_bsky()) {
394
-
Some(AppBskyEmbed::Images(embed)) => insert_post_embed_images(conn, at_uri, embed).await,
395
-
Some(AppBskyEmbed::Video(embed)) => insert_post_embed_video(conn, at_uri, embed).await,
396
-
Some(AppBskyEmbed::External(embed)) => insert_post_embed_ext(conn, at_uri, embed).await,
397
-
Some(AppBskyEmbed::Record(embed)) => {
398
-
insert_post_embed_record(conn, at_uri, embed, rec.created_at).await
399
-
}
400
-
Some(AppBskyEmbed::RecordWithMedia(embed)) => {
401
-
insert_post_embed_record(conn, at_uri, embed.record, rec.created_at).await?;
402
-
match *embed.media {
403
-
AppBskyEmbed::Images(embed) => insert_post_embed_images(conn, at_uri, embed).await,
404
-
AppBskyEmbed::Video(embed) => insert_post_embed_video(conn, at_uri, embed).await,
405
-
AppBskyEmbed::External(embed) => insert_post_embed_ext(conn, at_uri, embed).await,
406
-
_ => unreachable!(),
407
-
}
408
-
}
409
-
_ => Ok(res),
410
-
}
411
-
}
412
-
413
-
async fn insert_post_embed_images(
414
-
conn: &mut AsyncPgConnection,
415
-
at_uri: &str,
416
-
rec: records::AppBskyEmbedImages,
417
-
) -> QueryResult<usize> {
418
-
let images = rec
419
-
.images
420
-
.into_iter()
421
-
.enumerate()
422
-
.map(|(idx, img)| models::NewPostEmbedImage {
423
-
post_uri: at_uri,
424
-
seq: idx as i16,
425
-
mime_type: img.image.mime_type,
426
-
cid: img.image.r#ref.to_string(),
427
-
alt: empty_str_as_none(img.alt),
428
-
width: img.aspect_ratio.as_ref().map(|v| v.width),
429
-
height: img.aspect_ratio.map(|v| v.height),
430
-
})
431
-
.collect::<Vec<_>>();
432
-
433
-
diesel::insert_into(schema::post_embed_images::table)
434
-
.values(images)
435
-
.execute(conn)
436
-
.await
437
-
}
438
-
439
-
async fn insert_post_embed_video(
440
-
conn: &mut AsyncPgConnection,
441
-
at_uri: &str,
442
-
rec: records::AppBskyEmbedVideo,
443
-
) -> QueryResult<usize> {
444
-
let res = diesel::insert_into(schema::post_embed_video::table)
445
-
.values(models::NewPostEmbedVideo {
446
-
post_uri: at_uri,
447
-
mime_type: &rec.video.mime_type,
448
-
cid: rec.video.r#ref.to_string(),
449
-
alt: rec.alt,
450
-
width: rec.aspect_ratio.as_ref().map(|v| v.width),
451
-
height: rec.aspect_ratio.map(|v| v.height),
452
-
})
453
-
.execute(conn)
454
-
.await?;
455
-
456
-
match rec.captions {
457
-
Some(captions) => insert_post_embed_video_captions(conn, at_uri, &captions).await,
458
-
None => Ok(res),
459
-
}
460
-
}
461
-
462
-
async fn insert_post_embed_video_captions(
463
-
conn: &mut AsyncPgConnection,
464
-
at_uri: &str,
465
-
captions: &[records::EmbedVideoCaptions],
466
-
) -> QueryResult<usize> {
467
-
let captions = captions
468
-
.iter()
469
-
.map(|caption| models::NewPostEmbedVideoCaption {
470
-
post_uri: at_uri,
471
-
language: caption.lang.clone(),
472
-
mime_type: caption.file.mime_type.clone(),
473
-
cid: caption.file.r#ref.to_string(),
474
-
})
475
-
.collect::<Vec<_>>();
476
-
477
-
diesel::insert_into(schema::post_embed_video_captions::table)
478
-
.values(captions)
479
-
.execute(conn)
480
-
.await
481
-
}
482
-
483
-
async fn insert_post_embed_ext(
484
-
conn: &mut AsyncPgConnection,
485
-
at_uri: &str,
486
-
rec: records::AppBskyEmbedExternal,
487
-
) -> QueryResult<usize> {
488
-
diesel::insert_into(schema::post_embed_ext::table)
489
-
.values(models::NewPostEmbedExt {
490
-
post_uri: at_uri,
491
-
uri: &rec.external.uri,
492
-
title: &rec.external.title,
493
-
description: &rec.external.description,
494
-
thumb_mime_type: rec.external.thumb.as_ref().map(|v| v.mime_type.clone()),
495
-
thumb_cid: rec.external.thumb.as_ref().map(|v| v.r#ref.to_string()),
496
-
})
497
-
.execute(conn)
498
-
.await
499
-
}
500
-
501
-
async fn insert_post_embed_record(
502
-
conn: &mut AsyncPgConnection,
503
-
at_uri: &str,
504
-
rec: records::AppBskyEmbedRecord,
505
-
post_created_at: DateTime<Utc>,
506
-
) -> QueryResult<usize> {
507
-
// strip "at://" then break into parts by '/'
508
-
let parts = rec.record.uri[5..].split('/').collect::<Vec<_>>();
509
-
510
-
let detached = if parts[1] == "app.bsky.feed.post" {
511
-
// do a lookup on if we have a postgate for this record
512
-
let postgate_effective = schema::postgates::table
513
-
.select(schema::postgates::created_at)
514
-
.filter(schema::postgates::post_uri.eq(at_uri))
515
-
.get_result::<DateTime<Utc>>(conn)
516
-
.await
517
-
.optional()?;
518
-
519
-
postgate_effective.map(|v| Utc::now().min(post_created_at) > v)
520
-
} else {
521
-
None
522
-
};
523
-
524
-
diesel::insert_into(schema::post_embed_record::table)
525
-
.values(models::NewPostEmbedRecord {
526
-
post_uri: at_uri,
527
-
record_type: parts[1],
528
-
uri: &rec.record.uri,
529
-
cid: rec.record.cid.to_string(),
530
-
detached,
531
-
})
532
-
.execute(conn)
533
-
.await
534
-
}
535
-
536
-
pub async fn delete_post(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
537
-
diesel::delete(schema::posts::table)
538
-
.filter(schema::posts::at_uri.eq(at_uri))
539
-
.execute(conn)
540
-
.await
541
-
}
542
-
543
-
pub async fn get_post_info_for_delete(
544
-
conn: &mut AsyncPgConnection,
545
-
at_uri: &str,
546
-
) -> QueryResult<Option<(Option<String>, Option<String>)>> {
547
-
schema::posts::table
548
-
.left_join(
549
-
schema::post_embed_record::table
550
-
.on(schema::posts::at_uri.eq(schema::post_embed_record::post_uri)),
551
-
)
552
-
.select((
553
-
schema::posts::parent_uri,
554
-
schema::post_embed_record::uri.nullable(),
555
-
))
556
-
.filter(schema::posts::at_uri.eq(at_uri))
557
-
.get_result(conn)
558
-
.await
559
-
.optional()
560
-
}
561
-
562
-
pub async fn upsert_postgate(
563
-
conn: &mut AsyncPgConnection,
564
-
at_uri: &str,
565
-
cid: Cid,
566
-
rec: &records::AppBskyFeedPostgate,
567
-
) -> QueryResult<usize> {
568
-
let rules = rec
569
-
.embedding_rules
570
-
.iter()
571
-
.map(|v| v.as_str().to_string())
572
-
.collect();
573
-
574
-
let data = models::UpsertPostgate {
575
-
at_uri,
576
-
cid: cid.to_string(),
577
-
post_uri: &rec.post,
578
-
detached: &rec.detached_embedding_uris,
579
-
rules,
580
-
created_at: rec.created_at.naive_utc(),
581
-
};
582
-
583
-
diesel::insert_into(schema::postgates::table)
584
-
.values(&data)
585
-
.on_conflict(schema::postgates::at_uri)
586
-
.do_update()
587
-
.set(&data)
588
-
.execute(conn)
589
-
.await
590
-
}
591
-
592
-
pub async fn delete_postgate(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
593
-
diesel::delete(schema::postgates::table)
594
-
.filter(schema::postgates::at_uri.eq(at_uri))
595
-
.execute(conn)
596
-
.await
597
-
}
598
-
599
-
define_sql_function! {fn maintain_postgates(post: Text, detached: Array<Text>, effective: Nullable<Timestamp>)}
600
-
601
-
pub async fn postgate_maintain_detaches(
602
-
conn: &mut AsyncPgConnection,
603
-
post: &str,
604
-
detached: &[String],
605
-
disable_effective: Option<NaiveDateTime>,
606
-
) -> QueryResult<usize> {
607
-
diesel::select(maintain_postgates(post, detached, disable_effective))
608
-
.execute(conn)
609
-
.await
610
-
}
611
-
612
-
pub async fn upsert_threadgate(
613
-
conn: &mut AsyncPgConnection,
614
-
at_uri: &str,
615
-
cid: Cid,
616
-
rec: records::AppBskyFeedThreadgate,
617
-
) -> QueryResult<usize> {
618
-
let record = serde_json::to_value(&rec).unwrap();
619
-
620
-
let allowed_lists = rec
621
-
.allow
622
-
.iter()
623
-
.filter_map(|rule| match rule {
624
-
records::ThreadgateRule::List { list } => Some(list.clone()),
625
-
_ => None,
626
-
})
627
-
.collect();
628
-
629
-
let allow = rec
630
-
.allow
631
-
.into_iter()
632
-
.map(|v| v.as_str().to_string())
633
-
.collect();
634
-
635
-
let data = models::UpsertThreadgate {
636
-
at_uri,
637
-
cid: cid.to_string(),
638
-
post_uri: &rec.post,
639
-
hidden_replies: rec.hidden_replies,
640
-
allow,
641
-
allowed_lists,
642
-
record,
643
-
created_at: rec.created_at.naive_utc(),
644
-
};
645
-
646
-
diesel::insert_into(schema::threadgates::table)
647
-
.values(&data)
648
-
.on_conflict(schema::threadgates::at_uri)
649
-
.do_update()
650
-
.set(&data)
651
-
.execute(conn)
652
-
.await
653
-
}
654
-
655
-
pub async fn delete_threadgate(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
656
-
diesel::delete(schema::threadgates::table)
657
-
.filter(schema::threadgates::at_uri.eq(at_uri))
658
-
.execute(conn)
659
-
.await
660
-
}
661
-
662
-
pub async fn insert_like(
663
-
conn: &mut AsyncPgConnection,
664
-
did: &str,
665
-
at_uri: &str,
666
-
rec: records::AppBskyFeedLike,
667
-
) -> QueryResult<usize> {
668
-
let data = models::NewLike {
669
-
at_uri,
670
-
did,
671
-
subject: &rec.subject.uri,
672
-
subject_cid: rec.subject.cid.to_string(),
673
-
created_at: rec.created_at.naive_utc(),
674
-
};
675
-
676
-
diesel::insert_into(schema::likes::table)
677
-
.values(&data)
678
-
.execute(conn)
679
-
.await
680
-
}
681
-
682
-
pub async fn delete_like(
683
-
conn: &mut AsyncPgConnection,
684
-
at_uri: &str,
685
-
) -> QueryResult<Option<String>> {
686
-
diesel::delete(schema::likes::table)
687
-
.filter(schema::likes::at_uri.eq(at_uri))
688
-
.returning(schema::likes::subject)
689
-
.get_result(conn)
690
-
.await
691
-
.optional()
692
-
}
693
-
694
-
pub async fn insert_repost(
695
-
conn: &mut AsyncPgConnection,
696
-
did: &str,
697
-
at_uri: &str,
698
-
rec: records::AppBskyFeedRepost,
699
-
) -> QueryResult<usize> {
700
-
let data = models::NewRepost {
701
-
at_uri,
702
-
did,
703
-
post: &rec.subject.uri,
704
-
post_cid: rec.subject.cid.to_string(),
705
-
created_at: rec.created_at.naive_utc(),
706
-
};
707
-
708
-
diesel::insert_into(schema::reposts::table)
709
-
.values(&data)
710
-
.execute(conn)
711
-
.await
712
-
}
713
-
714
-
pub async fn delete_repost(
715
-
conn: &mut AsyncPgConnection,
716
-
at_uri: &str,
717
-
) -> QueryResult<Option<String>> {
718
-
diesel::delete(schema::reposts::table)
719
-
.filter(schema::reposts::at_uri.eq(at_uri))
720
-
.returning(schema::reposts::post)
721
-
.get_result(conn)
722
-
.await
723
-
.optional()
724
-
}
725
-
726
-
pub async fn upsert_chat_decl(
727
-
conn: &mut AsyncPgConnection,
728
-
did: &str,
729
-
rec: records::ChatBskyActorDeclaration,
730
-
) -> QueryResult<usize> {
731
-
let data = models::NewChatDecl {
732
-
did,
733
-
allow_incoming: rec.allow_incoming.to_string(),
734
-
};
735
-
736
-
diesel::insert_into(schema::chat_decls::table)
737
-
.values(&data)
738
-
.on_conflict(schema::chat_decls::did)
739
-
.do_update()
740
-
.set(&data)
741
-
.execute(conn)
742
-
.await
743
-
}
744
-
745
-
pub async fn delete_chat_decl(conn: &mut AsyncPgConnection, did: &str) -> QueryResult<usize> {
746
-
diesel::delete(schema::chat_decls::table)
747
-
.filter(schema::chat_decls::did.eq(did))
748
-
.execute(conn)
749
-
.await
750
-
}
751
-
752
-
pub async fn upsert_starterpack(
753
-
conn: &mut AsyncPgConnection,
754
-
did: &str,
755
-
cid: Cid,
756
-
at_uri: &str,
757
-
rec: records::AppBskyGraphStarterPack,
758
-
) -> QueryResult<usize> {
759
-
let record = serde_json::to_value(&rec).unwrap();
760
-
761
-
let feeds = rec
762
-
.feeds
763
-
.map(|v| v.into_iter().map(|item| item.uri).collect());
764
-
765
-
let description_facets = rec
766
-
.description_facets
767
-
.and_then(|v| serde_json::to_value(v).ok());
768
-
769
-
let data = models::NewStarterPack {
770
-
at_uri,
771
-
cid: cid.to_string(),
772
-
owner: did,
773
-
record,
774
-
name: &rec.name,
775
-
description: rec.description,
776
-
description_facets,
777
-
list: &rec.list,
778
-
feeds,
779
-
created_at: rec.created_at.naive_utc(),
780
-
indexed_at: Utc::now().naive_utc(),
781
-
};
782
-
783
-
diesel::insert_into(schema::starterpacks::table)
784
-
.values(&data)
785
-
.on_conflict(schema::starterpacks::at_uri)
786
-
.do_update()
787
-
.set(&data)
788
-
.execute(conn)
789
-
.await
790
-
}
791
-
792
-
pub async fn delete_starterpack(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
793
-
diesel::delete(schema::starterpacks::table)
794
-
.filter(schema::starterpacks::at_uri.eq(at_uri))
795
-
.execute(conn)
796
-
.await
797
-
}
798
-
799
-
pub async fn upsert_label_service(
800
-
conn: &mut AsyncPgConnection,
801
-
repo: &str,
802
-
cid: Cid,
803
-
rec: records::AppBskyLabelerService,
804
-
) -> QueryResult<usize> {
805
-
let reasons = rec
806
-
.reason_types
807
-
.as_ref()
808
-
.map(|v| v.iter().map(|v| v.to_string()).collect());
809
-
let subject_types = rec
810
-
.subject_types
811
-
.as_ref()
812
-
.map(|v| v.iter().map(|v| v.to_string()).collect());
813
-
814
-
let data = models::UpsertLabelerService {
815
-
did: repo,
816
-
cid: cid.to_string(),
817
-
reasons,
818
-
subject_types,
819
-
subject_collections: rec.subject_collections.as_ref(),
820
-
indexed_at: Utc::now().naive_utc(),
821
-
};
822
-
823
-
let res = diesel::insert_into(schema::labelers::table)
824
-
.values(&data)
825
-
.on_conflict(schema::labelers::did)
826
-
.do_update()
827
-
.set(&data)
828
-
.execute(conn)
829
-
.await?;
830
-
831
-
maintain_label_defs(conn, repo, &rec).await?;
832
-
833
-
Ok(res)
834
-
}
835
-
836
-
pub async fn delete_label_service(conn: &mut AsyncPgConnection, repo: &str) -> QueryResult<usize> {
837
-
diesel::delete(schema::labelers::table)
838
-
.filter(schema::labelers::did.eq(repo))
839
-
.execute(conn)
840
-
.await
841
-
}
842
-
843
-
pub async fn maintain_label_defs(
844
-
conn: &mut AsyncPgConnection,
845
-
repo: &str,
846
-
rec: &records::AppBskyLabelerService,
847
-
) -> QueryResult<()> {
848
-
// drop any label defs not currently in the list
849
-
diesel::delete(schema::labeler_defs::table)
850
-
.filter(
851
-
schema::labeler_defs::labeler
852
-
.eq(repo)
853
-
.and(schema::labeler_defs::label_identifier.ne_all(&rec.policies.label_values)),
854
-
)
855
-
.execute(conn)
856
-
.await?;
857
-
858
-
let definitions = rec
859
-
.policies
860
-
.label_value_definitions
861
-
.iter()
862
-
.map(|def| (def.identifier.clone(), def))
863
-
.collect::<HashMap<String, &LabelValueDefinition>>();
864
-
865
-
for label in &rec.policies.label_values {
866
-
let definition = definitions.get(label);
867
-
868
-
let locales = definition.and_then(|v| serde_json::to_value(&v.locales).ok());
869
-
870
-
let data = models::UpsertLabelDefinition {
871
-
labeler: repo,
872
-
label_identifier: label,
873
-
severity: definition.map(|v| v.severity.to_string()),
874
-
blurs: definition.map(|v| v.blurs.to_string()),
875
-
default_setting: definition
876
-
.and_then(|v| v.default_setting)
877
-
.map(|v| v.to_string()),
878
-
adult_only: definition.and_then(|v| v.adult_only),
879
-
locales,
880
-
indexed_at: Utc::now().naive_utc(),
881
-
};
882
-
883
-
diesel::insert_into(schema::labeler_defs::table)
884
-
.values(&data)
885
-
.on_conflict((
886
-
schema::labeler_defs::labeler,
887
-
schema::labeler_defs::label_identifier,
888
-
))
889
-
.do_update()
890
-
.set(&data)
891
-
.execute(conn)
892
-
.await?;
893
-
}
894
-
895
-
Ok(())
896
-
}
897
-
898
-
pub async fn maintain_self_labels(
899
-
conn: &mut AsyncPgConnection,
900
-
repo: &str,
901
-
cid: Option<Cid>,
902
-
at_uri: &str,
903
-
self_labels: SelfLabels,
904
-
) -> QueryResult<usize> {
905
-
// purge any existing self-labels
906
-
diesel::delete(schema::labels::table)
907
-
.filter(
908
-
schema::labels::self_label
909
-
.eq(true)
910
-
.and(schema::labels::uri.eq(at_uri)),
911
-
)
912
-
.execute(conn)
913
-
.await?;
914
-
915
-
let cid = cid.map(|cid| cid.to_string());
916
-
let now = Utc::now().naive_utc();
917
-
918
-
let labels = self_labels
919
-
.values
920
-
.iter()
921
-
.map(|v| models::NewLabel {
922
-
labeler: repo,
923
-
label: &v.val,
924
-
uri: at_uri,
925
-
self_label: true,
926
-
cid: cid.clone(),
927
-
expires: None,
928
-
sig: None,
929
-
created_at: now,
930
-
})
931
-
.collect::<Vec<_>>();
932
-
933
-
diesel::insert_into(schema::labels::table)
934
-
.values(&labels)
935
-
.execute(conn)
936
-
.await
937
-
}
938
-
939
-
pub async fn upsert_verification(
940
-
conn: &mut AsyncPgConnection,
941
-
did: &str,
942
-
cid: Cid,
943
-
at_uri: &str,
944
-
rec: records::AppBskyGraphVerification,
945
-
) -> QueryResult<usize> {
946
-
let data = models::NewVerificationEntry {
947
-
at_uri,
948
-
cid: cid.to_string(),
949
-
verifier: did,
950
-
subject: &rec.subject,
951
-
handle: &rec.handle,
952
-
display_name: &rec.display_name,
953
-
created_at: rec.created_at.naive_utc(),
954
-
indexed_at: None,
955
-
};
956
-
957
-
diesel::insert_into(schema::verification::table)
958
-
.values(&data)
959
-
.on_conflict(schema::verification::at_uri)
960
-
.do_update()
961
-
.set(&data)
962
-
.execute(conn)
963
-
.await
964
-
}
965
-
966
-
pub async fn delete_verification(conn: &mut AsyncPgConnection, at_uri: &str) -> QueryResult<usize> {
967
-
diesel::delete(schema::verification::table)
968
-
.filter(schema::verification::at_uri.eq(at_uri))
969
-
.execute(conn)
970
-
.await
971
-
}
+88
-101
consumer/src/indexer/mod.rs
+88
-101
consumer/src/indexer/mod.rs
···
1
1
use crate::config::HistoryMode;
2
+
use crate::db;
2
3
use crate::firehose::{
3
4
AtpAccountEvent, AtpCommitEvent, AtpIdentityEvent, CommitOp, FirehoseConsumer, FirehoseEvent,
4
5
FirehoseOutput,
···
6
7
use crate::indexer::types::{
7
8
AggregateDeltaStore, BackfillItem, BackfillItemInner, CollectionType, RecordTypes,
8
9
};
10
+
use deadpool_postgres::{Object, Pool, Transaction};
9
11
use did_resolver::Resolver;
10
-
use diesel_async::pooled_connection::deadpool::Pool;
11
-
use diesel_async::{AsyncConnection, AsyncPgConnection};
12
12
use foldhash::quality::RandomState;
13
13
use futures::StreamExt;
14
14
use ipld_core::cid::Cid;
···
23
23
use tokio::sync::mpsc::{channel, Sender};
24
24
use tracing::instrument;
25
25
26
-
pub mod db;
27
26
pub mod records;
28
27
pub mod types;
29
28
···
41
40
}
42
41
43
42
pub struct RelayIndexer {
44
-
pool: Pool<AsyncPgConnection>,
43
+
pool: Pool,
45
44
redis: MultiplexedConnection,
46
45
state: RelayIndexerState,
47
46
firehose: FirehoseConsumer,
···
51
50
52
51
impl RelayIndexer {
53
52
pub async fn new(
54
-
pool: Pool<AsyncPgConnection>,
53
+
pool: Pool,
55
54
redis: MultiplexedConnection,
56
55
idxc_tx: Sender<parakeet_index::AggregateDeltaReq>,
57
56
resolver: Arc<Resolver>,
···
197
196
#[instrument(skip_all, fields(seq = identity.seq, repo = identity.did))]
198
197
async fn index_identity(
199
198
state: &RelayIndexerState,
200
-
conn: &mut AsyncPgConnection,
199
+
conn: &mut Object,
201
200
identity: AtpIdentityEvent,
202
201
) -> eyre::Result<()> {
203
202
let new_handle = match state.do_handle_res {
204
203
true => resolve_handle(state, &identity.did, identity.handle).await?,
205
-
false => Some(identity.handle),
204
+
false => identity.handle,
206
205
};
207
206
208
-
let sync_state = (!state.do_backfill).then_some(ActorSyncState::Synced);
207
+
let sync_state = match state.do_backfill {
208
+
true => ActorSyncState::Dirty,
209
+
false => ActorSyncState::Synced,
210
+
};
209
211
210
-
db::upsert_actor(
211
-
conn,
212
-
&identity.did,
213
-
new_handle,
214
-
None,
215
-
sync_state,
216
-
identity.time,
217
-
)
218
-
.await?;
212
+
db::actor_upsert_handle(conn, &identity.did, sync_state, new_handle, identity.time).await?;
219
213
220
214
Ok(())
221
215
}
···
224
218
state: &RelayIndexerState,
225
219
did: &str,
226
220
expected_handle: Option<String>,
227
-
) -> eyre::Result<Option<Option<String>>> {
221
+
) -> eyre::Result<Option<String>> {
228
222
// Resolve the did doc
229
223
let Some(did_doc) = state.resolver.resolve_did(did).await? else {
230
224
eyre::bail!("missing did doc");
···
232
226
233
227
// if there's no handles in aka or the expected is none, set to none in DB.
234
228
if did_doc.also_known_as.as_ref().is_none_or(|v| v.is_empty()) || expected_handle.is_none() {
235
-
return Ok(Some(None));
229
+
return Ok(None);
236
230
}
237
231
238
232
let expected = expected_handle.unwrap();
···
241
235
let expected_in_doc = did_doc.also_known_as.is_some_and(|v| {
242
236
v.iter()
243
237
.filter_map(|v| v.strip_prefix("at://"))
244
-
.any(|v| v == &expected)
238
+
.any(|v| v == expected)
245
239
});
246
240
247
241
// if it isn't, set to invalid.
248
242
if !expected_in_doc {
249
243
tracing::warn!("Handle not in DID doc");
250
-
return Ok(Some(None));
244
+
return Ok(None);
251
245
}
252
246
253
247
// in theory, we can use com.atproto.identity.resolveHandle against a PDS, but that seems
254
248
// like a way to end up with really sus handles.
255
249
let Some(handle_did) = state.resolver.resolve_handle(&expected).await? else {
256
-
return Ok(Some(None));
250
+
return Ok(None);
257
251
};
258
252
259
253
// finally, check if the event did matches the handle, if not, set invalid, otherwise set the handle.
260
254
if handle_did != did {
261
-
Ok(Some(None))
255
+
Ok(None)
262
256
} else {
263
-
Ok(Some(Some(expected)))
257
+
Ok(Some(expected))
264
258
}
265
259
}
266
260
267
261
#[instrument(skip_all, fields(seq = account.seq, repo = account.did))]
268
262
async fn index_account(
269
263
state: &RelayIndexerState,
270
-
conn: &mut AsyncPgConnection,
264
+
conn: &mut Object,
271
265
rc: &mut MultiplexedConnection,
272
266
account: AtpAccountEvent,
273
267
) -> eyre::Result<()> {
···
279
273
let trigger_bf = if state.do_backfill && status == ActorStatus::Active {
280
274
// check old status - if they exist (Some(*)), AND were previously != Active but not Deleted,
281
275
// AND have a rev == null, then trigger backfill.
282
-
db::account_status_and_rev(conn, &account.did)
276
+
db::actor_get_status_and_rev(conn, &account.did)
283
277
.await?
284
278
.is_some_and(|(old_status, old_rev)| {
285
279
old_rev.is_none()
···
290
284
false
291
285
};
292
286
293
-
let sync_state = (!state.do_backfill).then_some(ActorSyncState::Synced);
287
+
let sync_state = match state.do_backfill {
288
+
true => ActorSyncState::Dirty,
289
+
false => ActorSyncState::Synced,
290
+
};
294
291
295
-
db::upsert_actor(
296
-
conn,
297
-
&account.did,
298
-
None,
299
-
Some(status),
300
-
sync_state,
301
-
account.time,
302
-
)
303
-
.await?;
292
+
db::actor_upsert(conn, &account.did, status, sync_state, account.time).await?;
304
293
305
294
if trigger_bf {
306
295
tracing::debug!("triggering backfill due to account coming out of inactive state");
···
313
302
#[instrument(skip_all, fields(seq = commit.seq, repo = commit.repo, rev = commit.rev))]
314
303
async fn index_commit(
315
304
state: &mut RelayIndexerState,
316
-
conn: &mut AsyncPgConnection,
305
+
conn: &mut Object,
317
306
rc: &mut MultiplexedConnection,
318
307
commit: AtpCommitEvent,
319
308
) -> eyre::Result<()> {
320
-
let (current_rev, sync_status) = db::get_repo_info(conn, &commit.repo).await?.unzip();
309
+
let (sync_status, current_rev) = db::actor_get_repo_status(conn, &commit.repo).await?.unzip();
321
310
322
311
// what's the backfill status of this account? this respects locks held by the backfiller.
323
312
// we should drop events for 'dirty' and queue 'processing'
···
338
327
}
339
328
340
329
// this is the first commit in an actor's repo - set them to Synced.
341
-
db::upsert_actor(
342
-
conn,
343
-
&commit.repo,
344
-
None,
345
-
None,
346
-
Some(ActorSyncState::Synced),
347
-
commit.time,
348
-
)
349
-
.await?;
330
+
db::actor_set_sync_status(conn, &commit.repo, ActorSyncState::Synced, commit.time)
331
+
.await?;
350
332
351
333
true
352
334
}
···
354
336
tracing::debug!("found new repo from commit");
355
337
let trigger_backfill = state.do_backfill && commit.since.is_some();
356
338
357
-
let sync_state = (!trigger_backfill).then_some(ActorSyncState::Synced);
339
+
let sync_state = match trigger_backfill {
340
+
true => ActorSyncState::Dirty,
341
+
false => ActorSyncState::Synced,
342
+
};
358
343
359
-
db::upsert_actor(conn, &commit.repo, None, None, sync_state, commit.time).await?;
344
+
db::actor_upsert(
345
+
conn,
346
+
&commit.repo,
347
+
ActorStatus::Active,
348
+
sync_state,
349
+
commit.time,
350
+
)
351
+
.await?;
360
352
361
353
if trigger_backfill {
362
354
rc.rpush::<_, _, i32>("backfill_queue", commit.repo).await?;
···
383
375
.await;
384
376
385
377
if is_active {
386
-
conn.transaction::<_, diesel::result::Error, _>(|t| {
387
-
Box::pin(async move {
388
-
db::update_repo_version(t, &commit.repo, &commit.rev, commit.commit).await?;
378
+
let mut t = conn.transaction().await?;
379
+
db::actor_set_repo_state(&mut t, &commit.repo, &commit.rev, commit.commit).await?;
389
380
390
-
for op in &commit.ops {
391
-
process_op(t, &mut state.idxc_tx, &commit.repo, op, &blocks).await?;
392
-
}
393
-
Ok(true)
394
-
})
395
-
})
396
-
.await?;
381
+
for op in &commit.ops {
382
+
process_op(&mut t, &mut state.idxc_tx, &commit.repo, op, &blocks).await?;
383
+
}
397
384
} else {
398
385
let items = commit
399
386
.ops
···
402
389
.collect::<Vec<_>>();
403
390
let items = serde_json::to_value(items).unwrap_or_default();
404
391
405
-
db::write_backfill_row(conn, &commit.repo, &commit.rev, commit.commit, items).await?;
392
+
db::backfill_write_row(conn, &commit.repo, &commit.rev, commit.commit, items).await?;
406
393
}
407
394
408
395
Ok(())
···
456
443
457
444
#[inline(always)]
458
445
async fn process_op(
459
-
conn: &mut AsyncPgConnection,
446
+
conn: &mut Transaction<'_>,
460
447
deltas: &mut impl AggregateDeltaStore,
461
448
repo: &str,
462
449
op: &CommitOp,
463
450
blocks: &HashMap<Cid, Vec<u8>>,
464
-
) -> diesel::QueryResult<()> {
451
+
) -> Result<(), tokio_postgres::Error> {
465
452
let Some((collection_raw, rkey)) = op.path.split_once("/") else {
466
453
tracing::warn!("op contained invalid path {}", op.path);
467
454
return Ok(());
···
512
499
}
513
500
514
501
pub async fn index_op(
515
-
conn: &mut AsyncPgConnection,
502
+
conn: &mut Transaction<'_>,
516
503
deltas: &mut impl AggregateDeltaStore,
517
504
repo: &str,
518
505
cid: Cid,
519
506
record: RecordTypes,
520
507
at_uri: &str,
521
508
rkey: &str,
522
-
) -> diesel::QueryResult<()> {
509
+
) -> Result<(), tokio_postgres::Error> {
523
510
match record {
524
511
RecordTypes::AppBskyActorProfile(record) => {
525
512
if rkey == "self" {
526
513
let labels = record.labels.clone();
527
-
db::upsert_profile(conn, repo, cid, record).await?;
514
+
db::profile_upsert(conn, repo, cid, record).await?;
528
515
529
516
if let Some(labels) = labels {
530
517
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
···
533
520
}
534
521
RecordTypes::AppBskyFeedGenerator(record) => {
535
522
let labels = record.labels.clone();
536
-
let count = db::upsert_feedgen(conn, repo, cid, at_uri, record).await?;
523
+
let count = db::feedgen_upsert(conn, at_uri, repo, cid, record).await?;
537
524
538
525
if let Some(labels) = labels {
539
526
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
···
545
532
}
546
533
RecordTypes::AppBskyFeedLike(record) => {
547
534
let subject = record.subject.uri.clone();
548
-
let count = db::insert_like(conn, repo, at_uri, record).await?;
535
+
let count = db::like_insert(conn, at_uri, repo, record).await?;
549
536
550
537
deltas
551
538
.add_delta(&subject, AggregateType::Like, count as i32)
···
575
562
});
576
563
577
564
let labels = record.labels.clone();
578
-
db::insert_post(conn, repo, cid, at_uri, record).await?;
565
+
db::post_insert(conn, at_uri, repo, cid, record).await?;
579
566
if let Some(labels) = labels {
580
567
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
581
568
}
···
600
587
.contains(&records::PostgateEmbeddingRules::Disable);
601
588
let disable_effective = has_disable_rule.then_some(record.created_at.naive_utc());
602
589
603
-
db::upsert_postgate(conn, at_uri, cid, &record).await?;
590
+
db::postgate_upsert(conn, at_uri, cid, &record).await?;
604
591
605
592
db::postgate_maintain_detaches(
606
593
conn,
···
614
601
deltas
615
602
.incr(&record.subject.uri, AggregateType::Repost)
616
603
.await;
617
-
db::insert_repost(conn, repo, at_uri, record).await?;
604
+
db::repost_insert(conn, at_uri, repo, record).await?;
618
605
}
619
606
RecordTypes::AppBskyFeedThreadgate(record) => {
620
607
let split_aturi = record.post.rsplitn(4, '/').collect::<Vec<_>>();
···
623
610
return Ok(());
624
611
}
625
612
626
-
db::upsert_threadgate(conn, at_uri, cid, record).await?;
613
+
db::threadgate_upsert(conn, at_uri, cid, record).await?;
627
614
}
628
615
RecordTypes::AppBskyGraphBlock(record) => {
629
-
db::insert_block(conn, repo, at_uri, record).await?;
616
+
db::block_insert(conn, at_uri, repo, record).await?;
630
617
}
631
618
RecordTypes::AppBskyGraphFollow(record) => {
632
619
let subject = record.subject.clone();
633
-
let count = db::insert_follow(conn, repo, at_uri, record).await?;
620
+
let count = db::follow_insert(conn, at_uri, repo, record).await?;
634
621
635
622
deltas
636
623
.add_delta(repo, AggregateType::Follow, count as i32)
···
641
628
}
642
629
RecordTypes::AppBskyGraphList(record) => {
643
630
let labels = record.labels.clone();
644
-
let count = db::upsert_list(conn, repo, at_uri, cid, record).await?;
631
+
let count = db::list_upsert(conn, at_uri, repo, cid, record).await?;
645
632
646
633
if let Some(labels) = labels {
647
634
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
···
652
639
.await;
653
640
}
654
641
RecordTypes::AppBskyGraphListBlock(record) => {
655
-
db::insert_list_block(conn, repo, at_uri, record).await?;
642
+
db::list_block_insert(conn, at_uri, repo, record).await?;
656
643
}
657
644
RecordTypes::AppBskyGraphListItem(record) => {
658
645
let split_aturi = record.list.rsplitn(4, '/').collect::<Vec<_>>();
···
662
649
return Ok(());
663
650
}
664
651
665
-
db::insert_list_item(conn, at_uri, record).await?;
652
+
db::list_item_insert(conn, at_uri, record).await?;
666
653
}
667
654
RecordTypes::AppBskyGraphStarterPack(record) => {
668
-
let count = db::upsert_starterpack(conn, repo, cid, at_uri, record).await?;
655
+
let count = db::starter_pack_upsert(conn, at_uri, repo, cid, record).await?;
669
656
deltas
670
657
.add_delta(repo, AggregateType::ProfileStarterpack, count as i32)
671
658
.await;
672
659
}
673
660
RecordTypes::AppBskyGraphVerification(record) => {
674
-
db::upsert_verification(conn, repo, cid, at_uri, record).await?;
661
+
db::verification_insert(conn, at_uri, repo, cid, record).await?;
675
662
}
676
663
RecordTypes::AppBskyLabelerService(record) => {
677
664
if rkey == "self" {
678
665
let labels = record.labels.clone();
679
-
db::upsert_label_service(conn, repo, cid, record).await?;
666
+
db::labeler_upsert(conn, repo, cid, record).await?;
680
667
681
668
if let Some(labels) = labels {
682
669
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
···
685
672
}
686
673
RecordTypes::ChatBskyActorDeclaration(record) => {
687
674
if rkey == "self" {
688
-
db::upsert_chat_decl(conn, repo, record).await?;
675
+
db::chat_decl_upsert(conn, repo, record).await?;
689
676
}
690
677
}
691
678
}
692
679
693
-
db::write_record(conn, at_uri, repo, cid).await?;
680
+
db::record_upsert(conn, at_uri, repo, cid).await?;
694
681
695
682
Ok(())
696
683
}
697
684
698
685
pub async fn index_op_delete(
699
-
conn: &mut AsyncPgConnection,
686
+
conn: &mut Transaction<'_>,
700
687
deltas: &mut impl AggregateDeltaStore,
701
688
repo: &str,
702
689
collection: CollectionType,
703
690
at_uri: &str,
704
-
) -> diesel::QueryResult<()> {
691
+
) -> Result<(), tokio_postgres::Error> {
705
692
match collection {
706
-
CollectionType::BskyProfile => db::delete_profile(conn, repo).await?,
707
-
CollectionType::BskyBlock => db::delete_block(conn, at_uri).await?,
693
+
CollectionType::BskyProfile => db::profile_delete(conn, repo).await?,
694
+
CollectionType::BskyBlock => db::block_delete(conn, at_uri).await?,
708
695
CollectionType::BskyFeedGen => {
709
-
let count = db::delete_feedgen(conn, at_uri).await?;
696
+
let count = db::feedgen_delete(conn, at_uri).await?;
710
697
deltas
711
698
.add_delta(repo, AggregateType::ProfileFeed, -(count as i32))
712
699
.await;
713
700
count
714
701
}
715
702
CollectionType::BskyFeedLike => {
716
-
if let Some(subject) = db::delete_like(conn, at_uri).await? {
703
+
if let Some(subject) = db::like_delete(conn, at_uri).await? {
717
704
deltas.decr(&subject, AggregateType::Like).await;
718
705
}
719
706
0
720
707
}
721
708
CollectionType::BskyFeedPost => {
722
-
let post_info = db::get_post_info_for_delete(conn, at_uri).await?;
709
+
let post_info = db::post_get_info_for_delete(conn, at_uri).await?;
723
710
724
-
db::delete_post(conn, at_uri).await?;
711
+
db::post_delete(conn, at_uri).await?;
725
712
726
713
if let Some((reply_to, embed)) = post_info {
727
714
deltas.decr(repo, AggregateType::ProfilePost).await;
···
735
722
736
723
0
737
724
}
738
-
CollectionType::BskyFeedPostgate => db::delete_postgate(conn, at_uri).await?,
725
+
CollectionType::BskyFeedPostgate => db::postgate_delete(conn, at_uri).await?,
739
726
CollectionType::BskyFeedRepost => {
740
-
if let Some(subject) = db::delete_repost(conn, at_uri).await? {
727
+
if let Some(subject) = db::repost_delete(conn, at_uri).await? {
741
728
deltas.decr(&subject, AggregateType::Repost).await;
742
729
}
743
730
0
744
731
}
745
-
CollectionType::BskyFeedThreadgate => db::delete_threadgate(conn, at_uri).await?,
732
+
CollectionType::BskyFeedThreadgate => db::threadgate_delete(conn, at_uri).await?,
746
733
CollectionType::BskyFollow => {
747
-
if let Some(followee) = db::delete_follow(conn, at_uri).await? {
734
+
if let Some(followee) = db::follow_delete(conn, at_uri).await? {
748
735
deltas.decr(&followee, AggregateType::Follower).await;
749
736
deltas.decr(repo, AggregateType::Follow).await;
750
737
}
751
738
0
752
739
}
753
740
CollectionType::BskyList => {
754
-
let count = db::delete_list(conn, at_uri).await?;
741
+
let count = db::list_delete(conn, at_uri).await?;
755
742
deltas
756
743
.add_delta(repo, AggregateType::ProfileList, -(count as i32))
757
744
.await;
758
745
count
759
746
}
760
-
CollectionType::BskyListBlock => db::delete_list_block(conn, at_uri).await?,
761
-
CollectionType::BskyListItem => db::delete_list_item(conn, at_uri).await?,
747
+
CollectionType::BskyListBlock => db::list_block_delete(conn, at_uri).await?,
748
+
CollectionType::BskyListItem => db::list_item_delete(conn, at_uri).await?,
762
749
CollectionType::BskyStarterPack => {
763
-
let count = db::delete_starterpack(conn, at_uri).await?;
750
+
let count = db::starter_pack_delete(conn, at_uri).await?;
764
751
deltas
765
752
.add_delta(repo, AggregateType::ProfileStarterpack, -(count as i32))
766
753
.await;
767
754
count
768
755
}
769
-
CollectionType::BskyVerification => db::delete_verification(conn, at_uri).await?,
770
-
CollectionType::BskyLabelerService => db::delete_label_service(conn, at_uri).await?,
771
-
CollectionType::ChatActorDecl => db::delete_chat_decl(conn, at_uri).await?,
756
+
CollectionType::BskyVerification => db::verification_delete(conn, at_uri).await?,
757
+
CollectionType::BskyLabelerService => db::labeler_delete(conn, at_uri).await?,
758
+
CollectionType::ChatActorDecl => db::chat_decl_delete(conn, at_uri).await?,
772
759
_ => unreachable!(),
773
760
};
774
761
775
-
db::delete_record(conn, at_uri).await?;
762
+
db::record_delete(conn, at_uri).await?;
776
763
777
764
Ok(())
778
765
}
+12
-12
consumer/src/indexer/records.rs
+12
-12
consumer/src/indexer/records.rs
···
9
9
use lexica::com_atproto::moderation::{ReasonType, SubjectType};
10
10
use serde::{Deserialize, Serialize};
11
11
12
-
#[derive(Debug, Deserialize, Serialize)]
12
+
#[derive(Clone, Debug, Deserialize, Serialize)]
13
13
pub struct StrongRef {
14
14
#[serde(
15
15
deserialize_with = "utils::cid_from_string",
···
19
19
pub uri: String,
20
20
}
21
21
22
-
#[derive(Debug, Deserialize, Serialize)]
22
+
#[derive(Clone, Debug, Deserialize, Serialize)]
23
23
#[serde(tag = "$type")]
24
24
#[serde(rename = "blob")]
25
25
#[serde(rename_all = "camelCase")]
···
43
43
pub created_at: Option<DateTime<Utc>>,
44
44
}
45
45
46
-
#[derive(Debug, Deserialize, Serialize)]
46
+
#[derive(Clone, Debug, Deserialize, Serialize)]
47
47
#[serde(untagged)]
48
48
pub enum EmbedOuter {
49
49
Bsky(AppBskyEmbed),
···
83
83
}
84
84
}
85
85
86
-
#[derive(Debug, Deserialize, Serialize)]
86
+
#[derive(Clone, Debug, Deserialize, Serialize)]
87
87
#[serde(tag = "$type")]
88
88
pub enum AppBskyEmbed {
89
89
#[serde(rename = "app.bsky.embed.images")]
···
117
117
}
118
118
}
119
119
120
-
#[derive(Debug, Deserialize, Serialize)]
120
+
#[derive(Clone, Debug, Deserialize, Serialize)]
121
121
#[serde(rename_all = "camelCase")]
122
122
pub struct AppBskyEmbedImages {
123
123
pub images: Vec<EmbedImage>,
124
124
}
125
125
126
-
#[derive(Debug, Deserialize, Serialize)]
126
+
#[derive(Clone, Debug, Deserialize, Serialize)]
127
127
#[serde(rename_all = "camelCase")]
128
128
pub struct EmbedImage {
129
129
pub image: Blob,
···
132
132
pub aspect_ratio: Option<AspectRatio>,
133
133
}
134
134
135
-
#[derive(Debug, Deserialize, Serialize)]
135
+
#[derive(Clone, Debug, Deserialize, Serialize)]
136
136
#[serde(rename_all = "camelCase")]
137
137
pub struct AppBskyEmbedVideo {
138
138
pub video: Blob,
···
144
144
pub aspect_ratio: Option<AspectRatio>,
145
145
}
146
146
147
-
#[derive(Debug, Deserialize, Serialize)]
147
+
#[derive(Clone, Debug, Deserialize, Serialize)]
148
148
pub struct EmbedVideoCaptions {
149
149
pub lang: String,
150
150
pub file: Blob,
151
151
}
152
152
153
-
#[derive(Debug, Deserialize, Serialize)]
153
+
#[derive(Clone, Debug, Deserialize, Serialize)]
154
154
pub struct AppBskyEmbedExternal {
155
155
pub external: EmbedExternal,
156
156
}
157
157
158
-
#[derive(Debug, Deserialize, Serialize)]
158
+
#[derive(Clone, Debug, Deserialize, Serialize)]
159
159
pub struct EmbedExternal {
160
160
pub uri: String,
161
161
pub title: String,
···
164
164
pub thumb: Option<Blob>,
165
165
}
166
166
167
-
#[derive(Debug, Deserialize, Serialize)]
167
+
#[derive(Clone, Debug, Deserialize, Serialize)]
168
168
pub struct AppBskyEmbedRecord {
169
169
pub record: StrongRef,
170
170
}
171
171
172
-
#[derive(Debug, Deserialize, Serialize)]
172
+
#[derive(Clone, Debug, Deserialize, Serialize)]
173
173
pub struct AppBskyEmbedRecordWithMedia {
174
174
pub record: AppBskyEmbedRecord,
175
175
pub media: Box<AppBskyEmbed>,
+12
-16
consumer/src/label_indexer/mod.rs
+12
-16
consumer/src/label_indexer/mod.rs
···
6
6
use std::sync::Arc;
7
7
use std::time::Duration;
8
8
use tokio::sync::mpsc::{channel, Receiver, Sender};
9
+
use tokio::sync::watch::Receiver as WatchReceiver;
9
10
use tokio::task::JoinHandle;
10
11
use tokio::time::Instant;
11
12
use tokio_postgres::binary_copy::BinaryCopyInWriter;
12
13
use tokio_postgres::types::Type;
13
-
use tokio_postgres::NoTls;
14
14
use tracing::instrument;
15
-
use tokio::sync::watch::Receiver as WatchReceiver;
16
15
17
16
const LABELER_SERVICE_ID: &str = "#atproto_labeler";
18
17
19
18
pub struct LabelServiceManager {
20
-
client: tokio_postgres::Client,
19
+
conn: deadpool_postgres::Object,
21
20
rx: Receiver<String>,
22
21
resolver: Arc<Resolver>,
23
22
services: HashMap<String, JoinHandle<()>>,
···
27
26
28
27
impl LabelServiceManager {
29
28
pub async fn new(
30
-
pg_url: &str,
29
+
pool: deadpool_postgres::Pool,
31
30
resolver: Arc<Resolver>,
32
31
resume: sled::Db,
33
32
user_agent: String,
34
33
) -> eyre::Result<(Self, Sender<String>)> {
35
-
let (client, connection) = tokio_postgres::connect(pg_url, NoTls).await?;
36
-
37
-
tokio::spawn(async move {
38
-
if let Err(e) = connection.await {
39
-
tracing::error!("connection error: {}", e);
40
-
}
41
-
});
42
-
34
+
let conn = pool.get().await?;
43
35
let (tx, rx) = channel(8);
44
36
45
37
let lsm = LabelServiceManager {
46
-
client,
38
+
conn,
47
39
rx,
48
40
resolver,
49
41
resume,
···
112
104
continue;
113
105
}
114
106
tracing::debug!("got {} labels", buf.len());
115
-
store_labels(&mut self.client, &buf).await
107
+
store_labels(&mut self.conn, &buf).await
116
108
}
117
109
};
118
110
···
169
161
170
162
let count = binary_writer.finish().await?;
171
163
172
-
t.execute(include_str!("../sql/label_copy_upsert.sql"), &[])
164
+
t.execute(include_str!("../db/sql/label_copy_upsert.sql"), &[])
173
165
.await?;
174
166
175
167
t.commit().await?;
···
186
178
user_agent: String,
187
179
db_tx: Sender<AtpLabel>,
188
180
) {
189
-
let start_seq = resume.get(&service_did).ok().flatten().and_then(crate::utils::u64_from_ivec);
181
+
let start_seq = resume
182
+
.get(&service_did)
183
+
.ok()
184
+
.flatten()
185
+
.and_then(crate::utils::u64_from_ivec);
190
186
191
187
if let Some(start_seq) = start_seq {
192
188
tracing::info!("starting {service_did} label consumer from {start_seq}");
+11
-8
consumer/src/main.rs
+11
-8
consumer/src/main.rs
···
1
+
use deadpool_postgres::Runtime;
1
2
use did_resolver::{Resolver, ResolverOpts};
2
-
use diesel_async::pooled_connection::deadpool::Pool;
3
-
use diesel_async::pooled_connection::AsyncDieselConnectionManager;
4
-
use diesel_async::AsyncPgConnection;
5
3
use eyre::OptionExt;
6
4
use metrics_exporter_prometheus::PrometheusBuilder;
7
5
use std::sync::Arc;
6
+
use tokio_postgres::NoTls;
8
7
9
8
mod backfill;
10
9
mod cmd;
11
10
mod config;
11
+
mod db;
12
12
mod firehose;
13
13
mod indexer;
14
14
mod label_indexer;
···
24
24
25
25
let user_agent = build_ua(&conf.ua_contact);
26
26
27
-
let db_mgr = AsyncDieselConnectionManager::<AsyncPgConnection>::new(&conf.database_url);
28
-
let pool = Pool::builder(db_mgr).build()?;
27
+
let pool = conf.database.create_pool(Some(Runtime::Tokio1), NoTls)?;
29
28
30
29
let (redis_conn, redis_fut) = redis::Client::open(conf.redis_uri)?
31
30
.create_multiplexed_tokio_connection()
···
61
60
let resume = resume.clone().unwrap();
62
61
63
62
let (label_mgr, _label_svc_tx) = label_indexer::LabelServiceManager::new(
64
-
&conf.database_url,
63
+
pool.clone(),
65
64
resolver.clone(),
66
65
resume,
67
66
user_agent.clone(),
···
72
71
}
73
72
74
73
if cli.backfill {
74
+
let bf_cfg = conf
75
+
.backfill
76
+
.ok_or_eyre("Config item [backfill] must be specified when using --backfill")?;
77
+
75
78
let backfiller = backfill::BackfillManager::new(
76
79
pool.clone(),
77
80
redis_conn.clone(),
78
81
resolver.clone(),
79
-
index_client.clone(),
80
-
conf.backfill_workers,
82
+
(!bf_cfg.skip_aggregation).then_some(index_client.clone()),
83
+
bf_cfg,
81
84
)
82
85
.await?;
83
86
consumer/src/sql/label_copy_upsert.sql
consumer/src/db/sql/label_copy_upsert.sql
consumer/src/sql/label_copy_upsert.sql
consumer/src/db/sql/label_copy_upsert.sql
+4
-4
migrations/2025-01-29-213341_follows_and_blocks/up.sql
+4
-4
migrations/2025-01-29-213341_follows_and_blocks/up.sql
···
6
6
created_at timestamptz not null
7
7
);
8
8
9
-
create index blocks_did_index on blocks using hash (did);
10
-
create index blocks_subject_index on blocks using hash (subject);
9
+
create index blocks_did_index on blocks (did);
10
+
create index blocks_subject_index on blocks (subject);
11
11
12
12
create table follows
13
13
(
···
17
17
created_at timestamptz not null
18
18
);
19
19
20
-
create index follow_did_index on follows using hash (did);
21
-
create index follow_subject_index on follows using hash (subject);
20
+
create index follow_did_index on follows (did);
21
+
create index follow_subject_index on follows (subject);
+2
-2
migrations/2025-02-07-203450_lists/up.sql
+2
-2
migrations/2025-02-07-203450_lists/up.sql
···
25
25
indexed_at timestamp not null default now()
26
26
);
27
27
28
-
create index listitems_list_index on list_items using hash (list_uri);
29
-
create index listitems_subject_index on list_items using hash (subject);
28
+
create index listitems_list_index on list_items (list_uri);
29
+
create index listitems_subject_index on list_items (subject);
30
30
31
31
create table list_blocks
32
32
(
+10
-10
migrations/2025-02-16-142357_posts/up.sql
+10
-10
migrations/2025-02-16-142357_posts/up.sql
···
22
22
indexed_at timestamp not null default now()
23
23
);
24
24
25
-
create index posts_did_index on posts using hash (did);
26
-
create index posts_parent_index on posts using hash (parent_uri);
27
-
create index posts_root_index on posts using hash (root_uri);
25
+
create index posts_did_index on posts (did);
26
+
create index posts_parent_index on posts (parent_uri);
27
+
create index posts_root_index on posts (root_uri);
28
28
create index posts_lang_index on posts using gin (languages);
29
29
create index posts_tags_index on posts using gin (tags);
30
30
31
31
create table post_embed_images
32
32
(
33
-
post_uri text not null references posts (at_uri) on delete cascade,
33
+
post_uri text not null references posts (at_uri) on delete cascade deferrable,
34
34
seq smallint not null,
35
35
36
36
mime_type text not null,
···
47
47
48
48
create table post_embed_video
49
49
(
50
-
post_uri text primary key references posts (at_uri) on delete cascade,
50
+
post_uri text primary key references posts (at_uri) on delete cascade deferrable,
51
51
52
52
mime_type text not null,
53
53
cid text not null,
···
61
61
62
62
create table post_embed_video_captions
63
63
(
64
-
post_uri text not null references posts (at_uri) on delete cascade,
64
+
post_uri text not null references posts (at_uri) on delete cascade deferrable,
65
65
language text not null,
66
66
67
67
mime_type text not null,
···
74
74
75
75
create table post_embed_ext
76
76
(
77
-
post_uri text primary key references posts (at_uri) on delete cascade,
77
+
post_uri text primary key references posts (at_uri) on delete cascade deferrable,
78
78
79
79
uri text not null,
80
80
title text not null,
···
87
87
88
88
create table post_embed_record
89
89
(
90
-
post_uri text primary key references posts (at_uri) on delete cascade,
90
+
post_uri text primary key references posts (at_uri) on delete cascade deferrable,
91
91
92
92
record_type text not null,
93
93
uri text not null,
···
101
101
(
102
102
at_uri text primary key,
103
103
cid text not null,
104
-
post_uri text not null references posts (at_uri) on delete cascade,
104
+
post_uri text not null references posts (at_uri) on delete cascade deferrable,
105
105
106
106
detached text[] not null,
107
107
rules text[] not null,
···
118
118
(
119
119
at_uri text primary key,
120
120
cid text not null,
121
-
post_uri text not null references posts (at_uri) on delete cascade,
121
+
post_uri text not null references posts (at_uri) on delete cascade deferrable,
122
122
123
123
hidden_replies text[] not null,
124
124
allow text[] not null,
+4
-4
migrations/2025-04-05-114428_likes_and_reposts/up.sql
+4
-4
migrations/2025-04-05-114428_likes_and_reposts/up.sql
···
8
8
indexed_at timestamp not null default now()
9
9
);
10
10
11
-
create index likes_did_index on likes using hash (did);
12
-
create index likes_subject_index on likes using hash (subject);
11
+
create index likes_did_index on likes (did);
12
+
create index likes_subject_index on likes (subject);
13
13
14
14
create table reposts
15
15
(
···
21
21
indexed_at timestamp not null default now()
22
22
);
23
23
24
-
create index reposts_did_index on reposts using hash (did);
25
-
create index reposts_post_index on reposts using hash (post);
24
+
create index reposts_did_index on reposts (did);
25
+
create index reposts_post_index on reposts (post);
+2
-2
migrations/2025-04-18-185717_verification/up.sql
+2
-2
migrations/2025-04-18-185717_verification/up.sql
···
12
12
indexed_at timestamp not null default now()
13
13
);
14
14
15
-
create index verification_verifier_index on verification using hash (verifier);
16
-
create index verification_subject_index on verification using hash (subject);
15
+
create index verification_verifier_index on verification (verifier);
16
+
create index verification_subject_index on verification (subject);
+8
-2
parakeet-db/Cargo.toml
+8
-2
parakeet-db/Cargo.toml
···
5
5
6
6
[dependencies]
7
7
chrono = { version = "0.4.39", features = ["serde"] }
8
-
diesel = { version = "2.2.6", features = ["chrono", "serde_json"] }
9
-
serde_json = "1.0.134"
8
+
diesel = { version = "2.2.6", features = ["chrono", "serde_json"], optional = true }
9
+
postgres-types = { version = "0.2.9", optional = true }
10
+
serde_json = "1.0.134"
11
+
12
+
[features]
13
+
default = ["diesel"]
14
+
diesel = ["dep:diesel"]
15
+
postgres = ["dep:postgres-types"]
+2
parakeet-db/src/lib.rs
+2
parakeet-db/src/lib.rs
+100
-69
parakeet-db/src/types.rs
+100
-69
parakeet-db/src/types.rs
···
1
-
use diesel::backend::Backend;
2
-
use diesel::deserialize::FromSql;
3
-
use diesel::pg::Pg;
4
-
use diesel::serialize::{Output, ToSql};
5
-
use diesel::{AsExpression, FromSqlRow};
1
+
macro_rules! text_enum {
2
+
(enum $name:ident {$($variant:ident = $value:expr,)*}) => {
3
+
#[derive(Debug, PartialOrd, PartialEq)]
4
+
#[cfg_attr(feature = "diesel", derive(diesel::AsExpression, diesel::FromSqlRow))]
5
+
#[cfg_attr(feature = "diesel", diesel(sql_type = diesel::sql_types::Text))]
6
+
pub enum $name {
7
+
$($variant,)*
8
+
}
9
+
10
+
impl std::fmt::Display for $name {
11
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
12
+
match self {
13
+
$(Self::$variant => write!(f, $value),)*
14
+
}
15
+
}
16
+
}
17
+
18
+
impl std::str::FromStr for $name {
19
+
type Err = String;
20
+
21
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
22
+
match s {
23
+
$($value => Ok(Self::$variant),)*
24
+
x => Err(format!("Unrecognized variant {}", x).into()),
25
+
}
26
+
}
27
+
}
28
+
29
+
#[cfg(feature = "postgres")]
30
+
impl postgres_types::FromSql<'_> for $name {
31
+
fn from_sql(
32
+
ty: &postgres_types::Type,
33
+
raw: &[u8]
34
+
) -> Result<Self, Box<dyn std::error::Error + Sync + Send>> {
35
+
Ok(String::from_sql(ty, raw)?.parse()?)
36
+
}
37
+
38
+
fn accepts(ty: &postgres_types::Type) -> bool {
39
+
ty == &postgres_types::Type::TEXT
40
+
}
41
+
}
42
+
43
+
#[cfg(feature = "postgres")]
44
+
impl postgres_types::ToSql for $name {
45
+
fn to_sql(
46
+
&self,
47
+
ty: &postgres_types::Type,
48
+
out: &mut postgres_types::private::BytesMut
49
+
) -> Result<postgres_types::IsNull, Box<dyn std::error::Error + Sync + Send>>
50
+
where
51
+
Self: Sized,
52
+
{
53
+
self.to_string().to_sql(ty, out)
54
+
}
6
55
7
-
#[derive(Debug, PartialOrd, PartialEq, AsExpression, FromSqlRow)]
8
-
#[diesel(sql_type = diesel::sql_types::Text)]
9
-
pub enum ActorStatus {
10
-
Active,
11
-
Takendown,
12
-
Suspended,
13
-
Deleted,
14
-
Deactivated,
15
-
}
56
+
fn accepts(ty: &postgres_types::Type) -> bool
57
+
where
58
+
Self: Sized,
59
+
{
60
+
ty == &postgres_types::Type::TEXT
61
+
}
16
62
17
-
impl<DB> FromSql<diesel::sql_types::Text, DB> for ActorStatus
18
-
where
19
-
DB: Backend,
20
-
String: FromSql<diesel::sql_types::Text, DB>,
21
-
{
22
-
fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> {
23
-
match String::from_sql(bytes)?.as_str() {
24
-
"active" => Ok(ActorStatus::Active),
25
-
"takendown" => Ok(ActorStatus::Takendown),
26
-
"suspended" => Ok(ActorStatus::Suspended),
27
-
"deleted" => Ok(ActorStatus::Deleted),
28
-
"deactivated" => Ok(ActorStatus::Deactivated),
29
-
x => Err(format!("Unrecognized variant {}", x).into()),
63
+
postgres_types::to_sql_checked!();
30
64
}
31
-
}
32
-
}
33
65
34
-
impl ToSql<diesel::sql_types::Text, Pg> for ActorStatus {
35
-
fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result {
36
-
let val = match self {
37
-
ActorStatus::Active => "active",
38
-
ActorStatus::Takendown => "takendown",
39
-
ActorStatus::Suspended => "suspended",
40
-
ActorStatus::Deleted => "deleted",
41
-
ActorStatus::Deactivated => "deactivated",
42
-
};
66
+
#[cfg(feature = "diesel")]
67
+
impl<DB> diesel::deserialize::FromSql<diesel::sql_types::Text, DB> for $name
68
+
where
69
+
DB: diesel::backend::Backend,
70
+
String: diesel::deserialize::FromSql<diesel::sql_types::Text, DB>,
71
+
{
72
+
fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> {
73
+
use std::str::FromStr;
43
74
44
-
<str as ToSql<diesel::sql_types::Text, Pg>>::to_sql(val, out)
45
-
}
46
-
}
75
+
let st = <String as diesel::deserialize::FromSql<diesel::sql_types::Text, DB>>::from_sql(bytes)?;
47
76
48
-
#[derive(Debug, PartialOrd, PartialEq, AsExpression, FromSqlRow)]
49
-
#[diesel(sql_type = diesel::sql_types::Text)]
50
-
pub enum ActorSyncState {
51
-
Synced,
52
-
Dirty,
53
-
Processing,
54
-
}
77
+
let out = Self::from_str(&st)?;
78
+
Ok(out)
79
+
}
80
+
}
55
81
56
-
impl<DB> FromSql<diesel::sql_types::Text, DB> for ActorSyncState
57
-
where
58
-
DB: Backend,
59
-
String: FromSql<diesel::sql_types::Text, DB>,
60
-
{
61
-
fn from_sql(bytes: DB::RawValue<'_>) -> diesel::deserialize::Result<Self> {
62
-
match String::from_sql(bytes)?.as_str() {
63
-
"synced" => Ok(ActorSyncState::Synced),
64
-
"dirty" => Ok(ActorSyncState::Dirty),
65
-
"processing" => Ok(ActorSyncState::Processing),
66
-
x => Err(format!("Unrecognized variant {}", x).into()),
82
+
#[cfg(feature = "diesel")]
83
+
impl diesel::serialize::ToSql<diesel::sql_types::Text, diesel::pg::Pg> for $name {
84
+
fn to_sql<'b>(&'b self, out: &mut diesel::serialize::Output<'b, '_, diesel::pg::Pg>) -> diesel::serialize::Result {
85
+
use std::io::Write;
86
+
let val = self.to_string();
87
+
88
+
out.write(val.as_bytes())?;
89
+
Ok(diesel::serialize::IsNull::No)
90
+
}
67
91
}
68
-
}
92
+
93
+
};
69
94
}
70
95
71
-
impl ToSql<diesel::sql_types::Text, Pg> for ActorSyncState {
72
-
fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result {
73
-
let val = match self {
74
-
ActorSyncState::Synced => "synced",
75
-
ActorSyncState::Dirty => "dirty",
76
-
ActorSyncState::Processing => "processing",
77
-
};
96
+
text_enum!(
97
+
enum ActorStatus {
98
+
Active = "active",
99
+
Takendown = "takendown",
100
+
Suspended = "suspended",
101
+
Deleted = "deleted",
102
+
Deactivated = "deactivated",
103
+
}
104
+
);
78
105
79
-
<str as ToSql<diesel::sql_types::Text, Pg>>::to_sql(val, out)
106
+
text_enum!(
107
+
enum ActorSyncState {
108
+
Synced = "synced",
109
+
Dirty = "dirty",
110
+
Processing = "processing",
80
111
}
81
-
}
112
+
);