+1
-2
bundle/operations.go
+1
-2
bundle/operations.go
···
67
67
}
68
68
69
69
var operation plc.PLCOperation
70
-
// Use sonic instead of json.Unmarshal
71
-
if err := json.Unmarshal(line, &operation); err != nil {
70
+
if err := json.UnmarshalNoEscape(line, &operation); err != nil {
72
71
return nil, fmt.Errorf("failed to parse line: %w", err)
73
72
}
74
73
+8
-1
cmd/plcbundle/detector.go
+8
-1
cmd/plcbundle/detector.go
···
545
545
546
546
// detectOperation runs all detectors on an operation and returns labels + confidence
547
547
func detectOperation(ctx context.Context, detectors []detector.Detector, op plc.PLCOperation, minConfidence float64) ([]string, float64) {
548
+
// Parse Operation ONCE before running detectors
549
+
opData, err := op.GetOperationData()
550
+
if err != nil {
551
+
return nil, 0
552
+
}
553
+
op.ParsedOperation = opData // Set for detectors to use
554
+
548
555
var matchedLabels []string
549
556
var maxConfidence float64
550
557
551
558
for _, det := range detectors {
552
-
match, err := det.Detect(ctx, op)
559
+
match, err := det.Detect(ctx, op) // ← op now has ParsedOperation set
553
560
if err != nil || match == nil || match.Confidence < minConfidence {
554
561
continue
555
562
}
+25
-10
plc/types.go
+25
-10
plc/types.go
···
8
8
9
9
// PLCOperation represents a single operation from the PLC directory
10
10
type PLCOperation struct {
11
-
DID string `json:"did"`
12
-
//Operation map[string]interface{} `json:"operation"`
13
-
Operation json.RawMessage `json:"operation"`
11
+
DID string `json:"did"`
12
+
Operation json.RawMessage `json:"operation"` // Lazy
14
13
CID string `json:"cid"`
15
14
Nullified interface{} `json:"nullified,omitempty"`
16
15
CreatedAt time.Time `json:"createdAt"`
17
16
18
-
// RawJSON stores the original JSON bytes for exact reproduction
19
-
RawJSON []byte `json:"-"`
17
+
RawJSON []byte `json:"-"`
18
+
ParsedOperation map[string]interface{} `json:"-"` // Pre-parsed cache
20
19
}
21
20
22
21
// IsNullified checks if this operation has been nullified
···
92
91
Endpoint string
93
92
}
94
93
95
-
// GetOperationMap parses Operation RawMessage into a map
96
-
func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) {
94
+
// GetOperationData parses Operation into map (with caching)
95
+
func (op *PLCOperation) GetOperationData() (map[string]interface{}, error) {
96
+
// Return cached if already parsed
97
+
if op.ParsedOperation != nil {
98
+
return op.ParsedOperation, nil
99
+
}
100
+
101
+
// Parse on first call
97
102
if len(op.Operation) == 0 {
98
103
return nil, nil
99
104
}
100
-
var result map[string]interface{}
101
-
if err := json.Unmarshal(op.Operation, &result); err != nil {
105
+
106
+
var data map[string]interface{}
107
+
if err := json.UnmarshalNoEscape(op.Operation, &data); err != nil {
102
108
return nil, err
103
109
}
104
-
return result, nil
110
+
111
+
// Cache it
112
+
op.ParsedOperation = data
113
+
114
+
return data, nil
115
+
}
116
+
117
+
// GetOperationMap is an alias for compatibility
118
+
func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) {
119
+
return op.GetOperationData()
105
120
}
+10
-5
scripts/benchmark-detector.go
+10
-5
scripts/benchmark-detector.go
···
16
16
17
17
// Minimal operation struct
18
18
type Operation struct {
19
-
DID string `json:"did"`
20
-
//Operation map[string]interface{} `json:"operation"`
21
-
CID string `json:"cid"`
22
-
Nullified interface{} `json:"nullified,omitempty"`
23
-
CreatedAt time.Time `json:"createdAt"`
19
+
DID string `json:"did"`
20
+
Operation map[string]interface{} `json:"operation"`
21
+
CID string `json:"cid"`
22
+
Nullified interface{} `json:"nullified,omitempty"`
23
+
CreatedAt time.Time `json:"createdAt"`
24
24
25
25
// RawJSON stores the original JSON bytes for exact reproduction
26
26
RawJSON []byte `json:"-"`
···
32
32
33
33
if strings.HasPrefix(op.DID, "did:plc:aa") {
34
34
labels = append(labels, "test")
35
+
}
36
+
37
+
// Log operation.sig (like console.log in JavaScript)
38
+
if sig, ok := op.Operation["sig"]; ok {
39
+
fmt.Fprintf(os.Stderr, "%v\n", sig)
35
40
}
36
41
37
42
return labels
+12
-8
scripts/benchmark-detector.js
+12
-8
scripts/benchmark-detector.js
···
4
4
function detect({ op }) {
5
5
const labels = [];
6
6
7
-
if (op.did.match(/^did:plc:aa/)) {
7
+
if (op.did.startsWith('did:plc:aa')) {
8
8
labels.push('test')
9
9
}
10
+
11
+
console.log(op.operation.sig)
10
12
11
13
return labels;
12
14
}
13
15
14
16
// ==========================================
15
-
// Pure Bun bundle processor
17
+
// Pure Bun bundle processor with native zstd
16
18
// ==========================================
17
-
18
-
import { spawn } from 'bun';
19
-
import { readdir } from 'fs/promises';
20
19
21
20
const BUNDLE_DIR = process.argv[2] || './';
22
21
const START_BUNDLE = parseInt(process.argv[3]) || 1;
···
39
38
const bundleFile = `${BUNDLE_DIR}/${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
40
39
41
40
try {
42
-
// Decompress bundle using zstd command
43
-
const proc = spawn(['zstd', '-d', '-c', bundleFile]);
44
-
const text = await new Response(proc.stdout).text();
41
+
// Read compressed bundle
42
+
const compressed = await Bun.file(bundleFile).arrayBuffer();
43
+
44
+
// Decompress using native Bun zstd (FAST!)
45
+
const decompressed = Bun.zstdDecompressSync(compressed);
46
+
47
+
// Convert to text
48
+
const text = new TextDecoder().decode(decompressed);
45
49
46
50
const lines = text.split('\n').filter(line => line.trim());
47
51
+1
scripts/benchmark_detector_rs/.gitignore
+1
scripts/benchmark_detector_rs/.gitignore
···
1
+
target
+554
scripts/benchmark_detector_rs/Cargo.lock
+554
scripts/benchmark_detector_rs/Cargo.lock
···
1
+
# This file is automatically @generated by Cargo.
2
+
# It is not intended for manual editing.
3
+
version = 4
4
+
5
+
[[package]]
6
+
name = "ahash"
7
+
version = "0.8.12"
8
+
source = "registry+https://github.com/rust-lang/crates.io-index"
9
+
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
10
+
dependencies = [
11
+
"cfg-if",
12
+
"getrandom",
13
+
"once_cell",
14
+
"version_check",
15
+
"zerocopy",
16
+
]
17
+
18
+
[[package]]
19
+
name = "benchmark_detector"
20
+
version = "0.1.0"
21
+
dependencies = [
22
+
"serde",
23
+
"sonic-rs",
24
+
"zstd",
25
+
]
26
+
27
+
[[package]]
28
+
name = "bumpalo"
29
+
version = "3.19.0"
30
+
source = "registry+https://github.com/rust-lang/crates.io-index"
31
+
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
32
+
33
+
[[package]]
34
+
name = "bytes"
35
+
version = "1.10.1"
36
+
source = "registry+https://github.com/rust-lang/crates.io-index"
37
+
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
38
+
39
+
[[package]]
40
+
name = "cc"
41
+
version = "1.2.43"
42
+
source = "registry+https://github.com/rust-lang/crates.io-index"
43
+
checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
44
+
dependencies = [
45
+
"find-msvc-tools",
46
+
"jobserver",
47
+
"libc",
48
+
"shlex",
49
+
]
50
+
51
+
[[package]]
52
+
name = "cfg-if"
53
+
version = "1.0.4"
54
+
source = "registry+https://github.com/rust-lang/crates.io-index"
55
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
56
+
57
+
[[package]]
58
+
name = "equivalent"
59
+
version = "1.0.2"
60
+
source = "registry+https://github.com/rust-lang/crates.io-index"
61
+
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
62
+
63
+
[[package]]
64
+
name = "faststr"
65
+
version = "0.2.32"
66
+
source = "registry+https://github.com/rust-lang/crates.io-index"
67
+
checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8"
68
+
dependencies = [
69
+
"bytes",
70
+
"rkyv",
71
+
"serde",
72
+
"simdutf8",
73
+
]
74
+
75
+
[[package]]
76
+
name = "find-msvc-tools"
77
+
version = "0.1.4"
78
+
source = "registry+https://github.com/rust-lang/crates.io-index"
79
+
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
80
+
81
+
[[package]]
82
+
name = "getrandom"
83
+
version = "0.3.4"
84
+
source = "registry+https://github.com/rust-lang/crates.io-index"
85
+
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
86
+
dependencies = [
87
+
"cfg-if",
88
+
"libc",
89
+
"r-efi",
90
+
"wasip2",
91
+
]
92
+
93
+
[[package]]
94
+
name = "hashbrown"
95
+
version = "0.15.5"
96
+
source = "registry+https://github.com/rust-lang/crates.io-index"
97
+
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
98
+
99
+
[[package]]
100
+
name = "hashbrown"
101
+
version = "0.16.0"
102
+
source = "registry+https://github.com/rust-lang/crates.io-index"
103
+
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
104
+
105
+
[[package]]
106
+
name = "indexmap"
107
+
version = "2.12.0"
108
+
source = "registry+https://github.com/rust-lang/crates.io-index"
109
+
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
110
+
dependencies = [
111
+
"equivalent",
112
+
"hashbrown 0.16.0",
113
+
]
114
+
115
+
[[package]]
116
+
name = "itoa"
117
+
version = "1.0.15"
118
+
source = "registry+https://github.com/rust-lang/crates.io-index"
119
+
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
120
+
121
+
[[package]]
122
+
name = "jobserver"
123
+
version = "0.1.34"
124
+
source = "registry+https://github.com/rust-lang/crates.io-index"
125
+
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
126
+
dependencies = [
127
+
"getrandom",
128
+
"libc",
129
+
]
130
+
131
+
[[package]]
132
+
name = "js-sys"
133
+
version = "0.3.82"
134
+
source = "registry+https://github.com/rust-lang/crates.io-index"
135
+
checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
136
+
dependencies = [
137
+
"once_cell",
138
+
"wasm-bindgen",
139
+
]
140
+
141
+
[[package]]
142
+
name = "libc"
143
+
version = "0.2.177"
144
+
source = "registry+https://github.com/rust-lang/crates.io-index"
145
+
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
146
+
147
+
[[package]]
148
+
name = "munge"
149
+
version = "0.4.7"
150
+
source = "registry+https://github.com/rust-lang/crates.io-index"
151
+
checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c"
152
+
dependencies = [
153
+
"munge_macro",
154
+
]
155
+
156
+
[[package]]
157
+
name = "munge_macro"
158
+
version = "0.4.7"
159
+
source = "registry+https://github.com/rust-lang/crates.io-index"
160
+
checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
161
+
dependencies = [
162
+
"proc-macro2",
163
+
"quote",
164
+
"syn",
165
+
]
166
+
167
+
[[package]]
168
+
name = "once_cell"
169
+
version = "1.21.3"
170
+
source = "registry+https://github.com/rust-lang/crates.io-index"
171
+
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
172
+
173
+
[[package]]
174
+
name = "pkg-config"
175
+
version = "0.3.32"
176
+
source = "registry+https://github.com/rust-lang/crates.io-index"
177
+
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
178
+
179
+
[[package]]
180
+
name = "proc-macro2"
181
+
version = "1.0.103"
182
+
source = "registry+https://github.com/rust-lang/crates.io-index"
183
+
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
184
+
dependencies = [
185
+
"unicode-ident",
186
+
]
187
+
188
+
[[package]]
189
+
name = "ptr_meta"
190
+
version = "0.3.1"
191
+
source = "registry+https://github.com/rust-lang/crates.io-index"
192
+
checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79"
193
+
dependencies = [
194
+
"ptr_meta_derive",
195
+
]
196
+
197
+
[[package]]
198
+
name = "ptr_meta_derive"
199
+
version = "0.3.1"
200
+
source = "registry+https://github.com/rust-lang/crates.io-index"
201
+
checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
202
+
dependencies = [
203
+
"proc-macro2",
204
+
"quote",
205
+
"syn",
206
+
]
207
+
208
+
[[package]]
209
+
name = "quote"
210
+
version = "1.0.41"
211
+
source = "registry+https://github.com/rust-lang/crates.io-index"
212
+
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
213
+
dependencies = [
214
+
"proc-macro2",
215
+
]
216
+
217
+
[[package]]
218
+
name = "r-efi"
219
+
version = "5.3.0"
220
+
source = "registry+https://github.com/rust-lang/crates.io-index"
221
+
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
222
+
223
+
[[package]]
224
+
name = "rancor"
225
+
version = "0.1.1"
226
+
source = "registry+https://github.com/rust-lang/crates.io-index"
227
+
checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee"
228
+
dependencies = [
229
+
"ptr_meta",
230
+
]
231
+
232
+
[[package]]
233
+
name = "ref-cast"
234
+
version = "1.0.25"
235
+
source = "registry+https://github.com/rust-lang/crates.io-index"
236
+
checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
237
+
dependencies = [
238
+
"ref-cast-impl",
239
+
]
240
+
241
+
[[package]]
242
+
name = "ref-cast-impl"
243
+
version = "1.0.25"
244
+
source = "registry+https://github.com/rust-lang/crates.io-index"
245
+
checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
246
+
dependencies = [
247
+
"proc-macro2",
248
+
"quote",
249
+
"syn",
250
+
]
251
+
252
+
[[package]]
253
+
name = "rend"
254
+
version = "0.5.3"
255
+
source = "registry+https://github.com/rust-lang/crates.io-index"
256
+
checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6"
257
+
258
+
[[package]]
259
+
name = "rkyv"
260
+
version = "0.8.12"
261
+
source = "registry+https://github.com/rust-lang/crates.io-index"
262
+
checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4"
263
+
dependencies = [
264
+
"bytes",
265
+
"hashbrown 0.15.5",
266
+
"indexmap",
267
+
"munge",
268
+
"ptr_meta",
269
+
"rancor",
270
+
"rend",
271
+
"rkyv_derive",
272
+
"tinyvec",
273
+
"uuid",
274
+
]
275
+
276
+
[[package]]
277
+
name = "rkyv_derive"
278
+
version = "0.8.12"
279
+
source = "registry+https://github.com/rust-lang/crates.io-index"
280
+
checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482"
281
+
dependencies = [
282
+
"proc-macro2",
283
+
"quote",
284
+
"syn",
285
+
]
286
+
287
+
[[package]]
288
+
name = "rustversion"
289
+
version = "1.0.22"
290
+
source = "registry+https://github.com/rust-lang/crates.io-index"
291
+
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
292
+
293
+
[[package]]
294
+
name = "ryu"
295
+
version = "1.0.20"
296
+
source = "registry+https://github.com/rust-lang/crates.io-index"
297
+
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
298
+
299
+
[[package]]
300
+
name = "serde"
301
+
version = "1.0.228"
302
+
source = "registry+https://github.com/rust-lang/crates.io-index"
303
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
304
+
dependencies = [
305
+
"serde_core",
306
+
"serde_derive",
307
+
]
308
+
309
+
[[package]]
310
+
name = "serde_core"
311
+
version = "1.0.228"
312
+
source = "registry+https://github.com/rust-lang/crates.io-index"
313
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
314
+
dependencies = [
315
+
"serde_derive",
316
+
]
317
+
318
+
[[package]]
319
+
name = "serde_derive"
320
+
version = "1.0.228"
321
+
source = "registry+https://github.com/rust-lang/crates.io-index"
322
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
323
+
dependencies = [
324
+
"proc-macro2",
325
+
"quote",
326
+
"syn",
327
+
]
328
+
329
+
[[package]]
330
+
name = "shlex"
331
+
version = "1.3.0"
332
+
source = "registry+https://github.com/rust-lang/crates.io-index"
333
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
334
+
335
+
[[package]]
336
+
name = "simdutf8"
337
+
version = "0.1.5"
338
+
source = "registry+https://github.com/rust-lang/crates.io-index"
339
+
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
340
+
341
+
[[package]]
342
+
name = "sonic-number"
343
+
version = "0.1.0"
344
+
source = "registry+https://github.com/rust-lang/crates.io-index"
345
+
checksum = "a8a74044c092f4f43ca7a6cfd62854cf9fb5ac8502b131347c990bf22bef1dfe"
346
+
dependencies = [
347
+
"cfg-if",
348
+
]
349
+
350
+
[[package]]
351
+
name = "sonic-rs"
352
+
version = "0.5.5"
353
+
source = "registry+https://github.com/rust-lang/crates.io-index"
354
+
checksum = "22540d56ba14521e4878ad436d498518c59698c39a89d5905c694932f0bf7134"
355
+
dependencies = [
356
+
"ahash",
357
+
"bumpalo",
358
+
"bytes",
359
+
"cfg-if",
360
+
"faststr",
361
+
"itoa",
362
+
"ref-cast",
363
+
"ryu",
364
+
"serde",
365
+
"simdutf8",
366
+
"sonic-number",
367
+
"sonic-simd",
368
+
"thiserror",
369
+
]
370
+
371
+
[[package]]
372
+
name = "sonic-simd"
373
+
version = "0.1.1"
374
+
source = "registry+https://github.com/rust-lang/crates.io-index"
375
+
checksum = "b421f7b6aa4a5de8f685aaf398dfaa828346ee639d2b1c1061ab43d40baa6223"
376
+
dependencies = [
377
+
"cfg-if",
378
+
]
379
+
380
+
[[package]]
381
+
name = "syn"
382
+
version = "2.0.108"
383
+
source = "registry+https://github.com/rust-lang/crates.io-index"
384
+
checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
385
+
dependencies = [
386
+
"proc-macro2",
387
+
"quote",
388
+
"unicode-ident",
389
+
]
390
+
391
+
[[package]]
392
+
name = "thiserror"
393
+
version = "2.0.17"
394
+
source = "registry+https://github.com/rust-lang/crates.io-index"
395
+
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
396
+
dependencies = [
397
+
"thiserror-impl",
398
+
]
399
+
400
+
[[package]]
401
+
name = "thiserror-impl"
402
+
version = "2.0.17"
403
+
source = "registry+https://github.com/rust-lang/crates.io-index"
404
+
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
405
+
dependencies = [
406
+
"proc-macro2",
407
+
"quote",
408
+
"syn",
409
+
]
410
+
411
+
[[package]]
412
+
name = "tinyvec"
413
+
version = "1.10.0"
414
+
source = "registry+https://github.com/rust-lang/crates.io-index"
415
+
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
416
+
dependencies = [
417
+
"tinyvec_macros",
418
+
]
419
+
420
+
[[package]]
421
+
name = "tinyvec_macros"
422
+
version = "0.1.1"
423
+
source = "registry+https://github.com/rust-lang/crates.io-index"
424
+
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
425
+
426
+
[[package]]
427
+
name = "unicode-ident"
428
+
version = "1.0.22"
429
+
source = "registry+https://github.com/rust-lang/crates.io-index"
430
+
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
431
+
432
+
[[package]]
433
+
name = "uuid"
434
+
version = "1.18.1"
435
+
source = "registry+https://github.com/rust-lang/crates.io-index"
436
+
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
437
+
dependencies = [
438
+
"js-sys",
439
+
"wasm-bindgen",
440
+
]
441
+
442
+
[[package]]
443
+
name = "version_check"
444
+
version = "0.9.5"
445
+
source = "registry+https://github.com/rust-lang/crates.io-index"
446
+
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
447
+
448
+
[[package]]
449
+
name = "wasip2"
450
+
version = "1.0.1+wasi-0.2.4"
451
+
source = "registry+https://github.com/rust-lang/crates.io-index"
452
+
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
453
+
dependencies = [
454
+
"wit-bindgen",
455
+
]
456
+
457
+
[[package]]
458
+
name = "wasm-bindgen"
459
+
version = "0.2.105"
460
+
source = "registry+https://github.com/rust-lang/crates.io-index"
461
+
checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
462
+
dependencies = [
463
+
"cfg-if",
464
+
"once_cell",
465
+
"rustversion",
466
+
"wasm-bindgen-macro",
467
+
"wasm-bindgen-shared",
468
+
]
469
+
470
+
[[package]]
471
+
name = "wasm-bindgen-macro"
472
+
version = "0.2.105"
473
+
source = "registry+https://github.com/rust-lang/crates.io-index"
474
+
checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
475
+
dependencies = [
476
+
"quote",
477
+
"wasm-bindgen-macro-support",
478
+
]
479
+
480
+
[[package]]
481
+
name = "wasm-bindgen-macro-support"
482
+
version = "0.2.105"
483
+
source = "registry+https://github.com/rust-lang/crates.io-index"
484
+
checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
485
+
dependencies = [
486
+
"bumpalo",
487
+
"proc-macro2",
488
+
"quote",
489
+
"syn",
490
+
"wasm-bindgen-shared",
491
+
]
492
+
493
+
[[package]]
494
+
name = "wasm-bindgen-shared"
495
+
version = "0.2.105"
496
+
source = "registry+https://github.com/rust-lang/crates.io-index"
497
+
checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
498
+
dependencies = [
499
+
"unicode-ident",
500
+
]
501
+
502
+
[[package]]
503
+
name = "wit-bindgen"
504
+
version = "0.46.0"
505
+
source = "registry+https://github.com/rust-lang/crates.io-index"
506
+
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
507
+
508
+
[[package]]
509
+
name = "zerocopy"
510
+
version = "0.8.27"
511
+
source = "registry+https://github.com/rust-lang/crates.io-index"
512
+
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
513
+
dependencies = [
514
+
"zerocopy-derive",
515
+
]
516
+
517
+
[[package]]
518
+
name = "zerocopy-derive"
519
+
version = "0.8.27"
520
+
source = "registry+https://github.com/rust-lang/crates.io-index"
521
+
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
522
+
dependencies = [
523
+
"proc-macro2",
524
+
"quote",
525
+
"syn",
526
+
]
527
+
528
+
[[package]]
529
+
name = "zstd"
530
+
version = "0.13.3"
531
+
source = "registry+https://github.com/rust-lang/crates.io-index"
532
+
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
533
+
dependencies = [
534
+
"zstd-safe",
535
+
]
536
+
537
+
[[package]]
538
+
name = "zstd-safe"
539
+
version = "7.2.4"
540
+
source = "registry+https://github.com/rust-lang/crates.io-index"
541
+
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
542
+
dependencies = [
543
+
"zstd-sys",
544
+
]
545
+
546
+
[[package]]
547
+
name = "zstd-sys"
548
+
version = "2.0.16+zstd.1.5.7"
549
+
source = "registry+https://github.com/rust-lang/crates.io-index"
550
+
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
551
+
dependencies = [
552
+
"cc",
553
+
"pkg-config",
554
+
]
+17
scripts/benchmark_detector_rs/Cargo.toml
+17
scripts/benchmark_detector_rs/Cargo.toml
···
1
+
[package]
2
+
name = "benchmark_detector"
3
+
version = "0.1.0"
4
+
edition = "2021"
5
+
6
+
[dependencies]
7
+
sonic-rs = "0.5"
8
+
serde = { version = "1.0", features = ["derive"] }
9
+
zstd = "0.13"
10
+
11
+
[profile.release]
12
+
lto = true
13
+
codegen-units = 1
14
+
opt-level = 3
15
+
16
+
[build]
17
+
rustflags = ["-C", "target-cpu=native"]
+123
scripts/benchmark_detector_rs/src/main.rs
+123
scripts/benchmark_detector_rs/src/main.rs
···
1
+
use std::env;
2
+
use std::fs::File;
3
+
use std::io::{self, BufRead, BufReader, Write};
4
+
use std::time::Instant;
5
+
use sonic_rs::{Deserialize, from_str, JsonValueTrait, Value};
6
+
7
+
#[derive(Deserialize)]
8
+
struct Operation {
9
+
did: String,
10
+
cid: String,
11
+
operation: Value,
12
+
}
13
+
14
+
fn detect(op: &Operation) -> Vec<String> {
15
+
let mut labels = Vec::new();
16
+
17
+
if op.did.starts_with("did:plc:aa") {
18
+
labels.push("test".to_string());
19
+
}
20
+
21
+
if let Some(sig) = op.operation.get("sig") {
22
+
eprintln!("{}", sig);
23
+
}
24
+
25
+
labels
26
+
}
27
+
28
+
fn main() -> io::Result<()> {
29
+
let args: Vec<String> = env::args().collect();
30
+
let bundle_dir = args.get(1).map(|s| s.as_str()).unwrap_or("./");
31
+
let start_bundle: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(1);
32
+
let end_bundle: usize = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(100);
33
+
34
+
eprintln!("Processing bundles {}-{} from {}", start_bundle, end_bundle, bundle_dir);
35
+
eprintln!();
36
+
37
+
let stdout = io::stdout();
38
+
let mut writer = io::BufWriter::with_capacity(512 * 1024, stdout.lock());
39
+
writeln!(writer, "bundle,position,cid,size,confidence,labels")?;
40
+
41
+
let mut total_ops = 0;
42
+
let mut match_count = 0;
43
+
let mut total_bytes: u64 = 0;
44
+
let mut matched_bytes: u64 = 0;
45
+
let start_time = Instant::now();
46
+
47
+
for bundle_num in start_bundle..=end_bundle {
48
+
let bundle_file = format!("{}/{:06}.jsonl.zst", bundle_dir, bundle_num);
49
+
50
+
let file = match File::open(&bundle_file) {
51
+
Ok(f) => f,
52
+
Err(_) => continue,
53
+
};
54
+
55
+
let decoder = match zstd::Decoder::new(file) {
56
+
Ok(d) => d,
57
+
Err(_) => continue,
58
+
};
59
+
60
+
let reader = BufReader::new(decoder);
61
+
62
+
for (position, line) in reader.lines().enumerate() {
63
+
let line = match line {
64
+
Ok(l) => l,
65
+
Err(_) => continue,
66
+
};
67
+
68
+
if line.is_empty() {
69
+
continue;
70
+
}
71
+
72
+
total_ops += 1;
73
+
let op_size = line.len();
74
+
total_bytes += op_size as u64;
75
+
76
+
let op: Operation = match from_str(&line) {
77
+
Ok(o) => o,
78
+
Err(_) => continue,
79
+
};
80
+
81
+
let labels = detect(&op);
82
+
83
+
if !labels.is_empty() {
84
+
match_count += 1;
85
+
matched_bytes += op_size as u64;
86
+
87
+
let cid_short = if op.cid.len() > 4 {
88
+
&op.cid[op.cid.len()-4..]
89
+
} else {
90
+
&op.cid
91
+
};
92
+
93
+
writeln!(writer, "{},{},{},{},0.95,{}",
94
+
bundle_num, position, cid_short, op_size, labels.join(";"))?;
95
+
}
96
+
}
97
+
98
+
if bundle_num % 10 == 0 {
99
+
let elapsed = start_time.elapsed().as_secs_f64();
100
+
let ops_per_sec = total_ops as f64 / elapsed;
101
+
eprint!("Processed {}/{} bundles | {} ops | {:.0} ops/sec\r",
102
+
bundle_num, end_bundle, total_ops, ops_per_sec);
103
+
}
104
+
}
105
+
106
+
let elapsed = start_time.elapsed().as_secs_f64();
107
+
writer.flush()?;
108
+
109
+
eprintln!("\n\n✓ Detection complete");
110
+
eprintln!(" Total operations: {}", total_ops);
111
+
eprintln!(" Matches found: {} ({:.2}%)", match_count,
112
+
match_count as f64 / total_ops as f64 * 100.0);
113
+
eprintln!(" Total size: {:.1} MB", total_bytes as f64 / 1e6);
114
+
eprintln!(" Matched size: {:.1} MB ({:.2}%)",
115
+
matched_bytes as f64 / 1e6,
116
+
matched_bytes as f64 / total_bytes as f64 * 100.0);
117
+
eprintln!();
118
+
eprintln!(" Time elapsed: {:.2}s", elapsed);
119
+
eprintln!(" Throughput: {:.0} ops/sec", total_ops as f64 / elapsed);
120
+
eprintln!(" Speed: {:.1} MB/sec", total_bytes as f64 / elapsed / 1e6);
121
+
122
+
Ok(())
123
+
}