[DEPRECATED] Go implementation of plcbundle

update

Changed files
+753 -26
bundle
cmd
plcbundle
plc
scripts
+1 -2
bundle/operations.go
··· 67 67 } 68 68 69 69 var operation plc.PLCOperation 70 - // Use sonic instead of json.Unmarshal 71 - if err := json.Unmarshal(line, &operation); err != nil { 70 + if err := json.UnmarshalNoEscape(line, &operation); err != nil { 72 71 return nil, fmt.Errorf("failed to parse line: %w", err) 73 72 } 74 73
+8 -1
cmd/plcbundle/detector.go
··· 545 545 546 546 // detectOperation runs all detectors on an operation and returns labels + confidence 547 547 func detectOperation(ctx context.Context, detectors []detector.Detector, op plc.PLCOperation, minConfidence float64) ([]string, float64) { 548 + // Parse Operation ONCE before running detectors 549 + opData, err := op.GetOperationData() 550 + if err != nil { 551 + return nil, 0 552 + } 553 + op.ParsedOperation = opData // Set for detectors to use 554 + 548 555 var matchedLabels []string 549 556 var maxConfidence float64 550 557 551 558 for _, det := range detectors { 552 - match, err := det.Detect(ctx, op) 559 + match, err := det.Detect(ctx, op) // ← op now has ParsedOperation set 553 560 if err != nil || match == nil || match.Confidence < minConfidence { 554 561 continue 555 562 }
+25 -10
plc/types.go
··· 8 8 9 9 // PLCOperation represents a single operation from the PLC directory 10 10 type PLCOperation struct { 11 - DID string `json:"did"` 12 - //Operation map[string]interface{} `json:"operation"` 13 - Operation json.RawMessage `json:"operation"` 11 + DID string `json:"did"` 12 + Operation json.RawMessage `json:"operation"` // Lazy 14 13 CID string `json:"cid"` 15 14 Nullified interface{} `json:"nullified,omitempty"` 16 15 CreatedAt time.Time `json:"createdAt"` 17 16 18 - // RawJSON stores the original JSON bytes for exact reproduction 19 - RawJSON []byte `json:"-"` 17 + RawJSON []byte `json:"-"` 18 + ParsedOperation map[string]interface{} `json:"-"` // Pre-parsed cache 20 19 } 21 20 22 21 // IsNullified checks if this operation has been nullified ··· 92 91 Endpoint string 93 92 } 94 93 95 - // GetOperationMap parses Operation RawMessage into a map 96 - func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) { 94 + // GetOperationData parses Operation into map (with caching) 95 + func (op *PLCOperation) GetOperationData() (map[string]interface{}, error) { 96 + // Return cached if already parsed 97 + if op.ParsedOperation != nil { 98 + return op.ParsedOperation, nil 99 + } 100 + 101 + // Parse on first call 97 102 if len(op.Operation) == 0 { 98 103 return nil, nil 99 104 } 100 - var result map[string]interface{} 101 - if err := json.Unmarshal(op.Operation, &result); err != nil { 105 + 106 + var data map[string]interface{} 107 + if err := json.UnmarshalNoEscape(op.Operation, &data); err != nil { 102 108 return nil, err 103 109 } 104 - return result, nil 110 + 111 + // Cache it 112 + op.ParsedOperation = data 113 + 114 + return data, nil 115 + } 116 + 117 + // GetOperationMap is an alias for compatibility 118 + func (op *PLCOperation) GetOperationMap() (map[string]interface{}, error) { 119 + return op.GetOperationData() 105 120 }
+10 -5
scripts/benchmark-detector.go
··· 16 16 17 17 // Minimal operation struct 18 18 type Operation struct { 19 - DID string `json:"did"` 20 - //Operation map[string]interface{} `json:"operation"` 21 - CID string `json:"cid"` 22 - Nullified interface{} `json:"nullified,omitempty"` 23 - CreatedAt time.Time `json:"createdAt"` 19 + DID string `json:"did"` 20 + Operation map[string]interface{} `json:"operation"` 21 + CID string `json:"cid"` 22 + Nullified interface{} `json:"nullified,omitempty"` 23 + CreatedAt time.Time `json:"createdAt"` 24 24 25 25 // RawJSON stores the original JSON bytes for exact reproduction 26 26 RawJSON []byte `json:"-"` ··· 32 32 33 33 if strings.HasPrefix(op.DID, "did:plc:aa") { 34 34 labels = append(labels, "test") 35 + } 36 + 37 + // Log operation.sig (like console.log in JavaScript) 38 + if sig, ok := op.Operation["sig"]; ok { 39 + fmt.Fprintf(os.Stderr, "%v\n", sig) 35 40 } 36 41 37 42 return labels
+12 -8
scripts/benchmark-detector.js
··· 4 4 function detect({ op }) { 5 5 const labels = []; 6 6 7 - if (op.did.match(/^did:plc:aa/)) { 7 + if (op.did.startsWith('did:plc:aa')) { 8 8 labels.push('test') 9 9 } 10 + 11 + console.log(op.operation.sig) 10 12 11 13 return labels; 12 14 } 13 15 14 16 // ========================================== 15 - // Pure Bun bundle processor 17 + // Pure Bun bundle processor with native zstd 16 18 // ========================================== 17 - 18 - import { spawn } from 'bun'; 19 - import { readdir } from 'fs/promises'; 20 19 21 20 const BUNDLE_DIR = process.argv[2] || './'; 22 21 const START_BUNDLE = parseInt(process.argv[3]) || 1; ··· 39 38 const bundleFile = `${BUNDLE_DIR}/${bundleNum.toString().padStart(6, '0')}.jsonl.zst`; 40 39 41 40 try { 42 - // Decompress bundle using zstd command 43 - const proc = spawn(['zstd', '-d', '-c', bundleFile]); 44 - const text = await new Response(proc.stdout).text(); 41 + // Read compressed bundle 42 + const compressed = await Bun.file(bundleFile).arrayBuffer(); 43 + 44 + // Decompress using native Bun zstd (FAST!) 45 + const decompressed = Bun.zstdDecompressSync(compressed); 46 + 47 + // Convert to text 48 + const text = new TextDecoder().decode(decompressed); 45 49 46 50 const lines = text.split('\n').filter(line => line.trim()); 47 51
+1
scripts/benchmark_detector_rs/.gitignore
··· 1 + target
+554
scripts/benchmark_detector_rs/Cargo.lock
··· 1 + # This file is automatically @generated by Cargo. 2 + # It is not intended for manual editing. 3 + version = 4 4 + 5 + [[package]] 6 + name = "ahash" 7 + version = "0.8.12" 8 + source = "registry+https://github.com/rust-lang/crates.io-index" 9 + checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" 10 + dependencies = [ 11 + "cfg-if", 12 + "getrandom", 13 + "once_cell", 14 + "version_check", 15 + "zerocopy", 16 + ] 17 + 18 + [[package]] 19 + name = "benchmark_detector" 20 + version = "0.1.0" 21 + dependencies = [ 22 + "serde", 23 + "sonic-rs", 24 + "zstd", 25 + ] 26 + 27 + [[package]] 28 + name = "bumpalo" 29 + version = "3.19.0" 30 + source = "registry+https://github.com/rust-lang/crates.io-index" 31 + checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 32 + 33 + [[package]] 34 + name = "bytes" 35 + version = "1.10.1" 36 + source = "registry+https://github.com/rust-lang/crates.io-index" 37 + checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 38 + 39 + [[package]] 40 + name = "cc" 41 + version = "1.2.43" 42 + source = "registry+https://github.com/rust-lang/crates.io-index" 43 + checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" 44 + dependencies = [ 45 + "find-msvc-tools", 46 + "jobserver", 47 + "libc", 48 + "shlex", 49 + ] 50 + 51 + [[package]] 52 + name = "cfg-if" 53 + version = "1.0.4" 54 + source = "registry+https://github.com/rust-lang/crates.io-index" 55 + checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" 56 + 57 + [[package]] 58 + name = "equivalent" 59 + version = "1.0.2" 60 + source = "registry+https://github.com/rust-lang/crates.io-index" 61 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 62 + 63 + [[package]] 64 + name = "faststr" 65 + version = "0.2.32" 66 + source = "registry+https://github.com/rust-lang/crates.io-index" 67 + checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8" 68 + dependencies = [ 69 + "bytes", 70 + "rkyv", 71 + "serde", 72 + "simdutf8", 73 + ] 74 + 75 + [[package]] 76 + name = "find-msvc-tools" 77 + version = "0.1.4" 78 + source = "registry+https://github.com/rust-lang/crates.io-index" 79 + checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" 80 + 81 + [[package]] 82 + name = "getrandom" 83 + version = "0.3.4" 84 + source = "registry+https://github.com/rust-lang/crates.io-index" 85 + checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" 86 + dependencies = [ 87 + "cfg-if", 88 + "libc", 89 + "r-efi", 90 + "wasip2", 91 + ] 92 + 93 + [[package]] 94 + name = "hashbrown" 95 + version = "0.15.5" 96 + source = "registry+https://github.com/rust-lang/crates.io-index" 97 + checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 98 + 99 + [[package]] 100 + name = "hashbrown" 101 + version = "0.16.0" 102 + source = "registry+https://github.com/rust-lang/crates.io-index" 103 + checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" 104 + 105 + [[package]] 106 + name = "indexmap" 107 + version = "2.12.0" 108 + source = "registry+https://github.com/rust-lang/crates.io-index" 109 + checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" 110 + dependencies = [ 111 + "equivalent", 112 + "hashbrown 0.16.0", 113 + ] 114 + 115 + [[package]] 116 + name = "itoa" 117 + version = "1.0.15" 118 + source = "registry+https://github.com/rust-lang/crates.io-index" 119 + checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 120 + 121 + [[package]] 122 + name = "jobserver" 123 + version = "0.1.34" 124 + source = "registry+https://github.com/rust-lang/crates.io-index" 125 + checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" 126 + dependencies = [ 127 + "getrandom", 128 + "libc", 129 + ] 130 + 131 + [[package]] 132 + name = "js-sys" 133 + version = "0.3.82" 134 + source = "registry+https://github.com/rust-lang/crates.io-index" 135 + checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" 136 + dependencies = [ 137 + "once_cell", 138 + "wasm-bindgen", 139 + ] 140 + 141 + [[package]] 142 + name = "libc" 143 + version = "0.2.177" 144 + source = "registry+https://github.com/rust-lang/crates.io-index" 145 + checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" 146 + 147 + [[package]] 148 + name = "munge" 149 + version = "0.4.7" 150 + source = "registry+https://github.com/rust-lang/crates.io-index" 151 + checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" 152 + dependencies = [ 153 + "munge_macro", 154 + ] 155 + 156 + [[package]] 157 + name = "munge_macro" 158 + version = "0.4.7" 159 + source = "registry+https://github.com/rust-lang/crates.io-index" 160 + checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" 161 + dependencies = [ 162 + "proc-macro2", 163 + "quote", 164 + "syn", 165 + ] 166 + 167 + [[package]] 168 + name = "once_cell" 169 + version = "1.21.3" 170 + source = "registry+https://github.com/rust-lang/crates.io-index" 171 + checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 172 + 173 + [[package]] 174 + name = "pkg-config" 175 + version = "0.3.32" 176 + source = "registry+https://github.com/rust-lang/crates.io-index" 177 + checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 178 + 179 + [[package]] 180 + name = "proc-macro2" 181 + version = "1.0.103" 182 + source = "registry+https://github.com/rust-lang/crates.io-index" 183 + checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" 184 + dependencies = [ 185 + "unicode-ident", 186 + ] 187 + 188 + [[package]] 189 + name = "ptr_meta" 190 + version = "0.3.1" 191 + source = "registry+https://github.com/rust-lang/crates.io-index" 192 + checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" 193 + dependencies = [ 194 + "ptr_meta_derive", 195 + ] 196 + 197 + [[package]] 198 + name = "ptr_meta_derive" 199 + version = "0.3.1" 200 + source = "registry+https://github.com/rust-lang/crates.io-index" 201 + checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" 202 + dependencies = [ 203 + "proc-macro2", 204 + "quote", 205 + "syn", 206 + ] 207 + 208 + [[package]] 209 + name = "quote" 210 + version = "1.0.41" 211 + source = "registry+https://github.com/rust-lang/crates.io-index" 212 + checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" 213 + dependencies = [ 214 + "proc-macro2", 215 + ] 216 + 217 + [[package]] 218 + name = "r-efi" 219 + version = "5.3.0" 220 + source = "registry+https://github.com/rust-lang/crates.io-index" 221 + checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" 222 + 223 + [[package]] 224 + name = "rancor" 225 + version = "0.1.1" 226 + source = "registry+https://github.com/rust-lang/crates.io-index" 227 + checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" 228 + dependencies = [ 229 + "ptr_meta", 230 + ] 231 + 232 + [[package]] 233 + name = "ref-cast" 234 + version = "1.0.25" 235 + source = "registry+https://github.com/rust-lang/crates.io-index" 236 + checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" 237 + dependencies = [ 238 + "ref-cast-impl", 239 + ] 240 + 241 + [[package]] 242 + name = "ref-cast-impl" 243 + version = "1.0.25" 244 + source = "registry+https://github.com/rust-lang/crates.io-index" 245 + checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" 246 + dependencies = [ 247 + "proc-macro2", 248 + "quote", 249 + "syn", 250 + ] 251 + 252 + [[package]] 253 + name = "rend" 254 + version = "0.5.3" 255 + source = "registry+https://github.com/rust-lang/crates.io-index" 256 + checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" 257 + 258 + [[package]] 259 + name = "rkyv" 260 + version = "0.8.12" 261 + source = "registry+https://github.com/rust-lang/crates.io-index" 262 + checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" 263 + dependencies = [ 264 + "bytes", 265 + "hashbrown 0.15.5", 266 + "indexmap", 267 + "munge", 268 + "ptr_meta", 269 + "rancor", 270 + "rend", 271 + "rkyv_derive", 272 + "tinyvec", 273 + "uuid", 274 + ] 275 + 276 + [[package]] 277 + name = "rkyv_derive" 278 + version = "0.8.12" 279 + source = "registry+https://github.com/rust-lang/crates.io-index" 280 + checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" 281 + dependencies = [ 282 + "proc-macro2", 283 + "quote", 284 + "syn", 285 + ] 286 + 287 + [[package]] 288 + name = "rustversion" 289 + version = "1.0.22" 290 + source = "registry+https://github.com/rust-lang/crates.io-index" 291 + checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 292 + 293 + [[package]] 294 + name = "ryu" 295 + version = "1.0.20" 296 + source = "registry+https://github.com/rust-lang/crates.io-index" 297 + checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 298 + 299 + [[package]] 300 + name = "serde" 301 + version = "1.0.228" 302 + source = "registry+https://github.com/rust-lang/crates.io-index" 303 + checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 304 + dependencies = [ 305 + "serde_core", 306 + "serde_derive", 307 + ] 308 + 309 + [[package]] 310 + name = "serde_core" 311 + version = "1.0.228" 312 + source = "registry+https://github.com/rust-lang/crates.io-index" 313 + checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 314 + dependencies = [ 315 + "serde_derive", 316 + ] 317 + 318 + [[package]] 319 + name = "serde_derive" 320 + version = "1.0.228" 321 + source = "registry+https://github.com/rust-lang/crates.io-index" 322 + checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 323 + dependencies = [ 324 + "proc-macro2", 325 + "quote", 326 + "syn", 327 + ] 328 + 329 + [[package]] 330 + name = "shlex" 331 + version = "1.3.0" 332 + source = "registry+https://github.com/rust-lang/crates.io-index" 333 + checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 334 + 335 + [[package]] 336 + name = "simdutf8" 337 + version = "0.1.5" 338 + source = "registry+https://github.com/rust-lang/crates.io-index" 339 + checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" 340 + 341 + [[package]] 342 + name = "sonic-number" 343 + version = "0.1.0" 344 + source = "registry+https://github.com/rust-lang/crates.io-index" 345 + checksum = "a8a74044c092f4f43ca7a6cfd62854cf9fb5ac8502b131347c990bf22bef1dfe" 346 + dependencies = [ 347 + "cfg-if", 348 + ] 349 + 350 + [[package]] 351 + name = "sonic-rs" 352 + version = "0.5.5" 353 + source = "registry+https://github.com/rust-lang/crates.io-index" 354 + checksum = "22540d56ba14521e4878ad436d498518c59698c39a89d5905c694932f0bf7134" 355 + dependencies = [ 356 + "ahash", 357 + "bumpalo", 358 + "bytes", 359 + "cfg-if", 360 + "faststr", 361 + "itoa", 362 + "ref-cast", 363 + "ryu", 364 + "serde", 365 + "simdutf8", 366 + "sonic-number", 367 + "sonic-simd", 368 + "thiserror", 369 + ] 370 + 371 + [[package]] 372 + name = "sonic-simd" 373 + version = "0.1.1" 374 + source = "registry+https://github.com/rust-lang/crates.io-index" 375 + checksum = "b421f7b6aa4a5de8f685aaf398dfaa828346ee639d2b1c1061ab43d40baa6223" 376 + dependencies = [ 377 + "cfg-if", 378 + ] 379 + 380 + [[package]] 381 + name = "syn" 382 + version = "2.0.108" 383 + source = "registry+https://github.com/rust-lang/crates.io-index" 384 + checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" 385 + dependencies = [ 386 + "proc-macro2", 387 + "quote", 388 + "unicode-ident", 389 + ] 390 + 391 + [[package]] 392 + name = "thiserror" 393 + version = "2.0.17" 394 + source = "registry+https://github.com/rust-lang/crates.io-index" 395 + checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 396 + dependencies = [ 397 + "thiserror-impl", 398 + ] 399 + 400 + [[package]] 401 + name = "thiserror-impl" 402 + version = "2.0.17" 403 + source = "registry+https://github.com/rust-lang/crates.io-index" 404 + checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 405 + dependencies = [ 406 + "proc-macro2", 407 + "quote", 408 + "syn", 409 + ] 410 + 411 + [[package]] 412 + name = "tinyvec" 413 + version = "1.10.0" 414 + source = "registry+https://github.com/rust-lang/crates.io-index" 415 + checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" 416 + dependencies = [ 417 + "tinyvec_macros", 418 + ] 419 + 420 + [[package]] 421 + name = "tinyvec_macros" 422 + version = "0.1.1" 423 + source = "registry+https://github.com/rust-lang/crates.io-index" 424 + checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 425 + 426 + [[package]] 427 + name = "unicode-ident" 428 + version = "1.0.22" 429 + source = "registry+https://github.com/rust-lang/crates.io-index" 430 + checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" 431 + 432 + [[package]] 433 + name = "uuid" 434 + version = "1.18.1" 435 + source = "registry+https://github.com/rust-lang/crates.io-index" 436 + checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" 437 + dependencies = [ 438 + "js-sys", 439 + "wasm-bindgen", 440 + ] 441 + 442 + [[package]] 443 + name = "version_check" 444 + version = "0.9.5" 445 + source = "registry+https://github.com/rust-lang/crates.io-index" 446 + checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 447 + 448 + [[package]] 449 + name = "wasip2" 450 + version = "1.0.1+wasi-0.2.4" 451 + source = "registry+https://github.com/rust-lang/crates.io-index" 452 + checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" 453 + dependencies = [ 454 + "wit-bindgen", 455 + ] 456 + 457 + [[package]] 458 + name = "wasm-bindgen" 459 + version = "0.2.105" 460 + source = "registry+https://github.com/rust-lang/crates.io-index" 461 + checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" 462 + dependencies = [ 463 + "cfg-if", 464 + "once_cell", 465 + "rustversion", 466 + "wasm-bindgen-macro", 467 + "wasm-bindgen-shared", 468 + ] 469 + 470 + [[package]] 471 + name = "wasm-bindgen-macro" 472 + version = "0.2.105" 473 + source = "registry+https://github.com/rust-lang/crates.io-index" 474 + checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" 475 + dependencies = [ 476 + "quote", 477 + "wasm-bindgen-macro-support", 478 + ] 479 + 480 + [[package]] 481 + name = "wasm-bindgen-macro-support" 482 + version = "0.2.105" 483 + source = "registry+https://github.com/rust-lang/crates.io-index" 484 + checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" 485 + dependencies = [ 486 + "bumpalo", 487 + "proc-macro2", 488 + "quote", 489 + "syn", 490 + "wasm-bindgen-shared", 491 + ] 492 + 493 + [[package]] 494 + name = "wasm-bindgen-shared" 495 + version = "0.2.105" 496 + source = "registry+https://github.com/rust-lang/crates.io-index" 497 + checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" 498 + dependencies = [ 499 + "unicode-ident", 500 + ] 501 + 502 + [[package]] 503 + name = "wit-bindgen" 504 + version = "0.46.0" 505 + source = "registry+https://github.com/rust-lang/crates.io-index" 506 + checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 507 + 508 + [[package]] 509 + name = "zerocopy" 510 + version = "0.8.27" 511 + source = "registry+https://github.com/rust-lang/crates.io-index" 512 + checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 513 + dependencies = [ 514 + "zerocopy-derive", 515 + ] 516 + 517 + [[package]] 518 + name = "zerocopy-derive" 519 + version = "0.8.27" 520 + source = "registry+https://github.com/rust-lang/crates.io-index" 521 + checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 522 + dependencies = [ 523 + "proc-macro2", 524 + "quote", 525 + "syn", 526 + ] 527 + 528 + [[package]] 529 + name = "zstd" 530 + version = "0.13.3" 531 + source = "registry+https://github.com/rust-lang/crates.io-index" 532 + checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" 533 + dependencies = [ 534 + "zstd-safe", 535 + ] 536 + 537 + [[package]] 538 + name = "zstd-safe" 539 + version = "7.2.4" 540 + source = "registry+https://github.com/rust-lang/crates.io-index" 541 + checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" 542 + dependencies = [ 543 + "zstd-sys", 544 + ] 545 + 546 + [[package]] 547 + name = "zstd-sys" 548 + version = "2.0.16+zstd.1.5.7" 549 + source = "registry+https://github.com/rust-lang/crates.io-index" 550 + checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" 551 + dependencies = [ 552 + "cc", 553 + "pkg-config", 554 + ]
+17
scripts/benchmark_detector_rs/Cargo.toml
··· 1 + [package] 2 + name = "benchmark_detector" 3 + version = "0.1.0" 4 + edition = "2021" 5 + 6 + [dependencies] 7 + sonic-rs = "0.5" 8 + serde = { version = "1.0", features = ["derive"] } 9 + zstd = "0.13" 10 + 11 + [profile.release] 12 + lto = true 13 + codegen-units = 1 14 + opt-level = 3 15 + 16 + [build] 17 + rustflags = ["-C", "target-cpu=native"]
+123
scripts/benchmark_detector_rs/src/main.rs
··· 1 + use std::env; 2 + use std::fs::File; 3 + use std::io::{self, BufRead, BufReader, Write}; 4 + use std::time::Instant; 5 + use sonic_rs::{Deserialize, from_str, JsonValueTrait, Value}; 6 + 7 + #[derive(Deserialize)] 8 + struct Operation { 9 + did: String, 10 + cid: String, 11 + operation: Value, 12 + } 13 + 14 + fn detect(op: &Operation) -> Vec<String> { 15 + let mut labels = Vec::new(); 16 + 17 + if op.did.starts_with("did:plc:aa") { 18 + labels.push("test".to_string()); 19 + } 20 + 21 + if let Some(sig) = op.operation.get("sig") { 22 + eprintln!("{}", sig); 23 + } 24 + 25 + labels 26 + } 27 + 28 + fn main() -> io::Result<()> { 29 + let args: Vec<String> = env::args().collect(); 30 + let bundle_dir = args.get(1).map(|s| s.as_str()).unwrap_or("./"); 31 + let start_bundle: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(1); 32 + let end_bundle: usize = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(100); 33 + 34 + eprintln!("Processing bundles {}-{} from {}", start_bundle, end_bundle, bundle_dir); 35 + eprintln!(); 36 + 37 + let stdout = io::stdout(); 38 + let mut writer = io::BufWriter::with_capacity(512 * 1024, stdout.lock()); 39 + writeln!(writer, "bundle,position,cid,size,confidence,labels")?; 40 + 41 + let mut total_ops = 0; 42 + let mut match_count = 0; 43 + let mut total_bytes: u64 = 0; 44 + let mut matched_bytes: u64 = 0; 45 + let start_time = Instant::now(); 46 + 47 + for bundle_num in start_bundle..=end_bundle { 48 + let bundle_file = format!("{}/{:06}.jsonl.zst", bundle_dir, bundle_num); 49 + 50 + let file = match File::open(&bundle_file) { 51 + Ok(f) => f, 52 + Err(_) => continue, 53 + }; 54 + 55 + let decoder = match zstd::Decoder::new(file) { 56 + Ok(d) => d, 57 + Err(_) => continue, 58 + }; 59 + 60 + let reader = BufReader::new(decoder); 61 + 62 + for (position, line) in reader.lines().enumerate() { 63 + let line = match line { 64 + Ok(l) => l, 65 + Err(_) => continue, 66 + }; 67 + 68 + if line.is_empty() { 69 + continue; 70 + } 71 + 72 + total_ops += 1; 73 + let op_size = line.len(); 74 + total_bytes += op_size as u64; 75 + 76 + let op: Operation = match from_str(&line) { 77 + Ok(o) => o, 78 + Err(_) => continue, 79 + }; 80 + 81 + let labels = detect(&op); 82 + 83 + if !labels.is_empty() { 84 + match_count += 1; 85 + matched_bytes += op_size as u64; 86 + 87 + let cid_short = if op.cid.len() > 4 { 88 + &op.cid[op.cid.len()-4..] 89 + } else { 90 + &op.cid 91 + }; 92 + 93 + writeln!(writer, "{},{},{},{},0.95,{}", 94 + bundle_num, position, cid_short, op_size, labels.join(";"))?; 95 + } 96 + } 97 + 98 + if bundle_num % 10 == 0 { 99 + let elapsed = start_time.elapsed().as_secs_f64(); 100 + let ops_per_sec = total_ops as f64 / elapsed; 101 + eprint!("Processed {}/{} bundles | {} ops | {:.0} ops/sec\r", 102 + bundle_num, end_bundle, total_ops, ops_per_sec); 103 + } 104 + } 105 + 106 + let elapsed = start_time.elapsed().as_secs_f64(); 107 + writer.flush()?; 108 + 109 + eprintln!("\n\n✓ Detection complete"); 110 + eprintln!(" Total operations: {}", total_ops); 111 + eprintln!(" Matches found: {} ({:.2}%)", match_count, 112 + match_count as f64 / total_ops as f64 * 100.0); 113 + eprintln!(" Total size: {:.1} MB", total_bytes as f64 / 1e6); 114 + eprintln!(" Matched size: {:.1} MB ({:.2}%)", 115 + matched_bytes as f64 / 1e6, 116 + matched_bytes as f64 / total_bytes as f64 * 100.0); 117 + eprintln!(); 118 + eprintln!(" Time elapsed: {:.2}s", elapsed); 119 + eprintln!(" Throughput: {:.0} ops/sec", total_ops as f64 / elapsed); 120 + eprintln!(" Speed: {:.1} MB/sec", total_bytes as f64 / elapsed / 1e6); 121 + 122 + Ok(()) 123 + }
+2
scripts/detector-template.js
··· 10 10 // Return array of label strings 11 11 // Return empty array [] for no match 12 12 13 + //console.log(JSON.stringify(op)) 14 + 13 15 if (op.did.match(/^did:plc:aaa/)) { 14 16 labels.push('test') 15 17 }