+3
-3
Dockerfile
+3
-3
Dockerfile
···
18
18
RUN CGO_ENABLED=1 go build \
19
19
-ldflags="-w -s -X 'main.version=${VERSION}' -X 'main.gitCommit=${GIT_COMMIT}' -X 'main.buildDate=${BUILD_DATE}'" \
20
20
-trimpath \
21
-
-o plcbundle \
21
+
-o plcbundle-go \
22
22
./cmd/plcbundle
23
23
24
24
FROM alpine:3.19
25
25
26
26
RUN apk add --no-cache ca-certificates zstd-libs
27
27
28
-
COPY --from=builder /build/plcbundle /usr/local/bin/plcbundle
28
+
COPY --from=builder /build/plcbundle-go /usr/local/bin/plcbundle-go
29
29
30
30
WORKDIR /data
31
31
32
-
ENTRYPOINT ["plcbundle"]
32
+
ENTRYPOINT ["plcbundle-go"]
+8
-8
Makefile
+8
-8
Makefile
···
2
2
.PHONY: docker-build docker-buildx docker-push docker-run docker-clean docker-shell compose-up compose-down compose-logs
3
3
4
4
# Binary name
5
-
BINARY_NAME=plcbundle
5
+
BINARY_NAME=plcbundle-go
6
6
INSTALL_PATH=$(GOPATH)/bin
7
7
8
8
# Docker configuration
9
-
DOCKER_IMAGE=plcbundle
9
+
DOCKER_IMAGE=plcbundle-go
10
10
DOCKER_TAG=$(VERSION)
11
11
DOCKER_REGISTRY?=atscan
12
12
DOCKER_FULL_IMAGE=$(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY)/,)$(DOCKER_IMAGE):$(DOCKER_TAG)
···
45
45
# Install the CLI tool globally
46
46
install:
47
47
@echo "Installing $(BINARY_NAME) ..."
48
-
$(GOINSTALL) $(LDFLAGS) ./cmd/plcbundle
48
+
$(GOBUILD) $(LDFLAGS) -o $(INSTALL_PATH)/$(BINARY_NAME) ./cmd/plcbundle
49
49
50
50
# Run tests
51
51
test:
···
157
157
158
158
# Run Docker container as CLI
159
159
docker-run:
160
-
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle $(CMD)
160
+
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle-go $(CMD)
161
161
162
162
# Shortcuts
163
163
docker-info:
164
-
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle info
164
+
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle-go info
165
165
166
166
docker-fetch:
167
-
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle fetch
167
+
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle-go fetch
168
168
169
169
docker-verify:
170
-
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle verify
170
+
@docker run --rm -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle-go verify
171
171
172
172
# Run as server
173
173
docker-serve:
174
-
docker run --rm -it -p 8080:8080 -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle serve --host 0.0.0.0
174
+
docker run --rm -it -p 8080:8080 -v $(PWD)/data:/data $(DOCKER_FULL_IMAGE) plcbundle-go serve --host 0.0.0.0
175
175
176
176
# Open shell
177
177
docker-shell:
+9
-9
README.md
+9
-9
README.md
···
70
70
71
71
```bash
72
72
# CLI tool
73
-
go install tangled.org/atscan.net/plcbundle/cmd/plcbundle@latest
73
+
go install tangled.org/atscan.net/plcbundle-go/cmd/plcbundle@latest
74
74
75
75
# Library
76
-
go get tangled.org/atscan.net/plcbundle
76
+
go get tangled.org/atscan.net/plcbundle-go
77
77
```
78
78
79
79
### Docker
···
87
87
docker pull atscan/plcbundle:latest
88
88
89
89
# Run CLI
90
-
docker run --rm -v $(pwd)/data:/data atscan/plcbundle info
90
+
docker run --rm -v $(pwd)/data:/data atscan/plcbundle-go info
91
91
92
92
# Run as server
93
-
docker run -d -p 8080:8080 -v $(pwd)/data:/data atscan/plcbundle serve --host 0.0.0.0
93
+
docker run -d -p 8080:8080 -v $(pwd)/data:/data atscan/plcbundle-go serve --host 0.0.0.0
94
94
95
95
# Or use docker compose
96
96
curl -O https://tangled.org/@atscan.net/plcbundle/raw/main/docker-compose.yaml
···
111
111
### As a Library
112
112
113
113
```go
114
-
import plcbundle "tangled.org/atscan.net/plcbundle"
114
+
import plcbundle "tangled.org/atscan.net/plcbundle-go"
115
115
116
116
mgr, _ := plcbundle.New("./plc_data", "https://plc.directory")
117
117
defer mgr.Close()
···
126
126
127
127
```bash
128
128
# Fetch bundles from plc.directory
129
-
plcbundle fetch
129
+
plcbundle-go sync
130
130
131
131
# Clone from remote
132
-
plcbundle clone https://plc.example.com
132
+
plcbundle-go clone https://plc.example.com
133
133
134
134
# Verify integrity
135
-
plcbundle verify
135
+
plcbundle-go verify
136
136
```
137
137
138
138
[See full CLI reference โ](./docs/cli.md)
···
141
141
142
142
```bash
143
143
# CLI usage
144
-
docker run --rm -v $(pwd)/data:/data plcbundle info
144
+
docker run --rm -v $(pwd)/data:/data plcbundle-go info
145
145
146
146
# Server mode
147
147
docker-compose up -d
+20
-8
bundle/bundle_test.go
+20
-8
bundle/bundle_test.go
···
5
5
"testing"
6
6
"time"
7
7
8
-
"tangled.org/atscan.net/plcbundle/bundle"
9
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
10
-
"tangled.org/atscan.net/plcbundle/internal/mempool"
11
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
12
-
"tangled.org/atscan.net/plcbundle/internal/storage"
13
-
"tangled.org/atscan.net/plcbundle/internal/types"
8
+
"tangled.org/atscan.net/plcbundle-go/bundle"
9
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
10
+
"tangled.org/atscan.net/plcbundle-go/internal/mempool"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
14
+
)
15
+
16
+
var (
17
+
bundleInfo = &storage.BundleInfo{
18
+
BundleNumber: 1,
19
+
Origin: "test-origin",
20
+
ParentHash: "",
21
+
Cursor: "",
22
+
CreatedBy: "test",
23
+
Hostname: "test-host",
24
+
}
14
25
)
15
26
16
27
// TestIndex tests index operations
17
28
func TestIndex(t *testing.T) {
29
+
18
30
t.Run("CreateNewIndex", func(t *testing.T) {
19
31
idx := bundleindex.NewIndex("test-origin")
20
32
if idx == nil {
···
315
327
tmpDir := t.TempDir()
316
328
logger := &testLogger{t: t}
317
329
318
-
ops, err := storage.NewOperations(logger)
330
+
ops, err := storage.NewOperations(logger, false)
319
331
if err != nil {
320
332
t.Fatalf("NewOperations failed: %v", err)
321
333
}
···
348
360
path := filepath.Join(tmpDir, "test_bundle.jsonl.zst")
349
361
350
362
// Save
351
-
uncompHash, compHash, uncompSize, compSize, err := ops.SaveBundle(path, operations)
363
+
uncompHash, compHash, uncompSize, compSize, err := ops.SaveBundle(path, operations, bundleInfo)
352
364
if err != nil {
353
365
t.Fatalf("SaveBundle failed: %v", err)
354
366
}
+43
bundle/helpers.go
+43
bundle/helpers.go
···
1
+
package bundle
2
+
3
+
import (
4
+
"fmt"
5
+
"time"
6
+
)
7
+
8
+
// formatTimeDistance formats a duration as "X ago" or "live"
9
+
func formatTimeDistance(d time.Duration) string {
10
+
if d < 10*time.Second {
11
+
return "live"
12
+
}
13
+
if d < time.Minute {
14
+
return fmt.Sprintf("%ds ago", int(d.Seconds()))
15
+
}
16
+
if d < time.Hour {
17
+
return fmt.Sprintf("%dm ago", int(d.Minutes()))
18
+
}
19
+
if d < 24*time.Hour {
20
+
hours := int(d.Hours())
21
+
mins := int(d.Minutes()) % 60
22
+
if mins > 0 {
23
+
return fmt.Sprintf("%dh%dm ago", hours, mins)
24
+
}
25
+
return fmt.Sprintf("%dh ago", hours)
26
+
}
27
+
days := int(d.Hours() / 24)
28
+
if days == 1 {
29
+
return "1 day ago"
30
+
}
31
+
if days < 7 {
32
+
return fmt.Sprintf("%d days ago", days)
33
+
}
34
+
weeks := days / 7
35
+
if weeks < 4 {
36
+
return fmt.Sprintf("%d weeks ago", weeks)
37
+
}
38
+
months := days / 30
39
+
if months < 12 {
40
+
return fmt.Sprintf("%d months ago", months)
41
+
}
42
+
return fmt.Sprintf("%.1f years ago", float64(days)/365)
43
+
}
+457
-162
bundle/manager.go
+457
-162
bundle/manager.go
···
8
8
"os"
9
9
"path/filepath"
10
10
"runtime"
11
+
"runtime/debug"
11
12
"sort"
12
13
"strings"
13
14
"sync"
15
+
"sync/atomic"
14
16
"time"
15
17
16
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
17
-
"tangled.org/atscan.net/plcbundle/internal/didindex"
18
-
"tangled.org/atscan.net/plcbundle/internal/handleresolver"
19
-
"tangled.org/atscan.net/plcbundle/internal/mempool"
20
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
21
-
"tangled.org/atscan.net/plcbundle/internal/storage"
22
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
23
-
"tangled.org/atscan.net/plcbundle/internal/types"
18
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
19
+
"tangled.org/atscan.net/plcbundle-go/internal/didindex"
20
+
"tangled.org/atscan.net/plcbundle-go/internal/handleresolver"
21
+
"tangled.org/atscan.net/plcbundle-go/internal/mempool"
22
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
23
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
24
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
25
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
24
26
)
25
27
26
28
// defaultLogger is a simple logger implementation
···
53
55
bundleCache map[int]*Bundle
54
56
cacheMu sync.RWMutex
55
57
maxCacheSize int
58
+
59
+
// Resolver performance tracking
60
+
resolverStats struct {
61
+
sync.Mutex
62
+
totalResolutions int64
63
+
mempoolHits int64
64
+
bundleHits int64
65
+
errors int64
66
+
67
+
// Timing (in microseconds)
68
+
totalTime int64
69
+
totalMempoolTime int64
70
+
totalIndexTime int64
71
+
totalLoadOpTime int64
72
+
73
+
// Recent timings (circular buffer)
74
+
recentTimes []resolverTiming
75
+
recentIdx int
76
+
recentSize int
77
+
}
56
78
}
57
79
58
80
// NewManager creates a new bundle manager
···
84
106
}
85
107
86
108
// Initialize operations handler
87
-
ops, err := storage.NewOperations(config.Logger)
109
+
ops, err := storage.NewOperations(config.Logger, config.Verbose)
88
110
if err != nil {
89
111
return nil, fmt.Errorf("failed to initialize operations: %w", err)
90
112
}
···
303
325
handleResolver = handleresolver.NewClient(config.HandleResolverURL)
304
326
}
305
327
306
-
return &Manager{
328
+
m := &Manager{
307
329
config: config,
308
330
operations: ops,
309
331
index: index,
···
317
339
cloner: cloner,
318
340
plcClient: plcClient,
319
341
handleResolver: handleResolver,
320
-
}, nil
342
+
}
343
+
// Initialize resolver stats
344
+
m.resolverStats.recentSize = 1000
345
+
m.resolverStats.recentTimes = make([]resolverTiming, 1000)
346
+
347
+
return m, nil
321
348
}
322
349
323
350
// Close cleans up resources
···
434
461
}
435
462
436
463
// SaveBundle saves a bundle to disk and updates the index
437
-
// Returns the DID index update duration
438
-
func (m *Manager) SaveBundle(ctx context.Context, bundle *Bundle, quiet bool) (time.Duration, error) {
464
+
func (m *Manager) SaveBundle(ctx context.Context, bundle *Bundle, verbose bool, quiet bool, stats types.BundleProductionStats, skipDIDIndex bool) (time.Duration, error) {
465
+
466
+
totalStart := time.Now()
439
467
if err := bundle.ValidateForSave(); err != nil {
440
468
return 0, fmt.Errorf("bundle validation failed: %w", err)
441
469
}
442
470
443
471
path := filepath.Join(m.config.BundleDir, fmt.Sprintf("%06d.jsonl.zst", bundle.BundleNumber))
444
472
445
-
// Save to disk
446
-
uncompressedHash, compressedHash, uncompressedSize, compressedSize, err := m.operations.SaveBundle(path, bundle.Operations)
447
-
if err != nil {
448
-
return 0, fmt.Errorf("failed to save bundle: %w", err)
449
-
}
450
-
451
-
bundle.ContentHash = uncompressedHash
452
-
bundle.CompressedHash = compressedHash
453
-
bundle.UncompressedSize = uncompressedSize
454
-
bundle.CompressedSize = compressedSize
455
-
bundle.CreatedAt = time.Now().UTC()
456
-
457
473
// Get parent
458
474
var parent string
459
475
if bundle.BundleNumber > 1 {
···
466
482
}
467
483
}
468
484
}
485
+
bundle.Parent = parent
469
486
470
-
bundle.Parent = parent
487
+
// Get origin
488
+
origin := m.index.Origin
489
+
if m.plcClient != nil {
490
+
origin = m.plcClient.GetBaseURL()
491
+
}
492
+
493
+
// Get version
494
+
version := "dev"
495
+
if info, ok := debug.ReadBuildInfo(); ok && info.Main.Version != "" && info.Main.Version != "(devel)" {
496
+
version = info.Main.Version
497
+
}
498
+
499
+
// Get hostname
500
+
hostname, _ := os.Hostname()
501
+
502
+
// Create BundleInfo
503
+
bundleInfo := &storage.BundleInfo{
504
+
BundleNumber: bundle.BundleNumber,
505
+
Origin: origin,
506
+
ParentHash: parent,
507
+
Cursor: bundle.Cursor,
508
+
CreatedBy: fmt.Sprintf("plcbundle/%s", version),
509
+
Hostname: hostname,
510
+
}
511
+
512
+
if m.config.Verbose {
513
+
m.logger.Printf("DEBUG: Calling operations.SaveBundle with bundle=%d", bundleInfo.BundleNumber)
514
+
}
515
+
516
+
// Save to disk with 3 parameters
517
+
uncompressedHash, compressedHash, uncompressedSize, compressedSize, err := m.operations.SaveBundle(path, bundle.Operations, bundleInfo)
518
+
if err != nil {
519
+
m.logger.Printf("DEBUG: SaveBundle FAILED: %v", err)
520
+
return 0, fmt.Errorf("failed to save bundle: %w", err)
521
+
}
522
+
523
+
if m.config.Verbose {
524
+
m.logger.Printf("DEBUG: SaveBundle SUCCESS, setting bundle fields")
525
+
}
526
+
527
+
bundle.Hash = m.operations.CalculateChainHash(parent, bundle.ContentHash)
528
+
bundle.ContentHash = uncompressedHash
529
+
bundle.CompressedHash = compressedHash
530
+
bundle.UncompressedSize = uncompressedSize
531
+
bundle.CompressedSize = compressedSize
532
+
bundle.CreatedAt = time.Now().UTC()
471
533
bundle.Hash = m.operations.CalculateChainHash(parent, bundle.ContentHash)
472
534
535
+
if m.config.Verbose {
536
+
m.logger.Printf("DEBUG: Adding bundle %d to index", bundle.BundleNumber)
537
+
}
538
+
473
539
// Add to index
474
540
m.index.AddBundle(bundle.ToMetadata())
541
+
542
+
if m.config.Verbose {
543
+
m.logger.Printf("DEBUG: Index now has %d bundles", m.index.Count())
544
+
}
475
545
476
546
// Save index
477
547
if err := m.SaveIndex(); err != nil {
548
+
m.logger.Printf("DEBUG: SaveIndex FAILED: %v", err)
478
549
return 0, fmt.Errorf("failed to save index: %w", err)
479
550
}
480
551
552
+
if m.config.Verbose {
553
+
m.logger.Printf("DEBUG: Index saved, last bundle = %d", m.index.GetLastBundle().BundleNumber)
554
+
}
555
+
556
+
saveDuration := time.Since(totalStart)
557
+
481
558
// Clean up old mempool
482
559
oldMempoolFile := m.mempool.GetFilename()
483
560
if err := m.mempool.Delete(); err != nil && !quiet {
···
493
570
return 0, fmt.Errorf("failed to create new mempool: %w", err)
494
571
}
495
572
496
-
oldMempool := m.mempool
497
573
m.mempool = newMempool
498
574
499
-
oldMempool.Clear()
500
-
501
-
// โจ Update DID index if enabled and track timing
575
+
// DID index update (if enabled)
502
576
var indexUpdateDuration time.Duration
503
-
if m.didIndex != nil && m.didIndex.Exists() {
577
+
if !skipDIDIndex && m.didIndex != nil && m.didIndex.Exists() {
504
578
indexUpdateStart := time.Now()
505
-
506
579
if err := m.updateDIDIndexForBundle(ctx, bundle); err != nil {
507
580
m.logger.Printf("Warning: failed to update DID index: %v", err)
508
581
} else {
509
582
indexUpdateDuration = time.Since(indexUpdateStart)
510
-
511
-
if !quiet {
583
+
if !quiet && m.config.Verbose {
512
584
m.logger.Printf(" [DID Index] Updated in %s", indexUpdateDuration)
513
585
}
514
586
}
515
587
}
516
588
589
+
if !quiet {
590
+
msg := fmt.Sprintf("โ Bundle %06d | %s | fetch: %s (%d reqs)",
591
+
bundle.BundleNumber,
592
+
bundle.Hash[0:7],
593
+
stats.TotalDuration.Round(time.Millisecond),
594
+
stats.TotalFetches,
595
+
)
596
+
if indexUpdateDuration > 0 {
597
+
msg += fmt.Sprintf(" | index: %s", indexUpdateDuration.Round(time.Millisecond))
598
+
}
599
+
msg += fmt.Sprintf(" | %s", formatTimeDistance(time.Since(bundle.EndTime)))
600
+
m.logger.Println(msg)
601
+
}
602
+
603
+
if m.config.Verbose {
604
+
m.logger.Printf("DEBUG: Bundle done = %d, finish duration = %s",
605
+
m.index.GetLastBundle().BundleNumber,
606
+
saveDuration.Round(time.Millisecond))
607
+
}
608
+
517
609
return indexUpdateDuration, nil
518
610
}
519
611
···
985
1077
}
986
1078
987
1079
// GetDIDOperations retrieves all operations for a DID (bundles + mempool combined)
988
-
func (m *Manager) GetDIDOperations(ctx context.Context, did string, verbose bool) ([]plcclient.PLCOperation, error) {
1080
+
// Returns: operations only, operations with locations, error
1081
+
func (m *Manager) GetDIDOperations(ctx context.Context, did string, verbose bool) ([]plcclient.PLCOperation, []PLCOperationWithLocation, error) {
989
1082
if err := plcclient.ValidateDIDFormat(did); err != nil {
990
-
return nil, err
1083
+
return nil, nil, err
991
1084
}
992
1085
993
1086
// Set verbose mode
···
995
1088
m.didIndex.SetVerbose(verbose)
996
1089
}
997
1090
998
-
// Get bundled operations from DID index
999
-
bundledOps, err := m.didIndex.GetDIDOperations(ctx, did, m)
1091
+
// Get bundled operations from DID index (includes nullified)
1092
+
bundledOpsWithLoc, err := m.didIndex.GetDIDOperations(ctx, did, m)
1000
1093
if err != nil {
1001
-
return nil, err
1094
+
return nil, nil, err
1095
+
}
1096
+
1097
+
// Convert to bundle types
1098
+
opsWithLoc := make([]PLCOperationWithLocation, len(bundledOpsWithLoc))
1099
+
bundledOps := make([]plcclient.PLCOperation, len(bundledOpsWithLoc))
1100
+
for i, r := range bundledOpsWithLoc {
1101
+
opsWithLoc[i] = PLCOperationWithLocation{
1102
+
Operation: r.Operation,
1103
+
Bundle: r.Bundle,
1104
+
Position: r.Position,
1105
+
}
1106
+
bundledOps[i] = r.Operation
1002
1107
}
1003
1108
1004
1109
// Get mempool operations
1005
1110
mempoolOps, err := m.GetDIDOperationsFromMempool(did)
1006
1111
if err != nil {
1007
-
return nil, err
1112
+
return nil, nil, err
1008
1113
}
1009
1114
1010
1115
if len(mempoolOps) > 0 && verbose {
1011
1116
m.logger.Printf("DEBUG: Found %d operations in mempool", len(mempoolOps))
1012
1117
}
1013
1118
1014
-
// Combine and sort
1119
+
// Combine operations (for the slice return)
1015
1120
allOps := append(bundledOps, mempoolOps...)
1016
1121
1017
1122
sort.Slice(allOps, func(i, j int) bool {
1018
1123
return allOps[i].CreatedAt.Before(allOps[j].CreatedAt)
1019
1124
})
1020
1125
1021
-
return allOps, nil
1126
+
return allOps, opsWithLoc, nil
1022
1127
}
1023
1128
1024
1129
// GetDIDOperationsFromMempool retrieves operations for a DID from mempool only
···
1051
1156
return m.didIndex.GetLatestDIDOperation(ctx, did, m)
1052
1157
}
1053
1158
1054
-
// GetDIDOperationsWithLocations returns operations along with their bundle/position info
1055
-
func (m *Manager) GetDIDOperationsWithLocations(ctx context.Context, did string, verbose bool) ([]PLCOperationWithLocation, error) {
1056
-
if err := plcclient.ValidateDIDFormat(did); err != nil {
1057
-
return nil, err
1058
-
}
1059
-
1060
-
// Set verbose mode
1061
-
if m.didIndex != nil {
1062
-
m.didIndex.SetVerbose(verbose)
1063
-
}
1064
-
1065
-
// Delegate to DID index
1066
-
results, err := m.didIndex.GetDIDOperationsWithLocations(ctx, did, m)
1067
-
if err != nil {
1068
-
return nil, err
1069
-
}
1070
-
1071
-
// Convert to bundle's type
1072
-
bundleResults := make([]PLCOperationWithLocation, len(results))
1073
-
for i, r := range results {
1074
-
bundleResults[i] = PLCOperationWithLocation{
1075
-
Operation: r.Operation,
1076
-
Bundle: r.Bundle,
1077
-
Position: r.Position,
1078
-
}
1079
-
}
1080
-
1081
-
return bundleResults, nil
1082
-
}
1083
-
1084
1159
// VerifyChain verifies the entire bundle chain
1085
1160
func (m *Manager) VerifyChain(ctx context.Context) (*ChainVerificationResult, error) {
1086
1161
result := &ChainVerificationResult{
···
1132
1207
}
1133
1208
1134
1209
// FetchNextBundle fetches operations and creates a bundle, looping until caught up
1135
-
func (m *Manager) FetchNextBundle(ctx context.Context, quiet bool) (*Bundle, error) {
1210
+
func (m *Manager) FetchNextBundle(ctx context.Context, verbose bool, quiet bool) (*Bundle, types.BundleProductionStats, error) {
1136
1211
if m.plcClient == nil {
1137
-
return nil, fmt.Errorf("PLC client not configured")
1212
+
return nil, types.BundleProductionStats{}, fmt.Errorf("PLC client not configured")
1138
1213
}
1139
1214
1140
1215
lastBundle := m.index.GetLastBundle()
···
1152
1227
prevBundle, err := m.LoadBundle(ctx, lastBundle.BundleNumber)
1153
1228
if err == nil {
1154
1229
_, prevBoundaryCIDs = m.operations.GetBoundaryCIDs(prevBundle.Operations)
1155
-
if !quiet {
1230
+
if verbose {
1156
1231
m.logger.Printf("Loaded %d boundary CIDs from bundle %06d (at %s)",
1157
1232
len(prevBoundaryCIDs), lastBundle.BundleNumber,
1158
1233
lastBundle.EndTime.Format(time.RFC3339)[:19])
···
1165
1240
if m.mempool.Count() > 0 {
1166
1241
mempoolLastTime := m.mempool.GetLastTime()
1167
1242
if mempoolLastTime != "" {
1168
-
if !quiet {
1169
-
m.logger.Printf("Mempool has %d ops, resuming from %s",
1243
+
if verbose {
1244
+
m.logger.Printf("[DEBUG] Mempool has %d ops, resuming from %s",
1170
1245
m.mempool.Count(), mempoolLastTime[:19])
1171
1246
}
1172
1247
afterTime = mempoolLastTime
···
1176
1251
if len(mempoolOps) > 0 {
1177
1252
_, mempoolBoundaries := m.operations.GetBoundaryCIDs(mempoolOps)
1178
1253
prevBoundaryCIDs = mempoolBoundaries
1179
-
if !quiet {
1254
+
if verbose {
1180
1255
m.logger.Printf("Using %d boundary CIDs from mempool", len(prevBoundaryCIDs))
1181
1256
}
1182
1257
}
1183
1258
}
1184
1259
}
1185
1260
1186
-
if !quiet {
1187
-
m.logger.Printf("Preparing bundle %06d (mempool: %d ops)...", nextBundleNum, m.mempool.Count())
1188
-
m.logger.Printf("Starting cursor: %s", afterTime)
1261
+
if verbose {
1262
+
m.logger.Printf("[DEBUG] Preparing bundle %06d (mempool: %d ops)...", nextBundleNum, m.mempool.Count())
1263
+
m.logger.Printf("[DEBUG] Starting cursor: %s", afterTime)
1189
1264
}
1190
1265
1191
1266
totalFetches := 0
···
1208
1283
afterTime,
1209
1284
prevBoundaryCIDs,
1210
1285
needed,
1211
-
quiet,
1286
+
!verbose,
1212
1287
m.mempool,
1213
1288
totalFetches,
1214
1289
)
···
1226
1301
// Stop if caught up or error
1227
1302
if err != nil || len(newOps) == 0 || gotIncompleteBatch {
1228
1303
caughtUp = true
1229
-
if !quiet && totalFetches > 0 {
1230
-
m.logger.Printf(" Caught up to latest PLC data")
1304
+
if verbose && totalFetches > 0 {
1305
+
m.logger.Printf("DEBUG: Caught up to latest PLC data")
1231
1306
}
1232
1307
break
1233
1308
}
···
1236
1311
break
1237
1312
}
1238
1313
}
1239
-
1240
-
// โจ REMOVED: m.mempool.Save() - now handled by FetchToMempool
1241
1314
1242
1315
totalDuration := time.Since(attemptStart)
1243
1316
1244
1317
if m.mempool.Count() < types.BUNDLE_SIZE {
1245
1318
if caughtUp {
1246
-
return nil, fmt.Errorf("insufficient operations: have %d, need %d (caught up to latest PLC data)",
1319
+
return nil, types.BundleProductionStats{}, fmt.Errorf("insufficient operations: have %d, need %d (caught up to latest PLC data)",
1247
1320
m.mempool.Count(), types.BUNDLE_SIZE)
1248
1321
} else {
1249
-
return nil, fmt.Errorf("insufficient operations: have %d, need %d (max attempts reached)",
1322
+
return nil, types.BundleProductionStats{}, fmt.Errorf("insufficient operations: have %d, need %d (max attempts reached)",
1250
1323
m.mempool.Count(), types.BUNDLE_SIZE)
1251
1324
}
1252
1325
}
···
1254
1327
// Create bundle
1255
1328
operations, err := m.mempool.Take(types.BUNDLE_SIZE)
1256
1329
if err != nil {
1257
-
return nil, err
1330
+
return nil, types.BundleProductionStats{}, err
1258
1331
}
1259
1332
1260
1333
syncBundle := internalsync.CreateBundle(nextBundleNum, operations, afterTime, prevBundleHash, m.operations)
···
1272
1345
CreatedAt: syncBundle.CreatedAt,
1273
1346
}
1274
1347
1275
-
if !quiet {
1276
-
avgPerFetch := float64(types.BUNDLE_SIZE) / float64(totalFetches)
1277
-
throughput := float64(types.BUNDLE_SIZE) / totalDuration.Seconds()
1278
-
m.logger.Printf("โ Bundle %06d ready (%d ops, %d DIDs) - %d fetches in %s (avg %.0f/fetch, %.0f ops/sec)",
1279
-
bundle.BundleNumber, len(bundle.Operations), bundle.DIDCount,
1280
-
totalFetches, totalDuration.Round(time.Millisecond), avgPerFetch, throughput)
1348
+
stats := types.BundleProductionStats{
1349
+
TotalFetches: totalFetches,
1350
+
TotalDuration: totalDuration,
1351
+
AvgPerFetch: float64(types.BUNDLE_SIZE) / float64(totalFetches),
1352
+
Throughput: float64(types.BUNDLE_SIZE) / totalDuration.Seconds(),
1281
1353
}
1282
1354
1283
-
return bundle, nil
1355
+
return bundle, stats, nil
1284
1356
}
1285
1357
1286
1358
// CloneFromRemote clones bundles from a remote endpoint
···
1313
1385
// ResolveDID resolves a DID to its current document with detailed timing metrics
1314
1386
func (m *Manager) ResolveDID(ctx context.Context, did string) (*ResolveDIDResult, error) {
1315
1387
if err := plcclient.ValidateDIDFormat(did); err != nil {
1388
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1316
1389
return nil, err
1317
1390
}
1318
1391
1319
1392
result := &ResolveDIDResult{}
1320
1393
totalStart := time.Now()
1321
1394
1322
-
// STEP 1: Check mempool first (most recent data) - OPTIMIZED
1395
+
// STEP 1: Check mempool first
1323
1396
mempoolStart := time.Now()
1324
-
1325
1397
var latestMempoolOp *plcclient.PLCOperation
1326
1398
if m.mempool != nil {
1327
-
// Fast backwards search with early exit
1328
1399
latestMempoolOp = m.mempool.FindLatestDIDOperation(did)
1329
1400
}
1330
1401
result.MempoolTime = time.Since(mempoolStart)
···
1333
1404
if latestMempoolOp != nil {
1334
1405
doc, err := plcclient.ResolveDIDDocument(did, []plcclient.PLCOperation{*latestMempoolOp})
1335
1406
if err != nil {
1407
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1336
1408
return nil, fmt.Errorf("resolution failed: %w", err)
1337
1409
}
1338
1410
1339
1411
result.Document = doc
1412
+
result.LatestOperation = latestMempoolOp
1340
1413
result.Source = "mempool"
1341
1414
result.TotalTime = time.Since(totalStart)
1415
+
1416
+
m.recordResolverTiming(result, nil)
1342
1417
return result, nil
1343
1418
}
1344
1419
1345
1420
// STEP 2: Index lookup
1346
1421
if m.didIndex == nil || !m.didIndex.Exists() {
1422
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1347
1423
return nil, fmt.Errorf("DID index not available - run 'plcbundle index build' to enable DID resolution")
1348
1424
}
1349
1425
···
1352
1428
result.IndexTime = time.Since(indexStart)
1353
1429
1354
1430
if err != nil {
1431
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1355
1432
return nil, err
1356
1433
}
1357
1434
1358
1435
if len(locations) == 0 {
1436
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1359
1437
return nil, fmt.Errorf("DID not found")
1360
1438
}
1361
1439
···
1371
1449
}
1372
1450
1373
1451
if latestLoc == nil {
1452
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1374
1453
return nil, fmt.Errorf("no valid operations (all nullified)")
1375
1454
}
1376
1455
···
1380
1459
result.LoadOpTime = time.Since(opStart)
1381
1460
1382
1461
if err != nil {
1462
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1383
1463
return nil, fmt.Errorf("failed to load operation: %w", err)
1384
1464
}
1385
1465
···
1389
1469
// STEP 4: Resolve document
1390
1470
doc, err := plcclient.ResolveDIDDocument(did, []plcclient.PLCOperation{*op})
1391
1471
if err != nil {
1472
+
atomic.AddInt64(&m.resolverStats.errors, 1)
1392
1473
return nil, fmt.Errorf("resolution failed: %w", err)
1393
1474
}
1394
1475
1395
1476
result.Document = doc
1477
+
result.LatestOperation = op
1396
1478
result.Source = "bundle"
1397
1479
result.TotalTime = time.Since(totalStart)
1398
1480
1481
+
m.recordResolverTiming(result, nil)
1399
1482
return result, nil
1400
1483
}
1401
1484
···
1413
1496
return m.mempool.Count()
1414
1497
}
1415
1498
1416
-
// FetchAndSaveNextBundle fetches and saves next bundle, returns bundle number and index time
1417
-
func (m *Manager) FetchAndSaveNextBundle(ctx context.Context, quiet bool) (int, time.Duration, error) {
1418
-
bundle, err := m.FetchNextBundle(ctx, quiet)
1499
+
func (m *Manager) FetchAndSaveNextBundle(ctx context.Context, verbose bool, quiet bool, skipDIDIndex bool) (int, *types.BundleProductionStats, error) {
1500
+
bundle, stats, err := m.FetchNextBundle(ctx, verbose, quiet)
1419
1501
if err != nil {
1420
-
return 0, 0, err
1502
+
return 0, nil, err
1421
1503
}
1422
1504
1423
-
indexTime, err := m.SaveBundle(ctx, bundle, quiet)
1505
+
indexTime, err := m.SaveBundle(ctx, bundle, verbose, quiet, stats, skipDIDIndex)
1424
1506
if err != nil {
1425
-
return 0, 0, err
1507
+
return 0, nil, err
1426
1508
}
1509
+
stats.IndexTime = indexTime
1427
1510
1428
-
return bundle.BundleNumber, indexTime, nil
1511
+
return bundle.BundleNumber, &types.BundleProductionStats{}, nil
1429
1512
}
1430
1513
1431
1514
// RunSyncLoop runs continuous sync loop (delegates to internal/sync)
···
1435
1518
}
1436
1519
1437
1520
// RunSyncOnce performs a single sync cycle
1438
-
func (m *Manager) RunSyncOnce(ctx context.Context, config *internalsync.SyncLoopConfig, verbose bool) (int, error) {
1521
+
func (m *Manager) RunSyncOnce(ctx context.Context, config *internalsync.SyncLoopConfig) (int, error) {
1439
1522
// Manager itself implements the SyncManager interface
1440
-
return internalsync.SyncOnce(ctx, m, config, verbose)
1523
+
return internalsync.SyncOnce(ctx, m, config)
1441
1524
}
1442
1525
1443
1526
// EnsureDIDIndex ensures DID index is built and up-to-date
1444
1527
// Returns true if index was built/rebuilt, false if already up-to-date
1445
1528
func (m *Manager) EnsureDIDIndex(ctx context.Context, progressCallback func(current, total int)) (bool, error) {
1446
-
bundleCount := m.index.Count()
1447
-
didStats := m.GetDIDIndexStats()
1448
-
1449
-
if bundleCount == 0 {
1450
-
return false, nil
1451
-
}
1452
-
1453
-
needsBuild := false
1454
-
reason := ""
1455
-
1456
-
if !didStats["exists"].(bool) {
1457
-
needsBuild = true
1458
-
reason = "index does not exist"
1459
-
} else {
1460
-
// Check version
1461
-
if m.didIndex != nil {
1462
-
config := m.didIndex.GetConfig()
1463
-
if config.Version != didindex.DIDINDEX_VERSION {
1464
-
needsBuild = true
1465
-
reason = fmt.Sprintf("index version outdated (v%d, need v%d)",
1466
-
config.Version, didindex.DIDINDEX_VERSION)
1467
-
} else {
1468
-
// Check if index is behind bundles
1469
-
lastBundle := m.index.GetLastBundle()
1470
-
if lastBundle != nil && config.LastBundle < lastBundle.BundleNumber {
1471
-
needsBuild = true
1472
-
reason = fmt.Sprintf("index is behind (bundle %d, need %d)",
1473
-
config.LastBundle, lastBundle.BundleNumber)
1474
-
}
1475
-
}
1476
-
}
1477
-
}
1478
-
1479
-
if !needsBuild {
1480
-
return false, nil
1481
-
}
1482
-
1483
1529
// Build index
1484
-
m.logger.Printf("Building DID index (%s)", reason)
1485
-
m.logger.Printf("This may take several minutes...")
1486
-
1487
-
if err := m.BuildDIDIndex(ctx, progressCallback); err != nil {
1488
-
return false, fmt.Errorf("failed to build DID index: %w", err)
1489
-
}
1490
-
1491
-
// Verify index consistency
1492
-
m.logger.Printf("Verifying index consistency...")
1493
-
if err := m.didIndex.VerifyAndRepairIndex(ctx, m); err != nil {
1494
-
return false, fmt.Errorf("index verification/repair failed: %w", err)
1495
-
}
1496
-
1530
+
m.UpdateDIDIndexSmart(ctx, progressCallback)
1497
1531
return true, nil
1498
1532
}
1499
1533
···
1529
1563
if err := plcclient.ValidateDIDFormat(input); err != nil {
1530
1564
return "", 0, err
1531
1565
}
1532
-
return input, 0, nil // โ
No resolution needed
1566
+
return input, 0, nil // No resolution needed
1533
1567
}
1534
1568
1535
1569
// Support did:web too
···
1547
1581
input, input)
1548
1582
}
1549
1583
1550
-
// โจ TIME THE RESOLUTION
1551
1584
resolveStart := time.Now()
1552
-
m.logger.Printf("Resolving handle: %s", input)
1585
+
if !m.config.Quiet {
1586
+
m.logger.Printf("Resolving handle: %s", input)
1587
+
}
1553
1588
did, err := m.handleResolver.ResolveHandle(ctx, input)
1554
1589
resolveTime := time.Since(resolveStart)
1555
1590
···
1557
1592
return "", resolveTime, fmt.Errorf("failed to resolve handle '%s': %w", input, err)
1558
1593
}
1559
1594
1560
-
m.logger.Printf("Resolved: %s โ %s (in %s)", input, did, resolveTime)
1595
+
if !m.config.Quiet {
1596
+
m.logger.Printf("Resolved: %s โ %s (in %s)", input, did, resolveTime)
1597
+
}
1561
1598
return did, resolveTime, nil
1562
1599
}
1563
1600
···
1565
1602
func (m *Manager) GetHandleResolver() *handleresolver.Client {
1566
1603
return m.handleResolver
1567
1604
}
1605
+
1606
+
// recordResolverTiming records resolver performance metrics
1607
+
func (m *Manager) recordResolverTiming(result *ResolveDIDResult, _ error) {
1608
+
m.resolverStats.Lock()
1609
+
defer m.resolverStats.Unlock()
1610
+
1611
+
// Increment counters
1612
+
atomic.AddInt64(&m.resolverStats.totalResolutions, 1)
1613
+
1614
+
switch result.Source {
1615
+
case "mempool":
1616
+
atomic.AddInt64(&m.resolverStats.mempoolHits, 1)
1617
+
case "bundle":
1618
+
atomic.AddInt64(&m.resolverStats.bundleHits, 1)
1619
+
}
1620
+
1621
+
// Record timings
1622
+
timing := resolverTiming{
1623
+
totalTime: result.TotalTime.Microseconds(),
1624
+
mempoolTime: result.MempoolTime.Microseconds(),
1625
+
indexTime: result.IndexTime.Microseconds(),
1626
+
loadOpTime: result.LoadOpTime.Microseconds(),
1627
+
source: result.Source,
1628
+
}
1629
+
1630
+
atomic.AddInt64(&m.resolverStats.totalTime, timing.totalTime)
1631
+
atomic.AddInt64(&m.resolverStats.totalMempoolTime, timing.mempoolTime)
1632
+
atomic.AddInt64(&m.resolverStats.totalIndexTime, timing.indexTime)
1633
+
atomic.AddInt64(&m.resolverStats.totalLoadOpTime, timing.loadOpTime)
1634
+
1635
+
// Add to circular buffer
1636
+
m.resolverStats.recentTimes[m.resolverStats.recentIdx] = timing
1637
+
m.resolverStats.recentIdx = (m.resolverStats.recentIdx + 1) % m.resolverStats.recentSize
1638
+
}
1639
+
1640
+
// GetResolverStats returns resolver performance statistics
1641
+
func (m *Manager) GetResolverStats() map[string]interface{} {
1642
+
totalResolutions := atomic.LoadInt64(&m.resolverStats.totalResolutions)
1643
+
1644
+
if totalResolutions == 0 {
1645
+
return map[string]interface{}{
1646
+
"total_resolutions": 0,
1647
+
}
1648
+
}
1649
+
1650
+
mempoolHits := atomic.LoadInt64(&m.resolverStats.mempoolHits)
1651
+
bundleHits := atomic.LoadInt64(&m.resolverStats.bundleHits)
1652
+
errors := atomic.LoadInt64(&m.resolverStats.errors)
1653
+
1654
+
totalTime := atomic.LoadInt64(&m.resolverStats.totalTime)
1655
+
totalMempoolTime := atomic.LoadInt64(&m.resolverStats.totalMempoolTime)
1656
+
totalIndexTime := atomic.LoadInt64(&m.resolverStats.totalIndexTime)
1657
+
totalLoadOpTime := atomic.LoadInt64(&m.resolverStats.totalLoadOpTime)
1658
+
1659
+
// Calculate overall averages
1660
+
avgTotalMs := float64(totalTime) / float64(totalResolutions) / 1000.0
1661
+
avgMempoolMs := float64(totalMempoolTime) / float64(totalResolutions) / 1000.0
1662
+
1663
+
stats := map[string]interface{}{
1664
+
"total_resolutions": totalResolutions,
1665
+
"mempool_hits": mempoolHits,
1666
+
"bundle_hits": bundleHits,
1667
+
"errors": errors,
1668
+
"success_rate": float64(totalResolutions-errors) / float64(totalResolutions),
1669
+
"mempool_hit_rate": float64(mempoolHits) / float64(totalResolutions),
1670
+
1671
+
// Overall averages
1672
+
"avg_total_time_ms": avgTotalMs,
1673
+
"avg_mempool_time_ms": avgMempoolMs,
1674
+
}
1675
+
1676
+
// Only include bundle-specific stats if we have bundle hits
1677
+
if bundleHits > 0 {
1678
+
avgIndexMs := float64(totalIndexTime) / float64(bundleHits) / 1000.0
1679
+
avgLoadMs := float64(totalLoadOpTime) / float64(bundleHits) / 1000.0
1680
+
1681
+
stats["avg_index_time_ms"] = avgIndexMs
1682
+
stats["avg_load_op_time_ms"] = avgLoadMs
1683
+
}
1684
+
1685
+
// Recent statistics
1686
+
m.resolverStats.Lock()
1687
+
recentCopy := make([]resolverTiming, m.resolverStats.recentSize)
1688
+
copy(recentCopy, m.resolverStats.recentTimes)
1689
+
m.resolverStats.Unlock()
1690
+
1691
+
// Filter valid entries
1692
+
validRecent := make([]resolverTiming, 0)
1693
+
for _, t := range recentCopy {
1694
+
if t.totalTime > 0 {
1695
+
validRecent = append(validRecent, t)
1696
+
}
1697
+
}
1698
+
1699
+
if len(validRecent) > 0 {
1700
+
// Extract total times for percentiles
1701
+
totalTimes := make([]int64, len(validRecent))
1702
+
for i, t := range validRecent {
1703
+
totalTimes[i] = t.totalTime
1704
+
}
1705
+
sort.Slice(totalTimes, func(i, j int) bool {
1706
+
return totalTimes[i] < totalTimes[j]
1707
+
})
1708
+
1709
+
// Calculate recent average
1710
+
var recentSum int64
1711
+
var recentMempoolSum int64
1712
+
var recentIndexSum int64
1713
+
var recentLoadSum int64
1714
+
recentBundleCount := 0
1715
+
1716
+
for _, t := range validRecent {
1717
+
recentSum += t.totalTime
1718
+
recentMempoolSum += t.mempoolTime
1719
+
if t.source == "bundle" {
1720
+
recentIndexSum += t.indexTime
1721
+
recentLoadSum += t.loadOpTime
1722
+
recentBundleCount++
1723
+
}
1724
+
}
1725
+
1726
+
stats["recent_avg_total_time_ms"] = float64(recentSum) / float64(len(validRecent)) / 1000.0
1727
+
stats["recent_avg_mempool_time_ms"] = float64(recentMempoolSum) / float64(len(validRecent)) / 1000.0
1728
+
1729
+
if recentBundleCount > 0 {
1730
+
stats["recent_avg_index_time_ms"] = float64(recentIndexSum) / float64(recentBundleCount) / 1000.0
1731
+
stats["recent_avg_load_time_ms"] = float64(recentLoadSum) / float64(recentBundleCount) / 1000.0
1732
+
}
1733
+
1734
+
stats["recent_sample_size"] = len(validRecent)
1735
+
1736
+
// Percentiles
1737
+
p50idx := len(totalTimes) * 50 / 100
1738
+
p95idx := len(totalTimes) * 95 / 100
1739
+
p99idx := len(totalTimes) * 99 / 100
1740
+
1741
+
stats["min_total_time_ms"] = float64(totalTimes[0]) / 1000.0
1742
+
stats["max_total_time_ms"] = float64(totalTimes[len(totalTimes)-1]) / 1000.0
1743
+
1744
+
if p50idx < len(totalTimes) {
1745
+
stats["p50_total_time_ms"] = float64(totalTimes[p50idx]) / 1000.0
1746
+
}
1747
+
if p95idx < len(totalTimes) {
1748
+
stats["p95_total_time_ms"] = float64(totalTimes[p95idx]) / 1000.0
1749
+
}
1750
+
if p99idx < len(totalTimes) {
1751
+
stats["p99_total_time_ms"] = float64(totalTimes[p99idx]) / 1000.0
1752
+
}
1753
+
}
1754
+
1755
+
return stats
1756
+
}
1757
+
1758
+
// ResetResolverStats resets resolver performance statistics
1759
+
func (m *Manager) ResetResolverStats() {
1760
+
m.resolverStats.Lock()
1761
+
defer m.resolverStats.Unlock()
1762
+
1763
+
atomic.StoreInt64(&m.resolverStats.totalResolutions, 0)
1764
+
atomic.StoreInt64(&m.resolverStats.mempoolHits, 0)
1765
+
atomic.StoreInt64(&m.resolverStats.bundleHits, 0)
1766
+
atomic.StoreInt64(&m.resolverStats.errors, 0)
1767
+
atomic.StoreInt64(&m.resolverStats.totalTime, 0)
1768
+
atomic.StoreInt64(&m.resolverStats.totalMempoolTime, 0)
1769
+
atomic.StoreInt64(&m.resolverStats.totalIndexTime, 0)
1770
+
atomic.StoreInt64(&m.resolverStats.totalLoadOpTime, 0)
1771
+
1772
+
m.resolverStats.recentTimes = make([]resolverTiming, m.resolverStats.recentSize)
1773
+
m.resolverStats.recentIdx = 0
1774
+
}
1775
+
1776
+
func (m *Manager) SetQuiet(quiet bool) {
1777
+
m.config.Quiet = quiet
1778
+
}
1779
+
1780
+
// ShouldRebuildDIDIndex checks if DID index needs rebuilding
1781
+
// Returns: (needsRebuild bool, reason string, canUpdateIncrementally bool)
1782
+
func (m *Manager) ShouldRebuildDIDIndex() (bool, string, bool) {
1783
+
if m.didIndex == nil {
1784
+
return false, "DID index disabled", false
1785
+
}
1786
+
1787
+
needsRebuild, reason := m.didIndex.NeedsRebuild(m.GetBundleIndex())
1788
+
1789
+
if needsRebuild {
1790
+
return true, reason, false
1791
+
}
1792
+
1793
+
// Check if incremental update is better
1794
+
canIncremental, behindBy := m.didIndex.ShouldUpdateIncrementally(m.GetBundleIndex())
1795
+
if canIncremental {
1796
+
return false, fmt.Sprintf("can update incrementally (%d bundles)", behindBy), true
1797
+
}
1798
+
1799
+
return false, "index is up to date", false
1800
+
}
1801
+
1802
+
// UpdateDIDIndexSmart updates DID index intelligently (rebuild vs incremental)
1803
+
func (m *Manager) UpdateDIDIndexSmart(ctx context.Context, progressCallback func(current, total int)) error {
1804
+
needsRebuild, reason, canIncremental := m.ShouldRebuildDIDIndex()
1805
+
1806
+
if !needsRebuild && !canIncremental {
1807
+
if m.config.Verbose {
1808
+
m.logger.Printf("DID index is up to date")
1809
+
}
1810
+
return nil
1811
+
}
1812
+
1813
+
if needsRebuild {
1814
+
m.logger.Printf("Rebuilding DID index: %s", reason)
1815
+
return m.BuildDIDIndex(ctx, progressCallback)
1816
+
}
1817
+
1818
+
if canIncremental {
1819
+
m.logger.Printf("Updating DID index incrementally: %s", reason)
1820
+
return m.updateDIDIndexIncremental(ctx, progressCallback)
1821
+
}
1822
+
1823
+
return nil
1824
+
}
1825
+
1826
+
// updateDIDIndexIncremental updates index for missing bundles only
1827
+
func (m *Manager) updateDIDIndexIncremental(ctx context.Context, progressCallback func(current, total int)) error {
1828
+
config := m.didIndex.GetConfig()
1829
+
lastBundle := m.index.GetLastBundle()
1830
+
1831
+
if lastBundle == nil || config.LastBundle >= lastBundle.BundleNumber {
1832
+
return nil
1833
+
}
1834
+
1835
+
start := config.LastBundle + 1
1836
+
end := lastBundle.BundleNumber
1837
+
total := end - start + 1
1838
+
1839
+
m.logger.Printf("Updating DID index for bundles %d-%d (%d bundles)", start, end, total)
1840
+
1841
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
1842
+
bundle, err := m.LoadBundle(ctx, bundleNum)
1843
+
if err != nil {
1844
+
return fmt.Errorf("failed to load bundle %d: %w", bundleNum, err)
1845
+
}
1846
+
1847
+
bundleData := &didindex.BundleData{
1848
+
BundleNumber: bundle.BundleNumber,
1849
+
Operations: bundle.Operations,
1850
+
}
1851
+
1852
+
if err := m.didIndex.UpdateIndexForBundle(ctx, bundleData); err != nil {
1853
+
return fmt.Errorf("failed to update bundle %d: %w", bundleNum, err)
1854
+
}
1855
+
1856
+
if progressCallback != nil {
1857
+
progressCallback(bundleNum-start+1, total)
1858
+
}
1859
+
}
1860
+
1861
+
return nil
1862
+
}
+90
-2
bundle/metadata.go
+90
-2
bundle/metadata.go
···
1
1
package bundle
2
2
3
3
import (
4
+
"bufio"
5
+
"encoding/json"
4
6
"fmt"
5
7
"os"
6
8
"time"
7
9
8
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
9
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
10
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
10
13
)
11
14
12
15
// CalculateBundleMetadata calculates complete metadata for a bundle
···
88
91
CreatedAt: time.Now().UTC(),
89
92
}, nil
90
93
}
94
+
95
+
// CalculateMetadataStreaming calculates metadata by streaming (NO full load)
96
+
func (m *Manager) CalculateMetadataStreaming(bundleNumber int, path string) (*bundleindex.BundleMetadata, error) {
97
+
// STEP 1: Stream to get times + counts (minimal memory)
98
+
opCount, didCount, startTime, endTime, err := m.streamBundleInfo(path)
99
+
if err != nil {
100
+
return nil, err
101
+
}
102
+
103
+
// STEP 2: Calculate hashes from file
104
+
compressedHash, compressedSize, contentHash, contentSize, err := m.operations.CalculateFileHashes(path)
105
+
if err != nil {
106
+
return nil, err
107
+
}
108
+
109
+
return &bundleindex.BundleMetadata{
110
+
BundleNumber: bundleNumber,
111
+
StartTime: startTime,
112
+
EndTime: endTime,
113
+
OperationCount: opCount,
114
+
DIDCount: didCount,
115
+
Hash: "", // Calculated later in sequential phase
116
+
ContentHash: contentHash,
117
+
Parent: "", // Calculated later
118
+
CompressedHash: compressedHash,
119
+
CompressedSize: compressedSize,
120
+
UncompressedSize: contentSize,
121
+
Cursor: "",
122
+
CreatedAt: time.Now().UTC(),
123
+
}, nil
124
+
}
125
+
126
+
// streamBundleInfo extracts metadata by streaming (minimal memory)
127
+
func (m *Manager) streamBundleInfo(path string) (opCount, didCount int, startTime, endTime time.Time, err error) {
128
+
file, err := os.Open(path)
129
+
if err != nil {
130
+
return 0, 0, time.Time{}, time.Time{}, err
131
+
}
132
+
defer file.Close()
133
+
134
+
// Use abstracted reader from storage package
135
+
reader, err := storage.NewStreamingReader(file)
136
+
if err != nil {
137
+
return 0, 0, time.Time{}, time.Time{}, fmt.Errorf("failed to create reader: %w", err)
138
+
}
139
+
defer reader.Release()
140
+
141
+
scanner := bufio.NewScanner(reader)
142
+
buf := make([]byte, 64*1024)
143
+
scanner.Buffer(buf, 1024*1024)
144
+
145
+
didSet := make(map[string]bool)
146
+
lineNum := 0
147
+
148
+
for scanner.Scan() {
149
+
line := scanner.Bytes()
150
+
if len(line) == 0 {
151
+
continue
152
+
}
153
+
154
+
// Only parse minimal fields (DID + time)
155
+
var op struct {
156
+
DID string `json:"did"`
157
+
CreatedAt time.Time `json:"createdAt"`
158
+
}
159
+
160
+
if err := json.Unmarshal(line, &op); err != nil {
161
+
continue
162
+
}
163
+
164
+
if lineNum == 0 {
165
+
startTime = op.CreatedAt
166
+
}
167
+
endTime = op.CreatedAt
168
+
169
+
didSet[op.DID] = true
170
+
lineNum++
171
+
}
172
+
173
+
if err := scanner.Err(); err != nil {
174
+
return 0, 0, time.Time{}, time.Time{}, err
175
+
}
176
+
177
+
return lineNum, len(didSet), startTime, endTime, nil
178
+
}
+3
-10
bundle/scanner.go
+3
-10
bundle/scanner.go
···
9
9
"sync"
10
10
"time"
11
11
12
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
13
13
)
14
14
15
15
// ScanDirectory scans the bundle directory and rebuilds the index
···
205
205
for num := range jobs {
206
206
path := filepath.Join(m.config.BundleDir, fmt.Sprintf("%06d.jsonl.zst", num))
207
207
208
-
// Load and process bundle
209
-
ops, err := m.operations.LoadBundle(path)
210
-
if err != nil {
211
-
results <- bundleResult{index: num, err: err}
212
-
continue
213
-
}
214
-
215
-
// Use the FAST method (cursor will be set later in sequential phase)
216
-
meta, err := m.CalculateBundleMetadataFast(num, path, ops, "")
208
+
// Stream metadata WITHOUT loading all operations
209
+
meta, err := m.CalculateMetadataStreaming(num, path)
217
210
if err != nil {
218
211
results <- bundleResult{index: num, err: err}
219
212
continue
+24
-13
bundle/types.go
+24
-13
bundle/types.go
···
5
5
"path/filepath"
6
6
"time"
7
7
8
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
9
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
10
-
"tangled.org/atscan.net/plcbundle/internal/types"
8
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
9
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
10
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
11
11
)
12
12
13
13
// Bundle represents a PLC bundle
···
113
113
LastBundle int
114
114
MissingGaps []int
115
115
TotalSize int64 // Compressed size
116
-
TotalUncompressed int64 // Uncompressed size (NEW)
116
+
TotalUncompressed int64
117
117
IndexUpdated bool
118
118
}
119
119
···
128
128
RebuildProgress func(current, total int) // Progress callback for rebuild
129
129
Logger types.Logger
130
130
Verbose bool
131
+
Quiet bool
131
132
}
132
133
133
134
// DefaultConfig returns default configuration
···
142
143
RebuildProgress: nil, // No progress callback by default
143
144
Logger: nil,
144
145
Verbose: false,
146
+
Quiet: false,
145
147
}
146
148
}
147
149
···
195
197
196
198
// ResolveDIDResult contains DID resolution with timing metrics
197
199
type ResolveDIDResult struct {
198
-
Document *plcclient.DIDDocument
199
-
MempoolTime time.Duration
200
-
IndexTime time.Duration
201
-
LoadOpTime time.Duration
202
-
TotalTime time.Duration
203
-
ResolvedHandle string
204
-
Source string // "mempool" or "bundle"
205
-
BundleNumber int // if from bundle
206
-
Position int // if from bundle
200
+
Document *plcclient.DIDDocument
201
+
LatestOperation *plcclient.PLCOperation
202
+
MempoolTime time.Duration
203
+
IndexTime time.Duration
204
+
LoadOpTime time.Duration
205
+
TotalTime time.Duration
206
+
ResolvedHandle string
207
+
Source string // "mempool" or "bundle"
208
+
BundleNumber int // if from bundle
209
+
Position int // if from bundle
210
+
}
211
+
212
+
type resolverTiming struct {
213
+
totalTime int64
214
+
mempoolTime int64
215
+
indexTime int64
216
+
loadOpTime int64
217
+
source string // "mempool" or "bundle"
207
218
}
+2
-2
bundle.go
+2
-2
bundle.go
···
4
4
"context"
5
5
"io"
6
6
7
-
"tangled.org/atscan.net/plcbundle/bundle"
7
+
"tangled.org/atscan.net/plcbundle-go/bundle"
8
8
)
9
9
10
10
// Manager is the main entry point for plcbundle operations
···
49
49
50
50
// FetchNextBundle fetches the next bundle from PLC directory
51
51
func (m *Manager) FetchNextBundle(ctx context.Context) (*Bundle, error) {
52
-
b, err := m.internal.FetchNextBundle(ctx, false)
52
+
b, _, err := m.internal.FetchNextBundle(ctx, false, false)
53
53
if err != nil {
54
54
return nil, err
55
55
}
+10
-11
cmd/plcbundle/commands/clone.go
+10
-11
cmd/plcbundle/commands/clone.go
···
12
12
"time"
13
13
14
14
"github.com/spf13/cobra"
15
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
16
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
15
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
16
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
17
17
)
18
18
19
19
func NewCloneCommand() *cobra.Command {
···
40
40
4. Updates local index
41
41
5. Can be interrupted and resumed safely`,
42
42
43
-
Args: cobra.RangeArgs(1, 2), // โจ 1 or 2 arguments
43
+
Args: cobra.RangeArgs(1, 2),
44
44
45
45
Example: ` # Clone into default 'bundles' directory
46
-
plcbundle clone https://plc.example.com
46
+
plcbundle-go clone https://plc.example.com
47
47
48
48
# Clone into specific directory
49
-
plcbundle clone https://plc.example.com my-plc-data
49
+
plcbundle-go clone https://plc.example.com my-plc-data
50
50
51
51
# Clone with more parallel workers (faster)
52
-
plcbundle clone https://plc.example.com --workers 8
52
+
plcbundle-go clone https://plc.example.com --workers 8
53
53
54
54
# Resume interrupted clone
55
-
plcbundle clone https://plc.example.com --resume
55
+
plcbundle-go clone https://plc.example.com --resume
56
56
57
57
# Verbose output (shows each bundle)
58
-
plcbundle clone https://plc.example.com my-bundles -v`,
58
+
plcbundle-go clone https://plc.example.com my-bundles -v`,
59
59
60
60
RunE: func(cmd *cobra.Command, args []string) error {
61
61
remoteURL := strings.TrimSuffix(args[0], "/")
62
62
63
-
// โจ Optional directory argument (default: "bundles")
64
63
targetDir := "bundles"
65
64
if len(args) > 1 {
66
65
targetDir = args[1]
···
93
92
}
94
93
95
94
func runClone(remoteURL string, targetDir string, opts cloneOptions) error {
96
-
// โจ Create target directory if it doesn't exist
95
+
// Create target directory if it doesn't exist
97
96
absDir, err := filepath.Abs(targetDir)
98
97
if err != nil {
99
98
return fmt.Errorf("invalid directory path: %w", err)
100
99
}
101
100
102
-
// โจ Clone creates new repository in specific directory
101
+
// Clone creates new repository in specific directory
103
102
mgr, dir, err := getManager(&ManagerOptions{
104
103
Dir: absDir,
105
104
PLCURL: "https://plc.directory",
+27
-17
cmd/plcbundle/commands/common.go
+27
-17
cmd/plcbundle/commands/common.go
···
9
9
"time"
10
10
11
11
"github.com/spf13/cobra"
12
-
"tangled.org/atscan.net/plcbundle/bundle"
13
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
14
-
"tangled.org/atscan.net/plcbundle/internal/didindex"
15
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
16
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
12
+
"tangled.org/atscan.net/plcbundle-go/bundle"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/didindex"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
16
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
17
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
17
18
)
18
19
19
20
// BundleManager interface (for testing/mocking)
···
29
30
ValidateMempool() error
30
31
RefreshMempool() error
31
32
ClearMempool() error
32
-
FetchNextBundle(ctx context.Context, quiet bool) (*bundle.Bundle, error)
33
-
SaveBundle(ctx context.Context, b *bundle.Bundle, quiet bool) (time.Duration, error)
33
+
FetchNextBundle(ctx context.Context, verbose bool, quiet bool) (*bundle.Bundle, types.BundleProductionStats, error)
34
+
SaveBundle(ctx context.Context, bundle *bundle.Bundle, verbose bool, quiet bool, stats types.BundleProductionStats, skipDIDIndex bool) (time.Duration, error)
34
35
SaveIndex() error
35
36
GetDIDIndexStats() map[string]interface{}
36
37
GetDIDIndex() *didindex.Manager
37
38
BuildDIDIndex(ctx context.Context, progress func(int, int)) error
38
-
GetDIDOperations(ctx context.Context, did string, verbose bool) ([]plcclient.PLCOperation, error)
39
-
GetDIDOperationsWithLocations(ctx context.Context, did string, verbose bool) ([]bundle.PLCOperationWithLocation, error)
39
+
GetDIDOperations(ctx context.Context, did string, verbose bool) ([]plcclient.PLCOperation, []PLCOperationWithLocation, error)
40
40
GetDIDOperationsFromMempool(did string) ([]plcclient.PLCOperation, error)
41
41
GetLatestDIDOperation(ctx context.Context, did string) (*plcclient.PLCOperation, error)
42
42
LoadOperation(ctx context.Context, bundleNum, position int) (*plcclient.PLCOperation, error)
43
43
LoadOperations(ctx context.Context, bundleNumber int, positions []int) (map[int]*plcclient.PLCOperation, error)
44
44
CloneFromRemote(ctx context.Context, opts internalsync.CloneOptions) (*internalsync.CloneResult, error)
45
45
ResolveDID(ctx context.Context, did string) (*bundle.ResolveDIDResult, error)
46
-
RunSyncOnce(ctx context.Context, config *internalsync.SyncLoopConfig, verbose bool) (int, error)
46
+
RunSyncOnce(ctx context.Context, config *internalsync.SyncLoopConfig) (int, error)
47
47
RunSyncLoop(ctx context.Context, config *internalsync.SyncLoopConfig) error
48
48
GetBundleIndex() didindex.BundleIndexProvider
49
49
ScanDirectoryParallel(workers int, progressCallback func(current, total int, bytesProcessed int64)) (*bundle.DirectoryScanResult, error)
50
50
LoadBundleForDIDIndex(ctx context.Context, bundleNumber int) (*didindex.BundleData, error)
51
51
ResolveHandleOrDID(ctx context.Context, input string) (string, time.Duration, error)
52
+
SetQuiet(quiet bool)
52
53
}
53
54
54
55
// PLCOperationWithLocation wraps operation with location info
55
56
type PLCOperationWithLocation = bundle.PLCOperationWithLocation
56
57
57
58
// ============================================================================
58
-
// โจ MANAGER OPTIONS STRUCT
59
+
// MANAGER OPTIONS STRUCT
59
60
// ============================================================================
60
61
61
62
// ManagerOptions configures manager creation
···
68
69
}
69
70
70
71
// ============================================================================
71
-
// โจ SINGLE UNIFIED getManager METHOD
72
+
// SINGLE UNIFIED getManager METHOD
72
73
// ============================================================================
73
74
74
75
// getManager creates or opens a bundle manager
···
123
124
config := bundle.DefaultConfig(absDir)
124
125
config.AutoInit = opts.AutoInit
125
126
126
-
// Set verbose from command if available
127
+
// Check BOTH global AND local verbose flags
127
128
if opts.Cmd != nil {
128
-
if verbose, err := opts.Cmd.Root().PersistentFlags().GetBool("verbose"); err == nil {
129
-
config.Verbose = verbose
130
-
}
129
+
globalVerbose, _ := opts.Cmd.Root().PersistentFlags().GetBool("verbose")
130
+
localVerbose, _ := opts.Cmd.Flags().GetBool("verbose")
131
+
globalQuiet, _ := opts.Cmd.Root().PersistentFlags().GetBool("quiet")
132
+
localQuiet, _ := opts.Cmd.Flags().GetBool("quiet")
133
+
134
+
// Use OR logic: verbose if EITHER flag is set
135
+
config.Verbose = globalVerbose || localVerbose
136
+
config.Quiet = globalQuiet || localQuiet
131
137
}
132
138
133
139
// Create PLC client if URL provided
···
146
152
// Set handle resolver URL from flag or option
147
153
handleResolverURL := opts.HandleResolverURL
148
154
if handleResolverURL == "" && opts.Cmd != nil {
149
-
handleResolverURL, _ = opts.Cmd.Root().PersistentFlags().GetString("handle-resolver") // โ
Fixed flag name
155
+
handleResolverURL, _ = opts.Cmd.Root().PersistentFlags().GetString("handle-resolver")
150
156
}
151
157
// Only override default if explicitly provided
152
158
if handleResolverURL != "" {
···
198
204
// Formatting helpers
199
205
200
206
func formatBytes(bytes int64) string {
207
+
if bytes < 0 {
208
+
return fmt.Sprintf("-%s", formatBytes(-bytes))
209
+
}
210
+
201
211
const unit = 1000
202
212
if bytes < unit {
203
213
return fmt.Sprintf("%d B", bytes)
+36
-36
cmd/plcbundle/commands/detector.go
+36
-36
cmd/plcbundle/commands/detector.go
···
15
15
16
16
"github.com/goccy/go-json"
17
17
"github.com/spf13/cobra"
18
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
19
-
"tangled.org/atscan.net/plcbundle/detector"
20
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
18
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
19
+
"tangled.org/atscan.net/plcbundle-go/detector"
20
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
21
21
)
22
22
23
23
func NewDetectorCommand() *cobra.Command {
···
41
41
Load JavaScript detectors from .js files with a detect() function.`,
42
42
43
43
Example: ` # List available detectors
44
-
plcbundle detector list
44
+
plcbundle-go detector list
45
45
46
46
# Run detector on bundles
47
-
plcbundle detector run invalid_handle --bundles 1-100
47
+
plcbundle-go detector run invalid_handle --bundles 1-100
48
48
49
49
# Run with parallel processing
50
-
plcbundle detector run invalid_handle --bundles 1-100 --workers 8
50
+
plcbundle-go detector run invalid_handle --bundles 1-100 --workers 8
51
51
52
52
# Run custom detector script
53
-
plcbundle detector run ./my_detector.js --bundles 1-100
53
+
plcbundle-go detector run ./my_detector.js --bundles 1-100
54
54
55
55
# Run multiple detectors
56
-
plcbundle detector run invalid_handle aka_spam --bundles 1-100
56
+
plcbundle-go detector run invalid_handle aka_spam --bundles 1-100
57
57
58
58
# Run all detectors
59
-
plcbundle detector run all --bundles 1-100
59
+
plcbundle-go detector run all --bundles 1-100
60
60
61
61
# Filter JSONL from stdin
62
-
cat ops.jsonl | plcbundle detector filter invalid_handle > clean.jsonl
62
+
cat ops.jsonl | plcbundle-go detector filter invalid_handle > clean.jsonl
63
63
64
64
# Get detector info
65
-
plcbundle detector info invalid_handle`,
65
+
plcbundle-go detector info invalid_handle`,
66
66
}
67
67
68
68
// Add subcommands
···
86
86
Long: `List all available built-in and loaded detectors`,
87
87
88
88
Example: ` # List all detectors
89
-
plcbundle detector list`,
89
+
plcbundle-go detector list`,
90
90
91
91
RunE: func(cmd *cobra.Command, args []string) error {
92
92
registry := detector.DefaultRegistry()
···
100
100
for _, d := range detectors {
101
101
fmt.Printf(" %-20s %s (v%s)\n", d.Name(), d.Description(), d.Version())
102
102
}
103
-
fmt.Printf("\nUse 'plcbundle detector info <name>' for details\n")
103
+
fmt.Printf("\nUse 'plcbundle-go detector info <name>' for details\n")
104
104
105
105
return nil
106
106
},
···
124
124
Long: `Test a detector on a specific bundle and show results`,
125
125
126
126
Example: ` # Test on bundle 42
127
-
plcbundle detector test invalid_handle --bundle 42
127
+
plcbundle-go detector test invalid_handle --bundle 42
128
128
129
129
# Verbose output with samples
130
-
plcbundle detector test aka_spam --bundle 100 -v
130
+
plcbundle-go detector test aka_spam --bundle 100 -v
131
131
132
132
# Custom confidence threshold
133
-
plcbundle detector test spam_pds --bundle 50 --confidence 0.85`,
133
+
plcbundle-go detector test spam_pds --bundle 50 --confidence 0.85`,
134
134
135
135
Args: cobra.ExactArgs(1),
136
136
···
234
234
โข Special keyword 'all' to run all built-in detectors`,
235
235
236
236
Example: ` # Run single detector
237
-
plcbundle detector run invalid_handle --bundles 1-100
237
+
plcbundle-go detector run invalid_handle --bundles 1-100
238
238
239
239
# Run with 8 parallel workers (faster)
240
-
plcbundle detector run invalid_handle --bundles 1-1000 --workers 8
240
+
plcbundle-go detector run invalid_handle --bundles 1-1000 --workers 8
241
241
242
242
# Run multiple detectors in parallel
243
-
plcbundle detector run invalid_handle aka_spam --bundles 1-100 -w 4
243
+
plcbundle-go detector run invalid_handle aka_spam --bundles 1-100 -w 4
244
244
245
245
# Run custom script
246
-
plcbundle detector run ./my_detector.js --bundles 1-100
246
+
plcbundle-go detector run ./my_detector.js --bundles 1-100
247
247
248
248
# Run all built-in detectors
249
-
plcbundle detector run all --bundles 1-100 --workers 8
249
+
plcbundle-go detector run all --bundles 1-100 --workers 8
250
250
251
251
# Save results to file
252
-
plcbundle detector run all --bundles 1-100 -w 8 > results.csv
252
+
plcbundle-go detector run all --bundles 1-100 -w 8 > results.csv
253
253
254
254
# Disable progress bar (for scripting)
255
-
plcbundle detector run spam --bundles 1-100 --no-progress
255
+
plcbundle-go detector run spam --bundles 1-100 --no-progress
256
256
257
257
# Enable profiling
258
-
plcbundle detector run all --bundles 1-100 --pprof :6060`,
258
+
plcbundle-go detector run all --bundles 1-100 --pprof :6060`,
259
259
260
260
Args: cobra.MinimumNArgs(1),
261
261
···
344
344
Perfect for cleaning datasets or pre-processing.`,
345
345
346
346
Example: ` # Filter with built-in detector
347
-
cat ops.jsonl | plcbundle detector filter invalid_handle > clean.jsonl
347
+
cat ops.jsonl | plcbundle-go detector filter invalid_handle > clean.jsonl
348
348
349
349
# Filter with custom script
350
-
plcbundle export --all | plcbundle detector filter ./spam.js > clean.jsonl
350
+
plcbundle export --all | plcbundle-go detector filter ./spam.js > clean.jsonl
351
351
352
352
# Chain multiple detectors
353
-
cat ops.jsonl | plcbundle detector filter invalid_handle aka_spam > clean.jsonl
353
+
cat ops.jsonl | plcbundle-go detector filter invalid_handle aka_spam > clean.jsonl
354
354
355
355
# Custom confidence
356
-
cat ops.jsonl | plcbundle detector filter spam_pds --confidence 0.95 > clean.jsonl`,
356
+
cat ops.jsonl | plcbundle-go detector filter spam_pds --confidence 0.95 > clean.jsonl`,
357
357
358
358
Args: cobra.MinimumNArgs(1),
359
359
···
388
388
Long: `Show detailed information about a specific detector`,
389
389
390
390
Example: ` # Show detector info
391
-
plcbundle detector info invalid_handle
392
-
plcbundle detector info aka_spam`,
391
+
plcbundle-go detector info invalid_handle
392
+
plcbundle-go detector info aka_spam`,
393
393
394
394
Args: cobra.ExactArgs(1),
395
395
···
408
408
409
409
fmt.Printf("Usage examples:\n")
410
410
fmt.Printf(" # Test on single bundle\n")
411
-
fmt.Printf(" plcbundle detector test %s --bundle 42\n\n", d.Name())
411
+
fmt.Printf(" plcbundle-go detector test %s --bundle 42\n\n", d.Name())
412
412
fmt.Printf(" # Run on range and save\n")
413
-
fmt.Printf(" plcbundle detector run %s --bundles 1-100 > results.csv\n\n", d.Name())
413
+
fmt.Printf(" plcbundle-go detector run %s --bundles 1-100 > results.csv\n\n", d.Name())
414
414
fmt.Printf(" # Filter JSONL stream\n")
415
-
fmt.Printf(" cat ops.jsonl | plcbundle detector filter %s > clean.jsonl\n\n", d.Name())
415
+
fmt.Printf(" cat ops.jsonl | plcbundle-go detector filter %s > clean.jsonl\n\n", d.Name())
416
416
417
417
return nil
418
418
},
···
444
444
func runDetectionParallel(ctx context.Context, mgr BundleManager, setup *detectorSetup, start, end int, workers int, showProgress bool) error {
445
445
totalBundles := end - start + 1
446
446
447
-
// โจ FIX: Don't create more workers than bundles
447
+
// Don't create more workers than bundles
448
448
if workers > totalBundles {
449
449
workers = totalBundles
450
450
}
451
451
452
-
// โจ FIX: Use unbuffered channels to avoid blocking issues
452
+
// Use unbuffered channels to avoid blocking issues
453
453
jobs := make(chan int, workers*2) // Small buffer for job numbers only
454
454
results := make(chan detectionResult, workers*2)
455
455
···
538
538
// Collect matches
539
539
allMatches = append(allMatches, res.matches...)
540
540
541
-
// โจ FIX: Output immediately (don't buffer too much)
541
+
// Output immediately (don't buffer too much)
542
542
if len(allMatches) >= 500 {
543
543
for _, match := range allMatches {
544
544
fmt.Printf("%d,%s,%d,%.2f,%s\n",
+54
-84
cmd/plcbundle/commands/did.go
+54
-84
cmd/plcbundle/commands/did.go
···
11
11
12
12
"github.com/goccy/go-json"
13
13
"github.com/spf13/cobra"
14
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
15
-
"tangled.org/atscan.net/plcbundle/internal/didindex"
16
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
14
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/didindex"
16
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
17
17
)
18
18
19
19
func NewDIDCommand() *cobra.Command {
···
27
27
require a DID index to be built for optimal performance.`,
28
28
29
29
Example: ` # Lookup all operations for a DID
30
-
plcbundle did lookup did:plc:524tuhdhh3m7li5gycdn6boe
30
+
plcbundle-go did lookup did:plc:524tuhdhh3m7li5gycdn6boe
31
31
32
32
# Resolve to current DID document
33
-
plcbundle did resolve did:plc:524tuhdhh3m7li5gycdn6boe
33
+
plcbundle-go did resolve did:plc:524tuhdhh3m7li5gycdn6boe
34
34
35
35
# Show complete audit log
36
-
plcbundle did history did:plc:524tuhdhh3m7li5gycdn6boe
36
+
plcbundle-go did history did:plc:524tuhdhh3m7li5gycdn6boe
37
37
38
38
# Show DID statistics
39
-
plcbundle did stats did:plc:524tuhdhh3m7li5gycdn6boe
39
+
plcbundle-go did stats did:plc:524tuhdhh3m7li5gycdn6boe
40
40
41
41
# Batch process from file
42
-
plcbundle did batch dids.txt`,
42
+
plcbundle-go did batch dids.txt`,
43
43
}
44
44
45
45
// Add subcommands
···
73
73
โข DID: did:plc:524tuhdhh3m7li5gycdn6boe
74
74
โข Handle: tree.fail (resolves via configured resolver)
75
75
76
-
Requires DID index to be built. If not available, will fall back to
77
-
full scan (slow).`,
76
+
Requires DID index to be built.`,
78
77
79
78
Example: ` # Lookup by DID
80
-
plcbundle did lookup did:plc:524tuhdhh3m7li5gycdn6boe
79
+
plcbundle-go did lookup did:plc:524tuhdhh3m7li5gycdn6boe
81
80
82
-
# Lookup by handle (requires --resolver-url)
83
-
plcbundle did lookup tree.fail
84
-
plcbundle did lookup ngerakines.me
81
+
# Lookup by handle
82
+
plcbundle-go did lookup tree.fail
83
+
plcbundle-go did lookup ngerakines.me
85
84
86
85
# With non-default handle resolver configured
87
86
plcbundle --handle-resolver https://quickdid.smokesignal.tools did lookup tree.fail`,
···
97
96
}
98
97
defer mgr.Close()
99
98
100
-
// โจ Resolve handle to DID with timing
99
+
// Resolve handle to DID with timing
101
100
ctx := context.Background()
102
-
did, handleResolveTime, err := mgr.ResolveHandleOrDID(ctx, input)
101
+
did, _, err := mgr.ResolveHandleOrDID(ctx, input)
103
102
if err != nil {
104
103
return err
105
104
}
106
105
107
-
// Show what we resolved to (if it was a handle)
108
-
if input != did && !showJSON {
109
-
fmt.Fprintf(os.Stderr, "Resolved handle '%s' โ %s (in %s)\n\n",
110
-
input, did, handleResolveTime)
111
-
}
112
-
113
106
stats := mgr.GetDIDIndexStats()
114
107
if !stats["exists"].(bool) {
115
-
fmt.Fprintf(os.Stderr, "โ ๏ธ DID index not found. Run: plcbundle index build\n")
108
+
fmt.Fprintf(os.Stderr, "โ ๏ธ DID index not found. Run: plcbundle-go index build\n")
116
109
fmt.Fprintf(os.Stderr, " Falling back to full scan (slow)...\n\n")
117
110
}
118
111
···
120
113
121
114
// Lookup operations
122
115
lookupStart := time.Now()
123
-
opsWithLoc, err := mgr.GetDIDOperationsWithLocations(ctx, did, verbose)
116
+
_, opsWithLoc, err := mgr.GetDIDOperations(ctx, did, verbose)
124
117
if err != nil {
125
118
return err
126
119
}
···
165
158
166
159
func newDIDResolveCommand() *cobra.Command {
167
160
var (
168
-
verbose bool
169
-
showTiming bool
170
-
raw bool
161
+
verbose bool
162
+
raw bool
171
163
)
172
164
173
165
cmd := &cobra.Command{
···
183
175
O(1) lookup of latest operation.`,
184
176
185
177
Example: ` # Resolve DID
186
-
plcbundle did resolve did:plc:524tuhdhh3m7li5gycdn6boe
178
+
plcbundle-go did resolve did:plc:524tuhdhh3m7li5gycdn6boe
187
179
188
-
# Show timing breakdown
189
-
plcbundle did resolve did:plc:524tuhdhh3m7li5gycdn6boe --timing
180
+
# Show timings and other details
181
+
plcbundle-go did resolve did:plc:524tuhdhh3m7li5gycdn6boe --verbose
190
182
191
183
# Get raw PLC state (not W3C format)
192
-
plcbundle did resolve did:plc:524tuhdhh3m7li5gycdn6boe --raw
184
+
plcbundle-go did resolve did:plc:524tuhdhh3m7li5gycdn6boe --raw
193
185
194
186
# Pipe to jq
195
-
plcbundle did resolve did:plc:524tuhdhh3m7li5gycdn6boe | jq .service
187
+
plcbundle-go did resolve did:plc:524tuhdhh3m7li5gycdn6boe | jq .service
196
188
197
189
# Resolve by handle
198
-
plcbundle did resolve tree.fail`,
190
+
plcbundle-go did resolve tree.fail`,
199
191
200
192
Args: cobra.ExactArgs(1),
201
193
···
210
202
211
203
ctx := context.Background()
212
204
213
-
// โจ Resolve handle to DID with timing
205
+
// Resolve handle to DID with timing
214
206
did, handleResolveTime, err := mgr.ResolveHandleOrDID(ctx, input)
215
207
if err != nil {
216
208
return err
217
209
}
218
210
219
-
// Show resolution timing if it was a handle
220
-
if input != did {
221
-
if showTiming {
222
-
fmt.Fprintf(os.Stderr, "Handle resolution: %s โ %s (%s)\n",
223
-
input, did, handleResolveTime)
224
-
} else {
225
-
fmt.Fprintf(os.Stderr, "Resolved handle '%s' โ %s\n", input, did)
226
-
}
227
-
}
228
-
229
-
if showTiming {
230
-
fmt.Fprintf(os.Stderr, "Resolving DID: %s\n", did)
231
-
}
232
-
233
211
if verbose {
212
+
fmt.Fprintf(os.Stderr, "Resolving DID: %s\n", did)
234
213
mgr.GetDIDIndex().SetVerbose(true)
235
214
}
236
215
···
240
219
}
241
220
242
221
// Display timing if requested
243
-
if showTiming {
222
+
if verbose {
244
223
if handleResolveTime > 0 {
245
224
fmt.Fprintf(os.Stderr, "Handle: %s | ", handleResolveTime)
246
225
}
···
264
243
}
265
244
266
245
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose debug output")
267
-
cmd.Flags().BoolVar(&showTiming, "timing", false, "Show timing breakdown")
268
246
cmd.Flags().BoolVar(&raw, "raw", false, "Output raw PLC state (not W3C document)")
269
247
270
248
return cmd
···
294
272
This provides a full audit trail of all changes to the DID.`,
295
273
296
274
Example: ` # Show full history
297
-
plcbundle did history did:plc:524tuhdhh3m7li5gycdn6boe
275
+
plcbundle-go did history did:plc:524tuhdhh3m7li5gycdn6boe
298
276
299
277
# Include nullified operations
300
-
plcbundle did history did:plc:524tuhdhh3m7li5gycdn6boe --include-nullified
278
+
plcbundle-go did history did:plc:524tuhdhh3m7li5gycdn6boe --include-nullified
301
279
302
280
# Compact one-line format
303
-
plcbundle did history did:plc:524tuhdhh3m7li5gycdn6boe --compact
281
+
plcbundle-go did history did:plc:524tuhdhh3m7li5gycdn6boe --compact
304
282
305
283
# JSON output
306
-
plcbundle did history did:plc:524tuhdhh3m7li5gycdn6boe --json`,
284
+
plcbundle-go did history did:plc:524tuhdhh3m7li5gycdn6boe --json`,
307
285
308
286
Args: cobra.ExactArgs(1),
309
287
···
319
297
ctx := context.Background()
320
298
321
299
// Get all operations with locations
322
-
opsWithLoc, err := mgr.GetDIDOperationsWithLocations(ctx, did, verbose)
300
+
_, opsWithLoc, err := mgr.GetDIDOperations(ctx, did, verbose)
323
301
if err != nil {
324
302
return err
325
303
}
···
382
360
- Omit file + use --stdin: reads from stdin`,
383
361
384
362
Example: ` # Batch lookup from file
385
-
plcbundle did batch dids.txt --action lookup
363
+
plcbundle-go did batch dids.txt --action lookup
386
364
387
365
# Read from stdin
388
-
cat dids.txt | plcbundle did batch --stdin --action lookup
389
-
cat dids.txt | plcbundle did batch - --action resolve
366
+
cat dids.txt | plcbundle-go did batch --stdin --action lookup
367
+
cat dids.txt | plcbundle-go did batch - --action resolve
390
368
391
369
# Export operations for DIDs from stdin
392
-
echo "did:plc:524tuhdhh3m7li5gycdn6boe" | plcbundle did batch - --action export
370
+
echo "did:plc:524tuhdhh3m7li5gycdn6boe" | plcbundle-go did batch - --action export
393
371
394
372
# Pipe results
395
-
plcbundle did batch dids.txt --action resolve -o resolved.jsonl
373
+
plcbundle-go did batch dids.txt --action resolve -o resolved.jsonl
396
374
397
375
# Parallel processing
398
-
cat dids.txt | plcbundle did batch --stdin --action lookup --workers 8
376
+
cat dids.txt | plcbundle-go did batch --stdin --action lookup --workers 8
399
377
400
378
# Chain commands
401
-
grep "did:plc:" some_file.txt | plcbundle did batch - --action export > ops.jsonl`,
379
+
grep "did:plc:" some_file.txt | plcbundle-go did batch - --action export > ops.jsonl`,
402
380
403
381
Args: cobra.MaximumNArgs(1),
404
382
···
414
392
} else if !fromStdin {
415
393
return fmt.Errorf("either provide filename or use --stdin flag\n" +
416
394
"Examples:\n" +
417
-
" plcbundle did batch dids.txt\n" +
418
-
" plcbundle did batch --stdin\n" +
419
-
" cat dids.txt | plcbundle did batch -")
395
+
" plcbundle-go did batch dids.txt\n" +
396
+
" plcbundle-go did batch --stdin\n" +
397
+
" cat dids.txt | plcbundle-go did batch -")
420
398
}
421
399
422
400
mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
···
463
441
Without DID: shows global index statistics`,
464
442
465
443
Example: ` # Stats for specific DID
466
-
plcbundle did stats did:plc:524tuhdhh3m7li5gycdn6boe
444
+
plcbundle-go did stats did:plc:524tuhdhh3m7li5gycdn6boe
467
445
468
446
# Global index stats
469
-
plcbundle did stats --global
470
-
plcbundle did stats
447
+
plcbundle-go did stats --global
448
+
plcbundle-go did stats
471
449
472
450
# JSON output
473
-
plcbundle did stats did:plc:524tuhdhh3m7li5gycdn6boe --json`,
451
+
plcbundle-go did stats did:plc:524tuhdhh3m7li5gycdn6boe --json`,
474
452
475
453
Args: cobra.MaximumNArgs(1),
476
454
···
595
573
596
574
if !stats["exists"].(bool) {
597
575
fmt.Printf("DID index does not exist\n")
598
-
fmt.Printf("Run: plcbundle index build\n")
576
+
fmt.Printf("Run: plcbundle-go index build\n")
599
577
return nil
600
578
}
601
579
···
650
628
ctx := context.Background()
651
629
652
630
// Get operations
653
-
opsWithLoc, err := mgr.GetDIDOperationsWithLocations(ctx, did, false)
631
+
_, opsWithLoc, err := mgr.GetDIDOperations(ctx, did, false)
654
632
if err != nil {
655
633
return err
656
634
}
···
843
821
errorCount := 0
844
822
845
823
for i, did := range dids {
846
-
opsWithLoc, err := mgr.GetDIDOperationsWithLocations(ctx, did, false)
824
+
_, opsWithLoc, err := mgr.GetDIDOperations(ctx, did, false)
847
825
if err != nil {
848
826
errorCount++
849
827
fmt.Fprintf(output, "%s,error,0,0,0,0\n", did)
···
1035
1013
writeResult(nil, fmt.Errorf("not found"))
1036
1014
}
1037
1015
1038
-
// โจ Process bundles in parallel - LoadOperations once per bundle
1016
+
// Process bundles in parallel - LoadOperations once per bundle
1039
1017
bundleJobs := make(chan bundleGroup, len(bundles))
1040
1018
var wg sync.WaitGroup
1041
1019
···
1051
1029
positions[i] = locations[didIdx].position
1052
1030
}
1053
1031
1054
-
// โจ Load operations once for this bundle
1032
+
// Load operations once for this bundle
1055
1033
ops, err := mgr.LoadOperations(ctx, job.bundleNum, positions)
1056
1034
1057
1035
if err != nil {
···
1131
1109
defer writer.Flush()
1132
1110
1133
1111
for i, did := range dids {
1134
-
opsWithLoc, err := mgr.GetDIDOperationsWithLocations(ctx, did, false)
1112
+
_, opsWithLoc, err := mgr.GetDIDOperations(ctx, did, false)
1135
1113
if err != nil {
1136
1114
errorCount++
1137
1115
if i < 10 { // Only log first few errors
···
1238
1216
return nil
1239
1217
}
1240
1218
1241
-
func displayLookupResults(did string, opsWithLoc []PLCOperationWithLocation, mempoolOps []plcclient.PLCOperation, totalElapsed, lookupElapsed, mempoolElapsed time.Duration, verbose bool, stats map[string]interface{}) error {
1219
+
func displayLookupResults(did string, opsWithLoc []PLCOperationWithLocation, mempoolOps []plcclient.PLCOperation, totalElapsed, lookupElapsed, mempoolElapsed time.Duration, verbose bool, _ map[string]interface{}) error {
1242
1220
nullifiedCount := 0
1243
1221
for _, owl := range opsWithLoc {
1244
1222
if owl.Operation.IsNullified() {
···
1298
1276
}
1299
1277
}
1300
1278
1301
-
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
1302
1279
fmt.Printf("โ Lookup complete in %s\n", totalElapsed)
1303
-
if stats["exists"].(bool) {
1304
-
fmt.Printf(" Method: DID index (fast)\n")
1305
-
} else {
1306
-
fmt.Printf(" Method: Full scan (slow)\n")
1307
-
}
1308
-
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
1309
-
1310
1280
return nil
1311
1281
}
1312
1282
+50
-29
cmd/plcbundle/commands/diff.go
+50
-29
cmd/plcbundle/commands/diff.go
···
11
11
12
12
"github.com/goccy/go-json"
13
13
"github.com/spf13/cobra"
14
-
"tangled.org/atscan.net/plcbundle/bundle"
15
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
16
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
14
+
"tangled.org/atscan.net/plcbundle-go/bundle"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
16
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
17
17
)
18
18
19
19
func NewDiffCommand() *cobra.Command {
···
45
45
โข Local file path (e.g., /path/to/plc_bundles.json)`,
46
46
47
47
Example: ` # High-level comparison
48
-
plcbundle diff https://plc.example.com
48
+
plcbundle-go diff https://plc.example.com
49
49
50
50
# Show all differences (verbose)
51
-
plcbundle diff https://plc.example.com -v
51
+
plcbundle-go diff https://plc.example.com -v
52
52
53
53
# Deep dive into specific bundle
54
-
plcbundle diff https://plc.example.com --bundle 23
54
+
plcbundle-go diff https://plc.example.com --bundle 23
55
55
56
56
# Compare bundle with operation samples
57
-
plcbundle diff https://plc.example.com --bundle 23 --show-operations
57
+
plcbundle-go diff https://plc.example.com --bundle 23 --show-operations
58
58
59
59
# Show first 50 operations
60
-
plcbundle diff https://plc.example.com --bundle 23 --sample 50
60
+
plcbundle-go diff https://plc.example.com --bundle 23 --sample 50
61
61
62
62
# Using alias
63
-
plcbundle compare https://plc.example.com`,
63
+
plcbundle-go compare https://plc.example.com`,
64
64
65
65
Args: cobra.ExactArgs(1),
66
66
···
116
116
// If there are hash mismatches, suggest deep dive
117
117
if len(comparison.HashMismatches) > 0 {
118
118
fmt.Printf("\n๐ก Tip: Use --bundle flag to investigate specific mismatches:\n")
119
-
fmt.Printf(" plcbundle diff %s --bundle %d --show-operations\n",
119
+
fmt.Printf(" plcbundle-go diff %s --bundle %d --show-operations\n",
120
120
target, comparison.HashMismatches[0].BundleNumber)
121
121
}
122
122
···
288
288
remoteCIDs[op.CID] = i
289
289
}
290
290
291
-
// Find differences
292
-
var missingInLocal []string
293
-
var missingInRemote []string
294
-
var positionMismatches []string
291
+
// Find differences - store as position+CID pairs
292
+
type cidWithPos struct {
293
+
cid string
294
+
pos int
295
+
}
296
+
297
+
var missingInLocal []cidWithPos
298
+
var missingInRemote []cidWithPos
299
+
var positionMismatches []cidWithPos
295
300
296
301
for cid, remotePos := range remoteCIDs {
297
302
if localPos, exists := localCIDs[cid]; !exists {
298
-
missingInLocal = append(missingInLocal, cid)
303
+
missingInLocal = append(missingInLocal, cidWithPos{cid, remotePos})
299
304
} else if localPos != remotePos {
300
-
positionMismatches = append(positionMismatches, cid)
305
+
positionMismatches = append(positionMismatches, cidWithPos{cid, localPos})
301
306
}
302
307
}
303
308
304
-
for cid := range localCIDs {
309
+
for cid, localPos := range localCIDs {
305
310
if _, exists := remoteCIDs[cid]; !exists {
306
-
missingInRemote = append(missingInRemote, cid)
311
+
missingInRemote = append(missingInRemote, cidWithPos{cid, localPos})
307
312
}
308
313
}
309
314
315
+
// Sort by position
316
+
sort.Slice(missingInLocal, func(i, j int) bool {
317
+
return missingInLocal[i].pos < missingInLocal[j].pos
318
+
})
319
+
sort.Slice(missingInRemote, func(i, j int) bool {
320
+
return missingInRemote[i].pos < missingInRemote[j].pos
321
+
})
322
+
sort.Slice(positionMismatches, func(i, j int) bool {
323
+
return positionMismatches[i].pos < positionMismatches[j].pos
324
+
})
325
+
310
326
// Display differences
311
327
if len(missingInLocal) > 0 {
312
328
fmt.Printf(" Missing in Local (%d operations):\n", len(missingInLocal))
313
329
displaySample := min(sampleSize, len(missingInLocal))
314
330
for i := 0; i < displaySample; i++ {
315
-
cid := missingInLocal[i]
316
-
pos := remoteCIDs[cid]
317
-
fmt.Printf(" - [%04d] %s\n", pos, cid)
331
+
item := missingInLocal[i]
332
+
fmt.Printf(" - [%04d] %s\n", item.pos, item.cid)
318
333
}
319
334
if len(missingInLocal) > displaySample {
320
335
fmt.Printf(" ... and %d more\n", len(missingInLocal)-displaySample)
···
326
341
fmt.Printf(" Missing in Remote (%d operations):\n", len(missingInRemote))
327
342
displaySample := min(sampleSize, len(missingInRemote))
328
343
for i := 0; i < displaySample; i++ {
329
-
cid := missingInRemote[i]
330
-
pos := localCIDs[cid]
331
-
fmt.Printf(" + [%04d] %s\n", pos, cid)
344
+
item := missingInRemote[i]
345
+
fmt.Printf(" + [%04d] %s\n", item.pos, item.cid)
332
346
}
333
347
if len(missingInRemote) > displaySample {
334
348
fmt.Printf(" ... and %d more\n", len(missingInRemote)-displaySample)
···
340
354
fmt.Printf(" Position Mismatches (%d operations):\n", len(positionMismatches))
341
355
displaySample := min(sampleSize, len(positionMismatches))
342
356
for i := 0; i < displaySample; i++ {
343
-
cid := positionMismatches[i]
344
-
localPos := localCIDs[cid]
345
-
remotePos := remoteCIDs[cid]
346
-
fmt.Printf(" ~ %s\n", cid)
347
-
fmt.Printf(" Local: position %04d\n", localPos)
357
+
item := positionMismatches[i]
358
+
remotePos := remoteCIDs[item.cid]
359
+
fmt.Printf(" ~ %s\n", item.cid)
360
+
fmt.Printf(" Local: position %04d\n", item.pos)
348
361
fmt.Printf(" Remote: position %04d\n", remotePos)
349
362
}
350
363
if len(positionMismatches) > displaySample {
···
541
554
}
542
555
}
543
556
}
557
+
558
+
// ADD THIS: Sort mismatches by bundle number
559
+
sort.Slice(comparison.HashMismatches, func(i, j int) bool {
560
+
return comparison.HashMismatches[i].BundleNumber < comparison.HashMismatches[j].BundleNumber
561
+
})
562
+
sort.Slice(comparison.ContentMismatches, func(i, j int) bool {
563
+
return comparison.ContentMismatches[i].BundleNumber < comparison.ContentMismatches[j].BundleNumber
564
+
})
544
565
545
566
return comparison
546
567
}
+14
-14
cmd/plcbundle/commands/export.go
+14
-14
cmd/plcbundle/commands/export.go
···
8
8
9
9
"github.com/goccy/go-json"
10
10
"github.com/spf13/cobra"
11
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
11
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
12
12
)
13
13
14
14
func NewExportCommand() *cobra.Command {
···
36
36
Supports filtering by count and timestamp for selective exports.`,
37
37
38
38
Example: ` # Export all existing bundles
39
-
plcbundle export --all
39
+
plcbundle-go export --all
40
40
41
41
# Export and sync new bundles
42
-
plcbundle export --all --sync
42
+
plcbundle-go export --all --sync
43
43
44
44
# Export specific range (existing only)
45
-
plcbundle export --range 1-100
45
+
plcbundle-go export --range 1-100
46
46
47
47
# Export with limit
48
-
plcbundle export --all --count 50000
48
+
plcbundle-go export --all --count 50000
49
49
50
50
# Export after timestamp
51
-
plcbundle export --all --after 2024-01-01T00:00:00Z
51
+
plcbundle-go export --all --after 2024-01-01T00:00:00Z
52
52
53
53
# Combine filters
54
-
plcbundle export --range 1-100 --count 10000 --after 2024-01-01T00:00:00Z
54
+
plcbundle-go export --range 1-100 --count 10000 --after 2024-01-01T00:00:00Z
55
55
56
56
# Export from specific PLC directory
57
-
plcbundle export --all --sync --plc https://plc.directory
57
+
plcbundle-go export --all --sync --plc https://plc.directory
58
58
59
59
# Pipe to file
60
-
plcbundle export --all > operations.jsonl
60
+
plcbundle-go export --all > operations.jsonl
61
61
62
62
# Process with jq
63
-
plcbundle export --all | jq -r .did | sort | uniq -c
63
+
plcbundle-go export --all | jq -r .did | sort | uniq -c
64
64
65
65
# Sync and filter with detector
66
-
plcbundle export --all --sync | plcbundle detector filter spam
66
+
plcbundle-go export --all --sync | plcbundle-go detector filter spam
67
67
68
68
# Using aliases (backwards compatible)
69
-
plcbundle stream --all --sync
70
-
plcbundle backfill --all`,
69
+
plcbundle-go stream --all --sync
70
+
plcbundle-go backfill --all`,
71
71
72
72
RunE: func(cmd *cobra.Command, args []string) error {
73
73
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
···
238
238
}
239
239
240
240
var err error
241
-
fetchedCount, err = mgr.RunSyncOnce(ctx, config, opts.verbose)
241
+
fetchedCount, err = mgr.RunSyncOnce(ctx, config)
242
242
if err != nil {
243
243
return err
244
244
}
+16
-16
cmd/plcbundle/commands/index.go
+16
-16
cmd/plcbundle/commands/index.go
···
7
7
8
8
"github.com/goccy/go-json"
9
9
"github.com/spf13/cobra"
10
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
10
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
11
11
)
12
12
13
13
func NewIndexCommand() *cobra.Command {
···
21
21
resolution and query operations.`,
22
22
23
23
Example: ` # Build DID position index
24
-
plcbundle index build
24
+
plcbundle-go index build
25
25
26
26
# Repair DID index (rebuild from bundles)
27
-
plcbundle index repair
27
+
plcbundle-go index repair
28
28
29
29
# Show DID index statistics
30
-
plcbundle index stats
30
+
plcbundle-go index stats
31
31
32
32
# Verify DID index integrity
33
-
plcbundle index verify`,
33
+
plcbundle-go index verify`,
34
34
}
35
35
36
36
cmd.AddCommand(newIndexBuildCommand())
···
60
60
are added. Use --force to rebuild from scratch.`,
61
61
62
62
Example: ` # Build index
63
-
plcbundle index build
63
+
plcbundle-go index build
64
64
65
65
# Force rebuild from scratch
66
-
plcbundle index build --force`,
66
+
plcbundle-go index build --force`,
67
67
68
68
RunE: func(cmd *cobra.Command, args []string) error {
69
69
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
141
141
โข Upgrade to new index version`,
142
142
143
143
Example: ` # Repair DID index
144
-
plcbundle index repair
144
+
plcbundle-go index repair
145
145
146
146
# Verbose output
147
-
plcbundle index repair -v`,
147
+
plcbundle-go index repair -v`,
148
148
149
149
RunE: func(cmd *cobra.Command, args []string) error {
150
150
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
···
158
158
stats := mgr.GetDIDIndexStats()
159
159
if !stats["exists"].(bool) {
160
160
fmt.Printf("DID index does not exist\n")
161
-
fmt.Printf("Use: plcbundle index build\n")
161
+
fmt.Printf("Use: plcbundle-go index build\n")
162
162
return nil
163
163
}
164
164
···
237
237
shard distribution, cache statistics, and coverage.`,
238
238
239
239
Example: ` # Show statistics
240
-
plcbundle index stats
240
+
plcbundle-go index stats
241
241
242
242
# JSON output
243
-
plcbundle index stats --json`,
243
+
plcbundle-go index stats --json`,
244
244
245
245
RunE: func(cmd *cobra.Command, args []string) error {
246
246
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
259
259
260
260
if !stats["exists"].(bool) {
261
261
fmt.Printf("DID index does not exist\n")
262
-
fmt.Printf("Run: plcbundle index build\n")
262
+
fmt.Printf("Run: plcbundle-go index build\n")
263
263
return nil
264
264
}
265
265
···
332
332
Automatically repairs minor issues.`,
333
333
334
334
Example: ` # Verify DID index
335
-
plcbundle index verify
335
+
plcbundle-go index verify
336
336
337
337
# Verbose output
338
-
plcbundle index verify -v`,
338
+
plcbundle-go index verify -v`,
339
339
340
340
RunE: func(cmd *cobra.Command, args []string) error {
341
341
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
348
348
349
349
if !stats["exists"].(bool) {
350
350
fmt.Printf("DID index does not exist\n")
351
-
fmt.Printf("Run: plcbundle index build\n")
351
+
fmt.Printf("Run: plcbundle-go index build\n")
352
352
return nil
353
353
}
354
354
+982
cmd/plcbundle/commands/inspect.go
+982
cmd/plcbundle/commands/inspect.go
···
1
+
package commands
2
+
3
+
import (
4
+
"context"
5
+
"fmt"
6
+
"os"
7
+
"path/filepath"
8
+
"sort"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/goccy/go-json"
13
+
"github.com/spf13/cobra"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
15
+
)
16
+
17
+
// ============================================================================
18
+
// TYPES (defined at package level to avoid conflicts)
19
+
// ============================================================================
20
+
21
+
type DIDActivity struct {
22
+
DID string `json:"did"`
23
+
Count int `json:"count"`
24
+
}
25
+
26
+
type DomainCount struct {
27
+
Domain string `json:"domain"`
28
+
Count int `json:"count"`
29
+
}
30
+
31
+
type EndpointCount struct {
32
+
Endpoint string `json:"endpoint"`
33
+
Count int `json:"count"`
34
+
}
35
+
36
+
type TimeSlot struct {
37
+
Time time.Time `json:"time"`
38
+
Count int `json:"count"`
39
+
}
40
+
41
+
type inspectOptions struct {
42
+
showJSON bool
43
+
verify bool
44
+
showSamples bool
45
+
sampleCount int
46
+
skipMetadata bool
47
+
skipPatterns bool
48
+
skipCrypto bool
49
+
verbose bool
50
+
}
51
+
52
+
type inspectResult struct {
53
+
// Metadata
54
+
Metadata *storage.BundleMetadata `json:"metadata,omitempty"`
55
+
56
+
// Basic stats
57
+
FilePath string `json:"file_path"`
58
+
FileSize int64 `json:"file_size"`
59
+
HasMetadataFrame bool `json:"has_metadata_frame"`
60
+
HasFrameIndex bool `json:"has_frame_index"`
61
+
62
+
// Operation analysis
63
+
TotalOps int `json:"total_ops"`
64
+
NullifiedOps int `json:"nullified_ops"`
65
+
ActiveOps int `json:"active_ops"`
66
+
UniqueDIDs int `json:"unique_dids"`
67
+
OperationTypes map[string]int `json:"operation_types"`
68
+
69
+
// DID patterns
70
+
TopDIDs []DIDActivity `json:"top_dids"`
71
+
SingleOpDIDs int `json:"single_op_dids"`
72
+
MultiOpDIDs int `json:"multi_op_dids"`
73
+
74
+
// Handle patterns
75
+
TotalHandles int `json:"total_handles"`
76
+
TopDomains []DomainCount `json:"top_domains"`
77
+
InvalidHandles int `json:"invalid_handles"`
78
+
79
+
// Service patterns
80
+
TotalServices int `json:"total_services"`
81
+
UniqueEndpoints int `json:"unique_endpoints"`
82
+
TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"`
83
+
84
+
// Temporal
85
+
TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"`
86
+
AvgOpsPerMinute float64 `json:"avg_ops_per_minute"`
87
+
88
+
// Size analysis
89
+
AvgOpSize int `json:"avg_op_size"`
90
+
MinOpSize int `json:"min_op_size"`
91
+
MaxOpSize int `json:"max_op_size"`
92
+
TotalOpSize int64 `json:"total_op_size"`
93
+
94
+
// Crypto verification
95
+
ContentHashValid bool `json:"content_hash_valid"`
96
+
CompressedHashValid bool `json:"compressed_hash_valid"`
97
+
MetadataValid bool `json:"metadata_valid"`
98
+
99
+
// Timing
100
+
LoadTime time.Duration `json:"load_time"`
101
+
AnalyzeTime time.Duration `json:"analyze_time"`
102
+
VerifyTime time.Duration `json:"verify_time"`
103
+
TotalTime time.Duration `json:"total_time"`
104
+
}
105
+
106
+
type bundleAnalysis struct {
107
+
TotalOps int `json:"total_ops"`
108
+
NullifiedOps int `json:"nullified_ops"`
109
+
ActiveOps int `json:"active_ops"`
110
+
UniqueDIDs int `json:"unique_dids"`
111
+
OperationTypes map[string]int `json:"operation_types"`
112
+
SingleOpDIDs int `json:"single_op_dids"`
113
+
MultiOpDIDs int `json:"multi_op_dids"`
114
+
TotalHandles int `json:"total_handles"`
115
+
InvalidHandles int `json:"invalid_handles"`
116
+
TotalServices int `json:"total_services"`
117
+
UniqueEndpoints int `json:"unique_endpoints"`
118
+
AvgOpsPerMinute float64 `json:"avg_ops_per_minute"`
119
+
AvgOpSize int `json:"avg_op_size"`
120
+
MinOpSize int `json:"min_op_size"`
121
+
MaxOpSize int `json:"max_op_size"`
122
+
TotalOpSize int64 `json:"total_op_size"`
123
+
124
+
// For top-N calculations (unexported, won't appear in JSON)
125
+
didActivity map[string]int
126
+
domainCounts map[string]int
127
+
endpointCounts map[string]int
128
+
129
+
// For time calculations
130
+
timeSlots map[int64]int
131
+
132
+
// Results
133
+
TopDIDs []DIDActivity `json:"top_dids"`
134
+
TopDomains []DomainCount `json:"top_domains"`
135
+
TopPDSEndpoints []EndpointCount `json:"top_pds_endpoints"`
136
+
TimeDistribution *TimeDistributionSummary `json:"time_distribution,omitempty"`
137
+
}
138
+
139
+
type TimeDistributionSummary struct {
140
+
EarliestOp time.Time `json:"earliest_op"`
141
+
LatestOp time.Time `json:"latest_op"`
142
+
TimeSpan string `json:"time_span"`
143
+
PeakHour time.Time `json:"peak_hour"`
144
+
PeakHourOps int `json:"peak_hour_ops"`
145
+
TotalHours int `json:"total_hours"`
146
+
}
147
+
148
+
// ============================================================================
149
+
// COMMAND DEFINITION
150
+
// ============================================================================
151
+
152
+
func NewInspectCommand() *cobra.Command {
153
+
var (
154
+
showJSON bool
155
+
verify bool
156
+
showSamples bool
157
+
sampleCount int
158
+
skipMetadata bool
159
+
skipPatterns bool
160
+
skipCrypto bool
161
+
)
162
+
163
+
cmd := &cobra.Command{
164
+
Use: "inspect <bundle-number|bundle-file>",
165
+
Short: "Deep analysis of bundle contents",
166
+
Long: `Deep analysis of bundle contents
167
+
168
+
Performs comprehensive analysis of a bundle including:
169
+
โข Embedded metadata (from skippable frame)
170
+
โข Operation type breakdown
171
+
โข DID activity patterns
172
+
โข Handle and domain statistics
173
+
โข Service endpoint analysis
174
+
โข Temporal distribution
175
+
โข Cryptographic verification
176
+
โข Size analysis
177
+
178
+
Can inspect either by bundle number (from repository) or direct file path.`,
179
+
180
+
Example: ` # Inspect from repository
181
+
plcbundle-go inspect 42
182
+
183
+
# Inspect specific file
184
+
plcbundle-go inspect /path/to/000042.jsonl.zst
185
+
plcbundle-go inspect 000042.jsonl.zst
186
+
187
+
# Skip certain analysis sections
188
+
plcbundle-go inspect 42 --skip-patterns --skip-crypto
189
+
190
+
# Show sample operations
191
+
plcbundle-go inspect 42 --samples --sample-count 20
192
+
193
+
# Verify all hashes
194
+
plcbundle-go inspect 42 --verify
195
+
196
+
# JSON output (for scripting)
197
+
plcbundle-go inspect 42 --json`,
198
+
199
+
Args: cobra.ExactArgs(1),
200
+
201
+
RunE: func(cmd *cobra.Command, args []string) error {
202
+
input := args[0]
203
+
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
204
+
205
+
return runInspect(cmd, input, inspectOptions{
206
+
showJSON: showJSON,
207
+
verify: verify,
208
+
showSamples: showSamples,
209
+
sampleCount: sampleCount,
210
+
skipMetadata: skipMetadata,
211
+
skipPatterns: skipPatterns,
212
+
skipCrypto: skipCrypto,
213
+
verbose: verbose,
214
+
})
215
+
},
216
+
}
217
+
218
+
cmd.Flags().BoolVar(&showJSON, "json", false, "Output as JSON")
219
+
cmd.Flags().BoolVar(&verify, "verify", false, "Verify cryptographic hashes")
220
+
cmd.Flags().BoolVar(&showSamples, "samples", false, "Show sample operations")
221
+
cmd.Flags().IntVar(&sampleCount, "sample-count", 10, "Number of samples to show")
222
+
cmd.Flags().BoolVar(&skipMetadata, "skip-metadata", false, "Skip embedded metadata section")
223
+
cmd.Flags().BoolVar(&skipPatterns, "skip-patterns", false, "Skip pattern analysis")
224
+
cmd.Flags().BoolVar(&skipCrypto, "skip-crypto", false, "Skip cryptographic verification")
225
+
226
+
return cmd
227
+
}
228
+
229
+
// ============================================================================
230
+
// MAIN LOGIC
231
+
// ============================================================================
232
+
233
+
func runInspect(cmd *cobra.Command, input string, opts inspectOptions) error {
234
+
totalStart := time.Now()
235
+
236
+
// Determine if input is bundle number or file path
237
+
bundlePath, bundleNum, err := resolveBundlePath(cmd, input)
238
+
if err != nil {
239
+
return err
240
+
}
241
+
242
+
result := &inspectResult{
243
+
FilePath: bundlePath,
244
+
OperationTypes: make(map[string]int),
245
+
TopDIDs: make([]DIDActivity, 0),
246
+
TopDomains: make([]DomainCount, 0),
247
+
TopPDSEndpoints: make([]EndpointCount, 0),
248
+
}
249
+
250
+
// Check file exists
251
+
info, err := os.Stat(bundlePath)
252
+
if err != nil {
253
+
return fmt.Errorf("bundle file not found: %w", err)
254
+
}
255
+
result.FileSize = info.Size()
256
+
257
+
// Check for frame index
258
+
ops, _ := storage.NewOperations(nil, opts.verbose)
259
+
260
+
if _, err := ops.ExtractBundleMetadata(bundlePath); err == nil {
261
+
result.HasFrameIndex = true // Has embedded index
262
+
} else {
263
+
// Check for external .idx file (legacy)
264
+
indexPath := bundlePath + ".idx"
265
+
if _, err := os.Stat(indexPath); err == nil {
266
+
result.HasFrameIndex = true
267
+
}
268
+
}
269
+
270
+
fmt.Fprintf(os.Stderr, "Inspecting: %s\n", filepath.Base(bundlePath))
271
+
fmt.Fprintf(os.Stderr, "File size: %s\n\n", formatBytes(result.FileSize))
272
+
273
+
// SECTION 1: Extract embedded metadata (fast!)
274
+
if !opts.skipMetadata {
275
+
fmt.Fprintf(os.Stderr, "Reading embedded metadata...\n")
276
+
metaStart := time.Now()
277
+
278
+
ops, _ := storage.NewOperations(nil, opts.verbose)
279
+
280
+
meta, err := ops.ExtractBundleMetadata(bundlePath)
281
+
if err != nil {
282
+
if opts.verbose {
283
+
fmt.Fprintf(os.Stderr, " No embedded metadata: %v\n", err)
284
+
}
285
+
result.HasMetadataFrame = false
286
+
} else {
287
+
result.HasMetadataFrame = true
288
+
result.Metadata = meta
289
+
if opts.verbose {
290
+
fmt.Fprintf(os.Stderr, " โ Extracted in %s\n", time.Since(metaStart))
291
+
}
292
+
}
293
+
fmt.Fprintf(os.Stderr, "\n")
294
+
}
295
+
296
+
// SECTION 2: Load and analyze operations
297
+
fmt.Fprintf(os.Stderr, "Loading and analyzing operations...\n")
298
+
loadStart := time.Now()
299
+
300
+
analysis, err := analyzeBundle(bundlePath, opts)
301
+
if err != nil {
302
+
return fmt.Errorf("analysis failed: %w", err)
303
+
}
304
+
305
+
result.LoadTime = time.Since(loadStart)
306
+
result.TotalOps = analysis.TotalOps
307
+
result.NullifiedOps = analysis.NullifiedOps
308
+
result.ActiveOps = analysis.ActiveOps
309
+
result.UniqueDIDs = analysis.UniqueDIDs
310
+
result.OperationTypes = analysis.OperationTypes
311
+
result.TopDIDs = analysis.TopDIDs
312
+
result.SingleOpDIDs = analysis.SingleOpDIDs
313
+
result.MultiOpDIDs = analysis.MultiOpDIDs
314
+
result.TotalHandles = analysis.TotalHandles
315
+
result.TopDomains = analysis.TopDomains
316
+
result.InvalidHandles = analysis.InvalidHandles
317
+
result.TotalServices = analysis.TotalServices
318
+
result.UniqueEndpoints = analysis.UniqueEndpoints
319
+
result.TopPDSEndpoints = analysis.TopPDSEndpoints
320
+
result.TimeDistribution = analysis.TimeDistribution
321
+
result.AvgOpsPerMinute = analysis.AvgOpsPerMinute
322
+
result.AvgOpSize = analysis.AvgOpSize
323
+
result.MinOpSize = analysis.MinOpSize
324
+
result.MaxOpSize = analysis.MaxOpSize
325
+
result.TotalOpSize = analysis.TotalOpSize
326
+
327
+
fmt.Fprintf(os.Stderr, " โ Analyzed in %s\n\n", result.LoadTime)
328
+
329
+
// SECTION 3: Cryptographic verification
330
+
if opts.verify && !opts.skipCrypto {
331
+
fmt.Fprintf(os.Stderr, "Verifying cryptographic hashes...\n")
332
+
verifyStart := time.Now()
333
+
334
+
// Pass cmd parameter
335
+
result.ContentHashValid, result.CompressedHashValid, result.MetadataValid =
336
+
verifyCrypto(cmd, bundlePath, result.Metadata, bundleNum, opts.verbose)
337
+
338
+
result.VerifyTime = time.Since(verifyStart)
339
+
fmt.Fprintf(os.Stderr, " โ Verified in %s\n\n", result.VerifyTime)
340
+
}
341
+
342
+
result.TotalTime = time.Since(totalStart)
343
+
344
+
// Display results
345
+
if opts.showJSON {
346
+
return displayInspectJSON(result)
347
+
}
348
+
349
+
return displayInspectHuman(result, analysis, opts)
350
+
}
351
+
352
+
// ============================================================================
353
+
// ANALYSIS FUNCTIONS
354
+
// ============================================================================
355
+
356
+
func analyzeBundle(path string, opts inspectOptions) (*bundleAnalysis, error) {
357
+
ops, _ := storage.NewOperations(nil, opts.verbose)
358
+
operations, err := ops.LoadBundle(path)
359
+
if err != nil {
360
+
return nil, err
361
+
}
362
+
363
+
analysis := &bundleAnalysis{
364
+
TotalOps: len(operations),
365
+
OperationTypes: make(map[string]int),
366
+
didActivity: make(map[string]int),
367
+
domainCounts: make(map[string]int),
368
+
endpointCounts: make(map[string]int),
369
+
timeSlots: make(map[int64]int),
370
+
}
371
+
372
+
// Analyze each operation
373
+
for _, op := range operations {
374
+
// Nullification
375
+
if op.IsNullified() {
376
+
analysis.NullifiedOps++
377
+
} else {
378
+
analysis.ActiveOps++
379
+
}
380
+
381
+
// DID activity
382
+
analysis.didActivity[op.DID]++
383
+
384
+
// Size stats
385
+
opSize := len(op.RawJSON)
386
+
if opSize == 0 {
387
+
data, _ := json.Marshal(op)
388
+
opSize = len(data)
389
+
}
390
+
391
+
analysis.TotalOpSize += int64(opSize)
392
+
if analysis.MinOpSize == 0 || opSize < analysis.MinOpSize {
393
+
analysis.MinOpSize = opSize
394
+
}
395
+
if opSize > analysis.MaxOpSize {
396
+
analysis.MaxOpSize = opSize
397
+
}
398
+
399
+
// Parse operation for detailed analysis
400
+
opData, err := op.GetOperationData()
401
+
if err != nil || opData == nil {
402
+
continue
403
+
}
404
+
405
+
// Operation type
406
+
if opType, ok := opData["type"].(string); ok {
407
+
analysis.OperationTypes[opType]++
408
+
}
409
+
410
+
// Handle analysis
411
+
if !opts.skipPatterns {
412
+
analyzeHandles(opData, analysis)
413
+
analyzeServices(opData, analysis)
414
+
}
415
+
416
+
// Time distribution (group by minute)
417
+
timeSlot := op.CreatedAt.Unix() / 60
418
+
analysis.timeSlots[timeSlot]++
419
+
}
420
+
421
+
// Calculate derived stats
422
+
analysis.UniqueDIDs = len(analysis.didActivity)
423
+
if analysis.TotalOps > 0 {
424
+
analysis.AvgOpSize = int(analysis.TotalOpSize / int64(analysis.TotalOps))
425
+
}
426
+
427
+
// Count single vs multi-op DIDs
428
+
for _, count := range analysis.didActivity {
429
+
if count == 1 {
430
+
analysis.SingleOpDIDs++
431
+
} else {
432
+
analysis.MultiOpDIDs++
433
+
}
434
+
}
435
+
436
+
// Top DIDs
437
+
analysis.TopDIDs = getTopDIDs(analysis.didActivity, 10)
438
+
439
+
// Top domains
440
+
analysis.TopDomains = getTopDomains(analysis.domainCounts, 10)
441
+
442
+
// Top endpoints
443
+
analysis.TopPDSEndpoints = getTopEndpoints(analysis.endpointCounts, 10)
444
+
445
+
// Unique endpoints
446
+
analysis.UniqueEndpoints = len(analysis.endpointCounts)
447
+
448
+
// Time distribution
449
+
analysis.TimeDistribution = calculateTimeDistributionSummary(analysis.timeSlots)
450
+
451
+
// Calculate ops per minute
452
+
if len(operations) > 1 {
453
+
duration := operations[len(operations)-1].CreatedAt.Sub(operations[0].CreatedAt)
454
+
if duration.Minutes() > 0 {
455
+
analysis.AvgOpsPerMinute = float64(len(operations)) / duration.Minutes()
456
+
}
457
+
}
458
+
459
+
return analysis, nil
460
+
}
461
+
462
+
func analyzeHandles(opData map[string]interface{}, analysis *bundleAnalysis) {
463
+
if aka, ok := opData["alsoKnownAs"].([]interface{}); ok {
464
+
for _, a := range aka {
465
+
if akaStr, ok := a.(string); ok {
466
+
if strings.HasPrefix(akaStr, "at://") {
467
+
analysis.TotalHandles++
468
+
469
+
// Extract domain
470
+
handle := strings.TrimPrefix(akaStr, "at://")
471
+
if idx := strings.Index(handle, "/"); idx > 0 {
472
+
handle = handle[:idx]
473
+
}
474
+
475
+
// Count domain (TLD)
476
+
parts := strings.Split(handle, ".")
477
+
if len(parts) >= 2 {
478
+
domain := parts[len(parts)-1]
479
+
if len(parts) >= 2 {
480
+
domain = parts[len(parts)-2] + "." + domain
481
+
}
482
+
analysis.domainCounts[domain]++
483
+
}
484
+
485
+
// Check for invalid patterns
486
+
if strings.Contains(handle, "_") {
487
+
analysis.InvalidHandles++
488
+
}
489
+
}
490
+
}
491
+
}
492
+
}
493
+
}
494
+
495
+
func analyzeServices(opData map[string]interface{}, analysis *bundleAnalysis) {
496
+
if services, ok := opData["services"].(map[string]interface{}); ok {
497
+
analysis.TotalServices += len(services)
498
+
499
+
// Extract PDS endpoints
500
+
if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
501
+
if endpoint, ok := pds["endpoint"].(string); ok {
502
+
// Normalize endpoint
503
+
endpoint = strings.TrimPrefix(endpoint, "https://")
504
+
endpoint = strings.TrimPrefix(endpoint, "http://")
505
+
if idx := strings.Index(endpoint, "/"); idx > 0 {
506
+
endpoint = endpoint[:idx]
507
+
}
508
+
analysis.endpointCounts[endpoint]++
509
+
}
510
+
}
511
+
}
512
+
}
513
+
514
+
func getTopDIDs(didActivity map[string]int, limit int) []DIDActivity {
515
+
var results []DIDActivity
516
+
for did, count := range didActivity {
517
+
results = append(results, DIDActivity{DID: did, Count: count})
518
+
}
519
+
520
+
sort.Slice(results, func(i, j int) bool {
521
+
return results[i].Count > results[j].Count
522
+
})
523
+
524
+
if len(results) > limit {
525
+
results = results[:limit]
526
+
}
527
+
528
+
return results
529
+
}
530
+
531
+
func getTopDomains(domainCounts map[string]int, limit int) []DomainCount {
532
+
var results []DomainCount
533
+
for domain, count := range domainCounts {
534
+
results = append(results, DomainCount{Domain: domain, Count: count})
535
+
}
536
+
537
+
sort.Slice(results, func(i, j int) bool {
538
+
return results[i].Count > results[j].Count
539
+
})
540
+
541
+
if len(results) > limit {
542
+
results = results[:limit]
543
+
}
544
+
545
+
return results
546
+
}
547
+
548
+
func getTopEndpoints(endpointCounts map[string]int, limit int) []EndpointCount {
549
+
var results []EndpointCount
550
+
for endpoint, count := range endpointCounts {
551
+
results = append(results, EndpointCount{Endpoint: endpoint, Count: count})
552
+
}
553
+
554
+
sort.Slice(results, func(i, j int) bool {
555
+
return results[i].Count > results[j].Count
556
+
})
557
+
558
+
if len(results) > limit {
559
+
results = results[:limit]
560
+
}
561
+
562
+
return results
563
+
}
564
+
565
+
func calculateTimeDistributionSummary(timeSlots map[int64]int) *TimeDistributionSummary {
566
+
if len(timeSlots) == 0 {
567
+
return nil
568
+
}
569
+
570
+
var earliest, latest int64
571
+
var peakHour int64
572
+
var peakCount int
573
+
574
+
// Group by hour and find stats
575
+
hourlySlots := make(map[int64]int)
576
+
577
+
for ts, count := range timeSlots {
578
+
// Track earliest/latest
579
+
if earliest == 0 || ts < earliest {
580
+
earliest = ts
581
+
}
582
+
if ts > latest {
583
+
latest = ts
584
+
}
585
+
586
+
// Group by hour
587
+
hour := (ts / 3600) * 3600 // Truncate to hour
588
+
hourlySlots[hour] += count
589
+
}
590
+
591
+
// Find peak hour
592
+
for hour, count := range hourlySlots {
593
+
if count > peakCount {
594
+
peakCount = count
595
+
peakHour = hour
596
+
}
597
+
}
598
+
599
+
// Calculate time span
600
+
duration := time.Unix(latest, 0).Sub(time.Unix(earliest, 0))
601
+
timeSpan := formatDuration(duration)
602
+
603
+
// Calculate total hours covered
604
+
totalHours := len(hourlySlots)
605
+
606
+
return &TimeDistributionSummary{
607
+
EarliestOp: time.Unix(earliest, 0).UTC(),
608
+
LatestOp: time.Unix(latest, 0).UTC(),
609
+
TimeSpan: timeSpan,
610
+
PeakHour: time.Unix(peakHour, 0).UTC(),
611
+
PeakHourOps: peakCount,
612
+
TotalHours: totalHours,
613
+
}
614
+
}
615
+
616
+
// ============================================================================
617
+
// DISPLAY FUNCTIONS
618
+
// ============================================================================
619
+
620
+
func displayInspectHuman(result *inspectResult, _ *bundleAnalysis, opts inspectOptions) error {
621
+
fmt.Printf("\n")
622
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
623
+
fmt.Printf(" Bundle Deep Inspection\n")
624
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n\n")
625
+
626
+
// File info
627
+
fmt.Printf("๐ File Information\n")
628
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโ\n")
629
+
fmt.Printf(" Path: %s\n", filepath.Base(result.FilePath))
630
+
fmt.Printf(" Size: %s\n", formatBytes(result.FileSize))
631
+
fmt.Printf(" Has metadata frame: %v\n", result.HasMetadataFrame)
632
+
fmt.Printf(" Has frame index: %v\n\n", result.HasFrameIndex)
633
+
634
+
// Embedded metadata
635
+
if result.HasMetadataFrame && result.Metadata != nil && !opts.skipMetadata {
636
+
meta := result.Metadata
637
+
fmt.Printf("๐ Embedded Metadata (Skippable Frame)\n")
638
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
639
+
fmt.Printf(" Format: %s\n", meta.Format)
640
+
fmt.Printf(" Origin: %s\n", meta.Origin)
641
+
fmt.Printf(" Bundle Number: %06d\n", meta.BundleNumber)
642
+
if meta.CreatedBy != "" {
643
+
fmt.Printf(" Created by: %s\n", meta.CreatedBy)
644
+
}
645
+
if meta.CreatedByHost != "" {
646
+
fmt.Printf(" Created on: %s\n", meta.CreatedByHost)
647
+
}
648
+
fmt.Printf(" Created at: %s\n", meta.CreatedAt.Format("2006-01-02 15:04:05 MST"))
649
+
650
+
fmt.Printf("\n Content:\n")
651
+
fmt.Printf(" Operations: %s\n", formatNumber(meta.OperationCount))
652
+
fmt.Printf(" Unique DIDs: %s\n", formatNumber(meta.DIDCount))
653
+
fmt.Printf(" Frames: %d ร %d ops\n", meta.FrameCount, meta.FrameSize)
654
+
fmt.Printf(" Timespan: %s โ %s\n",
655
+
meta.StartTime.Format("2006-01-02 15:04:05"),
656
+
meta.EndTime.Format("2006-01-02 15:04:05"))
657
+
fmt.Printf(" Duration: %s\n",
658
+
formatDuration(meta.EndTime.Sub(meta.StartTime)))
659
+
660
+
fmt.Printf("\n Integrity:\n")
661
+
fmt.Printf(" Content hash: %s\n", meta.ContentHash)
662
+
if meta.ParentHash != "" {
663
+
fmt.Printf(" Parent hash: %s\n", meta.ParentHash)
664
+
}
665
+
666
+
if len(meta.FrameOffsets) > 0 {
667
+
// Calculate metadata size (size of the metadata frame itself)
668
+
metadataSize := int64(0)
669
+
if result.HasMetadataFrame {
670
+
// Metadata is at the end of file, after all data frames
671
+
// Size = file size - last frame offset
672
+
if len(meta.FrameOffsets) > 0 {
673
+
lastFrameOffset := meta.FrameOffsets[len(meta.FrameOffsets)-1]
674
+
metadataSize = result.FileSize - lastFrameOffset
675
+
}
676
+
}
677
+
678
+
// Print with fixes
679
+
fmt.Printf(" Ops Frame Index: %d offsets (embedded)\n", len(meta.FrameOffsets))
680
+
fmt.Printf(" Metadata size: %s\n", formatBytes(metadataSize))
681
+
fmt.Printf(" Frame offsets: %v\n", formatOffsetArray(meta.FrameOffsets, 5)) // Show first 5
682
+
}
683
+
fmt.Printf("\n")
684
+
}
685
+
686
+
// Operations breakdown
687
+
fmt.Printf("๐ Operations Analysis\n")
688
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโ\n")
689
+
fmt.Printf(" Total operations: %s\n", formatNumber(result.TotalOps))
690
+
fmt.Printf(" Active: %s (%.1f%%)\n",
691
+
formatNumber(result.ActiveOps),
692
+
float64(result.ActiveOps)/float64(result.TotalOps)*100)
693
+
if result.NullifiedOps > 0 {
694
+
fmt.Printf(" Nullified: %s (%.1f%%)\n",
695
+
formatNumber(result.NullifiedOps),
696
+
float64(result.NullifiedOps)/float64(result.TotalOps)*100)
697
+
}
698
+
699
+
if len(result.OperationTypes) > 0 {
700
+
fmt.Printf("\n Operation Types:\n")
701
+
702
+
// Sort by count
703
+
var types []struct {
704
+
name string
705
+
count int
706
+
}
707
+
for name, count := range result.OperationTypes {
708
+
types = append(types, struct {
709
+
name string
710
+
count int
711
+
}{name, count})
712
+
}
713
+
sort.Slice(types, func(i, j int) bool {
714
+
return types[i].count > types[j].count
715
+
})
716
+
717
+
for _, t := range types {
718
+
pct := float64(t.count) / float64(result.TotalOps) * 100
719
+
fmt.Printf(" %-25s %s (%.1f%%)\n", t.name, formatNumber(t.count), pct)
720
+
}
721
+
}
722
+
fmt.Printf("\n")
723
+
724
+
// DID patterns
725
+
fmt.Printf("๐ค DID Activity Patterns\n")
726
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโ\n")
727
+
fmt.Printf(" Unique DIDs: %s\n", formatNumber(result.UniqueDIDs))
728
+
fmt.Printf(" Single-op DIDs: %s (%.1f%%)\n",
729
+
formatNumber(result.SingleOpDIDs),
730
+
float64(result.SingleOpDIDs)/float64(result.UniqueDIDs)*100)
731
+
fmt.Printf(" Multi-op DIDs: %s (%.1f%%)\n",
732
+
formatNumber(result.MultiOpDIDs),
733
+
float64(result.MultiOpDIDs)/float64(result.UniqueDIDs)*100)
734
+
735
+
if len(result.TopDIDs) > 0 {
736
+
fmt.Printf("\n Most Active DIDs:\n")
737
+
for i, da := range result.TopDIDs {
738
+
if i >= 5 {
739
+
break
740
+
}
741
+
fmt.Printf(" %d. %s (%d ops)\n", i+1, da.DID, da.Count)
742
+
}
743
+
}
744
+
fmt.Printf("\n")
745
+
746
+
// Handle patterns
747
+
if !opts.skipPatterns && result.TotalHandles > 0 {
748
+
fmt.Printf("๐ท๏ธ Handle Statistics\n")
749
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโ\n")
750
+
fmt.Printf(" Total handles: %s\n", formatNumber(result.TotalHandles))
751
+
if result.InvalidHandles > 0 {
752
+
fmt.Printf(" Invalid patterns: %s (%.1f%%)\n",
753
+
formatNumber(result.InvalidHandles),
754
+
float64(result.InvalidHandles)/float64(result.TotalHandles)*100)
755
+
}
756
+
757
+
if len(result.TopDomains) > 0 {
758
+
fmt.Printf("\n Top Domains:\n")
759
+
for i, dc := range result.TopDomains {
760
+
if i >= 10 {
761
+
break
762
+
}
763
+
pct := float64(dc.Count) / float64(result.TotalHandles) * 100
764
+
fmt.Printf(" %-25s %s (%.1f%%)\n", dc.Domain, formatNumber(dc.Count), pct)
765
+
}
766
+
}
767
+
fmt.Printf("\n")
768
+
}
769
+
770
+
// Service patterns
771
+
if !opts.skipPatterns && result.TotalServices > 0 {
772
+
fmt.Printf("๐ Service Endpoints\n")
773
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโ\n")
774
+
fmt.Printf(" Total services: %s\n", formatNumber(result.TotalServices))
775
+
fmt.Printf(" Unique endpoints: %s\n", formatNumber(result.UniqueEndpoints))
776
+
777
+
if len(result.TopPDSEndpoints) > 0 {
778
+
fmt.Printf("\n Top PDS Endpoints:\n")
779
+
for i, ec := range result.TopPDSEndpoints {
780
+
if i >= 10 {
781
+
break
782
+
}
783
+
fmt.Printf(" %-40s %s ops\n", ec.Endpoint, formatNumber(ec.Count))
784
+
}
785
+
}
786
+
fmt.Printf("\n")
787
+
}
788
+
789
+
// Temporal analysis
790
+
fmt.Printf("โฑ๏ธ Time Distribution\n")
791
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโ\n")
792
+
if result.TimeDistribution != nil {
793
+
td := result.TimeDistribution
794
+
fmt.Printf(" Earliest operation: %s\n", td.EarliestOp.Format(time.RFC3339))
795
+
fmt.Printf(" Latest operation: %s\n", td.LatestOp.Format(time.RFC3339))
796
+
fmt.Printf(" Time span: %s\n", td.TimeSpan)
797
+
fmt.Printf(" Peak hour: %s (%d ops)\n",
798
+
td.PeakHour.Format("2006-01-02 15:04"), td.PeakHourOps)
799
+
fmt.Printf(" Total active hours: %d\n", td.TotalHours)
800
+
fmt.Printf(" Avg ops/minute: %.1f\n", result.AvgOpsPerMinute)
801
+
}
802
+
fmt.Printf("\n")
803
+
804
+
// Size analysis
805
+
fmt.Printf("๐ Size Analysis\n")
806
+
fmt.Printf("โโโโโโโโโโโโโโโโ\n")
807
+
fmt.Printf(" Total data: %s\n", formatBytes(result.TotalOpSize))
808
+
fmt.Printf(" Average per op: %s\n", formatBytes(int64(result.AvgOpSize)))
809
+
fmt.Printf(" Min operation: %s\n", formatBytes(int64(result.MinOpSize)))
810
+
fmt.Printf(" Max operation: %s\n\n", formatBytes(int64(result.MaxOpSize)))
811
+
812
+
// Cryptographic verification
813
+
if opts.verify && !opts.skipCrypto {
814
+
fmt.Printf("๐ Cryptographic Verification\n")
815
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
816
+
817
+
status := func(valid bool) string {
818
+
if valid {
819
+
return "โ Valid"
820
+
}
821
+
return "โ Invalid"
822
+
}
823
+
824
+
fmt.Printf(" Content hash: %s\n", status(result.ContentHashValid))
825
+
fmt.Printf(" Compressed hash: %s\n", status(result.CompressedHashValid))
826
+
if result.HasMetadataFrame {
827
+
fmt.Printf(" Metadata integrity: %s\n", status(result.MetadataValid))
828
+
}
829
+
fmt.Printf("\n")
830
+
}
831
+
832
+
// Performance summary
833
+
fmt.Printf("โก Performance\n")
834
+
fmt.Printf("โโโโโโโโโโโโโโ\n")
835
+
fmt.Printf(" Load time: %s\n", result.LoadTime)
836
+
if opts.verify {
837
+
fmt.Printf(" Verify time: %s\n", result.VerifyTime)
838
+
}
839
+
fmt.Printf(" Total time: %s\n", result.TotalTime)
840
+
if result.LoadTime.Seconds() > 0 {
841
+
opsPerSec := float64(result.TotalOps) / result.LoadTime.Seconds()
842
+
mbPerSec := float64(result.TotalOpSize) / result.LoadTime.Seconds() / (1024 * 1024)
843
+
fmt.Printf(" Throughput: %.0f ops/sec, %.2f MB/s\n", opsPerSec, mbPerSec)
844
+
}
845
+
fmt.Printf("\n")
846
+
847
+
return nil
848
+
}
849
+
850
+
func displayInspectJSON(result *inspectResult) error {
851
+
data, _ := json.MarshalIndent(result, "", " ")
852
+
fmt.Println(string(data))
853
+
return nil
854
+
}
855
+
856
+
func verifyCrypto(cmd *cobra.Command, path string, meta *storage.BundleMetadata, bundleNum int, verbose bool) (contentValid, compressedValid, metadataValid bool) {
857
+
ops, _ := storage.NewOperations(nil, verbose)
858
+
859
+
// Calculate actual hashes from file
860
+
compHash, compSize, contentHash, contentSize, err := ops.CalculateFileHashes(path)
861
+
if err != nil {
862
+
if verbose {
863
+
fmt.Fprintf(os.Stderr, " Hash calculation failed: %v\n", err)
864
+
}
865
+
return false, false, false
866
+
}
867
+
868
+
contentValid = true
869
+
compressedValid = true
870
+
metadataValid = true
871
+
872
+
// Verify against embedded metadata if available
873
+
if meta != nil {
874
+
// Check content hash (this is in the metadata)
875
+
if meta.ContentHash != "" && meta.ContentHash != contentHash {
876
+
contentValid = false
877
+
if verbose {
878
+
fmt.Fprintf(os.Stderr, " โ Content hash mismatch!\n")
879
+
fmt.Fprintf(os.Stderr, " Expected: %s\n", meta.ContentHash)
880
+
fmt.Fprintf(os.Stderr, " Actual: %s\n", contentHash)
881
+
}
882
+
}
883
+
884
+
if meta.OperationCount > 0 {
885
+
// We can't verify this without loading, so skip
886
+
metadataValid = true
887
+
}
888
+
889
+
// Note: We don't check compressed hash/size because they're not in metadata
890
+
// (The file IS the compressed data, so it's redundant)
891
+
892
+
if verbose {
893
+
fmt.Fprintf(os.Stderr, " Embedded metadata:\n")
894
+
fmt.Fprintf(os.Stderr, " Content hash: %s\n", meta.ContentHash[:16]+"...")
895
+
fmt.Fprintf(os.Stderr, " Operations: %d\n", meta.OperationCount)
896
+
fmt.Fprintf(os.Stderr, " DIDs: %d\n", meta.DIDCount)
897
+
}
898
+
}
899
+
900
+
// Also verify against repository index if bundle number is known
901
+
if bundleNum > 0 {
902
+
mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
903
+
if err == nil {
904
+
defer mgr.Close()
905
+
906
+
ctx := context.Background()
907
+
vr, err := mgr.VerifyBundle(ctx, bundleNum)
908
+
if err == nil && vr != nil {
909
+
// Index verification
910
+
indexContentValid := vr.Valid
911
+
indexHashMatch := vr.HashMatch
912
+
913
+
if verbose {
914
+
fmt.Fprintf(os.Stderr, " Repository index:\n")
915
+
fmt.Fprintf(os.Stderr, " Content valid: %v\n", indexContentValid)
916
+
fmt.Fprintf(os.Stderr, " Hash match: %v\n", indexHashMatch)
917
+
}
918
+
919
+
contentValid = contentValid && indexContentValid
920
+
compressedValid = compressedValid && indexHashMatch
921
+
}
922
+
}
923
+
}
924
+
925
+
if verbose {
926
+
fmt.Fprintf(os.Stderr, " Calculated hashes:\n")
927
+
fmt.Fprintf(os.Stderr, " Content: %s (%s)\n", contentHash[:16]+"...", formatBytes(contentSize))
928
+
fmt.Fprintf(os.Stderr, " Compressed: %s (%s)\n", compHash[:16]+"...", formatBytes(compSize))
929
+
}
930
+
931
+
return contentValid, compressedValid, metadataValid
932
+
}
933
+
934
+
func resolveBundlePath(cmd *cobra.Command, input string) (path string, bundleNum int, err error) {
935
+
// Check if it's a file path
936
+
if strings.HasSuffix(input, ".zst") || strings.Contains(input, "/") || strings.Contains(input, "\\") {
937
+
absPath, err := filepath.Abs(input)
938
+
if err != nil {
939
+
return "", 0, err
940
+
}
941
+
942
+
// Try to extract bundle number from filename
943
+
base := filepath.Base(absPath)
944
+
fmt.Sscanf(base, "%d", &bundleNum)
945
+
946
+
return absPath, bundleNum, nil
947
+
}
948
+
949
+
// Try to parse as bundle number
950
+
if _, err := fmt.Sscanf(input, "%d", &bundleNum); err == nil {
951
+
// Load from repository
952
+
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
953
+
if err != nil {
954
+
return "", 0, err
955
+
}
956
+
defer mgr.Close()
957
+
958
+
path := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum))
959
+
if _, err := os.Stat(path); err != nil {
960
+
return "", 0, fmt.Errorf("bundle %d not found in repository", bundleNum)
961
+
}
962
+
963
+
return path, bundleNum, nil
964
+
}
965
+
966
+
return "", 0, fmt.Errorf("invalid input: must be bundle number or file path")
967
+
}
968
+
969
+
func formatOffsetArray(offsets []int64, maxShow int) string {
970
+
if len(offsets) == 0 {
971
+
return "[]"
972
+
}
973
+
974
+
if len(offsets) <= maxShow {
975
+
return fmt.Sprintf("%v", offsets)
976
+
}
977
+
978
+
// Show first maxShow elements
979
+
shown := make([]int64, maxShow)
980
+
copy(shown, offsets[:maxShow])
981
+
return fmt.Sprintf("%v ... (%d more)", shown, len(offsets)-maxShow)
982
+
}
+12
-12
cmd/plcbundle/commands/log.go
+12
-12
cmd/plcbundle/commands/log.go
···
10
10
11
11
"github.com/spf13/cobra"
12
12
"golang.org/x/term"
13
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
14
14
)
15
15
16
16
func NewLogCommand() *cobra.Command {
···
43
43
is a terminal, just like 'git log'. Use --no-pager to disable.`,
44
44
45
45
Example: ` # Show all bundles (newest first, auto-paged)
46
-
plcbundle log
46
+
plcbundle-go log
47
47
48
48
# Show last 10 bundles
49
-
plcbundle log --last 10
50
-
plcbundle log -n 10
49
+
plcbundle-go log --last 10
50
+
plcbundle-go log -n 10
51
51
52
52
# One-line format
53
-
plcbundle log --oneline
53
+
plcbundle-go log --oneline
54
54
55
55
# Hide hashes
56
-
plcbundle log --no-hashes
56
+
plcbundle-go log --no-hashes
57
57
58
58
# Oldest first (ascending order)
59
-
plcbundle log --reverse
59
+
plcbundle-go log --reverse
60
60
61
61
# Disable pager (direct output)
62
-
plcbundle log --no-pager
62
+
plcbundle-go log --no-pager
63
63
64
64
# Combination
65
-
plcbundle log -n 20 --oneline
65
+
plcbundle-go log -n 20 --oneline
66
66
67
67
# Using alias
68
-
plcbundle history -n 5`,
68
+
plcbundle-go history -n 5`,
69
69
70
70
RunE: func(cmd *cobra.Command, args []string) error {
71
71
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
110
110
fmt.Printf("No bundles in repository\n")
111
111
fmt.Printf("Directory: %s\n\n", dir)
112
112
fmt.Printf("Get started:\n")
113
-
fmt.Printf(" plcbundle clone <url> Clone from remote\n")
114
-
fmt.Printf(" plcbundle sync Fetch from PLC directory\n")
113
+
fmt.Printf(" plcbundle-go clone <url> Clone from remote\n")
114
+
fmt.Printf(" plcbundle-go sync Fetch from PLC directory\n")
115
115
return nil
116
116
}
117
117
+11
-11
cmd/plcbundle/commands/ls.go
+11
-11
cmd/plcbundle/commands/ls.go
···
6
6
"time"
7
7
8
8
"github.com/spf13/cobra"
9
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
9
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
10
10
)
11
11
12
12
func NewLsCommand() *cobra.Command {
···
30
30
shell scripts and automation.`,
31
31
32
32
Example: ` # List all bundles
33
-
plcbundle ls
33
+
plcbundle-go ls
34
34
35
35
# Last 10 bundles
36
-
plcbundle ls -n 10
36
+
plcbundle-go ls -n 10
37
37
38
38
# Oldest first
39
-
plcbundle ls --reverse
39
+
plcbundle-go ls --reverse
40
40
41
41
# Custom format
42
-
plcbundle ls --format "bundle,hash,date,size"
42
+
plcbundle-go ls --format "bundle,hash,date,size"
43
43
44
44
# CSV format
45
-
plcbundle ls --separator ","
45
+
plcbundle-go ls --separator ","
46
46
47
47
# Scripting examples
48
-
plcbundle ls | awk '{print $1}' # Just bundle numbers
49
-
plcbundle ls | grep 000150 # Find specific bundle
50
-
plcbundle ls -n 5 | cut -f1,4 # First and 4th columns
51
-
plcbundle ls --format bundle,hash # Custom columns
52
-
plcbundle ls --separator "," > bundles.csv # Export to CSV`,
48
+
plcbundle-go ls | awk '{print $1}' # Just bundle numbers
49
+
plcbundle-go ls | grep 000150 # Find specific bundle
50
+
plcbundle-go ls -n 5 | cut -f1,4 # First and 4th columns
51
+
plcbundle-go ls --format bundle,hash # Custom columns
52
+
plcbundle-go ls --separator "," > bundles.csv # Export to CSV`,
53
53
54
54
RunE: func(cmd *cobra.Command, args []string) error {
55
55
mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
+19
-19
cmd/plcbundle/commands/mempool.go
+19
-19
cmd/plcbundle/commands/mempool.go
···
9
9
10
10
"github.com/goccy/go-json"
11
11
"github.com/spf13/cobra"
12
-
"tangled.org/atscan.net/plcbundle/internal/types"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
13
13
)
14
14
15
15
func NewMempoolCommand() *cobra.Command {
···
23
23
strict chronological order and automatically validates consistency.`,
24
24
25
25
Example: ` # Show mempool status
26
-
plcbundle mempool
27
-
plcbundle mempool status
26
+
plcbundle-go mempool
27
+
plcbundle-go mempool status
28
28
29
29
# Clear all operations
30
-
plcbundle mempool clear
30
+
plcbundle-go mempool clear
31
31
32
32
# Export operations as JSONL
33
-
plcbundle mempool dump
34
-
plcbundle mempool dump > operations.jsonl
33
+
plcbundle-go mempool dump
34
+
plcbundle-go mempool dump > operations.jsonl
35
35
36
36
# Using alias
37
-
plcbundle mp status`,
37
+
plcbundle-go mp status`,
38
38
39
39
RunE: func(cmd *cobra.Command, args []string) error {
40
40
// Default to status subcommand
···
67
67
next bundle, validation status, and memory usage.`,
68
68
69
69
Example: ` # Show status
70
-
plcbundle mempool status
71
-
plcbundle mempool
70
+
plcbundle-go mempool status
71
+
plcbundle-go mempool
72
72
73
73
# Verbose output with samples
74
-
plcbundle mempool status -v`,
74
+
plcbundle-go mempool status -v`,
75
75
76
76
RunE: func(cmd *cobra.Command, args []string) error {
77
77
return mempoolStatus(cmd, args)
···
123
123
โข Force fresh start`,
124
124
125
125
Example: ` # Clear with confirmation
126
-
plcbundle mempool clear
126
+
plcbundle-go mempool clear
127
127
128
128
# Force clear without confirmation
129
-
plcbundle mempool clear --force
130
-
plcbundle mempool clear -f`,
129
+
plcbundle-go mempool clear --force
130
+
plcbundle-go mempool clear -f`,
131
131
132
132
RunE: func(cmd *cobra.Command, args []string) error {
133
133
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
188
188
Perfect for backup, analysis, or piping to other tools.`,
189
189
190
190
Example: ` # Dump to stdout
191
-
plcbundle mempool dump
191
+
plcbundle-go mempool dump
192
192
193
193
# Save to file
194
-
plcbundle mempool dump > mempool.jsonl
195
-
plcbundle mempool dump -o mempool.jsonl
194
+
plcbundle-go mempool dump > mempool.jsonl
195
+
plcbundle-go mempool dump -o mempool.jsonl
196
196
197
197
# Pipe to jq
198
-
plcbundle mempool dump | jq -r .did
198
+
plcbundle-go mempool dump | jq -r .did
199
199
200
200
# Count operations
201
-
plcbundle mempool dump | wc -l
201
+
plcbundle-go mempool dump | wc -l
202
202
203
203
# Using alias
204
-
plcbundle mempool export`,
204
+
plcbundle-go mempool export`,
205
205
206
206
RunE: func(cmd *cobra.Command, args []string) error {
207
207
mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
+448
cmd/plcbundle/commands/migrate.go
+448
cmd/plcbundle/commands/migrate.go
···
1
+
package commands
2
+
3
+
import (
4
+
"encoding/binary"
5
+
"fmt"
6
+
"os"
7
+
"path/filepath"
8
+
"time"
9
+
10
+
"github.com/spf13/cobra"
11
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
14
+
)
15
+
16
+
func NewMigrateCommand() *cobra.Command {
17
+
var (
18
+
dryRun bool
19
+
force bool
20
+
workers int
21
+
)
22
+
23
+
cmd := &cobra.Command{
24
+
Use: "migrate [flags]",
25
+
Short: "Migrate bundles to new zstd frame format",
26
+
Long: `Migrate old single-frame zstd bundles to new multi-frame format
27
+
28
+
This command converts bundles from the legacy single-frame zstd format
29
+
to the new multi-frame format with .idx index files. This enables:
30
+
โข Faster random access to individual operations
31
+
โข Reduced memory usage when loading specific positions
32
+
โข Better performance for DID lookups
33
+
34
+
The migration:
35
+
1. Scans for bundles missing .idx files (legacy format)
36
+
2. Re-compresses them using multi-frame format (100 ops/frame)
37
+
3. Generates .idx frame offset index files
38
+
4. Preserves all hashes and metadata
39
+
5. Verifies content integrity
40
+
41
+
Original files are replaced atomically. Use --dry-run to preview.`,
42
+
43
+
Example: ` # Preview migration (recommended first)
44
+
plcbundle-go migrate --dry-run
45
+
46
+
# Migrate all legacy bundles
47
+
plcbundle-go migrate
48
+
49
+
# Force migration even if .idx files exist
50
+
plcbundle-go migrate --force
51
+
52
+
# Parallel migration (faster)
53
+
plcbundle-go migrate --workers 8
54
+
55
+
# Verbose output
56
+
plcbundle-go migrate -v`,
57
+
58
+
RunE: func(cmd *cobra.Command, args []string) error {
59
+
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
60
+
61
+
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
62
+
if err != nil {
63
+
return err
64
+
}
65
+
defer mgr.Close()
66
+
67
+
return runMigration(mgr, dir, migrationOptions{
68
+
dryRun: dryRun,
69
+
force: force,
70
+
workers: workers,
71
+
verbose: verbose,
72
+
})
73
+
},
74
+
}
75
+
76
+
cmd.Flags().BoolVarP(&dryRun, "dry-run", "n", false, "Show what would be migrated without migrating")
77
+
cmd.Flags().BoolVarP(&force, "force", "f", false, "Re-migrate bundles that already have .idx files")
78
+
cmd.Flags().IntVarP(&workers, "workers", "w", 4, "Number of parallel workers")
79
+
80
+
return cmd
81
+
}
82
+
83
+
type migrationOptions struct {
84
+
dryRun bool
85
+
force bool
86
+
workers int
87
+
verbose bool
88
+
}
89
+
90
+
type bundleMigrationInfo struct {
91
+
bundleNumber int
92
+
oldSize int64
93
+
uncompressedSize int64
94
+
oldFormat string
95
+
oldCompressionRatio float64
96
+
}
97
+
98
+
func runMigration(mgr BundleManager, dir string, opts migrationOptions) error {
99
+
fmt.Printf("Scanning for legacy bundles in: %s\n\n", dir)
100
+
101
+
index := mgr.GetIndex()
102
+
bundles := index.GetBundles()
103
+
104
+
if len(bundles) == 0 {
105
+
fmt.Println("No bundles to migrate")
106
+
return nil
107
+
}
108
+
109
+
version := GetVersion()
110
+
ops := &storage.Operations{}
111
+
112
+
var needsMigration []bundleMigrationInfo
113
+
var totalSize int64
114
+
115
+
for _, meta := range bundles {
116
+
bundlePath := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", meta.BundleNumber))
117
+
embeddedMeta, err := ops.ExtractBundleMetadata(bundlePath)
118
+
119
+
info := bundleMigrationInfo{
120
+
bundleNumber: meta.BundleNumber,
121
+
oldSize: meta.CompressedSize,
122
+
uncompressedSize: meta.UncompressedSize,
123
+
}
124
+
125
+
if meta.CompressedSize > 0 {
126
+
info.oldCompressionRatio = float64(meta.UncompressedSize) / float64(meta.CompressedSize)
127
+
}
128
+
129
+
if err != nil {
130
+
info.oldFormat = "v0 (single-frame)"
131
+
} else {
132
+
info.oldFormat = embeddedMeta.Format
133
+
}
134
+
135
+
if err != nil || opts.force {
136
+
needsMigration = append(needsMigration, info)
137
+
totalSize += meta.CompressedSize
138
+
}
139
+
}
140
+
141
+
if len(needsMigration) == 0 {
142
+
fmt.Println("โ All bundles already migrated")
143
+
fmt.Println("\nUse --force to re-migrate")
144
+
return nil
145
+
}
146
+
147
+
// COMPACT PLAN
148
+
fmt.Printf("Migration Plan\n")
149
+
fmt.Printf("โโโโโโโโโโโโโโ\n\n")
150
+
151
+
formatCounts := make(map[string]int)
152
+
var totalUncompressed int64
153
+
for _, info := range needsMigration {
154
+
formatCounts[info.oldFormat]++
155
+
totalUncompressed += info.uncompressedSize
156
+
}
157
+
158
+
fmt.Printf(" Format: ")
159
+
first := true
160
+
for format, count := range formatCounts {
161
+
if !first {
162
+
fmt.Printf(" + ")
163
+
}
164
+
fmt.Printf("%s (%d)", format, count)
165
+
first = false
166
+
}
167
+
fmt.Printf(" โ v%d\n", storage.MetadataFormatVersion)
168
+
169
+
fmt.Printf(" Bundles: %d\n", len(needsMigration))
170
+
fmt.Printf(" Size: %s (%.3fx compression)\n",
171
+
formatBytes(totalSize),
172
+
float64(totalUncompressed)/float64(totalSize))
173
+
fmt.Printf(" Workers: %d, Compression Level: %d\n\n", opts.workers, storage.CompressionLevel)
174
+
175
+
if opts.dryRun {
176
+
fmt.Printf("๐ก Dry-run mode\n")
177
+
return nil
178
+
}
179
+
180
+
// Execute migration
181
+
fmt.Printf("Migrating...\n\n")
182
+
183
+
start := time.Now()
184
+
progress := ui.NewProgressBar(len(needsMigration))
185
+
186
+
success := 0
187
+
failed := 0
188
+
var firstError error
189
+
hashChanges := make([]int, 0)
190
+
191
+
var totalOldSize int64
192
+
var totalNewSize int64
193
+
var totalOldUncompressed int64
194
+
var totalNewUncompressed int64
195
+
196
+
for i, info := range needsMigration {
197
+
totalOldSize += info.oldSize
198
+
totalOldUncompressed += info.uncompressedSize
199
+
200
+
sizeDiff, newUncompressedSize, err := migrateBundle(dir, info.bundleNumber, index, version, opts.verbose)
201
+
if err != nil {
202
+
failed++
203
+
if firstError == nil {
204
+
firstError = err
205
+
}
206
+
if opts.verbose {
207
+
fmt.Fprintf(os.Stderr, "\nโ Bundle %06d failed: %v\n", info.bundleNumber, err)
208
+
}
209
+
} else {
210
+
success++
211
+
hashChanges = append(hashChanges, info.bundleNumber)
212
+
213
+
newSize := info.oldSize + sizeDiff
214
+
totalNewSize += newSize
215
+
totalNewUncompressed += newUncompressedSize
216
+
217
+
if opts.verbose {
218
+
oldRatio := float64(info.uncompressedSize) / float64(info.oldSize)
219
+
newRatio := float64(newUncompressedSize) / float64(newSize)
220
+
221
+
fmt.Fprintf(os.Stderr, "โ %06d: %.3fxโ%.3fx %+s\n",
222
+
info.bundleNumber, oldRatio, newRatio, formatBytes(sizeDiff))
223
+
}
224
+
}
225
+
226
+
progress.Set(i + 1)
227
+
}
228
+
229
+
progress.Finish()
230
+
elapsed := time.Since(start)
231
+
232
+
// Update index
233
+
if len(hashChanges) > 0 {
234
+
fmt.Printf("\nUpdating index...\n")
235
+
updateStart := time.Now()
236
+
237
+
updated := 0
238
+
for _, bundleNum := range hashChanges {
239
+
bundlePath := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum))
240
+
compHash, compSize, _, contentSize, err := ops.CalculateFileHashes(bundlePath)
241
+
if err != nil {
242
+
fmt.Fprintf(os.Stderr, " โ ๏ธ Failed to hash %06d: %v\n", bundleNum, err)
243
+
continue
244
+
}
245
+
246
+
bundleMeta, err := index.GetBundle(bundleNum)
247
+
if err != nil {
248
+
continue
249
+
}
250
+
251
+
bundleMeta.CompressedHash = compHash
252
+
bundleMeta.CompressedSize = compSize
253
+
bundleMeta.UncompressedSize = contentSize
254
+
255
+
index.AddBundle(bundleMeta)
256
+
updated++
257
+
}
258
+
259
+
if err := mgr.SaveIndex(); err != nil {
260
+
fmt.Fprintf(os.Stderr, " โ ๏ธ Failed to save index: %v\n", err)
261
+
} else {
262
+
fmt.Printf(" โ %d entries in %s\n", updated, time.Since(updateStart).Round(time.Millisecond))
263
+
}
264
+
}
265
+
266
+
// COMPACT SUMMARY
267
+
fmt.Printf("\n")
268
+
if failed == 0 {
269
+
fmt.Printf("โ Complete: %d bundles in %s\n\n", success, elapsed.Round(time.Millisecond))
270
+
271
+
if totalOldSize > 0 && success > 0 {
272
+
sizeDiff := totalNewSize - totalOldSize
273
+
oldRatio := float64(totalOldUncompressed) / float64(totalOldSize)
274
+
newRatio := float64(totalNewUncompressed) / float64(totalNewSize)
275
+
ratioDiff := newRatio - oldRatio
276
+
277
+
// MEASURE ACTUAL METADATA SIZE (not estimated)
278
+
var totalActualMetadata int64
279
+
for _, bundleNum := range hashChanges {
280
+
bundlePath := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum))
281
+
metaSize, _ := measureMetadataSize(bundlePath)
282
+
totalActualMetadata += metaSize
283
+
}
284
+
285
+
// FIXED ALIGNMENT
286
+
fmt.Printf(" Old New Change\n")
287
+
fmt.Printf(" โโโโโโโโ โโโโโโโโ โโโโโโโโโ\n")
288
+
fmt.Printf("Size %-13s %-13s %+s (%.1f%%)\n",
289
+
formatBytes(totalOldSize),
290
+
formatBytes(totalNewSize),
291
+
formatBytes(sizeDiff),
292
+
float64(sizeDiff)/float64(totalOldSize)*100)
293
+
fmt.Printf("Ratio %-13s %-13s %+s\n",
294
+
fmt.Sprintf("%.3fx", oldRatio), fmt.Sprintf("%.3fx", newRatio), fmt.Sprintf("%+.3fx", ratioDiff))
295
+
fmt.Printf("Avg/bundle %-13s %-13s %+s\n\n",
296
+
formatBytes(totalOldSize/int64(success)),
297
+
formatBytes(totalNewSize/int64(success)),
298
+
formatBytes(sizeDiff/int64(success)))
299
+
300
+
// FIXED BREAKDOWN - use actual metadata size
301
+
if totalActualMetadata > 0 {
302
+
compressionEfficiency := sizeDiff - totalActualMetadata
303
+
304
+
fmt.Printf("Breakdown:\n")
305
+
fmt.Printf(" Metadata: %+s (~%s/bundle, structural)\n",
306
+
formatBytes(totalActualMetadata),
307
+
formatBytes(totalActualMetadata/int64(success)))
308
+
309
+
// FIX: Use absolute threshold based on old size, not metadata size
310
+
threshold := totalOldSize / 1000 // 0.1% of old size
311
+
312
+
if abs(compressionEfficiency) > threshold {
313
+
if compressionEfficiency > 0 {
314
+
// Compression got worse
315
+
pctWorse := float64(compressionEfficiency) / float64(totalOldSize) * 100
316
+
fmt.Printf(" Compression: %+s (%.2f%% worse)\n",
317
+
formatBytes(compressionEfficiency), pctWorse)
318
+
} else if compressionEfficiency < 0 {
319
+
// Compression improved
320
+
pctBetter := float64(-compressionEfficiency) / float64(totalOldSize) * 100
321
+
fmt.Printf(" Compression: %s (%.2f%% better)\n",
322
+
formatBytes(compressionEfficiency), pctBetter)
323
+
}
324
+
} else {
325
+
// Truly negligible
326
+
fmt.Printf(" Compression: unchanged\n")
327
+
}
328
+
}
329
+
330
+
fmt.Printf("\n")
331
+
}
332
+
} else {
333
+
fmt.Printf("โ ๏ธ Failed: %d bundles\n", failed)
334
+
if firstError != nil {
335
+
fmt.Printf(" Error: %v\n", firstError)
336
+
}
337
+
return fmt.Errorf("migration failed for %d bundles", failed)
338
+
}
339
+
340
+
return nil
341
+
}
342
+
343
+
func migrateBundle(dir string, bundleNum int, index *bundleindex.Index, version string, verbose bool) (sizeDiff int64, newUncompressedSize int64, err error) {
344
+
bundlePath := filepath.Join(dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum))
345
+
backupPath := bundlePath + ".bak"
346
+
347
+
meta, err := index.GetBundle(bundleNum)
348
+
if err != nil {
349
+
return 0, 0, fmt.Errorf("bundle not in index: %w", err)
350
+
}
351
+
352
+
oldSize := meta.CompressedSize
353
+
354
+
ops := &storage.Operations{}
355
+
operations, err := ops.LoadBundle(bundlePath)
356
+
if err != nil {
357
+
return 0, 0, fmt.Errorf("failed to load: %w", err)
358
+
}
359
+
360
+
if err := os.Rename(bundlePath, backupPath); err != nil {
361
+
return 0, 0, fmt.Errorf("failed to backup: %w", err)
362
+
}
363
+
364
+
hostname, _ := os.Hostname()
365
+
366
+
bundleInfo := &storage.BundleInfo{
367
+
BundleNumber: meta.BundleNumber,
368
+
Origin: index.Origin,
369
+
ParentHash: meta.Parent,
370
+
Cursor: meta.Cursor,
371
+
CreatedBy: fmt.Sprintf("plcbundle/%s", version),
372
+
Hostname: hostname,
373
+
}
374
+
375
+
contentHash, _, contentSize, compSize, err := ops.SaveBundle(bundlePath, operations, bundleInfo)
376
+
if err != nil {
377
+
os.Rename(backupPath, bundlePath)
378
+
return 0, 0, fmt.Errorf("failed to save: %w", err)
379
+
}
380
+
381
+
embeddedMeta, err := ops.ExtractBundleMetadata(bundlePath)
382
+
if err != nil {
383
+
os.Remove(bundlePath)
384
+
os.Rename(backupPath, bundlePath)
385
+
return 0, 0, fmt.Errorf("embedded metadata not created: %w", err)
386
+
}
387
+
388
+
if len(embeddedMeta.FrameOffsets) == 0 {
389
+
os.Remove(bundlePath)
390
+
os.Rename(backupPath, bundlePath)
391
+
return 0, 0, fmt.Errorf("frame offsets missing in metadata")
392
+
}
393
+
394
+
if contentHash != meta.ContentHash {
395
+
fmt.Fprintf(os.Stderr, " โ ๏ธ Content hash changed: %s โ %s\n",
396
+
meta.ContentHash[:12], contentHash[:12])
397
+
}
398
+
399
+
os.Remove(backupPath)
400
+
401
+
// Calculate changes
402
+
newSize := compSize
403
+
sizeDiff = newSize - oldSize
404
+
newUncompressedSize = contentSize
405
+
406
+
if verbose {
407
+
oldRatio := float64(meta.UncompressedSize) / float64(oldSize)
408
+
newRatio := float64(contentSize) / float64(newSize)
409
+
410
+
fmt.Fprintf(os.Stderr, " Frames: %d, Ratio: %.3fxโ%.3fx, Size: %+s\n",
411
+
len(embeddedMeta.FrameOffsets)-1, oldRatio, newRatio, formatBytes(sizeDiff))
412
+
}
413
+
414
+
return sizeDiff, newUncompressedSize, nil
415
+
}
416
+
417
+
func measureMetadataSize(bundlePath string) (int64, error) {
418
+
file, err := os.Open(bundlePath)
419
+
if err != nil {
420
+
return 0, err
421
+
}
422
+
defer file.Close()
423
+
424
+
// Read magic (4 bytes) + size (4 bytes)
425
+
header := make([]byte, 8)
426
+
if _, err := file.Read(header); err != nil {
427
+
return 0, err
428
+
}
429
+
430
+
// Check if it's a skippable frame
431
+
magic := binary.LittleEndian.Uint32(header[0:4])
432
+
if magic < 0x184D2A50 || magic > 0x184D2A5F {
433
+
return 0, nil // No metadata frame
434
+
}
435
+
436
+
// Get frame data size
437
+
frameSize := binary.LittleEndian.Uint32(header[4:8])
438
+
439
+
// Total metadata size = 4 (magic) + 4 (size) + frameSize (data)
440
+
return int64(8 + frameSize), nil
441
+
}
442
+
443
+
func abs(n int64) int64 {
444
+
if n < 0 {
445
+
return -n
446
+
}
447
+
return n
448
+
}
+167
-107
cmd/plcbundle/commands/op.go
+167
-107
cmd/plcbundle/commands/op.go
···
10
10
11
11
"github.com/goccy/go-json"
12
12
"github.com/spf13/cobra"
13
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
14
-
"tangled.org/atscan.net/plcbundle/internal/types"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
15
15
)
16
16
17
17
func NewOpCommand() *cobra.Command {
···
29
29
Example: 88410345 = bundle 8841, position 345`,
30
30
31
31
Example: ` # Get operation as JSON
32
-
plcbundle op get 42 1337
33
-
plcbundle op get 420000
32
+
plcbundle-go op get 42 1337
33
+
plcbundle-go op get 420000
34
34
35
35
# Show operation (formatted)
36
-
plcbundle op show 42 1337
37
-
plcbundle op show 88410345
36
+
plcbundle-go op show 42 1337
37
+
plcbundle-go op show 88410345
38
38
39
39
# Find by CID
40
-
plcbundle op find bafyreig3...`,
40
+
plcbundle-go op find bafyreig3...`,
41
41
}
42
42
43
43
// Add subcommands
···
53
53
// ============================================================================
54
54
55
55
func newOpGetCommand() *cobra.Command {
56
+
var verbose bool
57
+
56
58
cmd := &cobra.Command{
57
59
Use: "get <bundle> <position> | <globalPosition>",
58
60
Short: "Get operation as JSON",
···
62
64
1. Bundle number + position: get 42 1337
63
65
2. Global position: get 420000
64
66
65
-
Global position = (bundleNumber ร 10,000) + position`,
67
+
Global position = (bundleNumber ร 10,000) + position
68
+
69
+
Use -v/--verbose to see detailed timing breakdown.`,
66
70
67
71
Example: ` # By bundle + position
68
-
plcbundle op get 42 1337
72
+
plcbundle-go op get 42 1337
69
73
70
74
# By global position
71
-
plcbundle op get 88410345
75
+
plcbundle-go op get 88410345
76
+
77
+
# With timing metrics
78
+
plcbundle-go op get 42 1337 -v
79
+
plcbundle-go op get 88410345 --verbose
72
80
73
81
# Pipe to jq
74
-
plcbundle op get 42 1337 | jq .did`,
82
+
plcbundle-go op get 42 1337 | jq .did`,
75
83
76
84
Args: cobra.RangeArgs(1, 2),
77
85
···
88
96
defer mgr.Close()
89
97
90
98
ctx := context.Background()
99
+
100
+
// Time the operation load
101
+
totalStart := time.Now()
91
102
op, err := mgr.LoadOperation(ctx, bundleNum, position)
103
+
totalDuration := time.Since(totalStart)
104
+
92
105
if err != nil {
93
106
return err
94
107
}
95
108
96
-
// Output raw JSON
109
+
if verbose {
110
+
globalPos := (bundleNum * 10000) + position
111
+
112
+
// Log-style output (compact, single-line friendly)
113
+
fmt.Fprintf(os.Stderr, "[Load] Bundle %06d:%04d (pos=%d) in %s",
114
+
bundleNum, position, globalPos, totalDuration)
115
+
fmt.Fprintf(os.Stderr, " | %d bytes", len(op.RawJSON))
116
+
fmt.Fprintf(os.Stderr, "\n")
117
+
}
118
+
119
+
// Output raw JSON to stdout
97
120
if len(op.RawJSON) > 0 {
98
121
fmt.Println(string(op.RawJSON))
99
122
} else {
···
105
128
},
106
129
}
107
130
131
+
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show timing metrics")
132
+
108
133
return cmd
109
134
}
110
135
111
-
// ============================================================================
136
+
// // ============================================================================
112
137
// OP SHOW - Show operation (formatted)
113
138
// ============================================================================
114
139
···
125
150
โข DID and CID
126
151
โข Timestamp and age
127
152
โข Nullification status
128
-
โข Parsed operation details`,
153
+
โข Parsed operation details
154
+
โข Performance metrics (with -v)`,
129
155
130
156
Example: ` # By bundle + position
131
-
plcbundle op show 42 1337
157
+
plcbundle-go op show 42 1337
132
158
133
159
# By global position
134
-
plcbundle op show 88410345
160
+
plcbundle-go op show 88410345
135
161
136
-
# Verbose (show full operation JSON)
137
-
plcbundle op show 42 1337 -v`,
162
+
# Verbose with timing and full JSON
163
+
plcbundle-go op show 42 1337 -v`,
138
164
139
165
Args: cobra.RangeArgs(1, 2),
140
166
···
151
177
defer mgr.Close()
152
178
153
179
ctx := context.Background()
180
+
181
+
// Time the operation
182
+
loadStart := time.Now()
154
183
op, err := mgr.LoadOperation(ctx, bundleNum, position)
184
+
loadDuration := time.Since(loadStart)
185
+
155
186
if err != nil {
156
187
return err
157
188
}
158
189
159
-
return displayOperation(bundleNum, position, op, verbose)
190
+
// Time the parsing
191
+
parseStart := time.Now()
192
+
opData, parseErr := op.GetOperationData()
193
+
parseDuration := time.Since(parseStart)
194
+
195
+
return displayOperationWithTiming(bundleNum, position, op, opData, parseErr,
196
+
loadDuration, parseDuration, verbose)
160
197
},
161
198
}
162
199
163
-
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show full operation JSON")
200
+
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show timing metrics and full JSON")
164
201
165
202
return cmd
166
203
}
···
181
218
Note: This performs a full scan and can be slow on large repositories.`,
182
219
183
220
Example: ` # Find by CID
184
-
plcbundle op find bafyreig3tg4k...
221
+
plcbundle-go op find bafyreig3tg4k...
185
222
186
223
# Pipe to op get
187
-
plcbundle op find bafyreig3... | awk '{print $3, $5}' | xargs plcbundle op get`,
224
+
plcbundle-go op find bafyreig3... | awk '{print $3, $5}' | xargs plcbundle-go op get`,
188
225
189
226
Args: cobra.ExactArgs(1),
190
227
···
250
287
return 0, 0, fmt.Errorf("usage: op <command> <bundle> <position> OR op <command> <globalPosition>")
251
288
}
252
289
253
-
// displayOperation shows formatted operation details
254
-
func displayOperation(bundleNum, position int, op *plcclient.PLCOperation, verbose bool) error {
290
+
// findOperationByCID searches for an operation by CID
291
+
func findOperationByCID(mgr BundleManager, cid string) error {
292
+
ctx := context.Background()
293
+
294
+
// CHECK MEMPOOL FIRST (most recent data)
295
+
fmt.Fprintf(os.Stderr, "Checking mempool...\n")
296
+
mempoolOps, err := mgr.GetMempoolOperations()
297
+
if err == nil && len(mempoolOps) > 0 {
298
+
for pos, op := range mempoolOps {
299
+
if op.CID == cid {
300
+
fmt.Printf("Found in mempool: position %d\n\n", pos)
301
+
fmt.Printf(" DID: %s\n", op.DID)
302
+
fmt.Printf(" Created: %s\n", op.CreatedAt.Format("2006-01-02 15:04:05"))
303
+
304
+
if op.IsNullified() {
305
+
fmt.Printf(" Status: โ Nullified")
306
+
if nullCID := op.GetNullifyingCID(); nullCID != "" {
307
+
fmt.Printf(" by %s", nullCID)
308
+
}
309
+
fmt.Printf("\n")
310
+
} else {
311
+
fmt.Printf(" Status: โ Active\n")
312
+
}
313
+
314
+
return nil
315
+
}
316
+
}
317
+
}
318
+
319
+
// Search bundles
320
+
index := mgr.GetIndex()
321
+
bundles := index.GetBundles()
322
+
323
+
if len(bundles) == 0 {
324
+
fmt.Fprintf(os.Stderr, "No bundles to search\n")
325
+
return nil
326
+
}
327
+
328
+
fmt.Fprintf(os.Stderr, "Searching %d bundles for CID: %s\n\n", len(bundles), cid)
329
+
330
+
for _, meta := range bundles {
331
+
bundle, err := mgr.LoadBundle(ctx, meta.BundleNumber)
332
+
if err != nil {
333
+
continue
334
+
}
335
+
336
+
for pos, op := range bundle.Operations {
337
+
if op.CID == cid {
338
+
globalPos := (meta.BundleNumber * types.BUNDLE_SIZE) + pos
339
+
340
+
fmt.Printf("Found: bundle %06d, position %d\n", meta.BundleNumber, pos)
341
+
fmt.Printf("Global position: %d\n\n", globalPos)
342
+
343
+
fmt.Printf(" DID: %s\n", op.DID)
344
+
fmt.Printf(" Created: %s\n", op.CreatedAt.Format("2006-01-02 15:04:05"))
345
+
346
+
if op.IsNullified() {
347
+
fmt.Printf(" Status: โ Nullified")
348
+
if nullCID := op.GetNullifyingCID(); nullCID != "" {
349
+
fmt.Printf(" by %s", nullCID)
350
+
}
351
+
fmt.Printf("\n")
352
+
} else {
353
+
fmt.Printf(" Status: โ Active\n")
354
+
}
355
+
356
+
return nil
357
+
}
358
+
}
359
+
360
+
// Progress indicator
361
+
if meta.BundleNumber%100 == 0 {
362
+
fmt.Fprintf(os.Stderr, "Searched through bundle %06d...\r", meta.BundleNumber)
363
+
}
364
+
}
365
+
366
+
fmt.Fprintf(os.Stderr, "\nCID not found: %s\n", cid)
367
+
fmt.Fprintf(os.Stderr, "(Searched %d bundles + mempool)\n", len(bundles))
368
+
return fmt.Errorf("CID not found")
369
+
}
370
+
371
+
// displayOperationWithTiming shows formatted operation details with timing
372
+
func displayOperationWithTiming(bundleNum, position int, op *plcclient.PLCOperation,
373
+
opData map[string]interface{}, _ error,
374
+
loadDuration, parseDuration time.Duration, verbose bool) error {
375
+
255
376
globalPos := (bundleNum * types.BUNDLE_SIZE) + position
256
377
257
-
fmt.Printf("Operation %d\n", globalPos)
378
+
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n")
379
+
fmt.Printf(" Operation %d\n", globalPos)
258
380
fmt.Printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n\n")
259
381
260
382
fmt.Printf("Location\n")
···
285
407
fmt.Printf("โโโโโโ\n")
286
408
fmt.Printf(" %s\n\n", status)
287
409
410
+
// Performance metrics (always shown if verbose)
411
+
if verbose {
412
+
totalTime := loadDuration + parseDuration
413
+
414
+
fmt.Printf("Performance\n")
415
+
fmt.Printf("โโโโโโโโโโโ\n")
416
+
fmt.Printf(" Load time: %s\n", loadDuration)
417
+
fmt.Printf(" Parse time: %s\n", parseDuration)
418
+
fmt.Printf(" Total time: %s\n", totalTime)
419
+
420
+
if len(op.RawJSON) > 0 {
421
+
fmt.Printf(" Data size: %d bytes\n", len(op.RawJSON))
422
+
mbPerSec := float64(len(op.RawJSON)) / loadDuration.Seconds() / (1024 * 1024)
423
+
fmt.Printf(" Load speed: %.2f MB/s\n", mbPerSec)
424
+
}
425
+
426
+
fmt.Printf("\n")
427
+
}
428
+
288
429
// Parse operation details
289
-
if opData, err := op.GetOperationData(); err == nil && opData != nil && !op.IsNullified() {
430
+
if opData != nil && !op.IsNullified() {
290
431
fmt.Printf("Details\n")
291
432
fmt.Printf("โโโโโโโ\n")
292
433
···
339
480
340
481
return nil
341
482
}
342
-
343
-
// findOperationByCID searches for an operation by CID
344
-
func findOperationByCID(mgr BundleManager, cid string) error {
345
-
ctx := context.Background()
346
-
347
-
// โจ CHECK MEMPOOL FIRST (most recent data)
348
-
fmt.Fprintf(os.Stderr, "Checking mempool...\n")
349
-
mempoolOps, err := mgr.GetMempoolOperations()
350
-
if err == nil && len(mempoolOps) > 0 {
351
-
for pos, op := range mempoolOps {
352
-
if op.CID == cid {
353
-
fmt.Printf("Found in mempool: position %d\n\n", pos)
354
-
fmt.Printf(" DID: %s\n", op.DID)
355
-
fmt.Printf(" Created: %s\n", op.CreatedAt.Format("2006-01-02 15:04:05"))
356
-
357
-
if op.IsNullified() {
358
-
fmt.Printf(" Status: โ Nullified")
359
-
if nullCID := op.GetNullifyingCID(); nullCID != "" {
360
-
fmt.Printf(" by %s", nullCID)
361
-
}
362
-
fmt.Printf("\n")
363
-
} else {
364
-
fmt.Printf(" Status: โ Active\n")
365
-
}
366
-
367
-
return nil
368
-
}
369
-
}
370
-
}
371
-
372
-
// Search bundles
373
-
index := mgr.GetIndex()
374
-
bundles := index.GetBundles()
375
-
376
-
if len(bundles) == 0 {
377
-
fmt.Fprintf(os.Stderr, "No bundles to search\n")
378
-
return nil
379
-
}
380
-
381
-
fmt.Fprintf(os.Stderr, "Searching %d bundles for CID: %s\n\n", len(bundles), cid)
382
-
383
-
for _, meta := range bundles {
384
-
bundle, err := mgr.LoadBundle(ctx, meta.BundleNumber)
385
-
if err != nil {
386
-
continue
387
-
}
388
-
389
-
for pos, op := range bundle.Operations {
390
-
if op.CID == cid {
391
-
globalPos := (meta.BundleNumber * types.BUNDLE_SIZE) + pos
392
-
393
-
fmt.Printf("Found: bundle %06d, position %d\n", meta.BundleNumber, pos)
394
-
fmt.Printf("Global position: %d\n\n", globalPos)
395
-
396
-
fmt.Printf(" DID: %s\n", op.DID)
397
-
fmt.Printf(" Created: %s\n", op.CreatedAt.Format("2006-01-02 15:04:05"))
398
-
399
-
if op.IsNullified() {
400
-
fmt.Printf(" Status: โ Nullified")
401
-
if nullCID := op.GetNullifyingCID(); nullCID != "" {
402
-
fmt.Printf(" by %s", nullCID)
403
-
}
404
-
fmt.Printf("\n")
405
-
} else {
406
-
fmt.Printf(" Status: โ Active\n")
407
-
}
408
-
409
-
return nil
410
-
}
411
-
}
412
-
413
-
// Progress indicator
414
-
if meta.BundleNumber%100 == 0 {
415
-
fmt.Fprintf(os.Stderr, "Searched through bundle %06d...\r", meta.BundleNumber)
416
-
}
417
-
}
418
-
419
-
fmt.Fprintf(os.Stderr, "\nCID not found: %s\n", cid)
420
-
fmt.Fprintf(os.Stderr, "(Searched %d bundles + mempool)\n", len(bundles))
421
-
return fmt.Errorf("CID not found")
422
-
}
+56
cmd/plcbundle/commands/progress_helper.go
+56
cmd/plcbundle/commands/progress_helper.go
···
1
+
package commands
2
+
3
+
import (
4
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
5
+
)
6
+
7
+
// NewBundleProgressBar creates a progress bar with auto-calculated bytes from bundles
8
+
func NewBundleProgressBar(mgr BundleManager, start, end int) *ui.ProgressBar {
9
+
total := end - start + 1
10
+
11
+
// Calculate total bytes from bundle metadata
12
+
index := mgr.GetIndex()
13
+
totalBytes := int64(0)
14
+
15
+
for bundleNum := start; bundleNum <= end; bundleNum++ {
16
+
if meta, err := index.GetBundle(bundleNum); err == nil {
17
+
totalBytes += meta.UncompressedSize
18
+
}
19
+
}
20
+
21
+
if totalBytes > 0 {
22
+
return ui.NewProgressBarWithBytes(total, totalBytes)
23
+
}
24
+
25
+
// Fallback: estimate based on average
26
+
stats := index.GetStats()
27
+
if avgBytes := estimateAvgBundleSize(stats); avgBytes > 0 {
28
+
return ui.NewProgressBarWithBytesAuto(total, avgBytes)
29
+
}
30
+
31
+
return ui.NewProgressBar(total)
32
+
}
33
+
34
+
// estimateAvgBundleSize estimates average uncompressed bundle size
35
+
func estimateAvgBundleSize(stats map[string]interface{}) int64 {
36
+
if totalUncompressed, ok := stats["total_uncompressed_size"].(int64); ok {
37
+
if bundleCount, ok := stats["bundle_count"].(int); ok && bundleCount > 0 {
38
+
return totalUncompressed / int64(bundleCount)
39
+
}
40
+
}
41
+
return 0
42
+
}
43
+
44
+
// UpdateBundleProgress updates progress with bundle's actual size
45
+
func UpdateBundleProgress(pb *ui.ProgressBar, current int, bundle interface{}) {
46
+
// Try to extract size from bundle if available
47
+
type sizer interface {
48
+
GetUncompressedSize() int64
49
+
}
50
+
51
+
if b, ok := bundle.(sizer); ok {
52
+
pb.SetWithBytes(current, b.GetUncompressedSize())
53
+
} else {
54
+
pb.Set(current)
55
+
}
56
+
}
+610
cmd/plcbundle/commands/query.go
+610
cmd/plcbundle/commands/query.go
···
1
+
// cmd/plcbundle/commands/query.go
2
+
package commands
3
+
4
+
import (
5
+
"bytes"
6
+
"context"
7
+
"fmt"
8
+
"os"
9
+
"runtime"
10
+
"strconv"
11
+
"strings"
12
+
"sync"
13
+
"sync/atomic"
14
+
15
+
"github.com/goccy/go-json"
16
+
"github.com/jmespath-community/go-jmespath" // Correct import
17
+
"github.com/spf13/cobra"
18
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
19
+
)
20
+
21
+
func NewQueryCommand() *cobra.Command {
22
+
var (
23
+
bundleRange string
24
+
threads int
25
+
format string
26
+
limit int
27
+
noProgress bool
28
+
simple bool
29
+
)
30
+
31
+
cmd := &cobra.Command{
32
+
Use: "query <expression> [flags]",
33
+
Aliases: []string{"q"},
34
+
Short: "Query ops using JMESPath or simple dot notation",
35
+
Long: `Query operations using JMESPath expressions or simple dot notation
36
+
37
+
Stream through operations in bundles and evaluate expressions.
38
+
Supports parallel processing for better performance.
39
+
40
+
Simple Mode (--simple):
41
+
Fast field extraction using dot notation (no JMESPath parsing):
42
+
did Extract top-level field
43
+
operation.handle Nested object access
44
+
operation.services.atproto_pds.endpoint
45
+
alsoKnownAs[0] Array indexing
46
+
47
+
Performance: should be faster than JMESPath mode
48
+
Limitations: No filters, functions, or projections
49
+
50
+
JMESPath Mode (default):
51
+
Full JMESPath query language with filters, projections, functions.`,
52
+
53
+
Example: ` # Simple mode (faster)
54
+
plcbundle query did --bundles 1-100 --simple
55
+
plcbundle query operation.handle --bundles 1-100 --simple
56
+
plcbundle query operation.services.atproto_pds.endpoint --simple --bundles 1-100
57
+
58
+
# JMESPath mode (powerful)
59
+
plcbundle query 'operation.services.*.endpoint' --bundles 1-100
60
+
plcbundle query 'operation | {did: did, handle: handle}' --bundles 1-10`,
61
+
62
+
Args: cobra.ExactArgs(1),
63
+
64
+
RunE: func(cmd *cobra.Command, args []string) error {
65
+
expression := args[0]
66
+
67
+
if threads <= 0 {
68
+
threads = runtime.NumCPU()
69
+
if threads < 1 {
70
+
threads = 1
71
+
}
72
+
}
73
+
74
+
mgr, _, err := getManager(&ManagerOptions{Cmd: cmd})
75
+
if err != nil {
76
+
return err
77
+
}
78
+
defer mgr.Close()
79
+
80
+
var start, end int
81
+
if bundleRange == "" {
82
+
index := mgr.GetIndex()
83
+
bundles := index.GetBundles()
84
+
if len(bundles) == 0 {
85
+
return fmt.Errorf("no bundles available")
86
+
}
87
+
start = bundles[0].BundleNumber
88
+
end = bundles[len(bundles)-1].BundleNumber
89
+
} else {
90
+
start, end, err = parseBundleRange(bundleRange)
91
+
if err != nil {
92
+
return err
93
+
}
94
+
}
95
+
96
+
return runQuery(cmd.Context(), mgr, queryOptions{
97
+
expression: expression,
98
+
start: start,
99
+
end: end,
100
+
threads: threads,
101
+
format: format,
102
+
limit: limit,
103
+
noProgress: noProgress,
104
+
simple: simple,
105
+
})
106
+
},
107
+
}
108
+
109
+
cmd.Flags().StringVar(&bundleRange, "bundles", "", "Bundle selection: number (42) or range (1-50)")
110
+
cmd.Flags().IntVar(&threads, "threads", 0, "Number of worker threads (0 = auto-detect CPU cores)")
111
+
cmd.Flags().StringVar(&format, "format", "jsonl", "Output format: jsonl|count")
112
+
cmd.Flags().IntVar(&limit, "limit", 0, "Limit number of results (0 = unlimited)")
113
+
cmd.Flags().BoolVar(&noProgress, "no-progress", false, "Disable progress output")
114
+
cmd.Flags().BoolVar(&simple, "simple", false, "Use fast dot notation instead of JMESPath")
115
+
116
+
return cmd
117
+
}
118
+
119
+
type queryOptions struct {
120
+
expression string
121
+
start int
122
+
end int
123
+
threads int
124
+
format string
125
+
limit int
126
+
noProgress bool
127
+
simple bool
128
+
}
129
+
130
+
func runQuery(ctx context.Context, mgr BundleManager, opts queryOptions) error {
131
+
totalBundles := opts.end - opts.start + 1
132
+
133
+
if opts.threads > totalBundles {
134
+
opts.threads = totalBundles
135
+
}
136
+
137
+
fmt.Fprintf(os.Stderr, "Query: %s\n", opts.expression)
138
+
if opts.simple {
139
+
fmt.Fprintf(os.Stderr, "Mode: simple (fast dot notation)\n")
140
+
} else {
141
+
fmt.Fprintf(os.Stderr, "Mode: JMESPath\n")
142
+
}
143
+
fmt.Fprintf(os.Stderr, "Bundles: %d-%d (%d total)\n", opts.start, opts.end, totalBundles)
144
+
fmt.Fprintf(os.Stderr, "Threads: %d\n", opts.threads)
145
+
fmt.Fprintf(os.Stderr, "Format: %s\n", opts.format)
146
+
if opts.limit > 0 {
147
+
fmt.Fprintf(os.Stderr, "Limit: %d\n", opts.limit)
148
+
}
149
+
fmt.Fprintf(os.Stderr, "\n")
150
+
151
+
// FIXED: Use interface type, not pointer
152
+
var compiled jmespath.JMESPath // NOT *jmespath.JMESPath
153
+
var simpleQuery *simpleFieldExtractor
154
+
155
+
if opts.simple {
156
+
simpleQuery = parseSimplePath(opts.expression)
157
+
} else {
158
+
var err error
159
+
compiled, err = jmespath.Compile(opts.expression)
160
+
if err != nil {
161
+
return fmt.Errorf("invalid JMESPath expression: %w", err)
162
+
}
163
+
}
164
+
165
+
// Shared counters
166
+
var (
167
+
totalOps int64
168
+
matchCount int64
169
+
bytesProcessed int64
170
+
)
171
+
172
+
var progress *ui.ProgressBar
173
+
if !opts.noProgress {
174
+
progress = NewBundleProgressBar(mgr, opts.start, opts.end)
175
+
}
176
+
177
+
jobs := make(chan int, opts.threads*2)
178
+
results := make(chan queryResult, opts.threads*2)
179
+
180
+
var wg sync.WaitGroup
181
+
for w := 0; w < opts.threads; w++ {
182
+
wg.Add(1)
183
+
go func() {
184
+
defer wg.Done()
185
+
for bundleNum := range jobs {
186
+
select {
187
+
case <-ctx.Done():
188
+
return
189
+
default:
190
+
}
191
+
192
+
var res queryResult
193
+
if opts.simple {
194
+
res = processBundleQuerySimple(ctx, mgr, bundleNum, simpleQuery, opts.limit > 0, &matchCount, int64(opts.limit))
195
+
} else {
196
+
res = processBundleQuery(ctx, mgr, bundleNum, compiled, opts.limit > 0, &matchCount, int64(opts.limit))
197
+
}
198
+
results <- res
199
+
}
200
+
}()
201
+
}
202
+
203
+
go func() {
204
+
wg.Wait()
205
+
close(results)
206
+
}()
207
+
208
+
go func() {
209
+
defer close(jobs)
210
+
for bundleNum := opts.start; bundleNum <= opts.end; bundleNum++ {
211
+
select {
212
+
case jobs <- bundleNum:
213
+
case <-ctx.Done():
214
+
return
215
+
}
216
+
}
217
+
}()
218
+
219
+
processed := 0
220
+
for res := range results {
221
+
processed++
222
+
223
+
if res.err != nil {
224
+
fmt.Fprintf(os.Stderr, "\nWarning: bundle %06d failed: %v\n", res.bundleNum, res.err)
225
+
} else {
226
+
atomic.AddInt64(&totalOps, int64(res.opsProcessed))
227
+
atomic.AddInt64(&bytesProcessed, res.bytesProcessed)
228
+
229
+
if opts.format != "count" {
230
+
for _, match := range res.matches {
231
+
if opts.limit > 0 && atomic.LoadInt64(&matchCount) >= int64(opts.limit) {
232
+
break
233
+
}
234
+
fmt.Println(match)
235
+
}
236
+
}
237
+
}
238
+
239
+
if progress != nil {
240
+
progress.SetWithBytes(processed, atomic.LoadInt64(&bytesProcessed))
241
+
}
242
+
243
+
if opts.limit > 0 && atomic.LoadInt64(&matchCount) >= int64(opts.limit) {
244
+
break
245
+
}
246
+
}
247
+
248
+
if progress != nil {
249
+
progress.Finish()
250
+
}
251
+
252
+
finalMatchCount := atomic.LoadInt64(&matchCount)
253
+
finalTotalOps := atomic.LoadInt64(&totalOps)
254
+
finalBytes := atomic.LoadInt64(&bytesProcessed)
255
+
256
+
fmt.Fprintf(os.Stderr, "\n")
257
+
if opts.format == "count" {
258
+
fmt.Println(finalMatchCount)
259
+
}
260
+
261
+
fmt.Fprintf(os.Stderr, "โ Query complete\n")
262
+
fmt.Fprintf(os.Stderr, " Total operations: %s\n", formatNumber(int(finalTotalOps)))
263
+
fmt.Fprintf(os.Stderr, " Matches: %s", formatNumber(int(finalMatchCount)))
264
+
if finalTotalOps > 0 {
265
+
fmt.Fprintf(os.Stderr, " (%.2f%%)", float64(finalMatchCount)/float64(finalTotalOps)*100)
266
+
}
267
+
fmt.Fprintf(os.Stderr, "\n")
268
+
if finalBytes > 0 {
269
+
fmt.Fprintf(os.Stderr, " Data processed: %s\n", formatBytes(finalBytes))
270
+
}
271
+
272
+
return nil
273
+
}
274
+
275
+
type queryResult struct {
276
+
bundleNum int
277
+
matches []string
278
+
opsProcessed int
279
+
bytesProcessed int64
280
+
err error
281
+
}
282
+
283
+
func processBundleQuery(
284
+
ctx context.Context,
285
+
mgr BundleManager,
286
+
bundleNum int,
287
+
compiled jmespath.JMESPath, // FIXED: Interface, not pointer
288
+
checkLimit bool,
289
+
matchCount *int64,
290
+
limit int64,
291
+
) queryResult {
292
+
res := queryResult{bundleNum: bundleNum}
293
+
294
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
295
+
if err != nil {
296
+
res.err = err
297
+
return res
298
+
}
299
+
300
+
res.opsProcessed = len(bundle.Operations)
301
+
matches := make([]string, 0)
302
+
303
+
for _, op := range bundle.Operations {
304
+
if checkLimit && atomic.LoadInt64(matchCount) >= limit {
305
+
break
306
+
}
307
+
308
+
opSize := int64(len(op.RawJSON))
309
+
if opSize == 0 {
310
+
data, _ := json.Marshal(op)
311
+
opSize = int64(len(data))
312
+
}
313
+
res.bytesProcessed += opSize
314
+
315
+
var opData map[string]interface{}
316
+
if len(op.RawJSON) > 0 {
317
+
if err := json.Unmarshal(op.RawJSON, &opData); err != nil {
318
+
continue
319
+
}
320
+
} else {
321
+
data, _ := json.Marshal(op)
322
+
json.Unmarshal(data, &opData)
323
+
}
324
+
325
+
// Call Search on the interface
326
+
result, err := compiled.Search(opData)
327
+
if err != nil {
328
+
continue
329
+
}
330
+
331
+
if result == nil {
332
+
continue
333
+
}
334
+
335
+
atomic.AddInt64(matchCount, 1)
336
+
337
+
resultJSON, err := json.Marshal(result)
338
+
if err != nil {
339
+
continue
340
+
}
341
+
342
+
matches = append(matches, string(resultJSON))
343
+
}
344
+
345
+
res.matches = matches
346
+
return res
347
+
}
348
+
349
+
// ============================================================================
350
+
// SIMPLE DOT NOTATION QUERY (FAST PATH)
351
+
// ============================================================================
352
+
353
+
type simpleFieldExtractor struct {
354
+
path []pathSegment
355
+
}
356
+
357
+
type pathSegment struct {
358
+
field string
359
+
arrayIndex int // -1 if not array access
360
+
isArray bool
361
+
}
362
+
363
+
func parseSimplePath(path string) *simpleFieldExtractor {
364
+
segments := make([]pathSegment, 0)
365
+
current := ""
366
+
367
+
for i := 0; i < len(path); i++ {
368
+
ch := path[i]
369
+
370
+
switch ch {
371
+
case '.':
372
+
if current != "" {
373
+
segments = append(segments, pathSegment{field: current, arrayIndex: -1})
374
+
current = ""
375
+
}
376
+
377
+
case '[':
378
+
if current != "" {
379
+
end := i + 1
380
+
for end < len(path) && path[end] != ']' {
381
+
end++
382
+
}
383
+
if end < len(path) {
384
+
indexStr := path[i+1 : end]
385
+
index := 0
386
+
fmt.Sscanf(indexStr, "%d", &index)
387
+
388
+
segments = append(segments, pathSegment{
389
+
field: current,
390
+
arrayIndex: index,
391
+
isArray: true,
392
+
})
393
+
current = ""
394
+
i = end
395
+
}
396
+
}
397
+
398
+
default:
399
+
current += string(ch)
400
+
}
401
+
}
402
+
403
+
if current != "" {
404
+
segments = append(segments, pathSegment{field: current, arrayIndex: -1})
405
+
}
406
+
407
+
return &simpleFieldExtractor{path: segments}
408
+
}
409
+
410
+
func (sfe *simpleFieldExtractor) extract(rawJSON []byte) (interface{}, bool) {
411
+
if len(sfe.path) == 0 {
412
+
return nil, false
413
+
}
414
+
415
+
// ULTRA-FAST PATH: Single top-level field (no JSON parsing!)
416
+
if len(sfe.path) == 1 && !sfe.path[0].isArray {
417
+
field := sfe.path[0].field
418
+
return extractTopLevelField(rawJSON, field)
419
+
}
420
+
421
+
// Nested paths: minimal parsing required
422
+
var data map[string]interface{}
423
+
if err := json.Unmarshal(rawJSON, &data); err != nil {
424
+
return nil, false
425
+
}
426
+
427
+
return sfe.extractFromData(data, 0)
428
+
}
429
+
430
+
// extractTopLevelField - NO JSON PARSING for simple fields (50-100x faster!)
431
+
func extractTopLevelField(rawJSON []byte, field string) (interface{}, bool) {
432
+
searchPattern := []byte(fmt.Sprintf(`"%s":`, field))
433
+
434
+
idx := bytes.Index(rawJSON, searchPattern)
435
+
if idx == -1 {
436
+
return nil, false
437
+
}
438
+
439
+
valueStart := idx + len(searchPattern)
440
+
for valueStart < len(rawJSON) && (rawJSON[valueStart] == ' ' || rawJSON[valueStart] == '\t') {
441
+
valueStart++
442
+
}
443
+
444
+
if valueStart >= len(rawJSON) {
445
+
return nil, false
446
+
}
447
+
448
+
switch rawJSON[valueStart] {
449
+
case '"':
450
+
// String: find closing quote
451
+
end := valueStart + 1
452
+
for end < len(rawJSON) {
453
+
if rawJSON[end] == '"' {
454
+
if end > valueStart+1 && rawJSON[end-1] == '\\' {
455
+
end++
456
+
continue
457
+
}
458
+
return string(rawJSON[valueStart+1 : end]), true
459
+
}
460
+
end++
461
+
}
462
+
return nil, false
463
+
464
+
case '{', '[':
465
+
// Complex type: need parsing
466
+
var temp map[string]interface{}
467
+
if err := json.Unmarshal(rawJSON, &temp); err != nil {
468
+
return nil, false
469
+
}
470
+
if val, ok := temp[field]; ok {
471
+
return val, true
472
+
}
473
+
return nil, false
474
+
475
+
default:
476
+
// Primitives: number, boolean, null
477
+
end := valueStart
478
+
for end < len(rawJSON) {
479
+
ch := rawJSON[end]
480
+
if ch == ',' || ch == '}' || ch == ']' || ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t' {
481
+
break
482
+
}
483
+
end++
484
+
}
485
+
486
+
valueStr := strings.TrimSpace(string(rawJSON[valueStart:end]))
487
+
488
+
if valueStr == "null" {
489
+
return nil, false
490
+
}
491
+
if valueStr == "true" {
492
+
return true, true
493
+
}
494
+
if valueStr == "false" {
495
+
return false, true
496
+
}
497
+
498
+
if num, err := strconv.ParseFloat(valueStr, 64); err == nil {
499
+
return num, true
500
+
}
501
+
502
+
return valueStr, true
503
+
}
504
+
}
505
+
506
+
func (sfe *simpleFieldExtractor) extractFromData(data interface{}, segmentIdx int) (interface{}, bool) {
507
+
if segmentIdx >= len(sfe.path) {
508
+
return data, true
509
+
}
510
+
511
+
segment := sfe.path[segmentIdx]
512
+
513
+
if m, ok := data.(map[string]interface{}); ok {
514
+
val, exists := m[segment.field]
515
+
if !exists {
516
+
return nil, false
517
+
}
518
+
519
+
if segment.isArray {
520
+
if arr, ok := val.([]interface{}); ok {
521
+
if segment.arrayIndex >= 0 && segment.arrayIndex < len(arr) {
522
+
val = arr[segment.arrayIndex]
523
+
} else {
524
+
return nil, false
525
+
}
526
+
} else {
527
+
return nil, false
528
+
}
529
+
}
530
+
531
+
if segmentIdx == len(sfe.path)-1 {
532
+
return val, true
533
+
}
534
+
return sfe.extractFromData(val, segmentIdx+1)
535
+
}
536
+
537
+
if arr, ok := data.([]interface{}); ok {
538
+
if segment.isArray && segment.arrayIndex >= 0 && segment.arrayIndex < len(arr) {
539
+
val := arr[segment.arrayIndex]
540
+
if segmentIdx == len(sfe.path)-1 {
541
+
return val, true
542
+
}
543
+
return sfe.extractFromData(val, segmentIdx+1)
544
+
}
545
+
}
546
+
547
+
return nil, false
548
+
}
549
+
550
+
func processBundleQuerySimple(
551
+
ctx context.Context,
552
+
mgr BundleManager,
553
+
bundleNum int,
554
+
extractor *simpleFieldExtractor,
555
+
checkLimit bool,
556
+
matchCount *int64,
557
+
limit int64,
558
+
) queryResult {
559
+
res := queryResult{bundleNum: bundleNum}
560
+
561
+
bundle, err := mgr.LoadBundle(ctx, bundleNum)
562
+
if err != nil {
563
+
res.err = err
564
+
return res
565
+
}
566
+
567
+
res.opsProcessed = len(bundle.Operations)
568
+
matches := make([]string, 0)
569
+
570
+
for _, op := range bundle.Operations {
571
+
if checkLimit && atomic.LoadInt64(matchCount) >= limit {
572
+
break
573
+
}
574
+
575
+
opSize := int64(len(op.RawJSON))
576
+
if opSize == 0 {
577
+
data, _ := json.Marshal(op)
578
+
opSize = int64(len(data))
579
+
}
580
+
res.bytesProcessed += opSize
581
+
582
+
var result interface{}
583
+
var found bool
584
+
585
+
if len(op.RawJSON) > 0 {
586
+
result, found = extractor.extract(op.RawJSON)
587
+
} else {
588
+
data, _ := json.Marshal(op)
589
+
result, found = extractor.extract(data)
590
+
}
591
+
592
+
if !found || result == nil {
593
+
continue
594
+
}
595
+
596
+
atomic.AddInt64(matchCount, 1)
597
+
598
+
var resultJSON []byte
599
+
if str, ok := result.(string); ok {
600
+
resultJSON = []byte(fmt.Sprintf(`"%s"`, str))
601
+
} else {
602
+
resultJSON, _ = json.Marshal(result)
603
+
}
604
+
605
+
matches = append(matches, string(resultJSON))
606
+
}
607
+
608
+
res.matches = matches
609
+
return res
610
+
}
+9
-9
cmd/plcbundle/commands/rollback.go
+9
-9
cmd/plcbundle/commands/rollback.go
···
8
8
"strings"
9
9
10
10
"github.com/spf13/cobra"
11
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
12
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
11
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
13
13
)
14
14
15
15
func NewRollbackCommand() *cobra.Command {
···
46
46
7. Optionally rebuilds DID index`,
47
47
48
48
Example: ` # Rollback TO bundle 100 (keeps 1-100, removes 101+)
49
-
plcbundle rollback --to 100
49
+
plcbundle-go rollback --to 100
50
50
51
51
# Remove last 5 bundles
52
-
plcbundle rollback --last 5
52
+
plcbundle-go rollback --last 5
53
53
54
54
# Rollback without confirmation
55
-
plcbundle rollback --to 50 --force
55
+
plcbundle-go rollback --to 50 --force
56
56
57
57
# Rollback and rebuild DID index
58
-
plcbundle rollback --to 100 --rebuild-did-index
58
+
plcbundle-go rollback --to 100 --rebuild-did-index
59
59
60
60
# Rollback but keep bundle files (index-only)
61
-
plcbundle rollback --to 100 --keep-files`,
61
+
plcbundle-go rollback --to 100 --keep-files`,
62
62
63
63
RunE: func(cmd *cobra.Command, args []string) error {
64
64
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
···
464
464
fmt.Printf(" Index has bundle %06d, but repository now at %06d\n",
465
465
config.LastBundle, plan.targetBundle)
466
466
}
467
-
fmt.Printf(" Run: plcbundle index build\n")
467
+
fmt.Printf(" Run: plcbundle-go index build\n")
468
468
}
469
469
470
470
return nil
···
524
524
if !opts.rebuildDIDIndex && plan.hasDIDIndex {
525
525
fmt.Printf("๐ก Next Steps\n")
526
526
fmt.Printf(" DID index is out of date. Rebuild with:\n")
527
-
fmt.Printf(" plcbundle index build\n\n")
527
+
fmt.Printf(" plcbundle-go index build\n\n")
528
528
}
529
529
}
+16
-15
cmd/plcbundle/commands/server.go
+16
-15
cmd/plcbundle/commands/server.go
···
9
9
"time"
10
10
11
11
"github.com/spf13/cobra"
12
-
"tangled.org/atscan.net/plcbundle/bundle"
13
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
14
-
"tangled.org/atscan.net/plcbundle/server"
12
+
"tangled.org/atscan.net/plcbundle-go/bundle"
13
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
14
+
"tangled.org/atscan.net/plcbundle-go/server"
15
15
)
16
16
17
17
func NewServerCommand() *cobra.Command {
···
43
43
- Live mempool (in sync mode)
44
44
45
45
Sync mode (--sync) runs as a daemon, continuously fetching new bundles.
46
-
For one-time sync, use 'plcbundle sync' command instead.`,
46
+
For one-time sync, use 'plcbundle-go sync' command instead.`,
47
47
48
48
Example: ` # Basic server (read-only, current directory)
49
-
plcbundle server
49
+
plcbundle-go server
50
50
51
51
# Server with specific directory
52
-
plcbundle server --dir ./my-bundles
52
+
plcbundle-go server --dir ./my-bundles
53
53
54
54
# Live syncing server (daemon mode)
55
-
plcbundle server --sync
56
-
plcbundle server -s
55
+
plcbundle-go server --sync
56
+
plcbundle-go server -s
57
57
58
58
# Using alias
59
-
plcbundle serve -s
59
+
plcbundle-go serve -s
60
60
61
61
# Custom port and host
62
-
plcbundle server --port 3000 --host 0.0.0.0
62
+
plcbundle-go server --port 3000 --host 0.0.0.0
63
63
64
64
# Full featured server
65
-
plcbundle server -s --websocket --resolver
65
+
plcbundle-go server -s --websocket --resolver
66
66
67
67
# Fast sync interval
68
-
plcbundle server -s --interval 30s
68
+
plcbundle-go server -s --interval 30s
69
69
70
70
# Sync with limit (stop after 1000 bundles)
71
-
plcbundle server -s --max-bundles 1000
71
+
plcbundle-go server -s --max-bundles 1000
72
72
73
73
# Public server with all features
74
-
plcbundle serve -s --websocket --resolver --host 0.0.0.0 --port 80`,
74
+
plcbundle-go serve -s --websocket --resolver --host 0.0.0.0 --port 80`,
75
75
76
76
RunE: func(cmd *cobra.Command, args []string) error {
77
77
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
78
78
79
-
// โจ Server in sync mode can create repo, read-only mode cannot
79
+
// Server in sync mode can create repo, read-only mode cannot
80
80
mgr, dir, err := getManager(&ManagerOptions{
81
81
Cmd: cmd,
82
82
PLCURL: plcURL,
···
131
131
if syncMode {
132
132
go runServerSyncLoop(ctx, mgr, syncInterval, maxBundles, verbose)
133
133
}
134
+
mgr.SetQuiet(true)
134
135
135
136
// Create and start HTTP server
136
137
serverConfig := &server.Config{
+6
-6
cmd/plcbundle/commands/status.go
+6
-6
cmd/plcbundle/commands/status.go
···
6
6
"time"
7
7
8
8
"github.com/spf13/cobra"
9
-
"tangled.org/atscan.net/plcbundle/bundle"
10
-
"tangled.org/atscan.net/plcbundle/internal/types"
9
+
"tangled.org/atscan.net/plcbundle-go/bundle"
10
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
11
11
)
12
12
13
13
func NewStatusCommand() *cobra.Command {
···
21
21
storage, timeline, mempool, and DID index status.`,
22
22
23
23
Example: ` # Show status
24
-
plcbundle status
24
+
plcbundle-go status
25
25
26
26
# Using alias
27
-
plcbundle info`,
27
+
plcbundle-go info`,
28
28
29
29
RunE: func(cmd *cobra.Command, args []string) error {
30
30
mgr, dir, err := getManager(&ManagerOptions{Cmd: cmd})
···
61
61
if bundleCount == 0 {
62
62
fmt.Printf("โ ๏ธ Empty repository (no bundles)\n\n")
63
63
fmt.Printf("Get started:\n")
64
-
fmt.Printf(" plcbundle clone <url> Clone from remote\n")
65
-
fmt.Printf(" plcbundle sync Fetch from PLC directory\n\n")
64
+
fmt.Printf(" plcbundle-go clone <url> Clone from remote\n")
65
+
fmt.Printf(" plcbundle-go sync Fetch from PLC directory\n\n")
66
66
return nil
67
67
}
68
68
+11
-11
cmd/plcbundle/commands/sync.go
+11
-11
cmd/plcbundle/commands/sync.go
···
9
9
"time"
10
10
11
11
"github.com/spf13/cobra"
12
-
"tangled.org/atscan.net/plcbundle/bundle"
13
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
12
+
"tangled.org/atscan.net/plcbundle-go/bundle"
13
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
14
14
)
15
15
16
16
func NewSyncCommand() *cobra.Command {
···
34
34
that continuously syncs at regular intervals.`,
35
35
36
36
Example: ` # Fetch new bundles once
37
-
plcbundle sync
37
+
plcbundle-go sync
38
38
39
39
# Fetch from specific directory
40
-
plcbundle sync --dir ./my-bundles
40
+
plcbundle-go sync --dir ./my-bundles
41
41
42
42
# Run continuously (daemon mode)
43
-
plcbundle sync --continuous
43
+
plcbundle-go sync --continuous
44
44
45
45
# Custom sync interval
46
-
plcbundle sync --continuous --interval 30s
46
+
plcbundle-go sync --continuous --interval 30s
47
47
48
48
# Fetch maximum 10 bundles then stop
49
-
plcbundle sync --max-bundles 10
49
+
plcbundle-go sync --max-bundles 10
50
50
51
51
# Continuous with limit
52
-
plcbundle sync --continuous --max-bundles 100 --interval 1m
52
+
plcbundle-go sync --continuous --max-bundles 100 --interval 1m
53
53
54
54
# Verbose output
55
-
plcbundle sync --continuous -v`,
55
+
plcbundle-go sync --continuous -v`,
56
56
57
57
RunE: func(cmd *cobra.Command, args []string) error {
58
58
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
59
59
quiet, _ := cmd.Root().PersistentFlags().GetBool("quiet")
60
60
61
-
// โจ Sync creates repository if missing
61
+
// Sync creates repository if missing
62
62
mgr, dir, err := getManager(&ManagerOptions{
63
63
Cmd: cmd,
64
64
PLCURL: plcURL,
···
112
112
}
113
113
114
114
// Call manager method (not internal directly)
115
-
synced, err := mgr.RunSyncOnce(ctx, config, verbose)
115
+
synced, err := mgr.RunSyncOnce(ctx, config)
116
116
if err != nil {
117
117
return err
118
118
}
+8
-8
cmd/plcbundle/commands/verify.go
+8
-8
cmd/plcbundle/commands/verify.go
···
7
7
"time"
8
8
9
9
"github.com/spf13/cobra"
10
-
"tangled.org/atscan.net/plcbundle/bundle"
11
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/ui"
10
+
"tangled.org/atscan.net/plcbundle-go/bundle"
11
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/ui"
12
12
)
13
13
14
14
func NewVerifyCommand() *cobra.Command {
···
37
37
โข Chain hash calculations are valid`,
38
38
39
39
Example: ` # Verify entire chain
40
-
plcbundle verify
41
-
plcbundle verify --chain
40
+
plcbundle-go verify
41
+
plcbundle-go verify --chain
42
42
43
43
# Verify specific bundle
44
-
plcbundle verify --bundle 42
44
+
plcbundle-go verify --bundle 42
45
45
46
46
# Verify range of bundles
47
-
plcbundle verify --range 1-100
47
+
plcbundle-go verify --range 1-100
48
48
49
49
# Verbose output
50
-
plcbundle verify --chain -v
50
+
plcbundle-go verify --chain -v
51
51
52
52
# Parallel verification (faster for ranges)
53
-
plcbundle verify --range 1-1000 --parallel --workers 8`,
53
+
plcbundle-go verify --range 1-1000 --parallel --workers 8`,
54
54
55
55
RunE: func(cmd *cobra.Command, args []string) error {
56
56
verbose, _ := cmd.Root().PersistentFlags().GetBool("verbose")
+19
-17
cmd/plcbundle/main.go
+19
-17
cmd/plcbundle/main.go
···
5
5
"os"
6
6
7
7
"github.com/spf13/cobra"
8
-
"tangled.org/atscan.net/plcbundle/cmd/plcbundle/commands"
8
+
"tangled.org/atscan.net/plcbundle-go/cmd/plcbundle/commands"
9
9
)
10
10
11
11
func main() {
···
20
20
21
21
func newRootCommand() *cobra.Command {
22
22
cmd := &cobra.Command{
23
-
Use: "plcbundle",
23
+
Use: "plcbundle-go",
24
24
Short: "DID PLC Bundle Management Tool",
25
-
Long: `plcbundle - DID PLC Bundle Management Tool
25
+
Long: `plcbundle-go - DID PLC Bundle Management Tool
26
26
27
27
Tool for archiving AT Protocol's DID PLC Directory operations
28
28
into immutable, cryptographically-chained bundles of 10,000
···
58
58
//cmd.AddCommand(commands.NewGapsCommand())
59
59
cmd.AddCommand(commands.NewVerifyCommand())
60
60
cmd.AddCommand(commands.NewDiffCommand())
61
-
/*cmd.AddCommand(commands.NewStatsCommand())
62
-
cmd.AddCommand(commands.NewInspectCommand())*/
61
+
//cmd.AddCommand(commands.NewStatsCommand())
62
+
cmd.AddCommand(commands.NewInspectCommand())
63
+
cmd.AddCommand(commands.NewQueryCommand())
63
64
64
65
// Namespaced commands
65
66
cmd.AddCommand(commands.NewDIDCommand())
···
71
72
/*cmd.AddCommand(commands.NewWatchCommand())
72
73
cmd.AddCommand(commands.NewHealCommand())*/
73
74
cmd.AddCommand(commands.NewCleanCommand())
75
+
cmd.AddCommand(commands.NewMigrateCommand())
74
76
75
77
// Server
76
78
cmd.AddCommand(commands.NewServerCommand())
···
87
89
Use: "version",
88
90
Short: "Show version information",
89
91
Run: func(cmd *cobra.Command, args []string) {
90
-
cmd.Printf("plcbundle version %s\n", GetVersion())
92
+
cmd.Printf("plcbundle-go version %s\n", GetVersion())
91
93
cmd.Printf(" commit: %s\n", getGitCommit())
92
94
cmd.Printf(" built: %s\n", getBuildDate())
93
95
},
···
103
105
To load completions:
104
106
105
107
Bash:
106
-
$ source <(plcbundle completion bash)
108
+
$ source <(plcbundle-go completion bash)
107
109
108
110
# To load automatically:
109
-
$ plcbundle completion bash > /etc/bash_completion.d/plcbundle
111
+
$ plcbundle-go completion bash > /etc/bash_completion.d/plcbundle-go
110
112
111
113
Zsh:
112
-
$ plcbundle completion zsh > ~/.zsh/completion/_plcbundle
114
+
$ plcbundle-go completion zsh > ~/.zsh/completion/_plcbundle-go
113
115
114
116
# Add to ~/.zshrc:
115
117
fpath=(~/.zsh/completion $fpath)
116
118
117
119
Fish:
118
-
$ plcbundle completion fish > ~/.config/fish/completions/plcbundle.fish`,
120
+
$ plcbundle-go completion fish > ~/.config/fish/completions/plcbundle-go.fish`,
119
121
120
122
Args: cobra.ExactArgs(1),
121
123
ValidArgs: []string{"bash", "zsh", "fish", "powershell"},
···
138
140
}
139
141
140
142
func printRootHelp() {
141
-
fmt.Print(`plcbundle ` + GetVersion() + ` - DID PLC Bundle Management
143
+
fmt.Print(`plcbundle-go ` + GetVersion() + ` - DID PLC Bundle Management
142
144
143
-
Usage: plcbundle <command> [options]
145
+
Usage: plcbundle-go <command> [options]
144
146
145
147
Main Commands:
146
148
sync Fetch new bundles from PLC
···
158
160
Tools: watch, heal, clean, mempool, detector
159
161
160
162
Getting Started:
161
-
plcbundle clone https://plc.example.com
162
-
plcbundle sync
163
-
plcbundle status
163
+
plcbundle-go clone https://plc.example.com
164
+
plcbundle-go sync
165
+
plcbundle-go status
164
166
165
-
Run 'plcbundle help' for full documentation
166
-
Run 'plcbundle <command> --help' for command help
167
+
Run 'plcbundle-go help' for full documentation
168
+
Run 'plcbundle-go <command> --help' for command help
167
169
`)
168
170
}
+41
-9
cmd/plcbundle/ui/progress.go
+41
-9
cmd/plcbundle/ui/progress.go
···
19
19
width int
20
20
lastPrint time.Time
21
21
showBytes bool
22
+
autoBytes bool // Auto-calculate bytes from items
23
+
bytesPerItem int64
22
24
}
23
25
24
-
// NewProgressBar creates a new progress bar
26
+
// NewProgressBar creates a simple progress bar
25
27
func NewProgressBar(total int) *ProgressBar {
26
28
return &ProgressBar{
27
29
total: total,
···
32
34
}
33
35
}
34
36
35
-
// NewProgressBarWithBytes creates a new progress bar that tracks bytes
37
+
// NewProgressBarWithBytes creates a progress bar that tracks bytes
36
38
func NewProgressBarWithBytes(total int, totalBytes int64) *ProgressBar {
37
39
return &ProgressBar{
38
40
total: total,
···
44
46
}
45
47
}
46
48
47
-
// Set sets the current progress
49
+
// NewProgressBarWithBytesAuto creates a progress bar that auto-estimates bytes
50
+
// avgBytesPerItem is the estimated bytes per item (e.g., avg bundle size)
51
+
func NewProgressBarWithBytesAuto(total int, avgBytesPerItem int64) *ProgressBar {
52
+
return &ProgressBar{
53
+
total: total,
54
+
totalBytes: int64(total) * avgBytesPerItem,
55
+
startTime: time.Now(),
56
+
width: 40,
57
+
lastPrint: time.Now(),
58
+
showBytes: true,
59
+
autoBytes: true,
60
+
bytesPerItem: avgBytesPerItem,
61
+
}
62
+
}
63
+
64
+
// Set sets the current progress (auto-estimates bytes if enabled)
48
65
func (pb *ProgressBar) Set(current int) {
49
66
pb.mu.Lock()
50
67
defer pb.mu.Unlock()
51
68
pb.current = current
69
+
70
+
// Auto-calculate bytes if enabled
71
+
if pb.autoBytes && pb.bytesPerItem > 0 {
72
+
pb.currentBytes = int64(current) * pb.bytesPerItem
73
+
}
74
+
52
75
pb.print()
53
76
}
54
77
55
-
// SetWithBytes sets progress with byte tracking
78
+
// SetWithBytes sets progress with exact byte tracking
56
79
func (pb *ProgressBar) SetWithBytes(current int, bytesProcessed int64) {
57
80
pb.mu.Lock()
58
81
defer pb.mu.Unlock()
···
62
85
pb.print()
63
86
}
64
87
88
+
// AddBytes increments current progress and adds bytes
89
+
func (pb *ProgressBar) AddBytes(increment int, bytes int64) {
90
+
pb.mu.Lock()
91
+
defer pb.mu.Unlock()
92
+
pb.current += increment
93
+
pb.currentBytes += bytes
94
+
pb.showBytes = true
95
+
pb.print()
96
+
}
97
+
65
98
// Finish completes the progress bar
66
99
func (pb *ProgressBar) Finish() {
67
100
pb.mu.Lock()
···
106
139
eta = time.Duration(float64(remaining)/speed) * time.Second
107
140
}
108
141
109
-
// โจ FIX: Check if complete
110
142
isComplete := pb.current >= pb.total
111
143
112
144
if pb.showBytes && pb.currentBytes > 0 {
113
145
mbProcessed := float64(pb.currentBytes) / (1000 * 1000)
114
-
mbPerSec := mbProcessed / elapsed.Seconds()
146
+
mbPerSec := 0.0
147
+
if elapsed.Seconds() > 0 {
148
+
mbPerSec = mbProcessed / elapsed.Seconds()
149
+
}
115
150
116
151
if isComplete {
117
-
// โจ Don't show ETA when done
118
152
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d | %.1f/s | %.1f MB/s | Done ",
119
153
bar, percent, pb.current, pb.total, speed, mbPerSec)
120
154
} else {
···
123
157
}
124
158
} else {
125
159
if isComplete {
126
-
// โจ Don't show ETA when done
127
160
fmt.Fprintf(os.Stderr, "\r [%s] %6.2f%% | %d/%d | %.1f/s | Done ",
128
161
bar, percent, pb.current, pb.total, speed)
129
162
} else {
···
134
167
}
135
168
136
169
func formatETA(d time.Duration) string {
137
-
// โจ This should never be called with 0 now, but keep as fallback
138
170
if d == 0 {
139
171
return "0s"
140
172
}
+1
-1
detector/builtin.go
+1
-1
detector/builtin.go
+1
-1
detector/detector.go
+1
-1
detector/detector.go
+2
-2
detector/runner.go
+2
-2
detector/runner.go
···
7
7
"sync"
8
8
"time"
9
9
10
-
"tangled.org/atscan.net/plcbundle/bundle"
11
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
10
+
"tangled.org/atscan.net/plcbundle-go/bundle"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
12
12
)
13
13
14
14
// Runner executes detectors against operations
+2
-2
detector/script.go
+2
-2
detector/script.go
···
14
14
"time"
15
15
16
16
"github.com/goccy/go-json"
17
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
17
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
18
18
)
19
19
20
20
// ScriptDetector runs a JavaScript detector via Unix socket
···
146
146
return nil, fmt.Errorf("not connected to server")
147
147
}
148
148
149
-
// โจ LOCK for entire socket communication
149
+
// LOCK for entire socket communication
150
150
d.mu.Lock()
151
151
defer d.mu.Unlock()
152
152
+1
-331
docs/cli.md
+1
-331
docs/cli.md
···
1
1
# CLI Guide
2
2
3
-
A concise guide to using the `plcbundle` command-line tool.
4
-
5
-
## Installation
6
-
7
-
```bash
8
-
go install tangled.org/atscan.net/plcbundle/cmd/plcbundle@latest
9
-
plcbundle version # Verify installation
10
-
```
11
-
12
-
## Quick Start
13
-
14
-
```bash
15
-
mkdir plc_archive && cd plc_archive
16
-
plcbundle fetch -count 1 # Fetch one bundle
17
-
plcbundle info # Check what you have
18
-
```
19
-
20
-
---
21
-
22
-
## Commands
23
-
24
-
### `fetch` - Download from PLC Directory
25
-
26
-
Fetches operations from PLC directory and creates bundles.
27
-
28
-
```bash
29
-
plcbundle fetch -count 1 # Fetch exactly 1 bundle
30
-
plcbundle fetch -count 10 # Fetch 10 bundles
31
-
plcbundle fetch # Fetch continuously until caught up
32
-
```
33
-
34
-
**Important:** Without `-count`, fetch runs indefinitely. Always use `-count N` for controlled fetching.
35
-
36
-
**Options:**
37
-
- `-count N` - Number of bundles to fetch (0 = all available)
38
-
- `-plc URL` - Custom PLC directory URL (default: `https://plc.directory`)
39
-
40
-
---
41
-
42
-
### `clone` - Download from Remote Server
43
-
44
-
Downloads pre-made bundles from another plcbundle server (much faster than fetch).
45
-
46
-
```bash
47
-
plcbundle clone https://plc.example.com
48
-
plcbundle clone https://plc.example.com -workers 16 # Faster with more workers
49
-
```
50
-
51
-
**Resumable:** Press Ctrl+C to stop, run again to resume.
52
-
53
-
**Options:**
54
-
- `-workers N` - Concurrent downloads (default: 4)
55
-
- `-v` - Verbose output
56
-
- `-skip-existing` - Skip existing bundles (default: true)
57
-
58
-
---
59
-
60
-
### `info` - View Archive Status
61
-
62
-
Shows bundle count, storage size, time ranges, and chain hashes.
63
-
64
-
```bash
65
-
plcbundle info # General overview
66
-
plcbundle info -bundle 42 # Specific bundle details
67
-
plcbundle info --bundles # List all bundles
68
-
plcbundle info --verify # Info + chain verification
69
-
```
70
-
71
-
---
72
-
73
-
### `verify` - Check Integrity
74
-
75
-
Verifies file hashes and chain links.
76
-
77
-
```bash
78
-
plcbundle verify # Verify entire chain
79
-
plcbundle verify -bundle 42 # Verify one bundle
80
-
plcbundle verify -v # Verbose output
81
-
```
82
-
83
-
---
84
-
85
-
### `rebuild` - Recreate Index
86
-
87
-
Scans bundle files and rebuilds `index.json`.
88
-
89
-
```bash
90
-
plcbundle rebuild # Auto-detect CPU cores
91
-
plcbundle rebuild -workers 8 # Use 8 workers
92
-
```
93
-
94
-
**When to use:**
95
-
- Lost/corrupted `index.json`
96
-
- Added bundle files manually
97
-
- Moved files from another location
98
-
99
-
---
100
-
101
-
### `export` - Extract Operations
102
-
103
-
Exports operations as JSONL to stdout.
104
-
105
-
```bash
106
-
plcbundle export -count 1000 > ops.jsonl
107
-
plcbundle export -after "2024-01-01T00:00:00Z" -count 5000 > jan.jsonl
108
-
```
109
-
110
-
---
111
-
112
-
### `backfill` - Stream All Operations
113
-
114
-
Streams operations from all bundles, fetching missing ones on-demand.
115
-
116
-
```bash
117
-
plcbundle backfill > all.jsonl
118
-
plcbundle backfill -start 100 -end 200 > range.jsonl
119
-
```
120
-
121
-
---
122
-
123
-
### `mempool` - Inspect Staging Area
124
-
125
-
Shows operations waiting to form a bundle (need 10,000 to create bundle).
126
-
127
-
```bash
128
-
plcbundle mempool # Show status
129
-
plcbundle mempool -export > mem.jsonl # Export mempool ops
130
-
plcbundle mempool -validate # Verify chronological order
131
-
plcbundle mempool -clear # Clear (destructive)
132
-
```
133
-
134
-
---
135
-
136
-
### `serve` - Run HTTP Server
137
-
138
-
Starts an HTTP server to share bundles with others.
139
-
140
-
```bash
141
-
plcbundle serve # Start on :8080
142
-
plcbundle serve -port 9000 -host 0.0.0.0 # Custom port/host
143
-
plcbundle serve -sync -sync-interval 5m # Auto-fetch new bundles
144
-
plcbundle serve -websocket # Enable WebSocket streaming
145
-
```
146
-
147
-
**Endpoints:**
148
-
- `GET /` - Info page
149
-
- `GET /index.json` - Bundle index
150
-
- `GET /data/:number` - Download bundle
151
-
- `WS /ws` - WebSocket stream (if enabled)
152
-
153
-
---
154
-
155
-
### `compare` - Compare with Remote
156
-
157
-
Shows differences between local and remote archives.
158
-
159
-
```bash
160
-
plcbundle compare https://plc.example.com
161
-
plcbundle compare https://plc.example.com --fetch-missing # Auto-fix
162
-
```
163
-
164
-
---
165
-
166
-
### `version` - Show Version
167
-
168
-
```bash
169
-
plcbundle version
170
-
```
171
-
172
-
---
173
-
174
-
## Important Concepts
175
-
176
-
### Working Directory
177
-
178
-
plcbundle operates in your **current directory**. Always `cd` to your archive first:
179
-
180
-
```bash
181
-
cd /path/to/plc_archive
182
-
plcbundle info
183
-
```
184
-
185
-
### Files Created
186
-
187
-
```
188
-
plc_archive/
189
-
โโโ 000001.jsonl.zst # Bundle files (10k ops each)
190
-
โโโ 000002.jsonl.zst
191
-
โโโ index.json # Index (metadata + hashes)
192
-
โโโ plc_mempool_*.jsonl # Mempool (auto-managed, temporary)
193
-
```
194
-
195
-
### Fetch vs Clone
196
-
197
-
**Use `fetch`** when:
198
-
- No mirror available
199
-
- Want data directly from PLC
200
-
- Building from scratch
201
-
202
-
**Use `clone`** when:
203
-
- A mirror exists
204
-
- Want faster setup
205
-
- Syncing with known good source
206
-
207
-
---
208
-
209
-
## Common Tasks
210
-
211
-
**Initial setup from mirror:**
212
-
```bash
213
-
mkdir plc_archive && cd plc_archive
214
-
plcbundle clone https://plc.example.com -workers 16
215
-
plcbundle verify
216
-
```
217
-
218
-
**Initial setup from PLC:**
219
-
```bash
220
-
mkdir plc_archive && cd plc_archive
221
-
plcbundle fetch -count 0 # Fetch all (can take hours)
222
-
```
223
-
224
-
**Daily sync (cron):**
225
-
```bash
226
-
#!/bin/bash
227
-
cd /path/to/plc_archive
228
-
plcbundle fetch -count 5 # Fetch up to 5 new bundles
229
-
```
230
-
231
-
**Share your archive:**
232
-
```bash
233
-
plcbundle serve -host 0.0.0.0 -sync
234
-
```
235
-
236
-
**Export recent data:**
237
-
```bash
238
-
plcbundle export -count 10000 > recent.jsonl
239
-
cat recent.jsonl | jq . # Process with jq
240
-
```
241
-
242
-
**Fix corrupted index:**
243
-
```bash
244
-
plcbundle rebuild
245
-
plcbundle verify
246
-
```
247
-
248
-
---
249
-
250
-
## Troubleshooting
251
-
252
-
**Command not found:**
253
-
```bash
254
-
export PATH=$PATH:$(go env GOPATH)/bin
255
-
```
256
-
257
-
**Wrong directory:**
258
-
```bash
259
-
pwd # Check where you are
260
-
cd /path/to/plc_archive
261
-
```
262
-
263
-
**Fetch doesn't create bundle:**
264
-
```bash
265
-
plcbundle mempool # Check if waiting for more ops
266
-
# Need 10,000 operations to create a bundle
267
-
```
268
-
269
-
**Port already in use:**
270
-
```bash
271
-
plcbundle serve -port 9000
272
-
```
273
-
274
-
**Hash verification failed:**
275
-
```bash
276
-
rm 000042.jsonl.zst # Delete corrupted bundle
277
-
plcbundle rebuild # Mark as missing
278
-
plcbundle fetch -count 1 # Re-fetch
279
-
```
280
-
281
-
**Out of disk space:**
282
-
```bash
283
-
df -h . # Check space
284
-
# Move to larger disk or delete old bundles
285
-
```
286
-
287
-
---
288
-
289
-
## Quick Reference
290
-
291
-
```bash
292
-
# Fetch
293
-
plcbundle fetch -count 1 # One bundle
294
-
plcbundle fetch # All available
295
-
296
-
# Clone
297
-
plcbundle clone <url> # From mirror
298
-
plcbundle clone <url> -workers 16 # Faster
299
-
300
-
# Info
301
-
plcbundle info # Overview
302
-
plcbundle info -bundle 42 # Specific bundle
303
-
304
-
# Verify
305
-
plcbundle verify # Check chain
306
-
plcbundle verify -bundle 42 # Check one
307
-
308
-
# Rebuild
309
-
plcbundle rebuild # Recreate index
310
-
311
-
# Export
312
-
plcbundle export -count 1000 > ops.jsonl
313
-
314
-
# Serve
315
-
plcbundle serve # Share bundles
316
-
plcbundle serve -sync -websocket # Full-featured
317
-
318
-
# Utilities
319
-
plcbundle mempool # Check staging
320
-
plcbundle compare <url> # Compare with remote
321
-
plcbundle backfill > all.jsonl # Export all
322
-
```
323
-
324
-
---
325
-
326
-
## Getting Help
327
-
328
-
```bash
329
-
plcbundle <command> -h # Command-specific help
330
-
```
331
-
332
-
**Report issues:** https://tangled.org/@atscan.net/plcbundle/issues
333
-
3
+
TODO
+1
-1666
docs/library.md
+1
-1666
docs/library.md
···
14
14
15
15
---
16
16
17
-
## Getting Started
18
-
19
-
### Installation
20
-
21
-
```bash
22
-
go get tangled.org/atscan.net/plcbundle
23
-
```
24
-
25
-
### Your First Program
26
-
27
-
Create a simple program to fetch and display bundle information:
28
-
29
-
```go
30
-
package main
31
-
32
-
import (
33
-
"context"
34
-
"log"
35
-
36
-
plcbundle "tangled.org/atscan.net/plcbundle"
37
-
)
38
-
39
-
func main() {
40
-
// Create a manager
41
-
mgr, err := plcbundle.New("./plc_data", "https://plc.directory")
42
-
if err != nil {
43
-
log.Fatal(err)
44
-
}
45
-
defer mgr.Close()
46
-
47
-
// Get repository info
48
-
info := mgr.GetInfo()
49
-
log.Printf("Bundle directory: %s", info["bundle_dir"])
50
-
51
-
// Get index stats
52
-
index := mgr.GetIndex()
53
-
stats := index.GetStats()
54
-
log.Printf("Total bundles: %d", stats["bundle_count"])
55
-
}
56
-
```
57
-
58
-
Run it:
59
-
```bash
60
-
go run main.go
61
-
# 2025/01/15 10:30:00 Bundle directory: ./plc_data
62
-
# 2025/01/15 10:30:00 Total bundles: 0
63
-
```
64
-
65
-
### Fetching Your First Bundle
66
-
67
-
Let's fetch a bundle from the PLC directory:
68
-
69
-
```go
70
-
package main
71
-
72
-
import (
73
-
"context"
74
-
"log"
75
-
76
-
plcbundle "tangled.org/atscan.net/plcbundle"
77
-
)
78
-
79
-
func main() {
80
-
mgr, err := plcbundle.New("./plc_data", "https://plc.directory")
81
-
if err != nil {
82
-
log.Fatal(err)
83
-
}
84
-
defer mgr.Close()
85
-
86
-
ctx := context.Background()
87
-
88
-
// Fetch next bundle
89
-
log.Println("Fetching bundle...")
90
-
bundle, err := mgr.FetchNext(ctx)
91
-
if err != nil {
92
-
log.Fatal(err)
93
-
}
94
-
95
-
log.Printf("โ Fetched bundle %d", bundle.BundleNumber)
96
-
log.Printf(" Operations: %d", len(bundle.Operations))
97
-
log.Printf(" Unique DIDs: %d", bundle.DIDCount)
98
-
log.Printf(" Time range: %s to %s",
99
-
bundle.StartTime.Format("2006-01-02"),
100
-
bundle.EndTime.Format("2006-01-02"))
101
-
}
102
-
```
103
-
104
-
**What's happening here?**
105
-
106
-
1. `plcbundle.New()` creates a manager that handles all bundle operations
107
-
2. `FetchNext()` automatically:
108
-
- Fetches operations from PLC directory
109
-
- Creates a bundle when 10,000 operations are collected
110
-
- Saves the bundle to disk
111
-
- Updates the index
112
-
- Returns the bundle object
113
-
114
-
### Reading Bundles
115
-
116
-
Once you have bundles, you can load and read them:
117
-
118
-
```go
119
-
package main
120
-
121
-
import (
122
-
"context"
123
-
"log"
124
-
125
-
plcbundle "tangled.org/atscan.net/plcbundle"
126
-
)
127
-
128
-
func main() {
129
-
mgr, err := plcbundle.New("./plc_data", "")
130
-
if err != nil {
131
-
log.Fatal(err)
132
-
}
133
-
defer mgr.Close()
134
-
135
-
ctx := context.Background()
136
-
137
-
// Load bundle 1
138
-
bundle, err := mgr.Load(ctx, 1)
139
-
if err != nil {
140
-
log.Fatal(err)
141
-
}
142
-
143
-
log.Printf("Bundle %d loaded", bundle.BundleNumber)
144
-
145
-
// Iterate through operations
146
-
for i, op := range bundle.Operations {
147
-
if i >= 5 {
148
-
break // Just show first 5
149
-
}
150
-
log.Printf("%d. DID: %s, CID: %s", i+1, op.DID, op.CID)
151
-
}
152
-
}
153
-
```
154
-
155
-
---
156
-
157
-
## Core Concepts
158
-
159
-
### The Manager
160
-
161
-
The `Manager` is your main entry point. It handles:
162
-
- Bundle storage and retrieval
163
-
- Index management
164
-
- PLC directory synchronization
165
-
- Verification
166
-
- Mempool management
167
-
168
-
**Creating a manager:**
169
-
170
-
```go
171
-
// Simple creation
172
-
mgr, err := plcbundle.New("./bundles", "https://plc.directory")
173
-
174
-
// Custom configuration
175
-
config := plcbundle.DefaultConfig("./bundles")
176
-
config.VerifyOnLoad = true
177
-
config.AutoRebuild = true
178
-
179
-
plcClient := plcbundle.NewPLCClient("https://plc.directory")
180
-
mgr, err := plcbundle.NewManager(config, plcClient)
181
-
```
182
-
183
-
### Bundles
184
-
185
-
A bundle contains exactly 10,000 operations:
186
-
187
-
```go
188
-
type Bundle struct {
189
-
BundleNumber int // Sequential number (1, 2, 3...)
190
-
StartTime time.Time // First operation timestamp
191
-
EndTime time.Time // Last operation timestamp
192
-
Operations []plcclient.PLCOperation // The 10,000 operations
193
-
DIDCount int // Unique DIDs in bundle
194
-
Hash string // Chain hash (includes history)
195
-
ContentHash string // This bundle's content hash
196
-
Parent string // Previous bundle's chain hash
197
-
CompressedSize int64 // File size on disk
198
-
UncompressedSize int64 // Original JSONL size
199
-
}
200
-
```
201
-
202
-
### The Index
203
-
204
-
The index tracks all bundles and their metadata:
205
-
206
-
```go
207
-
index := mgr.GetIndex()
208
-
209
-
// Get all bundles
210
-
bundles := index.GetBundles()
211
-
for _, meta := range bundles {
212
-
log.Printf("Bundle %d: %s to %s",
213
-
meta.BundleNumber,
214
-
meta.StartTime.Format("2006-01-02"),
215
-
meta.EndTime.Format("2006-01-02"))
216
-
}
217
-
218
-
// Get specific bundle metadata
219
-
meta, err := index.GetBundle(42)
220
-
221
-
// Get last bundle
222
-
lastBundle := index.GetLastBundle()
223
-
```
224
-
225
-
### Operations
226
-
227
-
Each operation represents a DID PLC directory event:
228
-
229
-
```go
230
-
type PLCOperation struct {
231
-
DID string // The DID (did:plc:...)
232
-
Operation json.RawMessage // Raw JSON bytes (use GetOperationMap() to parse)
233
-
CID string // Content identifier
234
-
Nullified interface{} // nil, false, or CID string
235
-
CreatedAt time.Time // When it was created
236
-
237
-
// Internal fields (populated automatically)
238
-
RawJSON []byte // Original JSON line
239
-
ParsedOperation map[string]interface{} // Cached parsed data
240
-
}
241
-
242
-
// Accessing operation data:
243
-
operation, err := op.GetOperationMap() // Parses Operation field (cached)
244
-
if err != nil || operation == nil {
245
-
return
246
-
}
247
-
248
-
// Now you can access fields
249
-
services := operation["services"].(map[string]interface{})
250
-
251
-
// Check if operation was nullified
252
-
if op.IsNullified() {
253
-
log.Printf("Operation %s was nullified by %s", op.CID, op.GetNullifyingCID())
254
-
}
255
-
```
256
-
257
-
### Accessing Operation Data
258
-
259
-
The `Operation` field uses lazy parsing for performance. Always parse it before accessing:
260
-
261
-
```go
262
-
// โ Wrong - won't compile
263
-
services := op.Operation["services"]
264
-
265
-
// โ
Correct
266
-
operation, err := op.GetOperationMap()
267
-
if err != nil || operation == nil {
268
-
return
269
-
}
270
-
services, ok := operation["services"].(map[string]interface{})
271
-
```
272
-
273
-
The parsed data is cached, so repeated calls are fast:
274
-
// First call: parses JSON
275
-
data1, _ := op.GetOperationMap()
276
-
277
-
// Second call: returns cached data (fast)
278
-
data2, _ := op.GetOperationMap()
279
-
280
-
---
281
-
282
-
## Common Patterns
283
-
284
-
### Pattern 1: Transparent Sync Service
285
-
286
-
**Goal:** Keep a local PLC mirror continuously synchronized.
287
-
288
-
This is the most common use case - maintaining an up-to-date copy of the PLC directory.
289
-
290
-
```go
291
-
package main
292
-
293
-
import (
294
-
"context"
295
-
"log"
296
-
"os"
297
-
"os/signal"
298
-
"syscall"
299
-
"time"
300
-
301
-
plcbundle "tangled.org/atscan.net/plcbundle"
302
-
)
303
-
304
-
type SyncService struct {
305
-
mgr *plcbundle.Manager
306
-
interval time.Duration
307
-
stop chan struct{}
308
-
}
309
-
310
-
func NewSyncService(bundleDir string, interval time.Duration) (*SyncService, error) {
311
-
mgr, err := plcbundle.New(bundleDir, "https://plc.directory")
312
-
if err != nil {
313
-
return nil, err
314
-
}
315
-
316
-
return &SyncService{
317
-
mgr: mgr,
318
-
interval: interval,
319
-
stop: make(chan struct{}),
320
-
}, nil
321
-
}
322
-
323
-
func (s *SyncService) Start() {
324
-
log.Println("Starting sync service...")
325
-
326
-
// Initial sync
327
-
s.sync()
328
-
329
-
// Periodic sync
330
-
ticker := time.NewTicker(s.interval)
331
-
defer ticker.Stop()
332
-
333
-
for {
334
-
select {
335
-
case <-ticker.C:
336
-
s.sync()
337
-
case <-s.stop:
338
-
log.Println("Sync service stopped")
339
-
return
340
-
}
341
-
}
342
-
}
343
-
344
-
func (s *SyncService) sync() {
345
-
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
346
-
defer cancel()
347
-
348
-
log.Println("Checking for new bundles...")
349
-
350
-
fetched := 0
351
-
for {
352
-
bundle, err := s.mgr.FetchNext(ctx)
353
-
if err != nil {
354
-
if isInsufficientOps(err) {
355
-
if fetched > 0 {
356
-
log.Printf("โ Synced %d new bundles", fetched)
357
-
} else {
358
-
log.Println("โ Up to date")
359
-
}
360
-
return
361
-
}
362
-
log.Printf("Error: %v", err)
363
-
return
364
-
}
365
-
366
-
fetched++
367
-
log.Printf("โ Fetched bundle %d (%d ops, %d DIDs)",
368
-
bundle.BundleNumber, len(bundle.Operations), bundle.DIDCount)
369
-
}
370
-
}
371
-
372
-
func (s *SyncService) Stop() {
373
-
close(s.stop)
374
-
s.mgr.Close()
375
-
}
376
-
377
-
func isInsufficientOps(err error) bool {
378
-
return err != nil &&
379
-
(strings.Contains(err.Error(), "insufficient operations") ||
380
-
strings.Contains(err.Error(), "no more available"))
381
-
}
382
-
383
-
func main() {
384
-
service, err := NewSyncService("./plc_data", 5*time.Minute)
385
-
if err != nil {
386
-
log.Fatal(err)
387
-
}
388
-
389
-
// Start service in background
390
-
go service.Start()
391
-
392
-
// Wait for interrupt
393
-
sigChan := make(chan os.Signal, 1)
394
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
395
-
<-sigChan
396
-
397
-
log.Println("Shutting down...")
398
-
service.Stop()
399
-
}
400
-
```
401
-
402
-
**Usage:**
403
-
```bash
404
-
go run main.go
405
-
# Starting sync service...
406
-
# Checking for new bundles...
407
-
# โ Fetched bundle 8548 (10000 ops, 8234 DIDs)
408
-
# โ Fetched bundle 8549 (10000 ops, 8156 DIDs)
409
-
# โ Up to date
410
-
# ... (repeats every 5 minutes)
411
-
```
412
-
413
-
### Pattern 2: Reading and Processing Operations
414
-
415
-
**Goal:** Process all historical operations for analysis.
416
-
417
-
```go
418
-
package main
419
-
420
-
import (
421
-
"context"
422
-
"log"
423
-
424
-
plcbundle "tangled.org/atscan.net/plcbundle"
425
-
)
426
-
427
-
type OperationProcessor struct {
428
-
mgr *plcbundle.Manager
429
-
}
430
-
431
-
func NewOperationProcessor(bundleDir string) (*OperationProcessor, error) {
432
-
mgr, err := plcbundle.New(bundleDir, "")
433
-
if err != nil {
434
-
return nil, err
435
-
}
436
-
437
-
return &OperationProcessor{mgr: mgr}, nil
438
-
}
439
-
440
-
func (p *OperationProcessor) ProcessAll() error {
441
-
ctx := context.Background()
442
-
443
-
index := p.mgr.GetIndex()
444
-
bundles := index.GetBundles()
445
-
446
-
log.Printf("Processing %d bundles...", len(bundles))
447
-
448
-
totalOps := 0
449
-
uniqueDIDs := make(map[string]bool)
450
-
451
-
for _, meta := range bundles {
452
-
// Load bundle
453
-
bundle, err := p.mgr.Load(ctx, meta.BundleNumber)
454
-
if err != nil {
455
-
return err
456
-
}
457
-
458
-
// Process operations
459
-
for _, op := range bundle.Operations {
460
-
totalOps++
461
-
uniqueDIDs[op.DID] = true
462
-
463
-
// Your processing logic here
464
-
p.processOperation(op)
465
-
}
466
-
467
-
if meta.BundleNumber % 100 == 0 {
468
-
log.Printf("Processed bundle %d...", meta.BundleNumber)
469
-
}
470
-
}
471
-
472
-
log.Printf("โ Processed %d operations from %d unique DIDs",
473
-
totalOps, len(uniqueDIDs))
474
-
475
-
return nil
476
-
}
477
-
478
-
func (p *OperationProcessor) processOperation(op plcbundle.PLCOperation) {
479
-
// Parse Operation field on-demand
480
-
operation, err := op.GetOperationMap()
481
-
if err != nil || operation == nil {
482
-
return
483
-
}
484
-
485
-
// Example: Extract PDS endpoints
486
-
if services, ok := operation["services"].(map[string]interface{}); ok {
487
-
if pds, ok := services["atproto_pds"].(map[string]interface{}); ok {
488
-
if endpoint, ok := pds["endpoint"].(string); ok {
489
-
log.Printf("DID %s uses PDS: %s", op.DID, endpoint)
490
-
}
491
-
}
492
-
}
493
-
}
494
-
495
-
496
-
func main() {
497
-
processor, err := NewOperationProcessor("./plc_data")
498
-
if err != nil {
499
-
log.Fatal(err)
500
-
}
501
-
502
-
if err := processor.ProcessAll(); err != nil {
503
-
log.Fatal(err)
504
-
}
505
-
}
506
-
```
507
-
508
-
### Pattern 3: Time-Based Queries
509
-
510
-
**Goal:** Export operations from a specific time period.
511
-
512
-
```go
513
-
package main
514
-
515
-
import (
516
-
"context"
517
-
"encoding/json"
518
-
"log"
519
-
"os"
520
-
"time"
521
-
522
-
plcbundle "tangled.org/atscan.net/plcbundle"
523
-
)
524
-
525
-
func exportOperationsSince(bundleDir string, since time.Time, limit int) error {
526
-
mgr, err := plcbundle.New(bundleDir, "")
527
-
if err != nil {
528
-
return err
529
-
}
530
-
defer mgr.Close()
531
-
532
-
ctx := context.Background()
533
-
534
-
// Export operations after timestamp
535
-
ops, err := mgr.Export(ctx, since, limit)
536
-
if err != nil {
537
-
return err
538
-
}
539
-
540
-
log.Printf("Exporting %d operations...", len(ops))
541
-
542
-
// Write as JSONL to stdout
543
-
encoder := json.NewEncoder(os.Stdout)
544
-
for _, op := range ops {
545
-
if err := encoder.Encode(op); err != nil {
546
-
return err
547
-
}
548
-
}
549
-
550
-
return nil
551
-
}
552
-
553
-
func main() {
554
-
// Export operations from the last 7 days
555
-
since := time.Now().AddDate(0, 0, -7)
556
-
557
-
if err := exportOperationsSince("./plc_data", since, 50000); err != nil {
558
-
log.Fatal(err)
559
-
}
560
-
}
561
-
```
562
-
563
-
**Output to file:**
564
-
```bash
565
-
go run main.go > last_7_days.jsonl
566
-
```
567
-
568
-
### Pattern 4: Verification Service
569
-
570
-
**Goal:** Periodically verify bundle integrity.
571
-
572
-
```go
573
-
package main
574
-
575
-
import (
576
-
"context"
577
-
"log"
578
-
"time"
579
-
580
-
plcbundle "tangled.org/atscan.net/plcbundle"
581
-
)
582
-
583
-
type VerificationService struct {
584
-
mgr *plcbundle.Manager
585
-
interval time.Duration
586
-
}
587
-
588
-
func NewVerificationService(bundleDir string, interval time.Duration) (*VerificationService, error) {
589
-
mgr, err := plcbundle.New(bundleDir, "")
590
-
if err != nil {
591
-
return nil, err
592
-
}
593
-
594
-
return &VerificationService{
595
-
mgr: mgr,
596
-
interval: interval,
597
-
}, nil
598
-
}
599
-
600
-
func (v *VerificationService) Start() {
601
-
ticker := time.NewTicker(v.interval)
602
-
defer ticker.Stop()
603
-
604
-
// Verify immediately on start
605
-
v.verify()
606
-
607
-
for range ticker.C {
608
-
v.verify()
609
-
}
610
-
}
611
-
612
-
func (v *VerificationService) verify() {
613
-
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
614
-
defer cancel()
615
-
616
-
log.Println("Starting chain verification...")
617
-
start := time.Now()
618
-
619
-
result, err := v.mgr.VerifyChain(ctx)
620
-
if err != nil {
621
-
log.Printf("โ Verification error: %v", err)
622
-
return
623
-
}
624
-
625
-
elapsed := time.Since(start)
626
-
627
-
if result.Valid {
628
-
log.Printf("โ
Chain verified: %d bundles, took %s",
629
-
result.ChainLength, elapsed.Round(time.Second))
630
-
631
-
// Get head hash
632
-
index := v.mgr.GetIndex()
633
-
if last := index.GetLastBundle(); last != nil {
634
-
log.Printf(" Head hash: %s...", last.Hash[:16])
635
-
}
636
-
} else {
637
-
log.Printf("โ Chain broken at bundle %d: %s",
638
-
result.BrokenAt, result.Error)
639
-
640
-
// Alert or take action
641
-
v.handleBrokenChain(result)
642
-
}
643
-
}
644
-
645
-
func (v *VerificationService) handleBrokenChain(result *plcbundle.ChainVerificationResult) {
646
-
// Send alert, trigger re-sync, etc.
647
-
log.Printf("โ ๏ธ ALERT: Chain integrity compromised!")
648
-
// TODO: Implement your alerting logic
649
-
}
650
-
651
-
func main() {
652
-
service, err := NewVerificationService("./plc_data", 24*time.Hour)
653
-
if err != nil {
654
-
log.Fatal(err)
655
-
}
656
-
657
-
log.Println("Verification service started (daily checks)")
658
-
service.Start()
659
-
}
660
-
```
661
-
662
-
### Pattern 5: Custom HTTP API
663
-
664
-
**Goal:** Build a custom API on top of your bundle archive.
665
-
666
-
```go
667
-
package main
668
-
669
-
import (
670
-
"encoding/json"
671
-
"log"
672
-
"net/http"
673
-
"strconv"
674
-
675
-
plcbundle "tangled.org/atscan.net/plcbundle"
676
-
)
677
-
678
-
type API struct {
679
-
mgr *plcbundle.Manager
680
-
}
681
-
682
-
func NewAPI(bundleDir string) (*API, error) {
683
-
mgr, err := plcbundle.New(bundleDir, "")
684
-
if err != nil {
685
-
return nil, err
686
-
}
687
-
688
-
return &API{mgr: mgr}, nil
689
-
}
690
-
691
-
func (api *API) handleStats(w http.ResponseWriter, r *http.Request) {
692
-
index := api.mgr.GetIndex()
693
-
stats := index.GetStats()
694
-
695
-
response := map[string]interface{}{
696
-
"bundles": stats["bundle_count"],
697
-
"first": stats["first_bundle"],
698
-
"last": stats["last_bundle"],
699
-
"total_size": stats["total_size"],
700
-
"start_time": stats["start_time"],
701
-
"end_time": stats["end_time"],
702
-
"updated_at": stats["updated_at"],
703
-
}
704
-
705
-
w.Header().Set("Content-Type", "application/json")
706
-
json.NewEncoder(w).Encode(response)
707
-
}
708
-
709
-
func (api *API) handleOperations(w http.ResponseWriter, r *http.Request) {
710
-
bundleNumStr := r.URL.Query().Get("bundle")
711
-
if bundleNumStr == "" {
712
-
http.Error(w, "bundle parameter required", http.StatusBadRequest)
713
-
return
714
-
}
715
-
716
-
bundleNum, err := strconv.Atoi(bundleNumStr)
717
-
if err != nil {
718
-
http.Error(w, "invalid bundle number", http.StatusBadRequest)
719
-
return
720
-
}
721
-
722
-
ctx := r.Context()
723
-
bundle, err := api.mgr.Load(ctx, bundleNum)
724
-
if err != nil {
725
-
http.Error(w, err.Error(), http.StatusNotFound)
726
-
return
727
-
}
728
-
729
-
w.Header().Set("Content-Type", "application/x-ndjson")
730
-
encoder := json.NewEncoder(w)
731
-
for _, op := range bundle.Operations {
732
-
encoder.Encode(op)
733
-
}
734
-
}
735
-
736
-
func (api *API) handleDID(w http.ResponseWriter, r *http.Request) {
737
-
did := r.URL.Query().Get("did")
738
-
if did == "" {
739
-
http.Error(w, "did parameter required", http.StatusBadRequest)
740
-
return
741
-
}
742
-
743
-
ctx := r.Context()
744
-
745
-
// Search through bundles for this DID
746
-
var operations []plcbundle.PLCOperation
747
-
748
-
index := api.mgr.GetIndex()
749
-
bundles := index.GetBundles()
750
-
751
-
for _, meta := range bundles {
752
-
bundle, err := api.mgr.Load(ctx, meta.BundleNumber)
753
-
if err != nil {
754
-
continue
755
-
}
756
-
757
-
for _, op := range bundle.Operations {
758
-
if op.DID == did {
759
-
operations = append(operations, op)
760
-
}
761
-
}
762
-
}
763
-
764
-
w.Header().Set("Content-Type", "application/json")
765
-
json.NewEncoder(w).Encode(map[string]interface{}{
766
-
"did": did,
767
-
"operations": operations,
768
-
"count": len(operations),
769
-
})
770
-
}
771
-
772
-
func main() {
773
-
api, err := NewAPI("./plc_data")
774
-
if err != nil {
775
-
log.Fatal(err)
776
-
}
777
-
778
-
http.HandleFunc("/stats", api.handleStats)
779
-
http.HandleFunc("/operations", api.handleOperations)
780
-
http.HandleFunc("/did", api.handleDID)
781
-
782
-
log.Println("API listening on :8080")
783
-
log.Fatal(http.ListenAndServe(":8080", nil))
784
-
}
785
-
```
786
-
787
-
**Usage:**
788
-
```bash
789
-
# Get stats
790
-
curl http://localhost:8080/stats
791
-
792
-
# Get operations from bundle 1
793
-
curl http://localhost:8080/operations?bundle=1
794
-
795
-
# Get all operations for a DID
796
-
curl http://localhost:8080/did?did=did:plc:example123
797
-
```
798
-
799
-
---
800
-
801
-
## Building Applications
802
-
803
-
### Application 1: PDS Discovery Tool
804
-
805
-
Find all PDS endpoints in the network:
806
-
807
-
```go
808
-
package main
809
-
810
-
import (
811
-
"context"
812
-
"fmt"
813
-
"log"
814
-
815
-
plcbundle "tangled.org/atscan.net/plcbundle"
816
-
)
817
-
818
-
type PDSTracker struct {
819
-
mgr *plcbundle.Manager
820
-
endpoints map[string]int // endpoint -> count
821
-
}
822
-
823
-
func NewPDSTracker(bundleDir string) (*PDSTracker, error) {
824
-
mgr, err := plcbundle.New(bundleDir, "")
825
-
if err != nil {
826
-
return nil, err
827
-
}
828
-
829
-
return &PDSTracker{
830
-
mgr: mgr,
831
-
endpoints: make(map[string]int),
832
-
}, nil
833
-
}
834
-
835
-
func (pt *PDSTracker) Scan() error {
836
-
ctx := context.Background()
837
-
838
-
index := pt.mgr.GetIndex()
839
-
bundles := index.GetBundles()
840
-
841
-
log.Printf("Scanning %d bundles for PDS endpoints...", len(bundles))
842
-
843
-
for _, meta := range bundles {
844
-
bundle, err := pt.mgr.Load(ctx, meta.BundleNumber)
845
-
if err != nil {
846
-
return err
847
-
}
848
-
849
-
for _, op := range bundle.Operations {
850
-
if endpoint := pt.extractPDS(op); endpoint != "" {
851
-
pt.endpoints[endpoint]++
852
-
}
853
-
}
854
-
}
855
-
856
-
return nil
857
-
}
858
-
859
-
func (pt *PDSTracker) extractPDS(op plcbundle.PLCOperation) string {
860
-
// Parse Operation field on-demand
861
-
operation, err := op.GetOperationMap()
862
-
if err != nil || operation == nil {
863
-
return ""
864
-
}
865
-
866
-
services, ok := operation["services"].(map[string]interface{})
867
-
if !ok {
868
-
return ""
869
-
}
870
-
871
-
pds, ok := services["atproto_pds"].(map[string]interface{})
872
-
if !ok {
873
-
return ""
874
-
}
875
-
876
-
endpoint, ok := pds["endpoint"].(string)
877
-
if !ok {
878
-
return ""
879
-
}
880
-
881
-
return endpoint
882
-
}
883
-
884
-
885
-
func (pt *PDSTracker) PrintResults() {
886
-
log.Printf("\nFound %d unique PDS endpoints:\n", len(pt.endpoints))
887
-
888
-
// Sort by count
889
-
type endpointCount struct {
890
-
endpoint string
891
-
count int
892
-
}
893
-
894
-
var sorted []endpointCount
895
-
for endpoint, count := range pt.endpoints {
896
-
sorted = append(sorted, endpointCount{endpoint, count})
897
-
}
898
-
899
-
sort.Slice(sorted, func(i, j int) bool {
900
-
return sorted[i].count > sorted[j].count
901
-
})
902
-
903
-
// Print top 20
904
-
for i, ec := range sorted {
905
-
if i >= 20 {
906
-
break
907
-
}
908
-
fmt.Printf("%3d. %s (%d DIDs)\n", i+1, ec.endpoint, ec.count)
909
-
}
910
-
}
911
-
912
-
func main() {
913
-
tracker, err := NewPDSTracker("./plc_data")
914
-
if err != nil {
915
-
log.Fatal(err)
916
-
}
917
-
918
-
if err := tracker.Scan(); err != nil {
919
-
log.Fatal(err)
920
-
}
921
-
922
-
tracker.PrintResults()
923
-
}
924
-
```
925
-
926
-
### Application 2: DID History Viewer
927
-
928
-
View the complete history of a DID:
929
-
930
-
```go
931
-
package main
932
-
933
-
import (
934
-
"context"
935
-
"encoding/json"
936
-
"fmt"
937
-
"log"
938
-
"os"
939
-
940
-
plcbundle "tangled.org/atscan.net/plcbundle"
941
-
)
942
-
943
-
type DIDHistory struct {
944
-
DID string `json:"did"`
945
-
Operations []plcbundle.PLCOperation `json:"operations"`
946
-
FirstSeen time.Time `json:"first_seen"`
947
-
LastSeen time.Time `json:"last_seen"`
948
-
OpCount int `json:"operation_count"`
949
-
}
950
-
951
-
func getDIDHistory(bundleDir, did string) (*DIDHistory, error) {
952
-
mgr, err := plcbundle.New(bundleDir, "")
953
-
if err != nil {
954
-
return nil, err
955
-
}
956
-
defer mgr.Close()
957
-
958
-
ctx := context.Background()
959
-
960
-
history := &DIDHistory{
961
-
DID: did,
962
-
Operations: make([]plcbundle.PLCOperation, 0),
963
-
}
964
-
965
-
index := mgr.GetIndex()
966
-
bundles := index.GetBundles()
967
-
968
-
log.Printf("Searching for DID %s...", did)
969
-
970
-
for _, meta := range bundles {
971
-
bundle, err := mgr.Load(ctx, meta.BundleNumber)
972
-
if err != nil {
973
-
continue
974
-
}
975
-
976
-
for _, op := range bundle.Operations {
977
-
if op.DID == did {
978
-
history.Operations = append(history.Operations, op)
979
-
}
980
-
}
981
-
}
982
-
983
-
if len(history.Operations) == 0 {
984
-
return nil, fmt.Errorf("DID not found")
985
-
}
986
-
987
-
// Set timestamps
988
-
history.FirstSeen = history.Operations[0].CreatedAt
989
-
history.LastSeen = history.Operations[len(history.Operations)-1].CreatedAt
990
-
history.OpCount = len(history.Operations)
991
-
992
-
return history, nil
993
-
}
994
-
995
-
func main() {
996
-
if len(os.Args) < 2 {
997
-
log.Fatal("Usage: did-history <did>")
998
-
}
999
-
1000
-
did := os.Args[1]
1001
-
1002
-
history, err := getDIDHistory("./plc_data", did)
1003
-
if err != nil {
1004
-
log.Fatal(err)
1005
-
}
1006
-
1007
-
// Print as JSON
1008
-
encoder := json.NewEncoder(os.Stdout)
1009
-
encoder.SetIndent("", " ")
1010
-
encoder.Encode(history)
1011
-
}
1012
-
```
1013
-
1014
-
### Application 3: Real-time Monitor
1015
-
1016
-
Monitor new operations as they arrive:
1017
-
1018
-
```go
1019
-
package main
1020
-
1021
-
import (
1022
-
"context"
1023
-
"log"
1024
-
"time"
1025
-
1026
-
plcbundle "tangled.org/atscan.net/plcbundle"
1027
-
)
1028
-
1029
-
type Monitor struct {
1030
-
mgr *plcbundle.Manager
1031
-
lastSeen int // Last bundle number processed
1032
-
pollInterval time.Duration
1033
-
}
1034
-
1035
-
func NewMonitor(bundleDir string, pollInterval time.Duration) (*Monitor, error) {
1036
-
mgr, err := plcbundle.New(bundleDir, "https://plc.directory")
1037
-
if err != nil {
1038
-
return nil, err
1039
-
}
1040
-
1041
-
// Get current position
1042
-
index := mgr.GetIndex()
1043
-
lastBundle := index.GetLastBundle()
1044
-
lastSeen := 0
1045
-
if lastBundle != nil {
1046
-
lastSeen = lastBundle.BundleNumber
1047
-
}
1048
-
1049
-
return &Monitor{
1050
-
mgr: mgr,
1051
-
lastSeen: lastSeen,
1052
-
pollInterval: pollInterval,
1053
-
}, nil
1054
-
}
1055
-
1056
-
func (m *Monitor) Start() {
1057
-
log.Println("Monitor started, watching for new bundles...")
1058
-
1059
-
ticker := time.NewTicker(m.pollInterval)
1060
-
defer ticker.Stop()
1061
-
1062
-
for range ticker.C {
1063
-
m.check()
1064
-
}
1065
-
}
1066
-
1067
-
func (m *Monitor) check() {
1068
-
ctx := context.Background()
1069
-
1070
-
// Try to fetch next bundle
1071
-
bundle, err := m.mgr.FetchNext(ctx)
1072
-
if err != nil {
1073
-
// Not an error if no new bundle available
1074
-
return
1075
-
}
1076
-
1077
-
// New bundle!
1078
-
log.Printf("๐ New bundle: %d", bundle.BundleNumber)
1079
-
log.Printf(" Operations: %d", len(bundle.Operations))
1080
-
log.Printf(" DIDs: %d", bundle.DIDCount)
1081
-
log.Printf(" Time: %s", bundle.EndTime.Format("2006-01-02 15:04:05"))
1082
-
1083
-
// Process new operations
1084
-
m.processNewOperations(bundle)
1085
-
1086
-
m.lastSeen = bundle.BundleNumber
1087
-
}
1088
-
1089
-
func (m *Monitor) processNewOperations(bundle *plcbundle.Bundle) {
1090
-
for _, op := range bundle.Operations {
1091
-
// Check for interesting operations
1092
-
if op.IsNullified() {
1093
-
log.Printf(" โ ๏ธ Nullified: %s", op.DID)
1094
-
}
1095
-
1096
-
// Check for new DIDs (operation type "create")
1097
-
operation, err := op.GetOperationMap()
1098
-
if err == nil && operation != nil {
1099
-
if opType, ok := operation["type"].(string); ok && opType == "create" {
1100
-
log.Printf(" โ New DID: %s", op.DID)
1101
-
}
1102
-
}
1103
-
}
1104
-
}
1105
-
1106
-
func main() {
1107
-
monitor, err := NewMonitor("./plc_data", 30*time.Second)
1108
-
if err != nil {
1109
-
log.Fatal(err)
1110
-
}
1111
-
1112
-
monitor.Start()
1113
-
}
1114
-
```
1115
-
1116
-
---
1117
-
1118
-
## Advanced Usage
1119
-
1120
-
### Custom Configuration
1121
-
1122
-
Full control over bundle manager behavior:
1123
-
1124
-
```go
1125
-
package main
1126
-
1127
-
import (
1128
-
"log"
1129
-
"runtime"
1130
-
"time"
1131
-
1132
-
"tangled.org/atscan.net/plcbundle/bundle"
1133
-
"tangled.org/atscan.net/plcbundle/plcclient"
1134
-
plcbundle "tangled.org/atscan.net/plcbundle"
1135
-
)
1136
-
1137
-
func main() {
1138
-
// Custom configuration
1139
-
config := &bundle.Config{
1140
-
BundleDir: "./my_bundles",
1141
-
VerifyOnLoad: true, // Verify hashes when loading
1142
-
AutoRebuild: true, // Auto-rebuild index if needed
1143
-
RebuildWorkers: runtime.NumCPU(), // Parallel workers for rebuild
1144
-
Logger: &MyCustomLogger{}, // Custom logger
1145
-
1146
-
// Progress callback for rebuild
1147
-
RebuildProgress: func(current, total int) {
1148
-
if current%100 == 0 {
1149
-
log.Printf("Rebuild: %d/%d (%.1f%%)",
1150
-
current, total, float64(current)/float64(total)*100)
1151
-
}
1152
-
},
1153
-
}
1154
-
1155
-
// Custom PLC client with rate limiting
1156
-
plcClient := plcclient.NewClient("https://plc.directory",
1157
-
plcclient.WithRateLimit(60, time.Minute), // 60 req/min
1158
-
plcclient.WithTimeout(30*time.Second), // 30s timeout
1159
-
plcclient.WithLogger(&MyCustomLogger{}), // Custom logger
1160
-
)
1161
-
1162
-
// Create manager
1163
-
mgr, err := bundle.NewManager(config, plcClient)
1164
-
if err != nil {
1165
-
log.Fatal(err)
1166
-
}
1167
-
defer mgr.Close()
1168
-
1169
-
log.Println("Manager created with custom configuration")
1170
-
}
1171
-
1172
-
// Custom logger implementation
1173
-
type MyCustomLogger struct{}
1174
-
1175
-
func (l *MyCustomLogger) Printf(format string, v ...interface{}) {
1176
-
// Add custom formatting, filtering, etc.
1177
-
log.Printf("[PLCBUNDLE] "+format, v...)
1178
-
}
1179
-
1180
-
func (l *MyCustomLogger) Println(v ...interface{}) {
1181
-
log.Println(append([]interface{}{"[PLCBUNDLE]"}, v...)...)
1182
-
}
1183
-
```
1184
-
1185
-
### Streaming Data
1186
-
1187
-
Stream bundle data without loading everything into memory:
1188
-
1189
-
```go
1190
-
package main
1191
-
1192
-
import (
1193
-
"bufio"
1194
-
"context"
1195
-
"encoding/json"
1196
-
"io"
1197
-
"log"
1198
-
1199
-
plcbundle "tangled.org/atscan.net/plcbundle"
1200
-
)
1201
-
1202
-
func streamBundle(mgr *plcbundle.Manager, bundleNumber int) error {
1203
-
ctx := context.Background()
1204
-
1205
-
// Get decompressed stream
1206
-
reader, err := mgr.StreamDecompressed(ctx, bundleNumber)
1207
-
if err != nil {
1208
-
return err
1209
-
}
1210
-
defer reader.Close()
1211
-
1212
-
// Read line by line (JSONL)
1213
-
scanner := bufio.NewScanner(reader)
1214
-
1215
-
// Set buffer size for large lines
1216
-
buf := make([]byte, 0, 64*1024)
1217
-
scanner.Buffer(buf, 1024*1024)
1218
-
1219
-
lineNum := 0
1220
-
for scanner.Scan() {
1221
-
lineNum++
1222
-
1223
-
var op plcbundle.PLCOperation
1224
-
if err := json.Unmarshal(scanner.Bytes(), &op); err != nil {
1225
-
log.Printf("Warning: failed to parse line %d: %v", lineNum, err)
1226
-
continue
1227
-
}
1228
-
1229
-
// Process operation without storing all in memory
1230
-
processOperation(op)
1231
-
}
1232
-
1233
-
return scanner.Err()
1234
-
}
1235
-
1236
-
func processOperation(op plcbundle.PLCOperation) {
1237
-
// Your processing logic
1238
-
log.Printf("Processing: %s", op.DID)
1239
-
}
1240
-
1241
-
func main() {
1242
-
mgr, err := plcbundle.New("./plc_data", "")
1243
-
if err != nil {
1244
-
log.Fatal(err)
1245
-
}
1246
-
defer mgr.Close()
1247
-
1248
-
// Stream bundle 1
1249
-
if err := streamBundle(mgr, 1); err != nil {
1250
-
log.Fatal(err)
1251
-
}
1252
-
}
1253
-
```
1254
-
1255
-
### Parallel Processing
1256
-
1257
-
Process multiple bundles concurrently:
1258
-
1259
-
```go
1260
-
package main
1261
-
1262
-
import (
1263
-
"context"
1264
-
"log"
1265
-
"sync"
1266
-
1267
-
plcbundle "tangled.org/atscan.net/plcbundle"
1268
-
)
1269
-
1270
-
func processParallel(mgr *plcbundle.Manager, workers int) error {
1271
-
ctx := context.Background()
1272
-
1273
-
index := mgr.GetIndex()
1274
-
bundles := index.GetBundles()
1275
-
1276
-
// Create job channel
1277
-
jobs := make(chan int, len(bundles))
1278
-
results := make(chan error, len(bundles))
1279
-
1280
-
// Start workers
1281
-
var wg sync.WaitGroup
1282
-
for w := 0; w < workers; w++ {
1283
-
wg.Add(1)
1284
-
go func() {
1285
-
defer wg.Done()
1286
-
for bundleNum := range jobs {
1287
-
if err := processBundle(ctx, mgr, bundleNum); err != nil {
1288
-
results <- err
1289
-
} else {
1290
-
results <- nil
1291
-
}
1292
-
}
1293
-
}()
1294
-
}
1295
-
1296
-
// Send jobs
1297
-
for _, meta := range bundles {
1298
-
jobs <- meta.BundleNumber
1299
-
}
1300
-
close(jobs)
1301
-
1302
-
// Wait for completion
1303
-
go func() {
1304
-
wg.Wait()
1305
-
close(results)
1306
-
}()
1307
-
1308
-
// Collect results
1309
-
errors := 0
1310
-
for err := range results {
1311
-
if err != nil {
1312
-
log.Printf("Error: %v", err)
1313
-
errors++
1314
-
}
1315
-
}
1316
-
1317
-
if errors > 0 {
1318
-
return fmt.Errorf("%d bundles failed processing", errors)
1319
-
}
1320
-
1321
-
return nil
1322
-
}
1323
-
1324
-
func processBundle(ctx context.Context, mgr *plcbundle.Manager, bundleNum int) error {
1325
-
bundle, err := mgr.Load(ctx, bundleNum)
1326
-
if err != nil {
1327
-
return err
1328
-
}
1329
-
1330
-
// Process operations
1331
-
for _, op := range bundle.Operations {
1332
-
// Your logic here
1333
-
_ = op
1334
-
}
1335
-
1336
-
log.Printf("Processed bundle %d", bundleNum)
1337
-
return nil
1338
-
}
1339
-
1340
-
func main() {
1341
-
mgr, err := plcbundle.New("./plc_data", "")
1342
-
if err != nil {
1343
-
log.Fatal(err)
1344
-
}
1345
-
defer mgr.Close()
1346
-
1347
-
// Process with 8 workers
1348
-
if err := processParallel(mgr, 8); err != nil {
1349
-
log.Fatal(err)
1350
-
}
1351
-
}
1352
-
```
1353
-
1354
-
### Working with Mempool
1355
-
1356
-
Access operations before they're bundled:
1357
-
1358
-
```go
1359
-
package main
1360
-
1361
-
import (
1362
-
"log"
1363
-
1364
-
plcbundle "tangled.org/atscan.net/plcbundle"
1365
-
)
1366
-
1367
-
func main() {
1368
-
mgr, err := plcbundle.New("./plc_data", "https://plc.directory")
1369
-
if err != nil {
1370
-
log.Fatal(err)
1371
-
}
1372
-
defer mgr.Close()
1373
-
1374
-
// Get mempool stats
1375
-
stats := mgr.GetMempoolStats()
1376
-
1377
-
count := stats["count"].(int)
1378
-
targetBundle := stats["target_bundle"].(int)
1379
-
canCreate := stats["can_create_bundle"].(bool)
1380
-
1381
-
log.Printf("Mempool status:")
1382
-
log.Printf(" Target bundle: %d", targetBundle)
1383
-
log.Printf(" Operations: %d/%d", count, plcbundle.BUNDLE_SIZE)
1384
-
log.Printf(" Ready: %v", canCreate)
1385
-
1386
-
if count > 0 {
1387
-
// Get mempool operations
1388
-
ops, err := mgr.GetMempoolOperations()
1389
-
if err != nil {
1390
-
log.Fatal(err)
1391
-
}
1392
-
1393
-
log.Printf("Latest unbundled operations:")
1394
-
for i, op := range ops {
1395
-
if i >= 5 {
1396
-
break
1397
-
}
1398
-
log.Printf(" %d. %s (%s)", i+1, op.DID, op.CreatedAt.Format("15:04:05"))
1399
-
}
1400
-
}
1401
-
1402
-
// Validate chronological order
1403
-
if err := mgr.ValidateMempool(); err != nil {
1404
-
log.Printf("โ ๏ธ Mempool validation failed: %v", err)
1405
-
} else {
1406
-
log.Println("โ Mempool validated")
1407
-
}
1408
-
}
1409
-
```
1410
-
1411
-
---
1412
-
1413
-
## Best Practices
1414
-
1415
-
### 1. Always Close the Manager
1416
-
1417
-
Use `defer` to ensure cleanup:
1418
-
1419
-
```go
1420
-
mgr, err := plcbundle.New("./plc_data", "https://plc.directory")
1421
-
if err != nil {
1422
-
return err
1423
-
}
1424
-
defer mgr.Close() // Always close!
1425
-
```
1426
-
1427
-
### 2. Handle Context Cancellation
1428
-
1429
-
Support graceful shutdown:
1430
-
1431
-
```go
1432
-
ctx, cancel := context.WithCancel(context.Background())
1433
-
defer cancel()
1434
-
1435
-
// Listen for interrupt
1436
-
sigChan := make(chan os.Signal, 1)
1437
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
1438
-
1439
-
go func() {
1440
-
<-sigChan
1441
-
log.Println("Interrupt received, stopping...")
1442
-
cancel()
1443
-
}()
1444
-
1445
-
// Use context in operations
1446
-
bundle, err := mgr.FetchNext(ctx)
1447
-
if err == context.Canceled {
1448
-
log.Println("Operation cancelled gracefully")
1449
-
return nil
1450
-
}
1451
-
```
1452
-
1453
-
### 3. Check Errors Properly
1454
-
1455
-
Distinguish between different error types:
1456
-
1457
-
```go
1458
-
bundle, err := mgr.FetchNext(ctx)
1459
-
if err != nil {
1460
-
// Check if it's just "caught up"
1461
-
if strings.Contains(err.Error(), "insufficient operations") {
1462
-
log.Println("No new bundles available (caught up)")
1463
-
return nil
1464
-
}
1465
-
1466
-
// Real error
1467
-
return fmt.Errorf("fetch failed: %w", err)
1468
-
}
1469
-
```
1470
-
1471
-
### 4. Use Streaming for Large Datasets
1472
-
1473
-
Don't load everything into memory:
1474
-
1475
-
```go
1476
-
// โ Bad: Loads all operations into memory
1477
-
index := mgr.GetIndex()
1478
-
var allOps []plcbundle.PLCOperation
1479
-
for _, meta := range index.GetBundles() {
1480
-
bundle, _ := mgr.Load(ctx, meta.BundleNumber)
1481
-
allOps = append(allOps, bundle.Operations...)
1482
-
}
1483
-
1484
-
// โ
Good: Process one bundle at a time
1485
-
for _, meta := range index.GetBundles() {
1486
-
bundle, _ := mgr.Load(ctx, meta.BundleNumber)
1487
-
for _, op := range bundle.Operations {
1488
-
processOperation(op)
1489
-
}
1490
-
}
1491
-
```
1492
-
1493
-
### 5. Enable Verification in Production
1494
-
1495
-
```go
1496
-
config := plcbundle.DefaultConfig("./plc_data")
1497
-
config.VerifyOnLoad = true // Verify hashes when loading
1498
-
1499
-
mgr, err := plcbundle.NewManager(config, plcClient)
1500
-
```
1501
-
1502
-
### 6. Log Appropriately
1503
-
1504
-
Implement custom logger for production:
1505
-
1506
-
```go
1507
-
type ProductionLogger struct {
1508
-
logger *zap.Logger
1509
-
}
1510
-
1511
-
func (l *ProductionLogger) Printf(format string, v ...interface{}) {
1512
-
l.logger.Sugar().Infof(format, v...)
1513
-
}
1514
-
1515
-
func (l *ProductionLogger) Println(v ...interface{}) {
1516
-
l.logger.Sugar().Info(v...)
1517
-
}
1518
-
```
1519
-
1520
-
### 7. Handle Rate Limits
1521
-
1522
-
Configure PLC client appropriately:
1523
-
1524
-
```go
1525
-
// Production: Be conservative
1526
-
plcClient := plcclient.NewClient("https://plc.directory",
1527
-
plcclient.WithRateLimit(60, time.Minute), // 60 req/min max
1528
-
plcclient.WithTimeout(60*time.Second),
1529
-
)
1530
-
1531
-
// Development: Can be more aggressive (but respectful)
1532
-
plcClient := plcclient.NewClient("https://plc.directory",
1533
-
plcclient.WithRateLimit(90, time.Minute),
1534
-
plcclient.WithTimeout(30*time.Second),
1535
-
)
1536
-
```
1537
-
1538
-
---
1539
-
1540
-
## API Reference
1541
-
1542
-
### Manager Methods
1543
-
1544
-
```go
1545
-
// Creation
1546
-
New(bundleDir, plcURL string) (*Manager, error)
1547
-
NewManager(config *Config, plcClient *PLCClient) (*Manager, error)
1548
-
1549
-
// Lifecycle
1550
-
Close()
1551
-
1552
-
// Fetching
1553
-
FetchNext(ctx) (*Bundle, error)
1554
-
1555
-
// Loading
1556
-
Load(ctx, bundleNumber int) (*Bundle, error)
1557
-
1558
-
// Verification
1559
-
Verify(ctx, bundleNumber int) (*VerificationResult, error)
1560
-
VerifyChain(ctx) (*ChainVerificationResult, error)
1561
-
1562
-
// Exporting
1563
-
Export(ctx, afterTime time.Time, count int) ([]PLCOperation, error)
1564
-
1565
-
// Streaming
1566
-
StreamRaw(ctx, bundleNumber int) (io.ReadCloser, error)
1567
-
StreamDecompressed(ctx, bundleNumber int) (io.ReadCloser, error)
1568
-
1569
-
// Index
1570
-
GetIndex() *Index
1571
-
ScanBundle(path string, bundleNumber int) (*BundleMetadata, error)
1572
-
Scan() (*DirectoryScanResult, error)
1573
-
1574
-
// Mempool
1575
-
GetMempoolStats() map[string]interface{}
1576
-
GetMempoolOperations() ([]PLCOperation, error)
1577
-
ValidateMempool() error
1578
-
ClearMempool() error
1579
-
1580
-
// Info
1581
-
GetInfo() map[string]interface{}
1582
-
IsBundleIndexed(bundleNumber int) bool
1583
-
```
1584
-
1585
-
### Index Methods
1586
-
1587
-
```go
1588
-
// Creation
1589
-
NewIndex() *Index
1590
-
LoadIndex(path string) (*Index, error)
1591
-
1592
-
// Persistence
1593
-
Save(path string) error
1594
-
1595
-
// Queries
1596
-
GetBundle(bundleNumber int) (*BundleMetadata, error)
1597
-
GetLastBundle() *BundleMetadata
1598
-
GetBundles() []*BundleMetadata
1599
-
GetBundleRange(start, end int) []*BundleMetadata
1600
-
1601
-
// Stats
1602
-
Count() int
1603
-
FindGaps() []int
1604
-
GetStats() map[string]interface{}
1605
-
```
1606
-
1607
-
### Configuration Types
1608
-
1609
-
```go
1610
-
type Config struct {
1611
-
BundleDir string
1612
-
VerifyOnLoad bool
1613
-
AutoRebuild bool
1614
-
RebuildWorkers int
1615
-
RebuildProgress func(current, total int)
1616
-
Logger Logger
1617
-
}
1618
-
1619
-
type Logger interface {
1620
-
Printf(format string, v ...interface{})
1621
-
Println(v ...interface{})
1622
-
}
1623
-
```
1624
-
1625
-
---
1626
-
1627
-
## Troubleshooting
1628
-
1629
-
### Bundle Not Found Error
1630
-
1631
-
```go
1632
-
bundle, err := mgr.Load(ctx, 999)
1633
-
if err != nil {
1634
-
if strings.Contains(err.Error(), "not in index") {
1635
-
// Bundle doesn't exist
1636
-
log.Printf("Bundle 999 hasn't been fetched yet")
1637
-
}
1638
-
}
1639
-
```
1640
-
1641
-
### Insufficient Operations Error
1642
-
1643
-
```go
1644
-
bundle, err := mgr.FetchNext(ctx)
1645
-
if err != nil {
1646
-
if strings.Contains(err.Error(), "insufficient operations") {
1647
-
// Not enough operations for a complete bundle
1648
-
// Check mempool
1649
-
stats := mgr.GetMempoolStats()
1650
-
count := stats["count"].(int)
1651
-
log.Printf("Only %d operations available (need %d)", count, plcbundle.BUNDLE_SIZE)
1652
-
}
1653
-
}
1654
-
```
1655
-
1656
-
### Memory Usage
1657
-
1658
-
If processing large numbers of bundles:
1659
-
1660
-
```go
1661
-
// Force garbage collection between bundles
1662
-
for _, meta := range index.GetBundles() {
1663
-
bundle, _ := mgr.Load(ctx, meta.BundleNumber)
1664
-
processBundle(bundle)
1665
-
1666
-
runtime.GC() // Help garbage collector
1667
-
}
1668
-
```
1669
-
1670
-
---
1671
-
1672
-
## Examples Repository
1673
-
1674
-
Find complete, runnable examples at:
1675
-
- https://github.com/plcbundle/examples
1676
-
1677
-
Including:
1678
-
- Complete sync service
1679
-
- API server
1680
-
- Analysis tools
1681
-
- Monitoring services
1682
-
17
+
TODO
+5
-6
go.mod
+5
-6
go.mod
···
1
-
module tangled.org/atscan.net/plcbundle
1
+
module tangled.org/atscan.net/plcbundle-go
2
2
3
3
go 1.25
4
4
5
5
require (
6
-
github.com/DataDog/zstd v1.5.7
7
6
github.com/goccy/go-json v0.10.5
8
7
github.com/gorilla/websocket v1.5.3
9
-
golang.org/x/sys v0.37.0
10
-
)
11
-
12
-
require (
8
+
github.com/jmespath-community/go-jmespath v1.1.1
13
9
github.com/spf13/cobra v1.10.1
10
+
github.com/valyala/gozstd v1.23.2
11
+
golang.org/x/sys v0.38.0
14
12
golang.org/x/term v0.36.0
15
13
)
16
14
17
15
require (
18
16
github.com/inconshreveable/mousetrap v1.1.0 // indirect
19
17
github.com/spf13/pflag v1.0.9 // indirect
18
+
golang.org/x/exp v0.0.0-20230314191032-db074128a8ec // indirect
20
19
)
+15
-4
go.sum
+15
-4
go.sum
···
1
-
github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE=
2
-
github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
3
1
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
2
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
4
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
5
5
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
6
6
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
7
7
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
8
8
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
9
9
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
10
+
github.com/jmespath-community/go-jmespath v1.1.1 h1:bFikPhsi/FdmlZhVgSCd2jj1e7G/rw+zyQfyg5UF+L4=
11
+
github.com/jmespath-community/go-jmespath v1.1.1/go.mod h1:4gOyFJsR/Gk+05RgTKYrifT7tBPWD8Lubtb5jRrfy9I=
12
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
13
+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
10
14
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
11
15
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
12
16
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
13
17
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
14
18
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
15
-
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
16
-
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
19
+
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
20
+
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
21
+
github.com/valyala/gozstd v1.23.2 h1:S3rRsskaDvBCM2XJzQFYIDAO6txxmvTc1arA/9Wgi9o=
22
+
github.com/valyala/gozstd v1.23.2/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
23
+
golang.org/x/exp v0.0.0-20230314191032-db074128a8ec h1:pAv+d8BM2JNnNctsLJ6nnZ6NqXT8N4+eauvZSb3P0I0=
24
+
golang.org/x/exp v0.0.0-20230314191032-db074128a8ec/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
25
+
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
26
+
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
17
27
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
18
28
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
19
29
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
30
+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
20
31
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+2
-2
internal/bundleindex/index_test.go
+2
-2
internal/bundleindex/index_test.go
···
8
8
"testing"
9
9
"time"
10
10
11
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
12
-
"tangled.org/atscan.net/plcbundle/internal/types"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
13
13
)
14
14
15
15
type testLogger struct {
+22
-10
internal/didindex/builder.go
+22
-10
internal/didindex/builder.go
···
50
50
return fmt.Errorf("no bundles to index")
51
51
}
52
52
53
+
if err := os.MkdirAll(dim.shardDir, 0755); err != nil {
54
+
return fmt.Errorf("failed to create shard directory: %w", err)
55
+
}
56
+
53
57
// Create temporary shard files
54
58
tempShards := make([]*os.File, DID_SHARD_COUNT)
55
59
for i := 0; i < DID_SHARD_COUNT; i++ {
···
172
176
173
177
type tempEntry struct {
174
178
identifier string
175
-
location OpLocation // โ Single packed value
179
+
location OpLocation
176
180
}
177
181
178
182
entries := make([]tempEntry, entryCount)
179
183
for i := 0; i < entryCount; i++ {
180
-
offset := i * 28 // โ 28 bytes
184
+
offset := i * 28
181
185
entries[i] = tempEntry{
182
186
identifier: string(data[offset : offset+24]),
183
187
location: OpLocation(binary.LittleEndian.Uint32(data[offset+24 : offset+28])),
···
240
244
}
241
245
242
246
groupDuration := time.Since(groupStart)
243
-
dim.logger.Printf(" [DID Index] Grouped operations into %d shards in %s",
244
-
len(shardOps), groupDuration)
247
+
if dim.verbose {
248
+
dim.logger.Printf(" [DID Index] Grouped operations into %d shards in %s",
249
+
len(shardOps), groupDuration)
250
+
}
245
251
246
252
// STEP 2: Write ALL shards to .tmp files FIRST (PARALLEL)
247
253
writeStart := time.Now()
···
266
272
semaphore := make(chan struct{}, workers)
267
273
var wg sync.WaitGroup
268
274
269
-
dim.logger.Printf(" [DID Index] Updating %d shards in parallel (%d workers)...",
270
-
len(shardOps), workers)
275
+
if dim.verbose {
276
+
dim.logger.Printf(" [DID Index] Updating %d shards in parallel (%d workers)...",
277
+
len(shardOps), workers)
278
+
}
271
279
272
280
// Process each shard in parallel
273
281
for shardNum, newOps := range shardOps {
···
312
320
close(errChan)
313
321
314
322
writeDuration := time.Since(writeStart)
315
-
dim.logger.Printf(" [DID Index] Wrote %d temp files in %s (%.1f shards/sec)",
316
-
len(tmpShards), writeDuration, float64(len(tmpShards))/writeDuration.Seconds())
323
+
if dim.verbose {
324
+
dim.logger.Printf(" [DID Index] Wrote %d temp files in %s (%.1f shards/sec)",
325
+
len(tmpShards), writeDuration, float64(len(tmpShards))/writeDuration.Seconds())
326
+
}
317
327
318
328
// Check for errors
319
329
if err := <-errChan; err != nil {
···
350
360
totalDuration := time.Since(totalStart)
351
361
352
362
// Summary log
353
-
dim.logger.Printf(" [DID Index] โ Bundle %06d indexed: +%d DIDs, %d shards updated in %s",
354
-
bundle.BundleNumber, deltaCount, len(tmpShards), totalDuration)
363
+
if dim.verbose {
364
+
dim.logger.Printf(" [DID Index] โ Bundle %06d indexed: +%d DIDs, %d shards updated in %s",
365
+
bundle.BundleNumber, deltaCount, len(tmpShards), totalDuration)
366
+
}
355
367
356
368
if dim.verbose {
357
369
dim.logger.Printf(" Breakdown: group=%s write=%s commit=%s config=%s",
+2
-1
internal/didindex/bundle.go
+2
-1
internal/didindex/bundle.go
···
4
4
"context"
5
5
"time"
6
6
7
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
7
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
8
8
)
9
9
10
10
// BundleProvider is an interface to avoid circular dependencies
···
12
12
type BundleProvider interface {
13
13
LoadBundleForDIDIndex(ctx context.Context, bundleNumber int) (*BundleData, error)
14
14
LoadOperation(ctx context.Context, bundleNumber int, position int) (*plcclient.PLCOperation, error)
15
+
LoadOperations(ctx context.Context, bundleNumber int, positions []int) (map[int]*plcclient.PLCOperation, error)
15
16
GetBundleIndex() BundleIndexProvider
16
17
}
17
18
+16
-126
internal/didindex/lookup.go
+16
-126
internal/didindex/lookup.go
···
6
6
"sort"
7
7
"sync"
8
8
9
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
9
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
10
10
)
11
11
12
-
// GetDIDOperations retrieves all operations for a DID from bundles
13
-
func (dim *Manager) GetDIDOperations(ctx context.Context, did string, provider BundleProvider) ([]plcclient.PLCOperation, error) {
14
-
if err := plcclient.ValidateDIDFormat(did); err != nil {
15
-
return nil, err
16
-
}
17
-
18
-
if !dim.Exists() {
19
-
return nil, fmt.Errorf("DID index not available - run 'plcbundle index build' to enable DID lookups")
20
-
}
21
-
22
-
if dim.verbose {
23
-
dim.logger.Printf("DEBUG: Using DID index for lookup")
24
-
}
25
-
26
-
locations, err := dim.GetDIDLocations(did)
27
-
if err != nil {
28
-
return nil, err
29
-
}
30
-
31
-
if len(locations) == 0 {
32
-
return []plcclient.PLCOperation{}, nil
33
-
}
34
-
35
-
// Filter nullified
36
-
var validLocations []OpLocation
37
-
for _, loc := range locations {
38
-
if !loc.Nullified() {
39
-
validLocations = append(validLocations, loc)
40
-
}
41
-
}
42
-
43
-
if dim.verbose {
44
-
dim.logger.Printf("DEBUG: Filtered %d valid locations (from %d total)",
45
-
len(validLocations), len(locations))
46
-
}
47
-
48
-
if len(validLocations) == 1 {
49
-
loc := validLocations[0]
50
-
op, err := provider.LoadOperation(ctx, loc.BundleInt(), loc.PositionInt())
51
-
if err != nil {
52
-
return nil, err
53
-
}
54
-
return []plcclient.PLCOperation{*op}, nil
55
-
}
56
-
57
-
// For multiple operations: group by bundle to minimize bundle loads
58
-
bundleMap := make(map[uint16][]uint16)
59
-
for _, loc := range validLocations {
60
-
bundleMap[loc.Bundle()] = append(bundleMap[loc.Bundle()], loc.Position())
61
-
}
62
-
63
-
if dim.verbose {
64
-
dim.logger.Printf("DEBUG: Loading from %d bundle(s)", len(bundleMap))
65
-
}
66
-
67
-
// Load operations
68
-
var allOps []plcclient.PLCOperation
69
-
for bundleNum, positions := range bundleMap {
70
-
// Optimization: If single position from bundle, use LoadOperation
71
-
if len(positions) == 1 {
72
-
op, err := provider.LoadOperation(ctx, int(bundleNum), int(positions[0]))
73
-
if err != nil {
74
-
dim.logger.Printf("Warning: failed to load operation at bundle %d position %d: %v",
75
-
bundleNum, positions[0], err)
76
-
continue
77
-
}
78
-
allOps = append(allOps, *op)
79
-
} else {
80
-
// Multiple positions: load full bundle
81
-
bundle, err := provider.LoadBundleForDIDIndex(ctx, int(bundleNum))
82
-
if err != nil {
83
-
dim.logger.Printf("Warning: failed to load bundle %d: %v", bundleNum, err)
84
-
continue
85
-
}
86
-
87
-
for _, pos := range positions {
88
-
if int(pos) < len(bundle.Operations) {
89
-
allOps = append(allOps, bundle.Operations[pos])
90
-
}
91
-
}
92
-
}
93
-
}
94
-
95
-
if dim.verbose {
96
-
dim.logger.Printf("DEBUG: Loaded %d total operations", len(allOps))
97
-
}
98
-
99
-
// Sort by time
100
-
sort.Slice(allOps, func(i, j int) bool {
101
-
return allOps[i].CreatedAt.Before(allOps[j].CreatedAt)
102
-
})
103
-
104
-
return allOps, nil
105
-
}
106
-
107
-
// GetDIDOperationsWithLocations returns operations with their bundle/position metadata
108
-
func (dim *Manager) GetDIDOperationsWithLocations(ctx context.Context, did string, provider BundleProvider) ([]OpLocationWithOperation, error) {
12
+
// GetDIDOperations retrieves all operations for a DID WITH location metadata
13
+
// Returns operations with bundle/position info (includes nullified operations)
14
+
func (dim *Manager) GetDIDOperations(ctx context.Context, did string, provider BundleProvider) ([]OpLocationWithOperation, error) {
109
15
if err := plcclient.ValidateDIDFormat(did); err != nil {
110
16
return nil, err
111
17
}
···
114
20
return nil, fmt.Errorf("DID index not available - run 'plcbundle index build' to enable DID lookups")
115
21
}
116
22
117
-
if dim.verbose {
118
-
dim.logger.Printf("DEBUG: Using DID index for lookup with locations")
119
-
}
120
-
121
23
locations, err := dim.GetDIDLocations(did)
122
24
if err != nil {
123
25
return nil, err
···
127
29
return []OpLocationWithOperation{}, nil
128
30
}
129
31
130
-
if dim.verbose {
131
-
dim.logger.Printf("DEBUG: Found %d locations in index", len(locations))
132
-
}
133
-
134
32
// Group by bundle
135
33
bundleMap := make(map[uint16][]OpLocation)
136
34
for _, loc := range locations {
137
35
bundleMap[loc.Bundle()] = append(bundleMap[loc.Bundle()], loc)
138
36
}
139
37
140
-
if dim.verbose {
141
-
dim.logger.Printf("DEBUG: Loading from %d bundle(s)", len(bundleMap))
142
-
}
143
-
144
38
var results []OpLocationWithOperation
145
39
for bundleNum, locs := range bundleMap {
146
-
bundle, err := provider.LoadBundleForDIDIndex(ctx, int(bundleNum))
40
+
positions := make([]int, len(locs))
41
+
for i, l := range locs {
42
+
positions[i] = l.PositionInt()
43
+
}
44
+
opsMap, err := provider.LoadOperations(ctx, int(bundleNum), positions)
147
45
if err != nil {
148
46
dim.logger.Printf("Warning: failed to load bundle %d: %v", bundleNum, err)
149
47
continue
150
48
}
151
-
152
-
for _, loc := range locs {
153
-
if loc.PositionInt() >= len(bundle.Operations) {
154
-
continue
49
+
for i, l := range locs {
50
+
if op, ok := opsMap[positions[i]]; ok {
51
+
results = append(results, OpLocationWithOperation{
52
+
Operation: *op,
53
+
Bundle: l.BundleInt(),
54
+
Position: l.PositionInt(),
55
+
})
155
56
}
156
-
157
-
op := bundle.Operations[loc.Position()]
158
-
results = append(results, OpLocationWithOperation{
159
-
Operation: op,
160
-
Bundle: loc.BundleInt(),
161
-
Position: loc.PositionInt(),
162
-
})
163
57
}
164
58
}
165
59
···
167
61
sort.Slice(results, func(i, j int) bool {
168
62
return results[i].Operation.CreatedAt.Before(results[j].Operation.CreatedAt)
169
63
})
170
-
171
-
if dim.verbose {
172
-
dim.logger.Printf("DEBUG: Loaded %d total operations", len(results))
173
-
}
174
64
175
65
return results, nil
176
66
}
+218
-18
internal/didindex/manager.go
+218
-18
internal/didindex/manager.go
···
13
13
14
14
"github.com/goccy/go-json"
15
15
"golang.org/x/sys/unix"
16
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
16
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
17
17
)
18
18
19
19
// NewManager creates a new DID index manager
···
46
46
evictionThreshold: 5,
47
47
config: config,
48
48
logger: logger,
49
+
recentLookupSize: 1000, // Track last 100 lookups
50
+
recentLookups: make([]int64, 1000),
49
51
}
50
52
}
51
53
···
76
78
dim.verbose = verbose
77
79
}
78
80
79
-
// GetDIDLocations returns all bundle+position locations for a DID
81
+
// GetDIDLocations returns all bundle+position locations for a DID (with timing)
80
82
func (dim *Manager) GetDIDLocations(did string) ([]OpLocation, error) {
83
+
// Start timing
84
+
lookupStart := time.Now()
85
+
defer func() {
86
+
dim.recordLookupTime(time.Since(lookupStart))
87
+
}()
88
+
81
89
identifier, err := extractDIDIdentifier(did)
82
90
if err != nil {
83
91
return nil, err
···
96
104
return nil, fmt.Errorf("failed to load shard %02x: %w", shardNum, err)
97
105
}
98
106
99
-
// CRITICAL: Release shard when done
100
107
defer dim.releaseShard(shard)
101
108
102
109
if shard.data == nil {
···
110
117
dim.logger.Printf("DEBUG: Shard %02x loaded, size: %d bytes", shardNum, len(shard.data))
111
118
}
112
119
113
-
// โ
Safe to read - refcount prevents eviction
114
120
locations := dim.searchShard(shard, identifier)
115
121
116
122
if dim.verbose {
···
141
147
atomic.AddInt64(&shard.refCount, 1)
142
148
atomic.StoreInt64(&shard.lastUsed, time.Now().Unix())
143
149
atomic.AddInt64(&shard.accessCount, 1)
150
+
atomic.AddInt64(&dim.cacheHits, 1)
144
151
145
152
return shard, nil
146
153
}
154
+
atomic.AddInt64(&dim.cacheMisses, 1)
147
155
148
156
// Cache miss - load from disk
149
157
shardPath := filepath.Join(dim.shardDir, fmt.Sprintf("%02x.idx", shardNum))
···
275
283
// Determine search range using prefix index
276
284
left, right := 0, int(entryCount)
277
285
278
-
// NEW: Use prefix index to narrow range (only for v3+)
286
+
// Use prefix index to narrow range (only for v3+)
279
287
if version >= 3 && len(identifier) > 0 {
280
288
prefixByte := identifier[0]
281
289
prefixIndexPos := 32 + (int(prefixByte) * 4)
···
428
436
// Read locations
429
437
locations := make([]OpLocation, count)
430
438
for i := 0; i < int(count); i++ {
431
-
if offset+4 > len(data) { // โ 4 bytes now
439
+
if offset+4 > len(data) {
432
440
return locations[:i]
433
441
}
434
442
···
436
444
packed := binary.LittleEndian.Uint32(data[offset : offset+4])
437
445
locations[i] = OpLocation(packed)
438
446
439
-
offset += 4 // โ 4 bytes
447
+
offset += 4
440
448
}
441
449
442
450
return locations
···
456
464
}
457
465
}
458
466
459
-
// GetStats returns index statistics
467
+
// GetStats returns index statistics (updated)
460
468
func (dim *Manager) GetStats() map[string]interface{} {
461
469
cachedShards := make([]int, 0)
462
470
···
467
475
468
476
sort.Ints(cachedShards)
469
477
470
-
return map[string]interface{}{
471
-
"total_dids": dim.config.TotalDIDs,
472
-
"last_bundle": dim.config.LastBundle,
473
-
"shard_count": dim.config.ShardCount,
474
-
"cached_shards": len(cachedShards),
475
-
"cache_limit": dim.maxCache,
476
-
"cache_order": cachedShards,
477
-
"updated_at": dim.config.UpdatedAt,
478
+
// Calculate cache hit rate
479
+
hits := atomic.LoadInt64(&dim.cacheHits)
480
+
misses := atomic.LoadInt64(&dim.cacheMisses)
481
+
total := hits + misses
482
+
483
+
cacheHitRate := 0.0
484
+
if total > 0 {
485
+
cacheHitRate = float64(hits) / float64(total)
486
+
}
487
+
488
+
baseStats := map[string]interface{}{
489
+
"total_dids": dim.config.TotalDIDs,
490
+
"last_bundle": dim.config.LastBundle,
491
+
"shard_count": dim.config.ShardCount,
492
+
"cached_shards": len(cachedShards),
493
+
"cache_limit": dim.maxCache,
494
+
"cache_order": cachedShards,
495
+
"updated_at": dim.config.UpdatedAt,
496
+
"cache_hits": hits,
497
+
"cache_misses": misses,
498
+
"cache_hit_rate": cacheHitRate,
499
+
"total_lookups": total,
478
500
}
501
+
502
+
// Merge with performance stats
503
+
perfStats := dim.calculateLookupStats()
504
+
for k, v := range perfStats {
505
+
baseStats[k] = v
506
+
}
507
+
508
+
return baseStats
479
509
}
480
510
481
511
// Exists checks if index exists
···
646
676
for i, id := range identifiers {
647
677
offsetTable[i] = uint32(currentOffset)
648
678
locations := builder.entries[id]
649
-
entrySize := DID_IDENTIFIER_LEN + 2 + (len(locations) * 4) // โ 4 bytes
679
+
entrySize := DID_IDENTIFIER_LEN + 2 + (len(locations) * 4)
650
680
currentOffset += entrySize
651
681
}
652
682
···
688
718
for _, loc := range locations {
689
719
// Write packed uint32 (global position + nullified bit)
690
720
binary.LittleEndian.PutUint32(buf[offset:offset+4], uint32(loc))
691
-
offset += 4 // โ 4 bytes per location
721
+
offset += 4
692
722
}
693
723
}
694
724
···
865
895
866
896
atomic.AddInt64(&shard.refCount, -1)
867
897
}
898
+
899
+
// ResetCacheStats resets cache statistics (useful for monitoring)
900
+
func (dim *Manager) ResetCacheStats() {
901
+
atomic.StoreInt64(&dim.cacheHits, 0)
902
+
atomic.StoreInt64(&dim.cacheMisses, 0)
903
+
}
904
+
905
+
// recordLookupTime records a lookup time (thread-safe)
906
+
func (dim *Manager) recordLookupTime(duration time.Duration) {
907
+
micros := duration.Microseconds()
908
+
909
+
// Update totals (atomic)
910
+
atomic.AddInt64(&dim.totalLookups, 1)
911
+
atomic.AddInt64(&dim.totalLookupTime, micros)
912
+
913
+
// Update circular buffer (with lock)
914
+
dim.lookupTimeLock.Lock()
915
+
dim.recentLookups[dim.recentLookupIdx] = micros
916
+
dim.recentLookupIdx = (dim.recentLookupIdx + 1) % dim.recentLookupSize
917
+
dim.lookupTimeLock.Unlock()
918
+
}
919
+
920
+
// calculateLookupStats calculates performance statistics
921
+
func (dim *Manager) calculateLookupStats() map[string]interface{} {
922
+
totalLookups := atomic.LoadInt64(&dim.totalLookups)
923
+
totalTime := atomic.LoadInt64(&dim.totalLookupTime)
924
+
925
+
stats := make(map[string]interface{})
926
+
927
+
if totalLookups == 0 {
928
+
return stats
929
+
}
930
+
931
+
// Overall average (all time)
932
+
avgMicros := float64(totalTime) / float64(totalLookups)
933
+
stats["avg_lookup_time_ms"] = avgMicros / 1000.0
934
+
stats["total_lookups"] = totalLookups
935
+
936
+
// Recent statistics (last N lookups)
937
+
dim.lookupTimeLock.Lock()
938
+
recentCopy := make([]int64, dim.recentLookupSize)
939
+
copy(recentCopy, dim.recentLookups)
940
+
dim.lookupTimeLock.Unlock()
941
+
942
+
// Find valid entries (non-zero)
943
+
validRecent := make([]int64, 0, dim.recentLookupSize)
944
+
for _, t := range recentCopy {
945
+
if t > 0 {
946
+
validRecent = append(validRecent, t)
947
+
}
948
+
}
949
+
950
+
if len(validRecent) > 0 {
951
+
// Sort for percentiles
952
+
sortedRecent := make([]int64, len(validRecent))
953
+
copy(sortedRecent, validRecent)
954
+
sort.Slice(sortedRecent, func(i, j int) bool {
955
+
return sortedRecent[i] < sortedRecent[j]
956
+
})
957
+
958
+
// Calculate recent average
959
+
var recentSum int64
960
+
for _, t := range validRecent {
961
+
recentSum += t
962
+
}
963
+
recentAvg := float64(recentSum) / float64(len(validRecent))
964
+
stats["recent_avg_lookup_time_ms"] = recentAvg / 1000.0
965
+
stats["recent_sample_size"] = len(validRecent)
966
+
967
+
// Min/Max
968
+
stats["min_lookup_time_ms"] = float64(sortedRecent[0]) / 1000.0
969
+
stats["max_lookup_time_ms"] = float64(sortedRecent[len(sortedRecent)-1]) / 1000.0
970
+
971
+
// Percentiles (p50, p95, p99)
972
+
p50idx := len(sortedRecent) * 50 / 100
973
+
p95idx := len(sortedRecent) * 95 / 100
974
+
p99idx := len(sortedRecent) * 99 / 100
975
+
976
+
if p50idx < len(sortedRecent) {
977
+
stats["p50_lookup_time_ms"] = float64(sortedRecent[p50idx]) / 1000.0
978
+
}
979
+
if p95idx < len(sortedRecent) {
980
+
stats["p95_lookup_time_ms"] = float64(sortedRecent[p95idx]) / 1000.0
981
+
}
982
+
if p99idx < len(sortedRecent) {
983
+
stats["p99_lookup_time_ms"] = float64(sortedRecent[p99idx]) / 1000.0
984
+
}
985
+
}
986
+
987
+
return stats
988
+
}
989
+
990
+
// ResetPerformanceStats resets performance statistics (useful for monitoring periods)
991
+
func (dim *Manager) ResetPerformanceStats() {
992
+
atomic.StoreInt64(&dim.cacheHits, 0)
993
+
atomic.StoreInt64(&dim.cacheMisses, 0)
994
+
atomic.StoreInt64(&dim.totalLookups, 0)
995
+
atomic.StoreInt64(&dim.totalLookupTime, 0)
996
+
997
+
dim.lookupTimeLock.Lock()
998
+
dim.recentLookups = make([]int64, dim.recentLookupSize)
999
+
dim.recentLookupIdx = 0
1000
+
dim.lookupTimeLock.Unlock()
1001
+
}
1002
+
1003
+
// NeedsRebuild checks if index needs rebuilding and returns reason
1004
+
func (dim *Manager) NeedsRebuild(bundleProvider BundleIndexProvider) (bool, string) {
1005
+
// Check if index exists
1006
+
if !dim.Exists() {
1007
+
return true, "index does not exist"
1008
+
}
1009
+
1010
+
// Get repository state
1011
+
bundles := bundleProvider.GetBundles()
1012
+
if len(bundles) == 0 {
1013
+
return false, "" // No bundles, no need to rebuild
1014
+
}
1015
+
1016
+
lastBundleInRepo := bundles[len(bundles)-1].BundleNumber
1017
+
1018
+
// Check version
1019
+
if dim.config.Version != DIDINDEX_VERSION {
1020
+
return true, fmt.Sprintf("index version outdated (v%d, need v%d)",
1021
+
dim.config.Version, DIDINDEX_VERSION)
1022
+
}
1023
+
1024
+
// Check if index is behind
1025
+
if dim.config.LastBundle < lastBundleInRepo {
1026
+
bundlesBehind := lastBundleInRepo - dim.config.LastBundle
1027
+
1028
+
// Smart logic: only rebuild if significantly behind
1029
+
// Otherwise can do incremental update
1030
+
if bundlesBehind > 100 {
1031
+
return true, fmt.Sprintf("index significantly behind (%d bundles)", bundlesBehind)
1032
+
}
1033
+
1034
+
return false, fmt.Sprintf("index slightly behind (%d bundles) - can update incrementally", bundlesBehind)
1035
+
}
1036
+
1037
+
// Check if index is ahead (corruption indicator)
1038
+
if dim.config.LastBundle > lastBundleInRepo {
1039
+
return true, fmt.Sprintf("index is ahead of repository (has %d, repo has %d) - likely corrupted",
1040
+
dim.config.LastBundle, lastBundleInRepo)
1041
+
}
1042
+
1043
+
// Index is up to date
1044
+
return false, ""
1045
+
}
1046
+
1047
+
// ShouldUpdateIncrementally checks if incremental update is appropriate
1048
+
func (dim *Manager) ShouldUpdateIncrementally(bundleProvider BundleIndexProvider) (bool, int) {
1049
+
if !dim.Exists() {
1050
+
return false, 0
1051
+
}
1052
+
1053
+
bundles := bundleProvider.GetBundles()
1054
+
if len(bundles) == 0 {
1055
+
return false, 0
1056
+
}
1057
+
1058
+
lastBundleInRepo := bundles[len(bundles)-1].BundleNumber
1059
+
bundlesBehind := lastBundleInRepo - dim.config.LastBundle
1060
+
1061
+
// Only do incremental if behind by less than 100 bundles
1062
+
if bundlesBehind > 0 && bundlesBehind <= 100 {
1063
+
return true, bundlesBehind
1064
+
}
1065
+
1066
+
return false, 0
1067
+
}
+1
-1
internal/didindex/manager_test.go
+1
-1
internal/didindex/manager_test.go
+11
-1
internal/didindex/types.go
+11
-1
internal/didindex/types.go
···
4
4
"sync"
5
5
"time"
6
6
7
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
7
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
8
8
)
9
9
10
10
const (
···
39
39
shardCache sync.Map
40
40
maxCache int
41
41
evictionThreshold int
42
+
cacheHits int64 // atomic counter
43
+
cacheMisses int64 // atomic counter
42
44
43
45
config *Config
44
46
logger Logger
45
47
verbose bool
46
48
47
49
indexMu sync.RWMutex
50
+
51
+
// Performance tracking
52
+
totalLookups int64 // Total number of lookups
53
+
totalLookupTime int64 // Total time in microseconds
54
+
lookupTimeLock sync.Mutex
55
+
recentLookups []int64 // Circular buffer for recent lookup times (microseconds)
56
+
recentLookupIdx int
57
+
recentLookupSize int
48
58
}
49
59
50
60
// mmapShard represents a memory-mapped shard file
+5
internal/handleresolver/resolver.go
+5
internal/handleresolver/resolver.go
+3
-3
internal/mempool/mempool.go
+3
-3
internal/mempool/mempool.go
···
10
10
"time"
11
11
12
12
"github.com/goccy/go-json"
13
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
14
-
"tangled.org/atscan.net/plcbundle/internal/types"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
15
15
)
16
16
17
17
const MEMPOOL_FILE_PREFIX = "plc_mempool_"
···
199
199
// Remove taken operations
200
200
m.operations = m.operations[n:]
201
201
202
-
// โจ FIX: ALWAYS reset tracking after Take
202
+
// ALWAYS reset tracking after Take
203
203
// Take() means we're consuming these ops for a bundle
204
204
// Any remaining ops are "new" and unsaved
205
205
m.lastSavedLen = 0
+4
-5
internal/mempool/mempool_test.go
+4
-5
internal/mempool/mempool_test.go
···
8
8
"testing"
9
9
"time"
10
10
11
-
"tangled.org/atscan.net/plcbundle/internal/mempool"
12
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
13
-
"tangled.org/atscan.net/plcbundle/internal/types"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/mempool"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
14
14
)
15
15
16
16
type testLogger struct {
···
352
352
m.Save()
353
353
354
354
// Add 10 more and save
355
-
// FIX: makeTestOperationsFrom(start, COUNT) - so we want (10, 10) not (10, 20)
356
-
ops2 := makeTestOperationsFrom(10, 10) // โ Changed from (10, 20)
355
+
ops2 := makeTestOperationsFrom(10, 10)
357
356
m.Add(ops2)
358
357
m.Save()
359
358
+3
-3
internal/plcclient/plc_test.go
+3
-3
internal/plcclient/plc_test.go
···
8
8
"testing"
9
9
"time"
10
10
11
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
12
-
"tangled.org/atscan.net/plcbundle/internal/storage"
11
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
12
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
13
13
)
14
14
15
15
// TestPLCOperation tests operation parsing and methods
···
244
244
}
245
245
246
246
logger := &benchLogger{}
247
-
operations, _ := storage.NewOperations(logger)
247
+
operations, _ := storage.NewOperations(logger, false)
248
248
defer operations.Close()
249
249
250
250
b.ResetTimer()
+4
-4
internal/plcclient/resolver.go
+4
-4
internal/plcclient/resolver.go
···
176
176
// Base contexts - ALWAYS include multikey (matches PLC directory behavior)
177
177
contexts := []string{
178
178
"https://www.w3.org/ns/did/v1",
179
-
"https://w3id.org/security/multikey/v1", // โ Always include this
179
+
"https://w3id.org/security/multikey/v1",
180
180
}
181
181
182
182
hasSecp256k1 := false
···
204
204
doc := &DIDDocument{
205
205
Context: contexts,
206
206
ID: state.DID,
207
-
AlsoKnownAs: []string{}, // โ Empty slice
208
-
VerificationMethod: []VerificationMethod{}, // โ Empty slice
209
-
Service: []Service{}, // โ Empty slice
207
+
AlsoKnownAs: []string{},
208
+
VerificationMethod: []VerificationMethod{},
209
+
Service: []Service{},
210
210
}
211
211
212
212
// Copy alsoKnownAs if present
+482
-74
internal/storage/storage.go
+482
-74
internal/storage/storage.go
···
8
8
"fmt"
9
9
"io"
10
10
"os"
11
+
"path/filepath"
11
12
"sync"
12
13
"time"
13
14
14
-
gozstd "github.com/DataDog/zstd"
15
15
"github.com/goccy/go-json"
16
-
"tangled.org/atscan.net/plcbundle/internal/plcclient" // ONLY import plcclient, NOT bundle
16
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
17
+
)
18
+
19
+
const (
20
+
MetadataFormatVersion = 1
17
21
)
18
22
23
+
// BundleMetadata - Self-describing bundle (content-focused, not container)
24
+
type BundleMetadata struct {
25
+
// === Format Info ===
26
+
Format string `json:"format"` // "plcbundle-v1"
27
+
28
+
// === Bundle Identity ===
29
+
Origin string `json:"origin"` // Source PLC directory URL
30
+
BundleNumber int `json:"bundle_number"` // Sequential bundle number
31
+
32
+
// === Content Integrity ===
33
+
ContentHash string `json:"content_hash"` // SHA256 of uncompressed JSONL content
34
+
ParentHash string `json:"parent_hash,omitempty"` // Hash of previous bundle (chain)
35
+
36
+
// === Content Description ===
37
+
OperationCount int `json:"operation_count"` // Always 10000 for complete bundles
38
+
DIDCount int `json:"did_count"` // Unique DIDs in this bundle
39
+
StartTime time.Time `json:"start_time"` // First operation timestamp
40
+
EndTime time.Time `json:"end_time"` // Last operation timestamp
41
+
42
+
// === Creation Provenance ===
43
+
CreatedAt time.Time `json:"created_at"` // When bundle was created
44
+
CreatedBy string `json:"created_by"` // "plcbundle/v1.2.3"
45
+
CreatedByHost string `json:"created_by_host,omitempty"` // Optional: hostname that created it
46
+
47
+
// === Optional Context ===
48
+
Cursor string `json:"cursor,omitempty"` // PLC export cursor for this bundle
49
+
Notes string `json:"notes,omitempty"` // Optional description
50
+
51
+
// === Frame Structure (for random access) ===
52
+
FrameCount int `json:"frame_count"` // Number of zstd frames (usually 100)
53
+
FrameSize int `json:"frame_size"` // Operations per frame (100)
54
+
FrameOffsets []int64 `json:"frame_offsets"` // Byte offsets of each frame
55
+
}
56
+
19
57
// Operations handles low-level bundle file operations
20
58
type Operations struct {
21
-
logger Logger
59
+
logger Logger
60
+
verbose bool
22
61
}
23
62
24
63
// Logger interface
···
27
66
Println(v ...interface{})
28
67
}
29
68
30
-
func NewOperations(logger Logger) (*Operations, error) {
31
-
return &Operations{logger: logger}, nil
69
+
func NewOperations(logger Logger, verbose bool) (*Operations, error) {
70
+
return &Operations{
71
+
logger: logger,
72
+
verbose: verbose,
73
+
}, nil
32
74
}
33
75
34
76
func (op *Operations) Close() {
35
77
// Nothing to close
78
+
}
79
+
80
+
// BundleInfo contains info needed to create metadata
81
+
type BundleInfo struct {
82
+
BundleNumber int
83
+
Origin string
84
+
ParentHash string
85
+
Cursor string
86
+
CreatedBy string // "plcbundle/v1.2.3"
87
+
Hostname string // Optional
36
88
}
37
89
38
90
// ========================================
···
83
135
}
84
136
85
137
// ========================================
86
-
// FILE OPERATIONS
138
+
// FILE OPERATIONS (using zstd abstraction)
87
139
// ========================================
88
140
89
-
// LoadBundle loads a compressed bundle
90
-
func (op *Operations) LoadBundle(path string) ([]plcclient.PLCOperation, error) {
91
-
compressed, err := os.ReadFile(path)
92
-
if err != nil {
93
-
return nil, fmt.Errorf("failed to read file: %w", err)
141
+
// SaveBundle saves operations with metadata containing RELATIVE frame offsets
142
+
func (op *Operations) SaveBundle(path string, operations []plcclient.PLCOperation, bundleInfo *BundleInfo) (string, string, int64, int64, error) {
143
+
if bundleInfo == nil {
144
+
return "", "", 0, 0, fmt.Errorf("bundleInfo cannot be nil")
94
145
}
95
146
96
-
decompressed, err := gozstd.Decompress(nil, compressed)
97
-
if err != nil {
98
-
return nil, fmt.Errorf("failed to decompress: %w", err)
99
-
}
100
-
101
-
return op.ParseJSONL(decompressed)
102
-
}
103
-
104
-
// SaveBundle saves operations to disk (compressed)
105
-
// Returns: contentHash, compressedHash, contentSize, compressedSize, error
106
-
func (op *Operations) SaveBundle(path string, operations []plcclient.PLCOperation) (string, string, int64, int64, error) {
147
+
// 1. Calculate content
107
148
jsonlData := op.SerializeJSONL(operations)
108
149
contentSize := int64(len(jsonlData))
109
150
contentHash := op.Hash(jsonlData)
151
+
dids := op.ExtractUniqueDIDs(operations)
110
152
111
-
compressed, err := gozstd.Compress(nil, jsonlData)
112
-
if err != nil {
113
-
return "", "", 0, 0, fmt.Errorf("failed to compress: %w", err)
153
+
hostnameHash := ""
154
+
if bundleInfo.Hostname != "" {
155
+
hostnameHash = op.Hash([]byte(bundleInfo.Hostname))[:16] // First 16 chars (64 bits)
114
156
}
115
157
116
-
compressedSize := int64(len(compressed))
117
-
compressedHash := op.Hash(compressed)
158
+
// 2. Compress all frames
159
+
compressedFrames := make([][]byte, 0)
160
+
161
+
for i := 0; i < len(operations); i += FrameSize {
162
+
end := i + FrameSize
163
+
if end > len(operations) {
164
+
end = len(operations)
165
+
}
166
+
opChunk := operations[i:end]
167
+
chunkJsonlData := op.SerializeJSONL(opChunk)
168
+
169
+
compressedChunk, err := CompressFrame(chunkJsonlData)
170
+
if err != nil {
171
+
return "", "", 0, 0, fmt.Errorf("failed to compress frame: %w", err)
172
+
}
118
173
119
-
if err := os.WriteFile(path, compressed, 0644); err != nil {
120
-
return "", "", 0, 0, fmt.Errorf("failed to write file: %w", err)
174
+
compressedFrames = append(compressedFrames, compressedChunk)
121
175
}
122
176
123
-
return contentHash, compressedHash, contentSize, compressedSize, nil
124
-
}
177
+
// 3. Calculate RELATIVE offsets (relative to first data frame)
178
+
relativeOffsets := make([]int64, len(compressedFrames)+1)
179
+
relativeOffsets[0] = 0
125
180
126
-
// Pool for scanner buffers
127
-
var scannerBufPool = sync.Pool{
128
-
New: func() interface{} {
129
-
buf := make([]byte, 64*1024)
130
-
return &buf
131
-
},
132
-
}
181
+
cumulative := int64(0)
182
+
for i, frame := range compressedFrames {
183
+
cumulative += int64(len(frame))
184
+
relativeOffsets[i+1] = cumulative
185
+
}
133
186
134
-
// LoadOperationAtPosition loads a single operation from a bundle
135
-
func (op *Operations) LoadOperationAtPosition(path string, position int) (*plcclient.PLCOperation, error) {
136
-
if position < 0 {
137
-
return nil, fmt.Errorf("invalid position: %d", position)
187
+
// 4. Build metadata with RELATIVE offsets
188
+
metadata := &BundleMetadata{
189
+
Format: fmt.Sprintf("plcbundle-v%d", MetadataFormatVersion),
190
+
BundleNumber: bundleInfo.BundleNumber,
191
+
Origin: bundleInfo.Origin,
192
+
CreatedAt: time.Now().UTC(),
193
+
CreatedBy: bundleInfo.CreatedBy,
194
+
CreatedByHost: hostnameHash,
195
+
ContentHash: contentHash,
196
+
ParentHash: bundleInfo.ParentHash,
197
+
OperationCount: len(operations),
198
+
DIDCount: len(dids),
199
+
FrameCount: len(compressedFrames),
200
+
FrameSize: FrameSize,
201
+
Cursor: bundleInfo.Cursor,
202
+
FrameOffsets: relativeOffsets, // RELATIVE to data start!
138
203
}
139
204
205
+
if len(operations) > 0 {
206
+
metadata.StartTime = operations[0].CreatedAt
207
+
metadata.EndTime = operations[len(operations)-1].CreatedAt
208
+
}
209
+
210
+
// 5. Write final file
211
+
finalFile, err := os.Create(path)
212
+
if err != nil {
213
+
return "", "", 0, 0, fmt.Errorf("failed to create file: %w", err)
214
+
}
215
+
defer func() {
216
+
finalFile.Close()
217
+
if err != nil {
218
+
os.Remove(path)
219
+
}
220
+
}()
221
+
222
+
// Write metadata frame
223
+
if _, err := op.WriteMetadataFrame(finalFile, metadata); err != nil {
224
+
return "", "", 0, 0, fmt.Errorf("failed to write metadata: %w", err)
225
+
}
226
+
227
+
// Write all data frames
228
+
for _, frame := range compressedFrames {
229
+
if _, err := finalFile.Write(frame); err != nil {
230
+
return "", "", 0, 0, fmt.Errorf("failed to write frame: %w", err)
231
+
}
232
+
}
233
+
234
+
finalFile.Sync()
235
+
finalFile.Close()
236
+
237
+
// 6. Hash
238
+
compressedData, err := os.ReadFile(path)
239
+
if err != nil {
240
+
return "", "", 0, 0, err
241
+
}
242
+
compressedHash := op.Hash(compressedData)
243
+
244
+
os.Remove(path + ".idx")
245
+
246
+
return contentHash, compressedHash, contentSize, int64(len(compressedData)), nil
247
+
}
248
+
249
+
// LoadBundle loads a compressed bundle
250
+
func (op *Operations) LoadBundle(path string) ([]plcclient.PLCOperation, error) {
140
251
file, err := os.Open(path)
141
252
if err != nil {
142
253
return nil, fmt.Errorf("failed to open file: %w", err)
143
254
}
144
255
defer file.Close()
145
256
146
-
reader := gozstd.NewReader(file)
147
-
defer reader.Close()
257
+
// Use abstracted streaming reader
258
+
reader, err := NewStreamingReader(file)
259
+
if err != nil {
260
+
return nil, fmt.Errorf("failed to create reader: %w", err)
261
+
}
262
+
defer reader.Release()
148
263
149
-
bufPtr := scannerBufPool.Get().(*[]byte)
150
-
defer scannerBufPool.Put(bufPtr)
264
+
// Read all decompressed data from all frames
265
+
decompressed, err := io.ReadAll(reader)
266
+
if err != nil {
267
+
return nil, fmt.Errorf("failed to decompress: %w", err)
268
+
}
151
269
152
-
scanner := bufio.NewScanner(reader)
153
-
scanner.Buffer(*bufPtr, 512*1024)
154
-
155
-
lineNum := 0
156
-
for scanner.Scan() {
157
-
if lineNum == position {
158
-
line := scanner.Bytes()
270
+
// DEFENSIVE: Validate we got actual data
271
+
if len(decompressed) == 0 {
272
+
return nil, fmt.Errorf("decompression produced empty result")
273
+
}
159
274
160
-
var operation plcclient.PLCOperation
161
-
if err := json.UnmarshalNoEscape(line, &operation); err != nil {
162
-
return nil, fmt.Errorf("failed to parse operation at position %d: %w", position, err)
163
-
}
164
-
165
-
operation.RawJSON = make([]byte, len(line))
166
-
copy(operation.RawJSON, line)
167
-
168
-
return &operation, nil
169
-
}
170
-
lineNum++
275
+
// Parse JSONL
276
+
operations, err := op.ParseJSONL(decompressed)
277
+
if err != nil {
278
+
return nil, fmt.Errorf("failed to parse JSONL: %w", err)
171
279
}
172
280
173
-
if err := scanner.Err(); err != nil {
174
-
return nil, fmt.Errorf("scanner error: %w", err)
281
+
// DEFENSIVE: Additional validation
282
+
if len(operations) == 0 {
283
+
return nil, fmt.Errorf("bundle contains no valid operations")
175
284
}
176
285
177
-
return nil, fmt.Errorf("position %d not found", position)
286
+
return operations, nil
178
287
}
179
288
180
289
// ========================================
···
197
306
return nil, fmt.Errorf("failed to open bundle: %w", err)
198
307
}
199
308
200
-
reader := gozstd.NewReader(file)
309
+
// Use abstracted reader
310
+
reader, err := NewStreamingReader(file)
311
+
if err != nil {
312
+
file.Close()
313
+
return nil, fmt.Errorf("failed to create reader: %w", err)
314
+
}
201
315
202
316
return &decompressedReader{
203
317
reader: reader,
···
207
321
208
322
// decompressedReader wraps a zstd decoder and underlying file
209
323
type decompressedReader struct {
210
-
reader io.ReadCloser
324
+
reader StreamReader
211
325
file *os.File
212
326
}
213
327
···
216
330
}
217
331
218
332
func (dr *decompressedReader) Close() error {
219
-
dr.reader.Close()
333
+
dr.reader.Release()
220
334
return dr.file.Close()
221
335
}
222
336
···
252
366
compressedHash = op.Hash(compressedData)
253
367
compressedSize = int64(len(compressedData))
254
368
255
-
decompressed, err := gozstd.Decompress(nil, compressedData)
369
+
// Use abstracted decompression
370
+
decompressed, err := DecompressAll(compressedData)
256
371
if err != nil {
257
372
return "", 0, "", 0, fmt.Errorf("failed to decompress: %w", err)
258
373
}
···
361
476
return operations[startIdx:]
362
477
}
363
478
479
+
// Pool for scanner buffers
480
+
var scannerBufPool = sync.Pool{
481
+
New: func() interface{} {
482
+
buf := make([]byte, 64*1024)
483
+
return &buf
484
+
},
485
+
}
486
+
487
+
// ========================================
488
+
// POSITION-BASED LOADING (with frame index)
489
+
// ========================================
490
+
491
+
// LoadOperationAtPosition loads a single operation from a bundle
492
+
func (op *Operations) LoadOperationAtPosition(path string, position int) (*plcclient.PLCOperation, error) {
493
+
if position < 0 {
494
+
return nil, fmt.Errorf("invalid position: %d", position)
495
+
}
496
+
497
+
// Try multiple sources for frame index (no goto!)
498
+
frameOffsets, err := op.loadFrameIndex(path)
499
+
if err != nil {
500
+
// No frame index available - use legacy full scan
501
+
if op.logger != nil {
502
+
op.logger.Printf("No frame index found for %s, using legacy scan", filepath.Base(path))
503
+
}
504
+
return op.loadOperationAtPositionLegacy(path, position)
505
+
}
506
+
507
+
// We have frame index - use it for fast random access
508
+
return op.loadOperationFromFrame(path, position, frameOffsets)
509
+
}
510
+
511
+
// loadFrameIndex loads frame offsets and converts to absolute positions
512
+
func (op *Operations) loadFrameIndex(path string) ([]int64, error) {
513
+
// Try embedded metadata first
514
+
meta, err := op.ExtractMetadataFromFile(path)
515
+
if err == nil && len(meta.FrameOffsets) > 0 {
516
+
// Convert relative offsets to absolute
517
+
// First, get metadata frame size by re-reading
518
+
file, _ := os.Open(path)
519
+
if file != nil {
520
+
defer file.Close()
521
+
522
+
// Read metadata frame to find where data starts
523
+
magic, data, readErr := op.ReadSkippableFrame(file)
524
+
if readErr == nil && magic == SkippableMagicMetadata {
525
+
// Metadata frame size = 4 (magic) + 4 (size) + len(data)
526
+
metadataFrameSize := int64(8 + len(data))
527
+
528
+
// Convert relative to absolute
529
+
absoluteOffsets := make([]int64, len(meta.FrameOffsets))
530
+
for i, relOffset := range meta.FrameOffsets {
531
+
absoluteOffsets[i] = metadataFrameSize + relOffset
532
+
}
533
+
534
+
return absoluteOffsets, nil
535
+
}
536
+
}
537
+
}
538
+
539
+
// Fallback to external .idx file
540
+
indexPath := path + ".idx"
541
+
indexData, err := os.ReadFile(indexPath)
542
+
if err != nil {
543
+
return nil, fmt.Errorf("no frame index available: %w", err)
544
+
}
545
+
546
+
var offsets []int64
547
+
if err := json.Unmarshal(indexData, &offsets); err != nil {
548
+
return nil, fmt.Errorf("invalid frame index: %w", err)
549
+
}
550
+
551
+
return offsets, nil
552
+
}
553
+
554
+
// loadOperationFromFrame loads operation using frame index
555
+
func (op *Operations) loadOperationFromFrame(path string, position int, frameOffsets []int64) (*plcclient.PLCOperation, error) {
556
+
frameIndex := position / FrameSize
557
+
lineInFrame := position % FrameSize
558
+
559
+
if frameIndex >= len(frameOffsets)-1 {
560
+
return nil, fmt.Errorf("position %d out of bounds (frame %d, total frames %d)",
561
+
position, frameIndex, len(frameOffsets)-1)
562
+
}
563
+
564
+
startOffset := frameOffsets[frameIndex]
565
+
endOffset := frameOffsets[frameIndex+1]
566
+
frameLength := endOffset - startOffset
567
+
568
+
if frameLength <= 0 || frameLength > 10*1024*1024 {
569
+
return nil, fmt.Errorf("invalid frame length: %d (offsets: %d-%d)",
570
+
frameLength, startOffset, endOffset)
571
+
}
572
+
573
+
bundleFile, err := os.Open(path)
574
+
if err != nil {
575
+
return nil, fmt.Errorf("failed to open bundle: %w", err)
576
+
}
577
+
defer bundleFile.Close()
578
+
579
+
compressedFrame := make([]byte, frameLength)
580
+
_, err = bundleFile.ReadAt(compressedFrame, startOffset)
581
+
if err != nil {
582
+
return nil, fmt.Errorf("failed to read frame %d (offset %d, length %d): %w",
583
+
frameIndex, startOffset, frameLength, err)
584
+
}
585
+
586
+
// Decompress
587
+
decompressed, err := DecompressFrame(compressedFrame)
588
+
if err != nil {
589
+
if op.logger != nil {
590
+
preview := compressedFrame
591
+
if len(preview) > 16 {
592
+
preview = preview[:16]
593
+
}
594
+
if op.verbose {
595
+
op.logger.Printf("DEBUG: Failed frame data (first 16 bytes): % x", preview)
596
+
}
597
+
}
598
+
return nil, fmt.Errorf("failed to decompress frame %d: %w", frameIndex, err)
599
+
}
600
+
601
+
// Scan to find the line
602
+
scanner := bufio.NewScanner(bytes.NewReader(decompressed))
603
+
lineNum := 0
604
+
605
+
for scanner.Scan() {
606
+
if lineNum == lineInFrame {
607
+
line := scanner.Bytes()
608
+
var operation plcclient.PLCOperation
609
+
if err := json.UnmarshalNoEscape(line, &operation); err != nil {
610
+
return nil, fmt.Errorf("failed to parse operation at position %d: %w", position, err)
611
+
}
612
+
operation.RawJSON = make([]byte, len(line))
613
+
copy(operation.RawJSON, line)
614
+
return &operation, nil
615
+
}
616
+
lineNum++
617
+
}
618
+
619
+
if err := scanner.Err(); err != nil {
620
+
return nil, fmt.Errorf("scanner error on frame %d: %w", frameIndex, err)
621
+
}
622
+
623
+
return nil, fmt.Errorf("position %d not found in frame %d", position, frameIndex)
624
+
}
625
+
626
+
// loadOperationAtPositionLegacy loads operation from old single-frame bundles
627
+
func (op *Operations) loadOperationAtPositionLegacy(path string, position int) (*plcclient.PLCOperation, error) {
628
+
file, err := os.Open(path)
629
+
if err != nil {
630
+
return nil, fmt.Errorf("failed to open file: %w", err)
631
+
}
632
+
defer file.Close()
633
+
634
+
// Use abstracted streaming reader
635
+
reader, err := NewStreamingReader(file)
636
+
if err != nil {
637
+
return nil, fmt.Errorf("failed to create reader: %w", err)
638
+
}
639
+
defer reader.Release()
640
+
641
+
scanner := bufio.NewScanner(reader)
642
+
buf := make([]byte, 512*1024)
643
+
scanner.Buffer(buf, 1024*1024)
644
+
645
+
lineNum := 0
646
+
for scanner.Scan() {
647
+
if lineNum == position {
648
+
line := scanner.Bytes()
649
+
var operation plcclient.PLCOperation
650
+
if err := json.UnmarshalNoEscape(line, &operation); err != nil {
651
+
return nil, fmt.Errorf("failed to parse operation at position %d: %w", position, err)
652
+
}
653
+
operation.RawJSON = make([]byte, len(line))
654
+
copy(operation.RawJSON, line)
655
+
return &operation, nil
656
+
}
657
+
lineNum++
658
+
}
659
+
660
+
if err := scanner.Err(); err != nil {
661
+
return nil, fmt.Errorf("scanner error: %w", err)
662
+
}
663
+
664
+
return nil, fmt.Errorf("position %d not found in bundle", position)
665
+
}
666
+
364
667
// LoadOperationsAtPositions loads multiple operations from a bundle in one pass
365
668
func (op *Operations) LoadOperationsAtPositions(path string, positions []int) (map[int]*plcclient.PLCOperation, error) {
366
669
if len(positions) == 0 {
···
386
689
}
387
690
defer file.Close()
388
691
389
-
reader := gozstd.NewReader(file)
390
-
defer reader.Close()
692
+
// Use abstracted streaming reader
693
+
reader, err := NewStreamingReader(file)
694
+
if err != nil {
695
+
return nil, fmt.Errorf("failed to create reader: %w", err)
696
+
}
697
+
defer reader.Release()
391
698
392
699
bufPtr := scannerBufPool.Get().(*[]byte)
393
700
defer scannerBufPool.Put(bufPtr)
···
419
726
420
727
lineNum++
421
728
422
-
// Early exit if we passed the max position we need
729
+
// Early exit if we passed the max position
423
730
if lineNum > maxPos {
424
731
break
425
732
}
···
431
738
432
739
return results, nil
433
740
}
741
+
742
+
// CalculateMetadataWithoutLoading calculates metadata by streaming (no full load)
743
+
func (op *Operations) CalculateMetadataWithoutLoading(path string) (opCount int, didCount int, startTime, endTime time.Time, err error) {
744
+
file, err := os.Open(path)
745
+
if err != nil {
746
+
return 0, 0, time.Time{}, time.Time{}, err
747
+
}
748
+
defer file.Close()
749
+
750
+
// Use abstracted reader
751
+
reader, err := NewStreamingReader(file)
752
+
if err != nil {
753
+
return 0, 0, time.Time{}, time.Time{}, fmt.Errorf("failed to create reader: %w", err)
754
+
}
755
+
defer reader.Release()
756
+
757
+
scanner := bufio.NewScanner(reader)
758
+
buf := make([]byte, 64*1024)
759
+
scanner.Buffer(buf, 1024*1024)
760
+
761
+
didSet := make(map[string]bool)
762
+
lineNum := 0
763
+
764
+
for scanner.Scan() {
765
+
line := scanner.Bytes()
766
+
if len(line) == 0 {
767
+
continue
768
+
}
769
+
770
+
// Only parse minimal fields needed for metadata
771
+
var op struct {
772
+
DID string `json:"did"`
773
+
CreatedAt time.Time `json:"createdAt"`
774
+
}
775
+
776
+
if err := json.Unmarshal(line, &op); err != nil {
777
+
continue
778
+
}
779
+
780
+
if lineNum == 0 {
781
+
startTime = op.CreatedAt
782
+
}
783
+
endTime = op.CreatedAt
784
+
785
+
didSet[op.DID] = true
786
+
lineNum++
787
+
}
788
+
789
+
return lineNum, len(didSet), startTime, endTime, scanner.Err()
790
+
}
791
+
792
+
// ExtractBundleMetadata extracts metadata from bundle file without decompressing
793
+
func (op *Operations) ExtractBundleMetadata(path string) (*BundleMetadata, error) {
794
+
meta, err := op.ExtractMetadataFromFile(path)
795
+
if err != nil {
796
+
return nil, fmt.Errorf("failed to extract metadata: %w", err)
797
+
}
798
+
return meta, nil
799
+
}
800
+
801
+
// LoadBundleWithMetadata loads bundle and returns both data and embedded metadata
802
+
func (op *Operations) LoadBundleWithMetadata(path string) ([]plcclient.PLCOperation, *BundleMetadata, error) {
803
+
file, err := os.Open(path)
804
+
if err != nil {
805
+
return nil, nil, fmt.Errorf("failed to open file: %w", err)
806
+
}
807
+
defer file.Close()
808
+
809
+
// 1. Try to read metadata frame first
810
+
meta, err := op.ReadMetadataFrame(file)
811
+
if err != nil {
812
+
// No metadata frame - fall back to regular load
813
+
file.Seek(0, io.SeekStart) // Reset to beginning
814
+
ops, err := op.loadFromReader(file)
815
+
return ops, nil, err
816
+
}
817
+
818
+
// 2. Read compressed data (file position is now after metadata frame)
819
+
ops, err := op.loadFromReader(file)
820
+
if err != nil {
821
+
return nil, nil, err
822
+
}
823
+
824
+
return ops, meta, nil
825
+
}
826
+
827
+
// loadFromReader loads operations from a reader (internal helper)
828
+
func (op *Operations) loadFromReader(r io.Reader) ([]plcclient.PLCOperation, error) {
829
+
reader, err := NewStreamingReader(r)
830
+
if err != nil {
831
+
return nil, fmt.Errorf("failed to create reader: %w", err)
832
+
}
833
+
defer reader.Release()
834
+
835
+
decompressed, err := io.ReadAll(reader)
836
+
if err != nil {
837
+
return nil, fmt.Errorf("failed to decompress: %w", err)
838
+
}
839
+
840
+
return op.ParseJSONL(decompressed)
841
+
}
+35
-24
internal/storage/storage_test.go
+35
-24
internal/storage/storage_test.go
···
10
10
"testing"
11
11
"time"
12
12
13
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
14
-
"tangled.org/atscan.net/plcbundle/internal/storage"
13
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
15
15
)
16
16
17
17
type testLogger struct {
···
25
25
func (l *testLogger) Println(v ...interface{}) {
26
26
l.t.Log(v...)
27
27
}
28
+
29
+
var (
30
+
bundleInfo = &storage.BundleInfo{
31
+
BundleNumber: 1,
32
+
Origin: "test-origin",
33
+
ParentHash: "",
34
+
Cursor: "",
35
+
CreatedBy: "test",
36
+
Hostname: "test-host",
37
+
}
38
+
)
28
39
29
40
// ====================================================================================
30
41
// COMPRESSION TESTS
···
33
44
func TestStorageCompression(t *testing.T) {
34
45
tmpDir := t.TempDir()
35
46
logger := &testLogger{t: t}
36
-
ops, err := storage.NewOperations(logger)
47
+
ops, err := storage.NewOperations(logger, false)
37
48
if err != nil {
38
49
t.Fatalf("NewOperations failed: %v", err)
39
50
}
···
62
73
path := filepath.Join(tmpDir, tt.name+".jsonl.zst")
63
74
64
75
// Save
65
-
_, _, _, _, err := ops.SaveBundle(path, original)
76
+
_, _, _, _, err := ops.SaveBundle(path, original, bundleInfo)
66
77
if err != nil {
67
78
t.Fatalf("SaveBundle failed: %v", err)
68
79
}
···
98
109
operations := makeTestOperations(10000)
99
110
path := filepath.Join(tmpDir, "compression_test.jsonl.zst")
100
111
101
-
_, _, uncompSize, compSize, err := ops.SaveBundle(path, operations)
112
+
_, _, uncompSize, compSize, err := ops.SaveBundle(path, operations, bundleInfo)
102
113
if err != nil {
103
114
t.Fatalf("SaveBundle failed: %v", err)
104
115
}
···
119
130
operations := makeTestOperations(100)
120
131
path := filepath.Join(tmpDir, "integrity_test.jsonl.zst")
121
132
122
-
contentHash, compHash, _, _, err := ops.SaveBundle(path, operations)
133
+
contentHash, compHash, _, _, err := ops.SaveBundle(path, operations, bundleInfo)
123
134
if err != nil {
124
135
t.Fatalf("SaveBundle failed: %v", err)
125
136
}
···
146
157
147
158
func TestStorageHashing(t *testing.T) {
148
159
logger := &testLogger{t: t}
149
-
ops, err := storage.NewOperations(logger)
160
+
ops, err := storage.NewOperations(logger, false)
150
161
if err != nil {
151
162
t.Fatalf("NewOperations failed: %v", err)
152
163
}
···
240
251
func TestStorageConcurrency(t *testing.T) {
241
252
tmpDir := t.TempDir()
242
253
logger := &testLogger{t: t}
243
-
ops, err := storage.NewOperations(logger)
254
+
ops, err := storage.NewOperations(logger, false)
244
255
if err != nil {
245
256
t.Fatalf("NewOperations failed: %v", err)
246
257
}
···
250
261
// Create test bundle
251
262
operations := makeTestOperations(10000)
252
263
path := filepath.Join(tmpDir, "parallel_test.jsonl.zst")
253
-
_, _, _, _, err := ops.SaveBundle(path, operations)
264
+
_, _, _, _, err := ops.SaveBundle(path, operations, bundleInfo)
254
265
if err != nil {
255
266
t.Fatalf("SaveBundle failed: %v", err)
256
267
}
···
286
297
// Critical test - this is heavily used by DID lookups
287
298
operations := makeTestOperations(10000)
288
299
path := filepath.Join(tmpDir, "position_test.jsonl.zst")
289
-
_, _, _, _, err := ops.SaveBundle(path, operations)
300
+
_, _, _, _, err := ops.SaveBundle(path, operations, bundleInfo)
290
301
if err != nil {
291
302
t.Fatalf("SaveBundle failed: %v", err)
292
303
}
···
321
332
t.Run("ConcurrentHashVerification", func(t *testing.T) {
322
333
operations := makeTestOperations(1000)
323
334
path := filepath.Join(tmpDir, "verify_test.jsonl.zst")
324
-
_, compHash, _, _, err := ops.SaveBundle(path, operations)
335
+
_, compHash, _, _, err := ops.SaveBundle(path, operations, bundleInfo)
325
336
if err != nil {
326
337
t.Fatalf("SaveBundle failed: %v", err)
327
338
}
···
351
362
func TestStorageEdgeCases(t *testing.T) {
352
363
tmpDir := t.TempDir()
353
364
logger := &testLogger{t: t}
354
-
ops, err := storage.NewOperations(logger)
365
+
ops, err := storage.NewOperations(logger, false)
355
366
if err != nil {
356
367
t.Fatalf("NewOperations failed: %v", err)
357
368
}
···
371
382
t.Run("TruncatedFile", func(t *testing.T) {
372
383
operations := makeTestOperations(100)
373
384
path := filepath.Join(tmpDir, "truncated.jsonl.zst")
374
-
ops.SaveBundle(path, operations)
385
+
ops.SaveBundle(path, operations, bundleInfo)
375
386
376
387
// Read and truncate
377
388
data, _ := os.ReadFile(path)
···
389
400
390
401
// Manually compress invalid data
391
402
operations := makeTestOperations(10)
392
-
ops.SaveBundle(path, operations) // Create valid file first
403
+
ops.SaveBundle(path, operations, bundleInfo) // Create valid file first
393
404
394
405
// Now corrupt it with invalid JSON
395
406
// This is hard to test properly since SaveBundle enforces valid data
···
410
421
t.Run("InvalidPosition", func(t *testing.T) {
411
422
operations := makeTestOperations(100)
412
423
path := filepath.Join(tmpDir, "position_test.jsonl.zst")
413
-
ops.SaveBundle(path, operations)
424
+
ops.SaveBundle(path, operations, bundleInfo)
414
425
415
426
// Negative position
416
427
_, err := ops.LoadOperationAtPosition(path, -1)
···
432
443
433
444
func TestStorageBoundaryConditions(t *testing.T) {
434
445
logger := &testLogger{t: t}
435
-
ops, err := storage.NewOperations(logger)
446
+
ops, err := storage.NewOperations(logger, false)
436
447
if err != nil {
437
448
t.Fatalf("NewOperations failed: %v", err)
438
449
}
···
587
598
588
599
func TestStorageSerialization(t *testing.T) {
589
600
logger := &testLogger{t: t}
590
-
ops, err := storage.NewOperations(logger)
601
+
ops, err := storage.NewOperations(logger, false)
591
602
if err != nil {
592
603
t.Fatalf("NewOperations failed: %v", err)
593
604
}
···
659
670
func TestStorageUtilities(t *testing.T) {
660
671
tmpDir := t.TempDir()
661
672
logger := &testLogger{t: t}
662
-
ops, err := storage.NewOperations(logger)
673
+
ops, err := storage.NewOperations(logger, false)
663
674
if err != nil {
664
675
t.Fatalf("NewOperations failed: %v", err)
665
676
}
···
737
748
func TestStorageStreaming(t *testing.T) {
738
749
tmpDir := t.TempDir()
739
750
logger := &testLogger{t: t}
740
-
ops, err := storage.NewOperations(logger)
751
+
ops, err := storage.NewOperations(logger, false)
741
752
if err != nil {
742
753
t.Fatalf("NewOperations failed: %v", err)
743
754
}
···
746
757
t.Run("StreamRaw", func(t *testing.T) {
747
758
operations := makeTestOperations(100)
748
759
path := filepath.Join(tmpDir, "stream_raw.jsonl.zst")
749
-
_, _, _, _, err := ops.SaveBundle(path, operations)
760
+
_, _, _, _, err := ops.SaveBundle(path, operations, bundleInfo)
750
761
if err != nil {
751
762
t.Fatalf("SaveBundle failed: %v", err)
752
763
}
···
772
783
t.Run("StreamDecompressed", func(t *testing.T) {
773
784
operations := makeTestOperations(100)
774
785
path := filepath.Join(tmpDir, "stream_decomp.jsonl.zst")
775
-
ops.SaveBundle(path, operations)
786
+
ops.SaveBundle(path, operations, bundleInfo)
776
787
777
788
reader, err := ops.StreamDecompressed(path)
778
789
if err != nil {
···
800
811
func BenchmarkStorageOperations(b *testing.B) {
801
812
tmpDir := b.TempDir()
802
813
logger := &testLogger{t: &testing.T{}}
803
-
ops, _ := storage.NewOperations(logger)
814
+
ops, _ := storage.NewOperations(logger, false)
804
815
defer ops.Close()
805
816
806
817
operations := makeTestOperations(10000)
···
808
819
b.Run("SaveBundle", func(b *testing.B) {
809
820
for i := 0; i < b.N; i++ {
810
821
path := filepath.Join(tmpDir, fmt.Sprintf("bench_%d.jsonl.zst", i))
811
-
ops.SaveBundle(path, operations)
822
+
ops.SaveBundle(path, operations, bundleInfo)
812
823
}
813
824
})
814
825
815
826
// Create bundle for read benchmarks
816
827
testPath := filepath.Join(tmpDir, "bench_read.jsonl.zst")
817
-
ops.SaveBundle(testPath, operations)
828
+
ops.SaveBundle(testPath, operations, nil)
818
829
819
830
b.Run("LoadBundle", func(b *testing.B) {
820
831
for i := 0; i < b.N; i++ {
+295
internal/storage/zstd.go
+295
internal/storage/zstd.go
···
1
+
package storage
2
+
3
+
import (
4
+
"bytes"
5
+
"encoding/binary"
6
+
"encoding/json"
7
+
"fmt"
8
+
"io"
9
+
"os"
10
+
"path/filepath"
11
+
12
+
"github.com/valyala/gozstd"
13
+
)
14
+
15
+
// ============================================================================
16
+
// ZSTD COMPRESSION ABSTRACTION LAYER
17
+
// ============================================================================
18
+
19
+
const (
20
+
CompressionLevel = 1
21
+
FrameSize = 100
22
+
23
+
SkippableMagicMetadata = 0x184D2A50
24
+
)
25
+
26
+
// ============================================================================
27
+
// SKIPPABLE FRAME FUNCTIONS
28
+
// ============================================================================
29
+
30
+
// WriteSkippableFrame writes a skippable frame with the given data
31
+
func WriteSkippableFrame(w io.Writer, magicNumber uint32, data []byte) (int64, error) {
32
+
frameSize := uint32(len(data))
33
+
34
+
// Write magic number (little-endian)
35
+
if err := binary.Write(w, binary.LittleEndian, magicNumber); err != nil {
36
+
return 0, err
37
+
}
38
+
39
+
// Write frame size (little-endian)
40
+
if err := binary.Write(w, binary.LittleEndian, frameSize); err != nil {
41
+
return 0, err
42
+
}
43
+
44
+
// Write data
45
+
n, err := w.Write(data)
46
+
if err != nil {
47
+
return 0, err
48
+
}
49
+
50
+
totalBytes := int64(4 + 4 + n) // magic + size + data
51
+
return totalBytes, nil
52
+
}
53
+
54
+
// ReadSkippableFrame with debug
55
+
func (ops *Operations) ReadSkippableFrame(r io.Reader) (uint32, []byte, error) {
56
+
var magic uint32
57
+
if err := binary.Read(r, binary.LittleEndian, &magic); err != nil {
58
+
return 0, nil, fmt.Errorf("failed to read magic: %w", err)
59
+
}
60
+
61
+
if magic < 0x184D2A50 || magic > 0x184D2A5F {
62
+
return 0, nil, fmt.Errorf("not a skippable frame: magic=0x%08X (expected 0x184D2A50-0x184D2A5F)", magic)
63
+
}
64
+
65
+
var frameSize uint32
66
+
if err := binary.Read(r, binary.LittleEndian, &frameSize); err != nil {
67
+
return 0, nil, fmt.Errorf("failed to read frame size: %w", err)
68
+
}
69
+
70
+
data := make([]byte, frameSize)
71
+
if _, err := io.ReadFull(r, data); err != nil {
72
+
return 0, nil, fmt.Errorf("failed to read frame data: %w", err)
73
+
}
74
+
75
+
return magic, data, nil
76
+
}
77
+
78
+
// WriteMetadataFrame writes bundle metadata as skippable frame (compact JSON)
79
+
func (op *Operations) WriteMetadataFrame(w io.Writer, meta *BundleMetadata) (int64, error) {
80
+
jsonData, err := json.Marshal(meta)
81
+
if err != nil {
82
+
return 0, fmt.Errorf("failed to marshal metadata: %w", err)
83
+
}
84
+
return WriteSkippableFrame(w, SkippableMagicMetadata, jsonData)
85
+
}
86
+
87
+
// ReadMetadataFrame reads bundle metadata from skippable frame
88
+
func (ops *Operations) ReadMetadataFrame(r io.Reader) (*BundleMetadata, error) {
89
+
magic, data, err := ops.ReadSkippableFrame(r)
90
+
if err != nil {
91
+
return nil, err
92
+
}
93
+
94
+
if magic != SkippableMagicMetadata {
95
+
return nil, fmt.Errorf("unexpected skippable frame magic: 0x%08X (expected 0x%08X)",
96
+
magic, SkippableMagicMetadata)
97
+
}
98
+
99
+
var meta BundleMetadata
100
+
if err := json.Unmarshal(data, &meta); err != nil {
101
+
return nil, fmt.Errorf("failed to unmarshal metadata: %w", err)
102
+
}
103
+
104
+
return &meta, nil
105
+
}
106
+
107
+
// ExtractMetadataFromFile reads metadata without decompressing
108
+
func (ops *Operations) ExtractMetadataFromFile(path string) (*BundleMetadata, error) {
109
+
file, err := os.Open(path)
110
+
if err != nil {
111
+
return nil, err
112
+
}
113
+
defer file.Close()
114
+
115
+
// Check first bytes
116
+
header := make([]byte, 8)
117
+
if _, err := file.Read(header); err != nil {
118
+
return nil, fmt.Errorf("failed to read header: %w", err)
119
+
}
120
+
121
+
// Seek back to start
122
+
file.Seek(0, io.SeekStart)
123
+
124
+
meta, err := ops.ReadMetadataFrame(file)
125
+
if err != nil {
126
+
return nil, fmt.Errorf("no metadata frame found: %w", err)
127
+
}
128
+
129
+
return meta, nil
130
+
}
131
+
132
+
// ExtractFrameIndexFromFile now just reads from metadata
133
+
func (ops *Operations) ExtractFrameIndexFromFile(path string) ([]int64, error) {
134
+
meta, err := ops.ExtractMetadataFromFile(path)
135
+
if err != nil {
136
+
return nil, err
137
+
}
138
+
139
+
if len(meta.FrameOffsets) == 0 {
140
+
return nil, fmt.Errorf("metadata has no frame offsets")
141
+
}
142
+
143
+
return meta.FrameOffsets, nil
144
+
}
145
+
146
+
// DebugFrameOffsets extracts and displays frame offset information
147
+
func (ops *Operations) DebugFrameOffsets(path string) error {
148
+
meta, err := ops.ExtractMetadataFromFile(path)
149
+
if err != nil {
150
+
return fmt.Errorf("failed to extract metadata: %w", err)
151
+
}
152
+
153
+
fmt.Printf("Frame Offset Debug for: %s\n\n", filepath.Base(path))
154
+
fmt.Printf("Metadata:\n")
155
+
fmt.Printf(" Bundle: %d\n", meta.BundleNumber)
156
+
fmt.Printf(" Frames: %d\n", meta.FrameCount)
157
+
fmt.Printf(" Frame size: %d ops\n", meta.FrameSize)
158
+
fmt.Printf(" Total ops: %d\n", meta.OperationCount)
159
+
160
+
fmt.Printf("\nFrame Offsets (%d total):\n", len(meta.FrameOffsets))
161
+
for i, offset := range meta.FrameOffsets {
162
+
if i < len(meta.FrameOffsets)-1 {
163
+
nextOffset := meta.FrameOffsets[i+1]
164
+
frameSize := nextOffset - offset
165
+
fmt.Printf(" Frame %3d: offset %10d, size %10d bytes\n", i, offset, frameSize)
166
+
} else {
167
+
fmt.Printf(" End mark: offset %10d\n", offset)
168
+
}
169
+
}
170
+
171
+
// Try to verify first frame
172
+
fmt.Printf("\nVerifying first frame...\n")
173
+
file, err := os.Open(path)
174
+
if err != nil {
175
+
return err
176
+
}
177
+
defer file.Close()
178
+
179
+
if len(meta.FrameOffsets) < 2 {
180
+
return fmt.Errorf("not enough frame offsets")
181
+
}
182
+
183
+
startOffset := meta.FrameOffsets[0]
184
+
endOffset := meta.FrameOffsets[1]
185
+
frameLength := endOffset - startOffset
186
+
187
+
fmt.Printf(" Start: %d, End: %d, Length: %d\n", startOffset, endOffset, frameLength)
188
+
189
+
compressedFrame := make([]byte, frameLength)
190
+
_, err = file.ReadAt(compressedFrame, startOffset)
191
+
if err != nil {
192
+
return fmt.Errorf("failed to read: %w", err)
193
+
}
194
+
195
+
decompressed, err := DecompressFrame(compressedFrame)
196
+
if err != nil {
197
+
return fmt.Errorf("failed to decompress: %w", err)
198
+
}
199
+
200
+
fmt.Printf(" โ Decompressed: %d bytes\n", len(decompressed))
201
+
202
+
// Count lines
203
+
lines := bytes.Count(decompressed, []byte("\n"))
204
+
fmt.Printf(" โ Lines: %d\n", lines)
205
+
206
+
return nil
207
+
}
208
+
209
+
// ============================================================================
210
+
// COMPRESSION/DECOMPRESSION
211
+
// ============================================================================
212
+
213
+
func CompressFrame(data []byte) ([]byte, error) {
214
+
compressed := gozstd.CompressLevel(nil, data, CompressionLevel)
215
+
return compressed, nil
216
+
}
217
+
218
+
func DecompressAll(compressed []byte) ([]byte, error) {
219
+
decompressed, err := gozstd.Decompress(nil, compressed)
220
+
if err != nil {
221
+
return nil, fmt.Errorf("decompression failed: %w", err)
222
+
}
223
+
return decompressed, nil
224
+
}
225
+
226
+
func DecompressFrame(compressedFrame []byte) ([]byte, error) {
227
+
return gozstd.Decompress(nil, compressedFrame)
228
+
}
229
+
230
+
func NewStreamingReader(r io.Reader) (StreamReader, error) {
231
+
reader := gozstd.NewReader(r)
232
+
return &gozstdReader{reader: reader}, nil
233
+
}
234
+
235
+
func NewStreamingWriter(w io.Writer) (StreamWriter, error) {
236
+
writer := gozstd.NewWriterLevel(w, CompressionLevel)
237
+
return &gozstdWriter{writer: writer}, nil
238
+
}
239
+
240
+
// ============================================================================
241
+
// INTERFACES
242
+
// ============================================================================
243
+
244
+
type StreamReader interface {
245
+
io.Reader
246
+
io.WriterTo
247
+
Release()
248
+
}
249
+
250
+
type StreamWriter interface {
251
+
io.Writer
252
+
io.Closer
253
+
Flush() error
254
+
Release()
255
+
}
256
+
257
+
// ============================================================================
258
+
// WRAPPER TYPES
259
+
// ============================================================================
260
+
261
+
type gozstdReader struct {
262
+
reader *gozstd.Reader
263
+
}
264
+
265
+
func (r *gozstdReader) Read(p []byte) (int, error) {
266
+
return r.reader.Read(p)
267
+
}
268
+
269
+
func (r *gozstdReader) WriteTo(w io.Writer) (int64, error) {
270
+
return r.reader.WriteTo(w)
271
+
}
272
+
273
+
func (r *gozstdReader) Release() {
274
+
r.reader.Release()
275
+
}
276
+
277
+
type gozstdWriter struct {
278
+
writer *gozstd.Writer
279
+
}
280
+
281
+
func (w *gozstdWriter) Write(p []byte) (int, error) {
282
+
return w.writer.Write(p)
283
+
}
284
+
285
+
func (w *gozstdWriter) Close() error {
286
+
return w.writer.Close()
287
+
}
288
+
289
+
func (w *gozstdWriter) Flush() error {
290
+
return w.writer.Flush()
291
+
}
292
+
293
+
func (w *gozstdWriter) Release() {
294
+
w.writer.Release()
295
+
}
+2
-2
internal/sync/bundler.go
+2
-2
internal/sync/bundler.go
···
3
3
import (
4
4
"time"
5
5
6
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
7
-
"tangled.org/atscan.net/plcbundle/internal/storage"
6
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
7
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
8
8
)
9
9
10
10
// CreateBundle creates a bundle structure from operations
+3
-3
internal/sync/cloner.go
+3
-3
internal/sync/cloner.go
···
12
12
"time"
13
13
14
14
"github.com/goccy/go-json"
15
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
16
-
"tangled.org/atscan.net/plcbundle/internal/storage"
17
-
"tangled.org/atscan.net/plcbundle/internal/types"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
16
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
17
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
18
18
)
19
19
20
20
// Cloner handles cloning bundles from remote endpoints
+7
-7
internal/sync/fetcher.go
+7
-7
internal/sync/fetcher.go
···
5
5
"fmt"
6
6
"time"
7
7
8
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
9
-
"tangled.org/atscan.net/plcbundle/internal/storage"
10
-
"tangled.org/atscan.net/plcbundle/internal/types"
8
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
9
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
10
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
11
11
)
12
12
13
13
// Fetcher handles fetching operations from PLC directory
···
48
48
49
49
seenCIDs := make(map[string]bool)
50
50
51
-
// โ
Initialize current boundaries from previous bundle (or empty if first fetch)
51
+
// Initialize current boundaries from previous bundle (or empty if first fetch)
52
52
currentBoundaryCIDs := prevBoundaryCIDs
53
53
if currentBoundaryCIDs == nil {
54
54
currentBoundaryCIDs = make(map[string]bool)
···
118
118
originalBatchSize := len(batch)
119
119
totalReceived += originalBatchSize
120
120
121
-
// โ
CRITICAL: Strip boundary duplicates using current boundaries
121
+
// CRITICAL: Strip boundary duplicates using current boundaries
122
122
batch = f.operations.StripBoundaryDuplicates(
123
123
batch,
124
124
currentAfter,
···
178
178
}
179
179
}
180
180
181
-
// โ
CRITICAL: Calculate NEW boundary CIDs from this fetch for next iteration
181
+
// CRITICAL: Calculate NEW boundary CIDs from this fetch for next iteration
182
182
if len(batch) > 0 {
183
183
boundaryTime, newBoundaryCIDs := f.operations.GetBoundaryCIDs(batch)
184
184
currentBoundaryCIDs = newBoundaryCIDs
···
202
202
}
203
203
allNewOps = append(allNewOps, batchNewOps...)
204
204
205
-
// โ
Still update boundaries even without mempool
205
+
// Still update boundaries even without mempool
206
206
if len(batch) > 0 {
207
207
boundaryTime, newBoundaryCIDs := f.operations.GetBoundaryCIDs(batch)
208
208
currentBoundaryCIDs = newBoundaryCIDs
+40
-24
internal/sync/sync_test.go
+40
-24
internal/sync/sync_test.go
···
11
11
"time"
12
12
13
13
"github.com/goccy/go-json"
14
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
15
-
"tangled.org/atscan.net/plcbundle/internal/storage"
16
-
internalsync "tangled.org/atscan.net/plcbundle/internal/sync"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
16
+
internalsync "tangled.org/atscan.net/plcbundle-go/internal/sync"
17
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
17
18
)
18
19
19
20
type testLogger struct {
···
61
62
}
62
63
}
63
64
64
-
return addedCount, nil // โ Return actual added count
65
+
return addedCount, nil
65
66
}
66
67
67
68
func (m *mockMempool) Save() error {
···
113
114
defer client.Close()
114
115
115
116
logger := &testLogger{t: t}
116
-
ops, _ := storage.NewOperations(logger)
117
+
ops, _ := storage.NewOperations(logger, false)
117
118
defer ops.Close()
118
119
119
120
fetcher := internalsync.NewFetcher(client, ops, logger)
···
174
175
defer client.Close()
175
176
176
177
logger := &testLogger{t: t}
177
-
storageOps, _ := storage.NewOperations(logger)
178
+
storageOps, _ := storage.NewOperations(logger, false)
178
179
defer storageOps.Close()
179
180
180
181
fetcher := internalsync.NewFetcher(client, storageOps, logger)
···
242
243
defer client.Close()
243
244
244
245
logger := &testLogger{t: t}
245
-
storageOps, _ := storage.NewOperations(logger)
246
+
storageOps, _ := storage.NewOperations(logger, false)
246
247
defer storageOps.Close()
247
248
248
249
fetcher := internalsync.NewFetcher(client, storageOps, logger)
···
385
386
defer client.Close()
386
387
387
388
logger := &testLogger{t: t}
388
-
storageOps, _ := storage.NewOperations(logger)
389
+
storageOps, _ := storage.NewOperations(logger, false)
389
390
defer storageOps.Close()
390
391
391
392
fetcher := internalsync.NewFetcher(client, storageOps, logger)
···
457
458
458
459
logger := &testLogger{t: t}
459
460
config := &internalsync.SyncLoopConfig{
460
-
MaxBundles: 0,
461
-
Verbose: false,
462
-
Logger: logger,
461
+
MaxBundles: 0,
462
+
Verbose: false,
463
+
Logger: logger,
464
+
SkipDIDIndex: false,
463
465
}
464
466
465
467
// First sync should detect "caught up" when no progress
466
-
synced, err := internalsync.SyncOnce(context.Background(), mockMgr, config, false)
468
+
synced, err := internalsync.SyncOnce(context.Background(), mockMgr, config)
467
469
468
470
if err != nil {
469
471
t.Fatalf("SyncOnce failed: %v", err)
···
483
485
484
486
logger := &testLogger{t: t}
485
487
config := &internalsync.SyncLoopConfig{
486
-
MaxBundles: 3,
487
-
Verbose: false,
488
-
Logger: logger,
488
+
MaxBundles: 3,
489
+
Verbose: false,
490
+
Logger: logger,
491
+
SkipDIDIndex: false,
489
492
}
490
493
491
494
ctx := context.Background()
492
-
synced, err := internalsync.SyncOnce(ctx, mockMgr, config, false)
495
+
synced, err := internalsync.SyncOnce(ctx, mockMgr, config)
493
496
494
497
if err != nil {
495
498
t.Fatalf("SyncOnce failed: %v", err)
···
510
513
511
514
logger := &testLogger{t: t}
512
515
config := &internalsync.SyncLoopConfig{
513
-
Interval: 100 * time.Millisecond,
514
-
MaxBundles: 0,
515
-
Verbose: false,
516
-
Logger: logger,
516
+
Interval: 100 * time.Millisecond,
517
+
MaxBundles: 0,
518
+
Verbose: false,
519
+
Logger: logger,
520
+
SkipDIDIndex: false,
517
521
}
518
522
519
523
ctx, cancel := context.WithCancel(context.Background())
···
556
560
557
561
func TestBundlerCreateBundle(t *testing.T) {
558
562
logger := &testLogger{t: t}
559
-
storageOps, _ := storage.NewOperations(logger)
563
+
storageOps, _ := storage.NewOperations(logger, false)
560
564
defer storageOps.Close()
561
565
562
566
t.Run("BasicBundleCreation", func(t *testing.T) {
···
683
687
return m.lastBundle
684
688
}
685
689
690
+
func (m *mockSyncManager) UpdateDIDIndexSmart(ctx context.Context, progressCallback func(current, total int)) error {
691
+
m.mu.Lock()
692
+
defer m.mu.Unlock()
693
+
return nil
694
+
}
695
+
696
+
func (m *mockSyncManager) BuildDIDIndex(ctx context.Context, progressCallback func(current, total int)) error {
697
+
m.mu.Lock()
698
+
defer m.mu.Unlock()
699
+
return nil
700
+
}
701
+
686
702
func (m *mockSyncManager) GetMempoolCount() int {
687
703
m.mu.Lock()
688
704
defer m.mu.Unlock()
689
705
return m.mempoolCount
690
706
}
691
707
692
-
func (m *mockSyncManager) FetchAndSaveNextBundle(ctx context.Context, quiet bool) (int, time.Duration, error) {
708
+
func (m *mockSyncManager) FetchAndSaveNextBundle(ctx context.Context, verbose bool, quiet bool, skipDIDIndex bool) (int, *types.BundleProductionStats, error) {
693
709
m.mu.Lock()
694
710
defer m.mu.Unlock()
695
711
···
701
717
if m.mempoolCount >= 10000 {
702
718
m.lastBundle++
703
719
m.mempoolCount -= 10000
704
-
return m.lastBundle, 10 * time.Millisecond, nil
720
+
return m.lastBundle, nil, nil
705
721
}
706
722
707
723
// Not enough ops
708
-
return 0, 0, fmt.Errorf("insufficient operations")
724
+
return 0, nil, fmt.Errorf("insufficient operations")
709
725
}
710
726
711
727
func (m *mockSyncManager) SaveMempool() error {
+24
-14
internal/sync/syncer.go
+24
-14
internal/sync/syncer.go
···
5
5
"context"
6
6
"time"
7
7
8
-
"tangled.org/atscan.net/plcbundle/internal/types"
8
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
9
9
)
10
10
11
11
// SyncLoopConfig configures continuous syncing
···
15
15
Verbose bool
16
16
Logger types.Logger
17
17
OnBundleSynced func(bundleNum int, fetchedCount int, mempoolCount int, duration time.Duration, indexTime time.Duration)
18
+
SkipDIDIndex bool
19
+
Quiet bool
18
20
}
19
21
20
22
// DefaultSyncLoopConfig returns default configuration
21
23
func DefaultSyncLoopConfig() *SyncLoopConfig {
22
24
return &SyncLoopConfig{
23
-
Interval: 1 * time.Minute,
24
-
MaxBundles: 0,
25
-
Verbose: false,
25
+
Interval: 1 * time.Minute,
26
+
MaxBundles: 0,
27
+
Verbose: false,
28
+
SkipDIDIndex: false,
29
+
Quiet: false,
26
30
}
27
31
}
28
32
···
31
35
GetLastBundleNumber() int
32
36
GetMempoolCount() int
33
37
// Returns: bundleNumber, indexUpdateTime, error
34
-
FetchAndSaveNextBundle(ctx context.Context, quiet bool) (int, time.Duration, error)
38
+
FetchAndSaveNextBundle(ctx context.Context, verbose bool, quiet bool, skipDIDIndex bool) (int, *types.BundleProductionStats, error)
35
39
SaveMempool() error
40
+
BuildDIDIndex(ctx context.Context, progressCallback func(current, total int)) error
41
+
UpdateDIDIndexSmart(ctx context.Context, progressCallback func(current, total int)) error
36
42
}
37
43
38
44
// SyncOnce performs a single sync cycle - fetches until caught up
39
-
func SyncOnce(ctx context.Context, mgr SyncManager, config *SyncLoopConfig, verbose bool) (int, error) {
45
+
func SyncOnce(ctx context.Context, mgr SyncManager, config *SyncLoopConfig) (int, error) {
40
46
cycleStart := time.Now()
41
47
startMempool := mgr.GetMempoolCount()
42
48
···
50
56
mempoolBefore := mgr.GetMempoolCount()
51
57
52
58
// Attempt to fetch and save next bundle
53
-
bundleNum, indexTime, err := mgr.FetchAndSaveNextBundle(ctx, !verbose)
59
+
bundleNum, stats, err := mgr.FetchAndSaveNextBundle(ctx, config.Verbose, config.Quiet, config.SkipDIDIndex)
54
60
55
61
// Check if we made any progress
56
62
bundleAfter := mgr.GetLastBundleNumber()
···
71
77
72
78
// Success
73
79
fetchedCount++
74
-
totalIndexTime += indexTime
80
+
if stats != nil {
81
+
totalIndexTime += stats.IndexTime
82
+
}
75
83
76
84
// Callback if provided
77
85
if config.OnBundleSynced != nil {
···
96
104
97
105
if fetchedCount > 0 {
98
106
if totalIndexTime > 10*time.Millisecond {
99
-
config.Logger.Printf("[Sync] โ Bundle %06d | Synced: %d | Mempool: %d (+%d) | %s (index: %s)",
107
+
config.Logger.Printf("[Sync] โ Bundle %06d | synced: %d | mempool: %d (+%d) | time: %s (index: %s)",
100
108
currentBundle, fetchedCount, mempoolAfter, addedOps,
101
109
duration.Round(time.Millisecond), totalIndexTime.Round(time.Millisecond))
102
110
} else {
103
-
config.Logger.Printf("[Sync] โ Bundle %06d | Synced: %d | Mempool: %d (+%d) | %s",
111
+
config.Logger.Printf("[Sync] โ Bundle %06d | synced: %d | mempool: %d (+%d) | time: %s",
104
112
currentBundle, fetchedCount, mempoolAfter, addedOps, duration.Round(time.Millisecond))
105
113
}
106
114
} else if addedOps > 0 {
107
115
// No bundles but added to mempool
108
-
config.Logger.Printf("[Sync] โ Bundle %06d | Mempool: %d (+%d) | %s",
116
+
config.Logger.Printf("[Sync] โ Bundle %06d | mempool: %d (+%d) | time: %s",
109
117
currentBundle, mempoolAfter, addedOps, duration.Round(time.Millisecond))
110
118
} else {
111
119
// Already up to date
112
-
config.Logger.Printf("[Sync] โ Bundle %06d | Up to date | %s",
120
+
config.Logger.Printf("[Sync] โ Bundle %06d | up to date | time: %s",
113
121
currentBundle, duration.Round(time.Millisecond))
114
122
}
115
123
}
···
134
142
config.Logger.Printf("[Sync] Initial sync starting...")
135
143
}
136
144
137
-
synced, err := SyncOnce(ctx, mgr, config, config.Verbose)
145
+
config.SkipDIDIndex = true
146
+
synced, err := SyncOnce(ctx, mgr, config)
138
147
if err != nil {
139
148
return err
140
149
}
141
150
bundlesSynced += synced
151
+
mgr.UpdateDIDIndexSmart(ctx, nil)
142
152
143
153
// Check if reached limit
144
154
if config.MaxBundles > 0 && bundlesSynced >= config.MaxBundles {
···
165
175
166
176
case <-ticker.C:
167
177
// Each tick, do one sync cycle (which fetches until caught up)
168
-
synced, err := SyncOnce(ctx, mgr, config, config.Verbose)
178
+
synced, err := SyncOnce(ctx, mgr, config)
169
179
if err != nil {
170
180
if config.Logger != nil {
171
181
config.Logger.Printf("[Sync] Error: %v", err)
+10
internal/types/types.go
+10
internal/types/types.go
···
1
1
package types
2
2
3
+
import "time"
4
+
3
5
// Logger is a simple logging interface used throughout plcbundle
4
6
type Logger interface {
5
7
Printf(format string, v ...interface{})
···
16
18
// INDEX_VERSION is the current index format version
17
19
INDEX_VERSION = "1.0"
18
20
)
21
+
22
+
type BundleProductionStats struct {
23
+
TotalFetches int
24
+
TotalDuration time.Duration
25
+
AvgPerFetch float64
26
+
Throughput float64
27
+
IndexTime time.Duration
28
+
}
+1
-1
internal/types/types_test.go
+1
-1
internal/types/types_test.go
+2
-2
options.go
+2
-2
options.go
+1
-1
scripts/benchmark-detector.go
+1
-1
scripts/benchmark-detector.go
+552
-62
server/handlers.go
+552
-62
server/handlers.go
···
11
11
"time"
12
12
13
13
"github.com/goccy/go-json"
14
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
15
-
"tangled.org/atscan.net/plcbundle/internal/types"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
16
16
)
17
17
18
18
func (s *Server) handleRoot() http.HandlerFunc {
···
74
74
sb.WriteString("immutable, cryptographically-chained bundles of 10,000 operations.\n\n")
75
75
sb.WriteString("More info: https://tangled.org/@atscan.net/plcbundle\n\n")
76
76
77
+
origin := s.manager.GetPLCOrigin()
78
+
77
79
if bundleCount > 0 {
78
80
sb.WriteString("Bundles\n")
79
81
sb.WriteString("โโโโโโโ\n")
82
+
sb.WriteString(fmt.Sprintf(" Origin: %s\n", origin))
80
83
sb.WriteString(fmt.Sprintf(" Bundle count: %d\n", bundleCount))
81
84
82
85
firstBundle := stats["first_bundle"].(int)
···
111
114
mempoolStats := s.manager.GetMempoolStats()
112
115
count := mempoolStats["count"].(int)
113
116
targetBundle := mempoolStats["target_bundle"].(int)
114
-
canCreate := mempoolStats["can_create_bundle"].(bool)
115
117
116
-
sb.WriteString("\nMempool Stats\n")
117
-
sb.WriteString("โโโโโโโโโโโโโ\n")
118
+
sb.WriteString("\nMempool\n")
119
+
sb.WriteString("โโโโโโโ\n")
118
120
sb.WriteString(fmt.Sprintf(" Target bundle: %d\n", targetBundle))
119
121
sb.WriteString(fmt.Sprintf(" Operations: %d / %d\n", count, types.BUNDLE_SIZE))
120
-
sb.WriteString(fmt.Sprintf(" Can create bundle: %v\n", canCreate))
121
122
122
123
if count > 0 {
123
124
progress := float64(count) / float64(types.BUNDLE_SIZE) * 100
···
142
143
}
143
144
}
144
145
145
-
if didStats := s.manager.GetDIDIndexStats(); didStats["exists"].(bool) {
146
-
sb.WriteString("\nDID Index\n")
147
-
sb.WriteString("โโโโโโโโโ\n")
146
+
if s.config.EnableResolver {
147
+
148
+
sb.WriteString("\nResolver\n")
149
+
sb.WriteString("โโโโโโโโ\n")
148
150
sb.WriteString(" Status: enabled\n")
149
151
150
-
indexedDIDs := didStats["indexed_dids"].(int64)
151
-
mempoolDIDs := didStats["mempool_dids"].(int64)
152
-
totalDIDs := didStats["total_dids"].(int64)
152
+
if didStats := s.manager.GetDIDIndexStats(); didStats["exists"].(bool) {
153
+
indexedDIDs := didStats["indexed_dids"].(int64)
154
+
mempoolDIDs := didStats["mempool_dids"].(int64)
155
+
totalDIDs := didStats["total_dids"].(int64)
153
156
154
-
if mempoolDIDs > 0 {
155
-
sb.WriteString(fmt.Sprintf(" Total DIDs: %s (%s indexed + %s mempool)\n",
156
-
formatNumber(int(totalDIDs)),
157
-
formatNumber(int(indexedDIDs)),
158
-
formatNumber(int(mempoolDIDs))))
159
-
} else {
160
-
sb.WriteString(fmt.Sprintf(" Total DIDs: %s\n", formatNumber(int(totalDIDs))))
157
+
if mempoolDIDs > 0 {
158
+
sb.WriteString(fmt.Sprintf(" Total DIDs: %s (%s indexed + %s mempool)\n",
159
+
formatNumber(int(totalDIDs)),
160
+
formatNumber(int(indexedDIDs)),
161
+
formatNumber(int(mempoolDIDs))))
162
+
} else {
163
+
sb.WriteString(fmt.Sprintf(" Total DIDs: %s\n", formatNumber(int(totalDIDs))))
164
+
}
161
165
}
162
-
163
-
sb.WriteString(fmt.Sprintf(" Cached shards: %d / %d\n",
164
-
didStats["cached_shards"], didStats["cache_limit"]))
165
166
sb.WriteString("\n")
166
167
}
167
168
168
169
sb.WriteString("Server Stats\n")
169
170
sb.WriteString("โโโโโโโโโโโโ\n")
170
-
sb.WriteString(fmt.Sprintf(" Version: %s\n", s.config.Version))
171
-
if origin := s.manager.GetPLCOrigin(); origin != "" {
172
-
sb.WriteString(fmt.Sprintf(" Origin: %s\n", origin))
173
-
}
174
-
sb.WriteString(fmt.Sprintf(" Sync mode: %v\n", s.config.SyncMode))
175
-
sb.WriteString(fmt.Sprintf(" WebSocket: %v\n", s.config.EnableWebSocket))
176
-
sb.WriteString(fmt.Sprintf(" Resolver: %v\n", s.config.EnableResolver))
177
-
sb.WriteString(fmt.Sprintf(" Uptime: %s\n", time.Since(s.startTime).Round(time.Second)))
171
+
sb.WriteString(fmt.Sprintf(" Version: %s\n", s.config.Version))
172
+
sb.WriteString(fmt.Sprintf(" Sync mode: %v\n", s.config.SyncMode))
173
+
sb.WriteString(fmt.Sprintf(" WebSocket: %v\n", s.config.EnableWebSocket))
174
+
sb.WriteString(fmt.Sprintf(" Handle Resolver: %v\n", s.manager.GetHandleResolver().GetBaseURL()))
175
+
sb.WriteString(fmt.Sprintf(" Uptime: %s\n", time.Since(s.startTime).Round(time.Second)))
178
176
179
177
sb.WriteString("\n\nAPI Endpoints\n")
180
178
sb.WriteString("โโโโโโโโโโโโโ\n")
···
183
181
sb.WriteString(" GET /bundle/:number Bundle metadata (JSON)\n")
184
182
sb.WriteString(" GET /data/:number Raw bundle (zstd compressed)\n")
185
183
sb.WriteString(" GET /jsonl/:number Decompressed JSONL stream\n")
184
+
sb.WriteString(" GET /op/:pointer Get single operation\n")
186
185
sb.WriteString(" GET /status Server status\n")
187
186
sb.WriteString(" GET /mempool Mempool operations (JSONL)\n")
188
187
···
192
191
sb.WriteString(" GET /:did DID Document (W3C format)\n")
193
192
sb.WriteString(" GET /:did/data PLC State (raw format)\n")
194
193
sb.WriteString(" GET /:did/log/audit Operation history\n")
195
-
196
-
didStats := s.manager.GetDIDIndexStats()
197
-
if didStats["exists"].(bool) {
198
-
sb.WriteString(fmt.Sprintf("\n Index: %s DIDs indexed\n",
199
-
formatNumber(int(didStats["total_dids"].(int64)))))
200
-
} else {
201
-
sb.WriteString("\n โ ๏ธ Index: not built (will use slow scan)\n")
202
-
}
203
-
sb.WriteString("\n")
204
194
}
205
195
206
196
if s.config.EnableWebSocket {
···
232
222
sb.WriteString(fmt.Sprintf(" curl %s/bundle/1\n", baseURL))
233
223
sb.WriteString(fmt.Sprintf(" curl %s/data/42 -o 000042.jsonl.zst\n", baseURL))
234
224
sb.WriteString(fmt.Sprintf(" curl %s/jsonl/1\n", baseURL))
225
+
sb.WriteString(fmt.Sprintf(" curl %s/op/0\n", baseURL))
235
226
236
227
if s.config.EnableWebSocket {
237
228
sb.WriteString(fmt.Sprintf(" websocat %s/ws\n", wsURL))
···
338
329
UptimeSeconds: int(time.Since(s.startTime).Seconds()),
339
330
SyncMode: s.config.SyncMode,
340
331
WebSocketEnabled: s.config.EnableWebSocket,
332
+
ResolverEnabled: s.config.EnableResolver,
341
333
Origin: s.manager.GetPLCOrigin(),
342
334
},
343
335
Bundles: BundleStatus{
···
346
338
UncompressedSize: indexStats["total_uncompressed_size"].(int64),
347
339
UpdatedAt: indexStats["updated_at"].(time.Time),
348
340
},
341
+
}
342
+
343
+
if resolver := s.manager.GetHandleResolver(); resolver != nil {
344
+
response.Server.HandleResolver = resolver.GetBaseURL()
349
345
}
350
346
351
347
if s.config.SyncMode && s.config.SyncInterval > 0 {
···
426
422
}
427
423
}
428
424
425
+
// DID Index stats
426
+
didStats := s.manager.GetDIDIndexStats()
427
+
if didStats["enabled"].(bool) {
428
+
didIndex := &DIDIndexStatus{
429
+
Enabled: true,
430
+
Exists: didStats["exists"].(bool),
431
+
TotalDIDs: didStats["total_dids"].(int64),
432
+
IndexedDIDs: didStats["indexed_dids"].(int64),
433
+
LastBundle: didStats["last_bundle"].(int),
434
+
ShardCount: didStats["shard_count"].(int),
435
+
CachedShards: didStats["cached_shards"].(int),
436
+
CacheLimit: didStats["cache_limit"].(int),
437
+
UpdatedAt: didStats["updated_at"].(time.Time),
438
+
}
439
+
440
+
// Mempool DIDs
441
+
if mempoolDIDs, ok := didStats["mempool_dids"].(int64); ok && mempoolDIDs > 0 {
442
+
didIndex.MempoolDIDs = mempoolDIDs
443
+
}
444
+
445
+
// Version and format
446
+
if s.manager.GetDIDIndex() != nil {
447
+
config := s.manager.GetDIDIndex().GetConfig()
448
+
didIndex.Version = config.Version
449
+
didIndex.Format = config.Format
450
+
}
451
+
452
+
// Hot shards
453
+
if cacheOrder, ok := didStats["cache_order"].([]int); ok && len(cacheOrder) > 0 {
454
+
maxShards := 10
455
+
if len(cacheOrder) < maxShards {
456
+
maxShards = len(cacheOrder)
457
+
}
458
+
didIndex.HotShards = cacheOrder[:maxShards]
459
+
}
460
+
461
+
// Cache performance
462
+
if cacheHitRate, ok := didStats["cache_hit_rate"].(float64); ok {
463
+
didIndex.CacheHitRate = cacheHitRate
464
+
}
465
+
if cacheHits, ok := didStats["cache_hits"].(int64); ok {
466
+
didIndex.CacheHits = cacheHits
467
+
}
468
+
if cacheMisses, ok := didStats["cache_misses"].(int64); ok {
469
+
didIndex.CacheMisses = cacheMisses
470
+
}
471
+
if totalLookups, ok := didStats["total_lookups"].(int64); ok {
472
+
didIndex.TotalLookups = totalLookups
473
+
}
474
+
475
+
// Lookup performance metrics
476
+
if avgTime, ok := didStats["avg_lookup_time_ms"].(float64); ok {
477
+
didIndex.AvgLookupTimeMs = avgTime
478
+
}
479
+
if recentAvg, ok := didStats["recent_avg_lookup_time_ms"].(float64); ok {
480
+
didIndex.RecentAvgLookupTimeMs = recentAvg
481
+
}
482
+
if minTime, ok := didStats["min_lookup_time_ms"].(float64); ok {
483
+
didIndex.MinLookupTimeMs = minTime
484
+
}
485
+
if maxTime, ok := didStats["max_lookup_time_ms"].(float64); ok {
486
+
didIndex.MaxLookupTimeMs = maxTime
487
+
}
488
+
if p50, ok := didStats["p50_lookup_time_ms"].(float64); ok {
489
+
didIndex.P50LookupTimeMs = p50
490
+
}
491
+
if p95, ok := didStats["p95_lookup_time_ms"].(float64); ok {
492
+
didIndex.P95LookupTimeMs = p95
493
+
}
494
+
if p99, ok := didStats["p99_lookup_time_ms"].(float64); ok {
495
+
didIndex.P99LookupTimeMs = p99
496
+
}
497
+
if sampleSize, ok := didStats["recent_sample_size"].(int); ok {
498
+
didIndex.RecentSampleSize = sampleSize
499
+
}
500
+
501
+
response.DIDIndex = didIndex
502
+
}
503
+
504
+
// Resolver performance stats
505
+
if s.config.EnableResolver {
506
+
resolverStats := s.manager.GetResolverStats()
507
+
508
+
if totalRes, ok := resolverStats["total_resolutions"].(int64); ok && totalRes > 0 {
509
+
resolver := &ResolverStatus{
510
+
Enabled: true,
511
+
TotalResolutions: totalRes,
512
+
}
513
+
514
+
// Handle resolver URL
515
+
if hr := s.manager.GetHandleResolver(); hr != nil {
516
+
resolver.HandleResolver = hr.GetBaseURL()
517
+
}
518
+
519
+
// Counts
520
+
if v, ok := resolverStats["mempool_hits"].(int64); ok {
521
+
resolver.MempoolHits = v
522
+
}
523
+
if v, ok := resolverStats["bundle_hits"].(int64); ok {
524
+
resolver.BundleHits = v
525
+
}
526
+
if v, ok := resolverStats["errors"].(int64); ok {
527
+
resolver.Errors = v
528
+
}
529
+
if v, ok := resolverStats["success_rate"].(float64); ok {
530
+
resolver.SuccessRate = v
531
+
}
532
+
if v, ok := resolverStats["mempool_hit_rate"].(float64); ok {
533
+
resolver.MempoolHitRate = v
534
+
}
535
+
536
+
// Overall averages
537
+
if v, ok := resolverStats["avg_total_time_ms"].(float64); ok {
538
+
resolver.AvgTotalTimeMs = v
539
+
}
540
+
if v, ok := resolverStats["avg_mempool_time_ms"].(float64); ok {
541
+
resolver.AvgMempoolTimeMs = v
542
+
}
543
+
if v, ok := resolverStats["avg_index_time_ms"].(float64); ok {
544
+
resolver.AvgIndexTimeMs = v
545
+
}
546
+
if v, ok := resolverStats["avg_load_op_time_ms"].(float64); ok {
547
+
resolver.AvgLoadOpTimeMs = v
548
+
}
549
+
550
+
// Recent averages
551
+
if v, ok := resolverStats["recent_avg_total_time_ms"].(float64); ok {
552
+
resolver.RecentAvgTotalTimeMs = v
553
+
}
554
+
if v, ok := resolverStats["recent_avg_mempool_time_ms"].(float64); ok {
555
+
resolver.RecentAvgMempoolTimeMs = v
556
+
}
557
+
if v, ok := resolverStats["recent_avg_index_time_ms"].(float64); ok {
558
+
resolver.RecentAvgIndexTimeMs = v
559
+
}
560
+
if v, ok := resolverStats["recent_avg_load_time_ms"].(float64); ok {
561
+
resolver.RecentAvgLoadTimeMs = v
562
+
}
563
+
if v, ok := resolverStats["recent_sample_size"].(int); ok {
564
+
resolver.RecentSampleSize = v
565
+
}
566
+
567
+
// Percentiles
568
+
if v, ok := resolverStats["min_total_time_ms"].(float64); ok {
569
+
resolver.MinTotalTimeMs = v
570
+
}
571
+
if v, ok := resolverStats["max_total_time_ms"].(float64); ok {
572
+
resolver.MaxTotalTimeMs = v
573
+
}
574
+
if v, ok := resolverStats["p50_total_time_ms"].(float64); ok {
575
+
resolver.P50TotalTimeMs = v
576
+
}
577
+
if v, ok := resolverStats["p95_total_time_ms"].(float64); ok {
578
+
resolver.P95TotalTimeMs = v
579
+
}
580
+
if v, ok := resolverStats["p99_total_time_ms"].(float64); ok {
581
+
resolver.P99TotalTimeMs = v
582
+
}
583
+
if v, ok := resolverStats["p95_index_time_ms"].(float64); ok {
584
+
resolver.P95IndexTimeMs = v
585
+
}
586
+
if v, ok := resolverStats["p95_load_op_time_ms"].(float64); ok {
587
+
resolver.P95LoadOpTimeMs = v
588
+
}
589
+
590
+
response.Resolver = resolver
591
+
} else {
592
+
// No resolutions yet, but resolver is enabled
593
+
response.Resolver = &ResolverStatus{
594
+
Enabled: true,
595
+
TotalResolutions: 0,
596
+
}
597
+
598
+
if hr := s.manager.GetHandleResolver(); hr != nil {
599
+
response.Resolver.HandleResolver = hr.GetBaseURL()
600
+
}
601
+
}
602
+
}
603
+
429
604
sendJSON(w, 200, response)
430
605
}
431
606
}
···
498
673
path := strings.TrimPrefix(r.URL.Path, "/")
499
674
500
675
parts := strings.SplitN(path, "/", 2)
501
-
did := parts[0]
676
+
input := parts[0]
502
677
503
-
if !strings.HasPrefix(did, "did:plc:") {
678
+
// Ignore common browser files before any validation
679
+
if isCommonBrowserFile(input) {
680
+
w.WriteHeader(http.StatusNotFound)
681
+
return
682
+
}
683
+
684
+
// Quick validation: must be either a DID or a valid handle format
685
+
if !isValidDIDOrHandle(input) {
504
686
sendJSON(w, 404, map[string]string{"error": "not found"})
505
687
return
506
688
}
507
689
690
+
// Route to appropriate handler
508
691
if len(parts) == 1 {
509
-
s.handleDIDDocument(did)(w, r)
692
+
s.handleDIDDocument(input)(w, r)
510
693
} else if parts[1] == "data" {
511
-
s.handleDIDData(did)(w, r)
694
+
s.handleDIDData(input)(w, r)
512
695
} else if parts[1] == "log/audit" {
513
-
s.handleDIDAuditLog(did)(w, r)
696
+
s.handleDIDAuditLog(input)(w, r)
514
697
} else {
515
698
sendJSON(w, 404, map[string]string{"error": "not found"})
516
699
}
517
700
}
518
701
519
-
func (s *Server) handleDIDDocument(did string) http.HandlerFunc {
702
+
func isCommonBrowserFile(path string) bool {
703
+
// Common files browsers request automatically
704
+
commonFiles := []string{
705
+
"favicon.ico",
706
+
"robots.txt",
707
+
"sitemap.xml",
708
+
"apple-touch-icon.png",
709
+
"apple-touch-icon-precomposed.png",
710
+
".well-known",
711
+
}
712
+
713
+
for _, file := range commonFiles {
714
+
if path == file || strings.HasPrefix(path, file) {
715
+
return true
716
+
}
717
+
}
718
+
719
+
// Common file extensions that are NOT DIDs/handles
720
+
commonExtensions := []string{
721
+
".ico", ".png", ".jpg", ".jpeg", ".gif", ".svg",
722
+
".css", ".js", ".woff", ".woff2", ".ttf", ".eot",
723
+
".xml", ".txt", ".html", ".webmanifest",
724
+
}
725
+
726
+
for _, ext := range commonExtensions {
727
+
if strings.HasSuffix(path, ext) {
728
+
return true
729
+
}
730
+
}
731
+
732
+
return false
733
+
}
734
+
735
+
// isValidDIDOrHandle does quick format check before expensive resolution
736
+
func isValidDIDOrHandle(input string) bool {
737
+
// Empty input
738
+
if input == "" {
739
+
return false
740
+
}
741
+
742
+
// If it's a DID
743
+
if strings.HasPrefix(input, "did:") {
744
+
// Only accept did:plc: method (reject other methods at routing level)
745
+
if !strings.HasPrefix(input, "did:plc:") {
746
+
return false // Returns 404 for did:web:, did:key:, did:invalid:, etc
747
+
}
748
+
749
+
// Accept any did:plc:* - let handler validate exact format
750
+
// This allows invalid formats to reach handler and get proper 400 errors
751
+
return true
752
+
}
753
+
754
+
// Not a DID - validate as handle
755
+
// Must have at least one dot (domain.tld)
756
+
if !strings.Contains(input, ".") {
757
+
return false
758
+
}
759
+
760
+
// Must not have invalid characters for a domain
761
+
// Simple check: alphanumeric, dots, hyphens only
762
+
for _, c := range input {
763
+
if !((c >= 'a' && c <= 'z') ||
764
+
(c >= 'A' && c <= 'Z') ||
765
+
(c >= '0' && c <= '9') ||
766
+
c == '.' || c == '-') {
767
+
return false
768
+
}
769
+
}
770
+
771
+
// Basic length check (DNS max is 253)
772
+
if len(input) > 253 {
773
+
return false
774
+
}
775
+
776
+
// Must not start or end with dot or hyphen
777
+
if strings.HasPrefix(input, ".") || strings.HasSuffix(input, ".") ||
778
+
strings.HasPrefix(input, "-") || strings.HasSuffix(input, "-") {
779
+
return false
780
+
}
781
+
782
+
return true
783
+
}
784
+
785
+
func (s *Server) handleDIDDocument(input string) http.HandlerFunc {
520
786
return func(w http.ResponseWriter, r *http.Request) {
787
+
if r.Method == "OPTIONS" {
788
+
return
789
+
}
790
+
791
+
// Resolve handle to DID
792
+
did, handleResolveTime, err := s.manager.ResolveHandleOrDID(r.Context(), input)
793
+
if err != nil {
794
+
if strings.Contains(err.Error(), "appears to be a handle") {
795
+
sendJSON(w, 400, map[string]string{
796
+
"error": "Handle resolver not configured",
797
+
"hint": "Start server with --handle-resolver flag",
798
+
})
799
+
} else {
800
+
sendJSON(w, 400, map[string]string{"error": err.Error()})
801
+
}
802
+
return
803
+
}
804
+
805
+
resolvedHandle := ""
806
+
if handleResolveTime > 0 {
807
+
resolvedHandle = input
808
+
}
809
+
810
+
// Single call gets both document AND operation metadata
521
811
result, err := s.manager.ResolveDID(r.Context(), did)
522
812
if err != nil {
523
813
if strings.Contains(err.Error(), "deactivated") {
···
530
820
return
531
821
}
532
822
533
-
// Add timing headers in MILLISECONDS (float for precision)
534
-
w.Header().Set("X-Resolution-Time-Ms", fmt.Sprintf("%.3f", float64(result.TotalTime.Microseconds())/1000.0))
535
-
w.Header().Set("X-Resolution-Source", result.Source)
536
-
w.Header().Set("X-Mempool-Time-Ms", fmt.Sprintf("%.3f", float64(result.MempoolTime.Microseconds())/1000.0))
823
+
// Early ETag check - operation is already in result.LatestOperation
824
+
if result.LatestOperation != nil {
825
+
etag := fmt.Sprintf(`"%s"`, result.LatestOperation.CID)
537
826
538
-
if result.Source == "bundle" {
539
-
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", result.BundleNumber))
540
-
w.Header().Set("X-Bundle-Position", fmt.Sprintf("%d", result.Position))
541
-
w.Header().Set("X-Index-Time-Ms", fmt.Sprintf("%.3f", float64(result.IndexTime.Microseconds())/1000.0))
542
-
w.Header().Set("X-Load-Time-Ms", fmt.Sprintf("%.3f", float64(result.LoadOpTime.Microseconds())/1000.0))
827
+
if match := r.Header.Get("If-None-Match"); match != "" {
828
+
// Strip quotes if present
829
+
matchClean := strings.Trim(match, `"`)
830
+
if matchClean == result.LatestOperation.CID {
831
+
// Set minimal headers for 304 response
832
+
w.Header().Set("ETag", etag)
833
+
w.Header().Set("Cache-Control", "public, max-age=300")
834
+
w.WriteHeader(http.StatusNotModified)
835
+
return
836
+
}
837
+
}
543
838
}
544
839
840
+
// Set all headers (now with result.LatestOperation available)
841
+
setDIDDocumentHeaders(w, r, did, resolvedHandle, result, handleResolveTime)
842
+
545
843
w.Header().Set("Content-Type", "application/did+ld+json")
546
844
sendJSON(w, 200, result.Document)
547
845
}
548
846
}
549
847
550
-
func (s *Server) handleDIDData(did string) http.HandlerFunc {
848
+
func (s *Server) handleDIDData(input string) http.HandlerFunc {
551
849
return func(w http.ResponseWriter, r *http.Request) {
552
-
if err := plcclient.ValidateDIDFormat(did); err != nil {
553
-
sendJSON(w, 400, map[string]string{"error": "Invalid DID format"})
850
+
// Resolve handle to DID
851
+
did, _, err := s.manager.ResolveHandleOrDID(r.Context(), input)
852
+
if err != nil {
853
+
sendJSON(w, 400, map[string]string{"error": err.Error()})
554
854
return
555
855
}
556
856
557
-
operations, err := s.manager.GetDIDOperations(context.Background(), did, false)
857
+
operations, _, err := s.manager.GetDIDOperations(context.Background(), did, false)
558
858
if err != nil {
559
859
sendJSON(w, 500, map[string]string{"error": err.Error()})
560
860
return
···
579
879
}
580
880
}
581
881
582
-
func (s *Server) handleDIDAuditLog(did string) http.HandlerFunc {
882
+
func (s *Server) handleDIDAuditLog(input string) http.HandlerFunc {
583
883
return func(w http.ResponseWriter, r *http.Request) {
584
-
if err := plcclient.ValidateDIDFormat(did); err != nil {
585
-
sendJSON(w, 400, map[string]string{"error": "Invalid DID format"})
884
+
did, _, err := s.manager.ResolveHandleOrDID(r.Context(), input)
885
+
if err != nil {
886
+
sendJSON(w, 400, map[string]string{"error": err.Error()})
586
887
return
587
888
}
588
889
589
-
operations, err := s.manager.GetDIDOperations(context.Background(), did, false)
890
+
operations, _, err := s.manager.GetDIDOperations(context.Background(), did, false)
590
891
if err != nil {
591
892
sendJSON(w, 500, map[string]string{"error": err.Error()})
592
893
return
···
601
902
sendJSON(w, 200, auditLog)
602
903
}
603
904
}
905
+
906
+
// handleOperation gets a single operation with detailed timing headers
907
+
func (s *Server) handleOperation() http.HandlerFunc {
908
+
return func(w http.ResponseWriter, r *http.Request) {
909
+
pointer := r.PathValue("pointer")
910
+
911
+
// Parse pointer format: "bundle:position" or global position
912
+
bundleNum, position, err := parseOperationPointer(pointer)
913
+
if err != nil {
914
+
sendJSON(w, 400, map[string]string{"error": err.Error()})
915
+
return
916
+
}
917
+
918
+
// Validate position range
919
+
if position < 0 || position >= types.BUNDLE_SIZE {
920
+
sendJSON(w, 400, map[string]string{
921
+
"error": fmt.Sprintf("Position must be 0-%d", types.BUNDLE_SIZE-1),
922
+
})
923
+
return
924
+
}
925
+
926
+
// Time the entire request
927
+
totalStart := time.Now()
928
+
929
+
// Time the operation load
930
+
loadStart := time.Now()
931
+
op, err := s.manager.LoadOperation(r.Context(), bundleNum, position)
932
+
loadDuration := time.Since(loadStart)
933
+
934
+
if err != nil {
935
+
if strings.Contains(err.Error(), "not in index") ||
936
+
strings.Contains(err.Error(), "not found") {
937
+
sendJSON(w, 404, map[string]string{"error": "Operation not found"})
938
+
} else {
939
+
sendJSON(w, 500, map[string]string{"error": err.Error()})
940
+
}
941
+
return
942
+
}
943
+
944
+
totalDuration := time.Since(totalStart)
945
+
946
+
// Calculate global position
947
+
globalPos := (bundleNum * types.BUNDLE_SIZE) + position
948
+
949
+
// Calculate operation age
950
+
opAge := time.Since(op.CreatedAt)
951
+
952
+
// Set response headers with useful metadata
953
+
setOperationHeaders(w, op, bundleNum, position, globalPos, loadDuration, totalDuration, opAge)
954
+
955
+
// Send raw JSON if available (faster, preserves exact format)
956
+
if len(op.RawJSON) > 0 {
957
+
w.Header().Set("Content-Type", "application/json")
958
+
w.Write(op.RawJSON)
959
+
} else {
960
+
sendJSON(w, 200, op)
961
+
}
962
+
}
963
+
}
964
+
965
+
// parseOperationPointer parses pointer in format "bundle:position" or global position
966
+
func parseOperationPointer(pointer string) (bundleNum, position int, err error) {
967
+
// Check if it's the "bundle:position" format
968
+
if strings.Contains(pointer, ":") {
969
+
parts := strings.Split(pointer, ":")
970
+
if len(parts) != 2 {
971
+
return 0, 0, fmt.Errorf("invalid pointer format: use 'bundle:position' or global position")
972
+
}
973
+
974
+
bundleNum, err = strconv.Atoi(parts[0])
975
+
if err != nil {
976
+
return 0, 0, fmt.Errorf("invalid bundle number: %w", err)
977
+
}
978
+
979
+
position, err = strconv.Atoi(parts[1])
980
+
if err != nil {
981
+
return 0, 0, fmt.Errorf("invalid position: %w", err)
982
+
}
983
+
984
+
if bundleNum < 1 {
985
+
return 0, 0, fmt.Errorf("bundle number must be >= 1")
986
+
}
987
+
988
+
return bundleNum, position, nil
989
+
}
990
+
991
+
// Parse as global position
992
+
globalPos, err := strconv.Atoi(pointer)
993
+
if err != nil {
994
+
return 0, 0, fmt.Errorf("invalid position: must be number or 'bundle:position' format")
995
+
}
996
+
997
+
if globalPos < 0 {
998
+
return 0, 0, fmt.Errorf("global position must be >= 0")
999
+
}
1000
+
1001
+
// Handle small numbers as shorthand for bundle 1
1002
+
if globalPos < types.BUNDLE_SIZE {
1003
+
return 1, globalPos, nil
1004
+
}
1005
+
1006
+
// Convert global position to bundle + position
1007
+
bundleNum = globalPos / types.BUNDLE_SIZE
1008
+
position = globalPos % types.BUNDLE_SIZE
1009
+
1010
+
// Minimum bundle number is 1
1011
+
if bundleNum < 1 {
1012
+
bundleNum = 1
1013
+
}
1014
+
1015
+
return bundleNum, position, nil
1016
+
}
1017
+
1018
+
// setOperationHeaders sets useful response headers
1019
+
func setOperationHeaders(
1020
+
w http.ResponseWriter,
1021
+
op *plcclient.PLCOperation,
1022
+
bundleNum, position, globalPos int,
1023
+
loadDuration, totalDuration, opAge time.Duration,
1024
+
) {
1025
+
// === Location Information ===
1026
+
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
1027
+
w.Header().Set("X-Position", fmt.Sprintf("%d", position))
1028
+
w.Header().Set("X-Global-Position", fmt.Sprintf("%d", globalPos))
1029
+
w.Header().Set("X-Pointer", fmt.Sprintf("%d:%d", bundleNum, position))
1030
+
1031
+
// === Operation Metadata ===
1032
+
w.Header().Set("X-Operation-DID", op.DID)
1033
+
w.Header().Set("X-Operation-CID", op.CID)
1034
+
w.Header().Set("X-Operation-Created", op.CreatedAt.Format(time.RFC3339))
1035
+
w.Header().Set("X-Operation-Age-Seconds", fmt.Sprintf("%d", int(opAge.Seconds())))
1036
+
1037
+
// Nullification status
1038
+
if op.IsNullified() {
1039
+
w.Header().Set("X-Operation-Nullified", "true")
1040
+
if nullCID := op.GetNullifyingCID(); nullCID != "" {
1041
+
w.Header().Set("X-Operation-Nullified-By", nullCID)
1042
+
}
1043
+
} else {
1044
+
w.Header().Set("X-Operation-Nullified", "false")
1045
+
}
1046
+
1047
+
// === Size Information ===
1048
+
if len(op.RawJSON) > 0 {
1049
+
w.Header().Set("X-Operation-Size", fmt.Sprintf("%d", len(op.RawJSON)))
1050
+
}
1051
+
1052
+
// === Performance Metrics (in milliseconds with precision) ===
1053
+
w.Header().Set("X-Load-Time-Ms", fmt.Sprintf("%.3f", float64(loadDuration.Microseconds())/1000.0))
1054
+
w.Header().Set("X-Total-Time-Ms", fmt.Sprintf("%.3f", float64(totalDuration.Microseconds())/1000.0))
1055
+
1056
+
// === Caching Hints ===
1057
+
// Set cache control (operations are immutable once bundled)
1058
+
w.Header().Set("Cache-Control", "public, max-age=31536000, immutable")
1059
+
w.Header().Set("ETag", op.CID) // CID is perfect for ETag
1060
+
}
1061
+
1062
+
// handleDIDIndexStats returns detailed DID index performance metrics
1063
+
func (s *Server) handleDebugDIDIndex() http.HandlerFunc {
1064
+
return func(w http.ResponseWriter, r *http.Request) {
1065
+
didStats := s.manager.GetDIDIndexStats()
1066
+
1067
+
if !didStats["enabled"].(bool) || !didStats["exists"].(bool) {
1068
+
sendJSON(w, 404, map[string]string{
1069
+
"error": "DID index not available",
1070
+
})
1071
+
return
1072
+
}
1073
+
1074
+
// Return all stats (more detailed than /status)
1075
+
sendJSON(w, 200, didStats)
1076
+
}
1077
+
}
1078
+
1079
+
func (s *Server) handleDebugResolver() http.HandlerFunc {
1080
+
return func(w http.ResponseWriter, r *http.Request) {
1081
+
resolverStats := s.manager.GetResolverStats()
1082
+
1083
+
if resolverStats == nil {
1084
+
sendJSON(w, 404, map[string]string{
1085
+
"error": "Resolver not enabled",
1086
+
})
1087
+
return
1088
+
}
1089
+
1090
+
// Return all stats (more detailed than /status)
1091
+
sendJSON(w, 200, resolverStats)
1092
+
}
1093
+
}
+93
server/helpers.go
+93
server/helpers.go
···
3
3
import (
4
4
"fmt"
5
5
"net/http"
6
+
"time"
7
+
8
+
"tangled.org/atscan.net/plcbundle-go/bundle"
6
9
)
7
10
8
11
// getScheme determines the HTTP scheme
···
56
59
}
57
60
return string(result)
58
61
}
62
+
63
+
func setDIDDocumentHeaders(
64
+
w http.ResponseWriter,
65
+
_ *http.Request,
66
+
did string,
67
+
resolvedHandle string,
68
+
result *bundle.ResolveDIDResult,
69
+
handleResolveTime time.Duration,
70
+
) {
71
+
// === Identity ===
72
+
w.Header().Set("X-DID", did)
73
+
74
+
if resolvedHandle != "" {
75
+
w.Header().Set("X-Handle-Resolved", resolvedHandle)
76
+
w.Header().Set("X-Handle-Resolution-Time-Ms",
77
+
fmt.Sprintf("%.3f", float64(handleResolveTime.Microseconds())/1000.0))
78
+
w.Header().Set("X-Request-Type", "handle")
79
+
} else {
80
+
w.Header().Set("X-Request-Type", "did")
81
+
}
82
+
83
+
// === Resolution Source & Location ===
84
+
w.Header().Set("X-Resolution-Source", result.Source)
85
+
86
+
if result.Source == "bundle" {
87
+
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", result.BundleNumber))
88
+
w.Header().Set("X-Bundle-Position", fmt.Sprintf("%d", result.Position))
89
+
globalPos := (result.BundleNumber * 10000) + result.Position
90
+
w.Header().Set("X-Global-Position", fmt.Sprintf("%d", globalPos))
91
+
w.Header().Set("X-Pointer", fmt.Sprintf("%d:%d", result.BundleNumber, result.Position))
92
+
} else {
93
+
w.Header().Set("X-Mempool", "true")
94
+
}
95
+
96
+
// === Operation Metadata (from result.LatestOperation) ===
97
+
if result.LatestOperation != nil {
98
+
op := result.LatestOperation
99
+
100
+
w.Header().Set("X-Operation-CID", op.CID)
101
+
w.Header().Set("X-Operation-Created", op.CreatedAt.Format(time.RFC3339))
102
+
103
+
opAge := time.Since(op.CreatedAt)
104
+
w.Header().Set("X-Operation-Age-Seconds", fmt.Sprintf("%d", int(opAge.Seconds())))
105
+
106
+
if len(op.RawJSON) > 0 {
107
+
w.Header().Set("X-Operation-Size", fmt.Sprintf("%d", len(op.RawJSON)))
108
+
}
109
+
110
+
// Nullification status
111
+
if op.IsNullified() {
112
+
w.Header().Set("X-Operation-Nullified", "true")
113
+
if nullCID := op.GetNullifyingCID(); nullCID != "" {
114
+
w.Header().Set("X-Operation-Nullified-By", nullCID)
115
+
}
116
+
} else {
117
+
w.Header().Set("X-Operation-Nullified", "false")
118
+
}
119
+
120
+
// Standard HTTP headers
121
+
w.Header().Set("Last-Modified", op.CreatedAt.UTC().Format(http.TimeFormat))
122
+
w.Header().Set("ETag", fmt.Sprintf(`"%s"`, op.CID))
123
+
}
124
+
125
+
// === Performance Metrics ===
126
+
totalTime := handleResolveTime + result.TotalTime
127
+
w.Header().Set("X-Resolution-Time-Ms",
128
+
fmt.Sprintf("%.3f", float64(totalTime.Microseconds())/1000.0))
129
+
w.Header().Set("X-Mempool-Time-Ms",
130
+
fmt.Sprintf("%.3f", float64(result.MempoolTime.Microseconds())/1000.0))
131
+
132
+
if result.Source == "bundle" {
133
+
w.Header().Set("X-Index-Time-Ms",
134
+
fmt.Sprintf("%.3f", float64(result.IndexTime.Microseconds())/1000.0))
135
+
w.Header().Set("X-Load-Time-Ms",
136
+
fmt.Sprintf("%.3f", float64(result.LoadOpTime.Microseconds())/1000.0))
137
+
}
138
+
139
+
// === Caching Strategy ===
140
+
if result.Source == "bundle" {
141
+
// Bundled data: cache 5min, stale-while-revalidate 10min
142
+
w.Header().Set("Cache-Control",
143
+
"public, max-age=300, stale-while-revalidate=600, stale-if-error=3600")
144
+
} else {
145
+
// Mempool data: cache 1min, stale-while-revalidate 5min
146
+
w.Header().Set("Cache-Control",
147
+
"public, max-age=60, stale-while-revalidate=300, stale-if-error=600")
148
+
}
149
+
150
+
w.Header().Set("Vary", "Accept, If-None-Match")
151
+
}
+11
-11
server/middleware.go
+11
-11
server/middleware.go
···
15
15
return
16
16
}
17
17
18
-
// Normal CORS handling
18
+
// Set CORS headers for all requests
19
19
w.Header().Set("Access-Control-Allow-Origin", "*")
20
-
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
21
-
22
-
if requestedHeaders := r.Header.Get("Access-Control-Request-Headers"); requestedHeaders != "" {
23
-
w.Header().Set("Access-Control-Allow-Headers", requestedHeaders)
24
-
} else {
25
-
w.Header().Set("Access-Control-Allow-Headers", "*")
26
-
}
27
-
20
+
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
21
+
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
28
22
w.Header().Set("Access-Control-Max-Age", "86400")
23
+
w.Header().Set("Access-Control-Expose-Headers",
24
+
"X-Bundle-Number, X-Position, X-Global-Position, X-Pointer, "+
25
+
"X-Operation-DID, X-Operation-CID, X-Load-Time-Ms, X-Total-Time-Ms, "+
26
+
"X-Resolution-Time-Ms, X-Resolution-Source, X-Index-Time-Ms")
29
27
28
+
// Handle OPTIONS preflight - return immediately WITHOUT calling handler
30
29
if r.Method == "OPTIONS" {
31
-
w.WriteHeader(204)
32
-
return
30
+
w.WriteHeader(http.StatusNoContent) // 204
31
+
return // STOP HERE - don't call next
33
32
}
34
33
34
+
// Only call handler for non-OPTIONS requests
35
35
next.ServeHTTP(w, r)
36
36
})
37
37
}
+19
-32
server/server.go
+19
-32
server/server.go
···
5
5
"net/http"
6
6
"time"
7
7
8
-
"tangled.org/atscan.net/plcbundle/bundle"
8
+
"tangled.org/atscan.net/plcbundle-go/bundle"
9
9
)
10
10
11
-
// Server serves bundle data over HTTP
12
-
type Server struct {
13
-
manager *bundle.Manager
14
-
addr string
15
-
config *Config
16
-
startTime time.Time
17
-
httpServer *http.Server
18
-
}
19
-
20
-
// Config configures the server
21
-
type Config struct {
22
-
Addr string
23
-
SyncMode bool
24
-
SyncInterval time.Duration
25
-
EnableWebSocket bool
26
-
EnableResolver bool
27
-
Version string
28
-
}
29
-
30
11
// New creates a new HTTP server
31
12
func New(manager *bundle.Manager, config *Config) *Server {
32
13
if config.Version == "" {
···
50
31
return s
51
32
}
52
33
53
-
// ListenAndServe starts the HTTP server
54
-
func (s *Server) ListenAndServe() error {
55
-
return s.httpServer.ListenAndServe()
56
-
}
57
-
58
-
// Shutdown gracefully shuts down the server
59
-
func (s *Server) Shutdown(ctx context.Context) error {
60
-
return s.httpServer.Shutdown(ctx)
61
-
}
62
-
63
34
// createHandler creates the HTTP handler with all routes
64
35
func (s *Server) createHandler() http.Handler {
65
36
mux := http.NewServeMux()
···
69
40
mux.HandleFunc("GET /bundle/{number}", s.handleBundle())
70
41
mux.HandleFunc("GET /data/{number}", s.handleBundleData())
71
42
mux.HandleFunc("GET /jsonl/{number}", s.handleBundleJSONL())
43
+
mux.HandleFunc("GET /op/{pointer}", s.handleOperation())
72
44
mux.HandleFunc("GET /status", s.handleStatus())
73
45
mux.HandleFunc("GET /debug/memory", s.handleDebugMemory())
46
+
mux.HandleFunc("GET /debug/didindex", s.handleDebugDIDIndex())
47
+
mux.HandleFunc("GET /debug/resolver", s.handleDebugResolver())
74
48
75
49
// WebSocket
76
50
if s.config.EnableWebSocket {
···
99
73
sendJSON(w, 404, map[string]string{"error": "not found"})
100
74
})
101
75
102
-
return corsMiddleware(mux)
76
+
// Apply middleware in correct order:
77
+
handler := corsMiddleware(mux)
78
+
79
+
return handler
80
+
}
81
+
82
+
// ListenAndServe starts the HTTP server
83
+
func (s *Server) ListenAndServe() error {
84
+
return s.httpServer.ListenAndServe()
85
+
}
86
+
87
+
// Shutdown gracefully shuts down the server
88
+
func (s *Server) Shutdown(ctx context.Context) error {
89
+
return s.httpServer.Shutdown(ctx)
103
90
}
104
91
105
92
// GetStartTime returns when the server started
···
107
94
return s.startTime
108
95
}
109
96
110
-
// Add this method to Server
97
+
// Handler returns the configured HTTP handler
111
98
func (s *Server) Handler() http.Handler {
112
99
return s.createHandler()
113
100
}
+185
-10
server/server_test.go
+185
-10
server/server_test.go
···
15
15
"time"
16
16
17
17
"github.com/gorilla/websocket"
18
-
"tangled.org/atscan.net/plcbundle/bundle"
19
-
"tangled.org/atscan.net/plcbundle/internal/bundleindex"
20
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
21
-
"tangled.org/atscan.net/plcbundle/internal/storage"
22
-
"tangled.org/atscan.net/plcbundle/server"
18
+
"tangled.org/atscan.net/plcbundle-go/bundle"
19
+
"tangled.org/atscan.net/plcbundle-go/internal/bundleindex"
20
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
21
+
"tangled.org/atscan.net/plcbundle-go/internal/storage"
22
+
"tangled.org/atscan.net/plcbundle-go/server"
23
23
)
24
24
25
25
type testLogger struct {
···
33
33
func (l *testLogger) Println(v ...interface{}) {
34
34
l.t.Log(v...)
35
35
}
36
+
37
+
var (
38
+
bundleInfo = &storage.BundleInfo{
39
+
BundleNumber: 1,
40
+
Origin: "test-origin",
41
+
ParentHash: "",
42
+
Cursor: "",
43
+
CreatedBy: "test",
44
+
Hostname: "test-host",
45
+
}
46
+
)
36
47
37
48
// ====================================================================================
38
49
// HTTP ENDPOINT TESTS
···
367
378
}
368
379
})
369
380
381
+
t.Run("InvalidDIDMethod_Returns400", func(t *testing.T) {
382
+
// These now return 400 (validation error) instead of 404 (routing rejection)
383
+
wrongMethodDIDs := []string{
384
+
"did:invalid:format",
385
+
"did:web:example.com",
386
+
"did:key:z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK",
387
+
}
388
+
389
+
for _, did := range wrongMethodDIDs {
390
+
resp, err := http.Get(ts.URL + "/" + did + "/data")
391
+
if err != nil {
392
+
t.Fatalf("request failed: %v", err)
393
+
}
394
+
resp.Body.Close()
395
+
396
+
// Now expect 400 (invalid DID format) or 404 (routing rejection)
397
+
if resp.StatusCode != 400 && resp.StatusCode != 404 {
398
+
t.Errorf("DID %s: expected 400 or 404, got %d", did, resp.StatusCode)
399
+
}
400
+
}
401
+
})
402
+
403
+
t.Run("HandleLikePathWithoutResolver", func(t *testing.T) {
404
+
// Need to create a fresh manager without resolver for this test
405
+
tmpDir := t.TempDir()
406
+
config := bundle.DefaultConfig(tmpDir)
407
+
config.AutoInit = true
408
+
config.HandleResolverURL = "" // โ DISABLE resolver
409
+
410
+
mgr, err := bundle.NewManager(config, nil)
411
+
if err != nil {
412
+
t.Fatalf("failed to create manager: %v", err)
413
+
}
414
+
defer mgr.Close()
415
+
416
+
serverConfig := &server.Config{
417
+
Addr: ":8080",
418
+
EnableResolver: true,
419
+
Version: "test",
420
+
}
421
+
422
+
srv := server.New(mgr, serverConfig)
423
+
ts := httptest.NewServer(srv.Handler())
424
+
defer ts.Close()
425
+
426
+
// Now test handle resolution without resolver configured
427
+
resp, err := http.Get(ts.URL + "/tree.fail")
428
+
if err != nil {
429
+
t.Fatalf("request failed: %v", err)
430
+
}
431
+
body, _ := io.ReadAll(resp.Body)
432
+
resp.Body.Close()
433
+
434
+
// Should get 400 (resolver not configured)
435
+
if resp.StatusCode != 400 {
436
+
t.Errorf("expected 400 (resolver not configured), got %d: %s",
437
+
resp.StatusCode, string(body))
438
+
return
439
+
}
440
+
441
+
// Verify error message
442
+
var errResp map[string]string
443
+
json.Unmarshal(body, &errResp)
444
+
445
+
if !strings.Contains(errResp["error"], "resolver") &&
446
+
!strings.Contains(errResp["hint"], "resolver") {
447
+
t.Errorf("expected resolver error, got: %v", errResp)
448
+
}
449
+
})
450
+
451
+
t.Run("HandleResolutionWithIndex", func(t *testing.T) {
452
+
// The default setupTestServerWithResolver has resolver configured
453
+
// So this tests the normal flow: handle โ DID โ document
454
+
455
+
resp, err := http.Get(ts.URL + "/tree.fail")
456
+
if err != nil {
457
+
t.Fatalf("request failed: %v", err)
458
+
}
459
+
body, _ := io.ReadAll(resp.Body)
460
+
resp.Body.Close()
461
+
462
+
// Could be:
463
+
// - 500: No DID index (expected in test)
464
+
// - 404: DID not found in index
465
+
// - 200: Success (if test data includes this DID)
466
+
467
+
switch resp.StatusCode {
468
+
case 500:
469
+
// No DID index - expected in test environment
470
+
var errResp map[string]string
471
+
json.Unmarshal(body, &errResp)
472
+
if !strings.Contains(errResp["error"], "DID index") {
473
+
t.Errorf("expected DID index error, got: %s", errResp["error"])
474
+
}
475
+
t.Log("Expected: no DID index configured")
476
+
477
+
case 404:
478
+
// DID not found - also acceptable
479
+
t.Log("Expected: DID not found in index")
480
+
481
+
case 200:
482
+
// Success - would need DID index + test data
483
+
var doc plcclient.DIDDocument
484
+
json.Unmarshal(body, &doc)
485
+
t.Logf("Success: resolved to %s", doc.ID)
486
+
487
+
default:
488
+
t.Errorf("unexpected status: %d, body: %s", resp.StatusCode, string(body))
489
+
}
490
+
491
+
// Verify we got handle resolution header
492
+
if resolvedHandle := resp.Header.Get("X-Handle-Resolved"); resolvedHandle != "" {
493
+
if resolvedHandle != "tree.fail" {
494
+
t.Errorf("wrong handle in header: %s", resolvedHandle)
495
+
}
496
+
t.Log("โ Handle resolution header present")
497
+
}
498
+
})
499
+
370
500
t.Run("InvalidDIDMethod_Returns404", func(t *testing.T) {
371
-
// DIDs with wrong method get 404 from routing (never reach validation)
501
+
// These should be rejected by routing (404) not validation (400)
372
502
wrongMethodDIDs := []string{
373
503
"did:invalid:format",
374
504
"did:web:example.com",
375
505
"did:key:z6MkhaXgBZDvotDkL5257faiztiGiC2QtKLGpbnnEGta2doK",
376
-
"notadid",
377
506
}
378
507
379
508
for _, did := range wrongMethodDIDs {
···
383
512
}
384
513
resp.Body.Close()
385
514
386
-
// Should get 404 (not a did:plc: path)
515
+
// With smart routing, these get 404 (not supported)
387
516
if resp.StatusCode != 404 {
388
517
t.Errorf("DID %s: expected 404 from routing, got %d", did, resp.StatusCode)
389
518
}
519
+
}
520
+
})
521
+
522
+
t.Run("NotADIDPath", func(t *testing.T) {
523
+
resp, err := http.Get(ts.URL + "/notadid")
524
+
if err != nil {
525
+
t.Fatalf("request failed: %v", err)
526
+
}
527
+
defer resp.Body.Close()
528
+
529
+
// "notadid" has no dot, rejected by isValidDIDOrHandle
530
+
if resp.StatusCode != 404 {
531
+
t.Errorf("expected 404 for non-DID path, got %d", resp.StatusCode)
532
+
}
533
+
})
534
+
535
+
t.Run("ValidHandleFormat", func(t *testing.T) {
536
+
// These should pass routing validation (have dots, valid chars)
537
+
validHandles := []string{
538
+
"user.bsky.social",
539
+
"tree.fail",
540
+
"example.com",
541
+
}
542
+
543
+
for _, handle := range validHandles {
544
+
resp, err := http.Get(ts.URL + "/" + handle)
545
+
if err != nil {
546
+
t.Fatalf("request failed: %v", err)
547
+
}
548
+
resp.Body.Close()
549
+
550
+
// Should NOT be 404 (routing accepts it)
551
+
// Will be 400 (no resolver), 500 (no index), or 404 (not found)
552
+
if resp.StatusCode == 404 {
553
+
body, _ := io.ReadAll(resp.Body)
554
+
// 404 is OK if it's "DID not found", not "route not found"
555
+
var errResp map[string]string
556
+
resp.Body = io.NopCloser(bytes.NewReader(body))
557
+
json.NewDecoder(resp.Body).Decode(&errResp)
558
+
559
+
if errResp["error"] == "not found" && !strings.Contains(errResp["error"], "DID") {
560
+
t.Errorf("Handle %s: got routing 404, should be accepted", handle)
561
+
}
562
+
}
563
+
564
+
t.Logf("Handle %s: status %d (400/500/404 all acceptable)", handle, resp.StatusCode)
390
565
}
391
566
})
392
567
···
989
1164
990
1165
// Create storage operations ONCE and reuse
991
1166
logger := &testLogger{t: t}
992
-
storageOps, err := storage.NewOperations(logger)
1167
+
storageOps, err := storage.NewOperations(logger, false)
993
1168
if err != nil {
994
1169
t.Fatalf("failed to create storage operations: %v", err)
995
1170
}
···
1006
1181
path := filepath.Join(tmpDir, fmt.Sprintf("%06d.jsonl.zst", i))
1007
1182
ops := makeMinimalTestOperations(10000, i*10000) // Unique ops per bundle
1008
1183
1009
-
contentHash, compHash, uncompSize, compSize, err := storageOps.SaveBundle(path, ops)
1184
+
contentHash, compHash, uncompSize, compSize, err := storageOps.SaveBundle(path, ops, bundleInfo)
1010
1185
if err != nil {
1011
1186
t.Fatalf("failed to save test bundle %d: %v", i, err)
1012
1187
}
+112
-6
server/types.go
+112
-6
server/types.go
···
1
1
package server
2
2
3
-
import "time"
3
+
import (
4
+
"net/http"
5
+
"time"
6
+
7
+
"tangled.org/atscan.net/plcbundle-go/bundle"
8
+
)
9
+
10
+
// Server serves bundle data over HTTP
11
+
type Server struct {
12
+
manager *bundle.Manager
13
+
addr string
14
+
config *Config
15
+
startTime time.Time
16
+
httpServer *http.Server
17
+
}
18
+
19
+
// Config configures the server
20
+
type Config struct {
21
+
Addr string
22
+
SyncMode bool
23
+
SyncInterval time.Duration
24
+
EnableWebSocket bool
25
+
EnableResolver bool
26
+
Version string
27
+
}
4
28
5
29
// StatusResponse is the /status endpoint response
6
30
type StatusResponse struct {
7
-
Bundles BundleStatus `json:"bundles"`
8
-
Mempool *MempoolStatus `json:"mempool,omitempty"`
9
-
Server ServerStatus `json:"server"`
31
+
Server ServerStatus `json:"server"`
32
+
Bundles BundleStatus `json:"bundles"`
33
+
Mempool *MempoolStatus `json:"mempool,omitempty"`
34
+
DIDIndex *DIDIndexStatus `json:"didindex,omitempty"`
35
+
Resolver *ResolverStatus `json:"resolver,omitempty"`
10
36
}
11
37
12
38
// ServerStatus contains server information
13
39
type ServerStatus struct {
14
40
Version string `json:"version"`
15
-
UptimeSeconds int `json:"uptime_seconds"`
41
+
Origin string `json:"origin,omitempty"`
16
42
SyncMode bool `json:"sync_mode"`
17
43
SyncIntervalSeconds int `json:"sync_interval_seconds,omitempty"`
18
44
WebSocketEnabled bool `json:"websocket_enabled"`
19
-
Origin string `json:"origin,omitempty"`
45
+
ResolverEnabled bool `json:"resolver_enabled"`
46
+
HandleResolver string `json:"handle_resolver,omitempty"`
47
+
UptimeSeconds int `json:"uptime_seconds"`
48
+
}
49
+
50
+
// DIDIndexStatus contains DID index statistics
51
+
type DIDIndexStatus struct {
52
+
Enabled bool `json:"enabled"`
53
+
Exists bool `json:"exists"`
54
+
TotalDIDs int64 `json:"total_dids"`
55
+
IndexedDIDs int64 `json:"indexed_dids"`
56
+
MempoolDIDs int64 `json:"mempool_dids,omitempty"`
57
+
LastBundle int `json:"last_bundle"`
58
+
ShardCount int `json:"shard_count"`
59
+
CachedShards int `json:"cached_shards"`
60
+
CacheLimit int `json:"cache_limit"`
61
+
CacheHitRate float64 `json:"cache_hit_rate"`
62
+
CacheHits int64 `json:"cache_hits"`
63
+
CacheMisses int64 `json:"cache_misses"`
64
+
TotalLookups int64 `json:"total_lookups"`
65
+
UpdatedAt time.Time `json:"updated_at"`
66
+
Version int `json:"version,omitempty"`
67
+
Format string `json:"format,omitempty"`
68
+
HotShards []int `json:"hot_shards,omitempty"`
69
+
70
+
// Lookup performance metrics
71
+
AvgLookupTimeMs float64 `json:"avg_lookup_time_ms"` // All-time average
72
+
RecentAvgLookupTimeMs float64 `json:"recent_avg_lookup_time_ms"` // Recent average
73
+
MinLookupTimeMs float64 `json:"min_lookup_time_ms,omitempty"` // Fastest
74
+
MaxLookupTimeMs float64 `json:"max_lookup_time_ms,omitempty"` // Slowest
75
+
P50LookupTimeMs float64 `json:"p50_lookup_time_ms,omitempty"` // Median
76
+
P95LookupTimeMs float64 `json:"p95_lookup_time_ms,omitempty"` // 95th percentile
77
+
P99LookupTimeMs float64 `json:"p99_lookup_time_ms,omitempty"` // 99th percentile
78
+
RecentSampleSize int `json:"recent_sample_size,omitempty"` // How many samples
79
+
}
80
+
81
+
// ResolverStatus contains DID document resolver performance metrics
82
+
type ResolverStatus struct {
83
+
Enabled bool `json:"enabled"`
84
+
HandleResolver string `json:"handle_resolver,omitempty"`
85
+
86
+
// Resolution counts
87
+
TotalResolutions int64 `json:"total_resolutions"`
88
+
MempoolHits int64 `json:"mempool_hits"`
89
+
BundleHits int64 `json:"bundle_hits"`
90
+
Errors int64 `json:"errors"`
91
+
SuccessRate float64 `json:"success_rate"`
92
+
MempoolHitRate float64 `json:"mempool_hit_rate"`
93
+
94
+
// Overall timing (all-time averages)
95
+
AvgTotalTimeMs float64 `json:"avg_total_time_ms"`
96
+
AvgMempoolTimeMs float64 `json:"avg_mempool_time_ms"`
97
+
AvgIndexTimeMs float64 `json:"avg_index_time_ms,omitempty"`
98
+
AvgLoadOpTimeMs float64 `json:"avg_load_op_time_ms,omitempty"`
99
+
100
+
// Recent performance (last N resolutions)
101
+
RecentAvgTotalTimeMs float64 `json:"recent_avg_total_time_ms"`
102
+
RecentAvgMempoolTimeMs float64 `json:"recent_avg_mempool_time_ms"`
103
+
RecentAvgIndexTimeMs float64 `json:"recent_avg_index_time_ms,omitempty"`
104
+
RecentAvgLoadTimeMs float64 `json:"recent_avg_load_time_ms,omitempty"`
105
+
RecentSampleSize int `json:"recent_sample_size"`
106
+
107
+
// Percentiles (total response time)
108
+
MinTotalTimeMs float64 `json:"min_total_time_ms,omitempty"`
109
+
MaxTotalTimeMs float64 `json:"max_total_time_ms,omitempty"`
110
+
P50TotalTimeMs float64 `json:"p50_total_time_ms,omitempty"`
111
+
P95TotalTimeMs float64 `json:"p95_total_time_ms,omitempty"`
112
+
P99TotalTimeMs float64 `json:"p99_total_time_ms,omitempty"`
113
+
114
+
// Breakdown percentiles (for bundle resolutions only)
115
+
P95IndexTimeMs float64 `json:"p95_index_time_ms,omitempty"`
116
+
P95LoadOpTimeMs float64 `json:"p95_load_op_time_ms,omitempty"`
20
117
}
21
118
22
119
// BundleStatus contains bundle statistics
···
56
153
LastOpAgeSeconds int `json:"last_op_age_seconds,omitempty"`
57
154
EtaNextBundleSeconds int `json:"eta_next_bundle_seconds,omitempty"`
58
155
}
156
+
157
+
// RequestLog represents a logged HTTP request
158
+
type RequestLog struct {
159
+
Timestamp time.Time `json:"timestamp"`
160
+
Method string `json:"method"`
161
+
Path string `json:"path"`
162
+
UserAgent string `json:"user_agent"`
163
+
RemoteAddr string `json:"remote_addr"`
164
+
}
+1
-1
server/types_test.go
+1
-1
server/types_test.go
+2
-2
server/websocket.go
+2
-2
server/websocket.go
···
11
11
12
12
"github.com/goccy/go-json"
13
13
"github.com/gorilla/websocket"
14
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
15
-
"tangled.org/atscan.net/plcbundle/internal/types"
14
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
15
+
"tangled.org/atscan.net/plcbundle-go/internal/types"
16
16
)
17
17
18
18
var upgrader = websocket.Upgrader{
+2
-2
types.go
+2
-2
types.go
···
3
3
import (
4
4
"time"
5
5
6
-
"tangled.org/atscan.net/plcbundle/internal/plcclient"
6
+
"tangled.org/atscan.net/plcbundle-go/internal/plcclient"
7
7
)
8
8
9
9
// Bundle represents a PLC bundle (public version)
···
40
40
}
41
41
42
42
// Helper to convert internal bundle to public
43
-
func toBundlePublic(b interface{}) *Bundle {
43
+
func toBundlePublic(_ interface{}) *Bundle {
44
44
// Implement conversion from internal bundle to public Bundle
45
45
return &Bundle{} // placeholder
46
46
}