+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+1
-1
config.sample.yaml
+1
-1
config.sample.yaml
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+225
-449
internal/api/handlers.go
+225
-449
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
···
76
74
}
77
75
78
76
// ===== FORMATTING HELPERS =====
79
-
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": bundle.DIDCount, // Use DIDCount instead of len(DIDs)
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
77
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
···
268
248
"endpoint": pds.Endpoint,
269
249
"discovered_at": pds.DiscoveredAt,
270
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
271
252
}
272
253
273
254
// Add server_did if available
···
703
684
return
704
685
}
705
686
706
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
707
-
if err != nil {
708
-
resp.error(err.Error(), http.StatusInternalServerError)
709
-
return
710
-
}
711
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
712
688
resp.json(map[string]interface{}{
713
689
"total_unique_dids": totalDIDs,
714
690
"last_bundle": lastBundle,
···
719
695
720
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
721
697
resp := newResponse(w)
722
-
723
698
bundleNum, err := getBundleNumber(r)
724
699
if err != nil {
725
700
resp.error("invalid bundle number", http.StatusBadRequest)
726
701
return
727
702
}
728
703
729
-
// Try to get existing bundle
730
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
731
-
if err == nil {
732
-
// Bundle exists, return it normally
733
-
resp.json(formatBundleResponse(bundle))
734
-
return
735
-
}
736
-
737
-
// Bundle not found - check if it's the next upcoming bundle
738
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
739
707
if err != nil {
740
-
resp.error("bundle not found", http.StatusNotFound)
741
-
return
742
-
}
743
-
744
-
if bundleNum == lastBundle+1 {
745
-
// This is the upcoming bundle - return preview based on mempool
746
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
747
-
if err != nil {
748
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
749
717
return
750
718
}
751
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
752
720
return
753
721
}
754
722
755
-
// Not an upcoming bundle, just not found
756
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
757
724
}
758
725
759
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
760
-
// Get mempool stats
761
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
762
-
if err != nil {
763
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
764
743
}
744
+
}
765
745
766
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
767
752
return map[string]interface{}{
768
753
"plc_bundle_number": bundleNum,
769
754
"is_upcoming": true,
···
773
758
}, nil
774
759
}
775
760
776
-
// Get first and last operations for time range
777
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
778
-
if err != nil {
779
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
780
771
}
781
772
782
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
783
-
if err != nil {
784
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
785
775
}
786
776
787
-
// Get unique DID count
788
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
789
-
if err != nil {
790
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
791
780
}
792
-
793
-
// Get uncompressed size estimate
794
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
795
-
if err != nil {
796
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
797
783
}
798
784
799
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
800
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
801
-
802
-
// Calculate completion estimate
803
-
var estimatedCompletionTime *time.Time
804
-
var operationsNeeded int
805
-
var currentRate float64
806
-
807
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
808
-
809
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
810
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
811
-
if timeSpan > 0 {
812
-
currentRate = float64(mempoolCount) / timeSpan
813
-
if currentRate > 0 {
814
-
secondsNeeded := float64(operationsNeeded) / currentRate
815
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
816
-
estimatedCompletionTime = &completionTime
817
-
}
818
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
819
789
}
820
790
821
-
// Get previous bundle for cursor context
822
-
var prevBundleHash string
823
-
var cursor string
791
+
// Get previous bundle info
824
792
if bundleNum > 1 {
825
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
826
-
if err == nil {
827
-
prevBundleHash = prevBundle.Hash
828
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
829
-
}
830
-
}
831
-
832
-
// Determine bundle status
833
-
status := "filling"
834
-
if mempoolCount >= plc.BUNDLE_SIZE {
835
-
status = "ready"
836
-
}
837
-
838
-
// Build upcoming bundle response
839
-
result := map[string]interface{}{
840
-
"plc_bundle_number": bundleNum,
841
-
"is_upcoming": true,
842
-
"status": status,
843
-
"operation_count": mempoolCount,
844
-
"target_operation_count": plc.BUNDLE_SIZE,
845
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
846
-
"operations_needed": operationsNeeded,
847
-
"did_count": uniqueDIDCount,
848
-
"start_time": firstOp.CreatedAt,
849
-
"current_end_time": lastOp.CreatedAt,
850
-
"uncompressed_size": uncompressedSize,
851
-
"estimated_compressed_size": estimatedCompressedSize,
852
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
853
-
"prev_bundle_hash": prevBundleHash,
854
-
"cursor": cursor,
855
-
}
856
-
857
-
if estimatedCompletionTime != nil {
858
-
result["estimated_completion_time"] = *estimatedCompletionTime
859
-
result["current_rate_per_second"] = currentRate
860
-
}
861
-
862
-
// Get actual mempool operations if requested (for DIDs list)
863
-
if r.URL.Query().Get("include_dids") == "true" {
864
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
865
-
if err == nil {
866
-
// Extract unique DIDs
867
-
didSet := make(map[string]bool)
868
-
for _, op := range ops {
869
-
didSet[op.DID] = true
870
-
}
871
-
dids := make([]string, 0, len(didSet))
872
-
for did := range didSet {
873
-
dids = append(dids, did)
874
-
}
875
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
876
796
}
877
797
}
878
798
···
888
808
return
889
809
}
890
810
891
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
892
813
if err != nil {
893
814
resp.error("bundle not found", http.StatusNotFound)
894
815
return
895
816
}
896
817
897
-
// Query DIDs from dids table instead
898
-
dids, err := s.db.GetDIDsForBundle(r.Context(), bundleNum)
899
-
if err != nil {
900
-
resp.error(fmt.Sprintf("failed to get DIDs: %v", err), http.StatusInternalServerError)
901
-
return
902
-
}
903
-
904
818
resp.json(map[string]interface{}{
905
-
"plc_bundle_number": bundle.BundleNumber,
906
-
"did_count": bundle.DIDCount,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
907
821
"dids": dids,
908
822
})
909
823
}
···
919
833
920
834
compressed := r.URL.Query().Get("compressed") != "false"
921
835
922
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
923
837
if err == nil {
924
838
// Bundle exists, serve it normally
925
839
resp.bundleHeaders(bundle)
···
933
847
}
934
848
935
849
// Bundle not found - check if it's the upcoming bundle
936
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
937
-
if err != nil {
938
-
resp.error("bundle not found", http.StatusNotFound)
939
-
return
940
-
}
941
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
942
851
if bundleNum == lastBundle+1 {
943
852
// This is the upcoming bundle - serve from mempool
944
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
945
854
return
946
855
}
947
856
···
949
858
resp.error("bundle not found", http.StatusNotFound)
950
859
}
951
860
952
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
953
-
ctx := r.Context()
954
-
955
-
// Get mempool count
956
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
957
-
if err != nil {
958
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
959
-
return
960
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
961
865
962
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
963
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
964
868
return
965
869
}
966
870
967
-
// Get mempool operations (up to BUNDLE_SIZE)
968
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
969
873
if err != nil {
970
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
971
875
return
972
876
}
973
877
974
-
if len(mempoolOps) == 0 {
975
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
976
880
return
977
881
}
978
882
979
-
// Get time range
980
-
firstOp := mempoolOps[0]
981
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
982
886
983
887
// Extract unique DIDs
984
888
didSet := make(map[string]bool)
985
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
986
890
didSet[op.DID] = true
987
891
}
988
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
989
899
// Get previous bundle hash
990
900
prevBundleHash := ""
991
901
if bundleNum > 1 {
992
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
993
903
prevBundleHash = prevBundle.Hash
994
904
}
995
905
}
996
906
997
-
// Serialize operations to JSONL
998
-
var buf []byte
999
-
for _, mop := range mempoolOps {
1000
-
buf = append(buf, []byte(mop.Operation)...)
1001
-
buf = append(buf, '\n')
1002
-
}
1003
-
1004
-
// Calculate size
1005
-
uncompressedSize := int64(len(buf))
1006
-
1007
907
// Set headers
1008
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
1009
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
1010
910
w.Header().Set("X-Bundle-Status", "preview")
1011
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
1012
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
1013
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
1014
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
1015
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
1016
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
1017
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
1018
919
1019
920
w.Header().Set("Content-Type", "application/jsonl")
1020
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
1021
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
1022
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
1023
922
923
+
// Stream operations as JSONL
1024
924
w.WriteHeader(http.StatusOK)
1025
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
1026
937
}
1027
938
1028
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1029
940
resp := newResponse(w)
1030
-
path := bundle.GetFilePath(s.plcBundleDir)
1031
941
1032
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
1033
944
if err != nil {
1034
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
1035
946
return
1036
947
}
1037
-
defer file.Close()
1038
-
1039
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
1040
949
1041
950
w.Header().Set("Content-Type", "application/zstd")
1042
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
1043
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
1044
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
1045
954
1046
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
1047
958
}
1048
959
1049
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1050
961
resp := newResponse(w)
1051
962
1052
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
1053
965
if err != nil {
1054
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
1055
967
return
1056
968
}
1057
-
1058
-
// Serialize to JSONL
1059
-
var buf []byte
1060
-
for _, op := range ops {
1061
-
buf = append(buf, op.RawJSON...)
1062
-
buf = append(buf, '\n')
1063
-
}
1064
-
1065
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
1066
-
compressedSize := int64(0)
1067
-
if fileInfo != nil {
1068
-
compressedSize = fileInfo.Size()
1069
-
}
969
+
defer reader.Close()
1070
970
1071
971
w.Header().Set("Content-Type", "application/jsonl")
1072
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
1073
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
1074
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
1075
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
1076
-
if compressedSize > 0 {
1077
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
1078
978
}
1079
979
980
+
// Stream the data directly to the response
1080
981
w.WriteHeader(http.StatusOK)
1081
-
w.Write(buf)
982
+
io.Copy(w, reader)
1082
983
}
1083
984
1084
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
1085
986
resp := newResponse(w)
1086
987
limit := getQueryInt(r, "limit", 50)
1087
988
1088
-
bundles, err := s.db.GetBundles(r.Context(), limit)
1089
-
if err != nil {
1090
-
resp.error(err.Error(), http.StatusInternalServerError)
1091
-
return
1092
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
1093
990
1094
991
response := make([]map[string]interface{}, len(bundles))
1095
992
for i, bundle := range bundles {
1096
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
1097
994
}
1098
995
1099
996
resp.json(response)
···
1102
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
1103
1000
resp := newResponse(w)
1104
1001
1105
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
1106
-
if err != nil {
1107
-
resp.error(err.Error(), http.StatusInternalServerError)
1108
-
return
1109
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
1110
1008
1111
1009
resp.json(map[string]interface{}{
1112
-
"plc_bundle_count": count,
1113
-
"last_bundle_number": lastBundle,
1114
-
"total_compressed_size": compressedSize,
1115
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1116
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
1117
-
"total_uncompressed_size": uncompressedSize,
1118
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1119
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
1120
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
1121
1015
})
1122
1016
}
1123
1017
···
1125
1019
1126
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
1127
1021
resp := newResponse(w)
1128
-
ctx := r.Context()
1129
1022
1130
-
count, err := s.db.GetMempoolCount(ctx)
1131
-
if err != nil {
1132
-
resp.error(err.Error(), http.StatusInternalServerError)
1133
-
return
1134
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
1135
1025
1136
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
1137
-
if err != nil {
1138
-
resp.error(err.Error(), http.StatusInternalServerError)
1139
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
1140
1030
}
1141
1031
1142
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
1143
-
if err != nil {
1144
-
resp.error(err.Error(), http.StatusInternalServerError)
1145
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
1146
1036
}
1147
1037
1148
-
result := map[string]interface{}{
1149
-
"operation_count": count,
1150
-
"unique_did_count": uniqueDIDCount,
1151
-
"uncompressed_size": uncompressedSize,
1152
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1153
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
1154
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
1155
1042
1156
-
if count > 0 {
1157
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
1158
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1159
1045
1160
-
if count < plc.BUNDLE_SIZE {
1161
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1162
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1163
1049
if timeSpan > 0 {
1164
1050
opsPerSecond := float64(count) / timeSpan
1165
1051
if opsPerSecond > 0 {
1166
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1167
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1168
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1169
-
result["operations_needed"] = remainingOps
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1170
1057
result["current_rate_per_second"] = opsPerSecond
1058
+
result["operations_needed"] = remainingOps
1171
1059
}
1172
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1173
1066
}
1174
-
} else {
1175
-
result["estimated_next_bundle_time"] = time.Now()
1176
-
result["operations_needed"] = 0
1177
1067
}
1178
1068
}
1179
1069
} else {
1070
+
// Empty mempool
1180
1071
result["mempool_start_time"] = nil
1181
1072
result["estimated_next_bundle_time"] = nil
1182
1073
}
···
1201
1092
1202
1093
// ===== VERIFICATION HANDLERS =====
1203
1094
1204
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1205
-
resp := newResponse(w)
1206
-
vars := mux.Vars(r)
1207
-
1208
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1209
-
if err != nil {
1210
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1211
-
return
1212
-
}
1213
-
1214
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1215
-
if err != nil {
1216
-
resp.error("Bundle not found", http.StatusNotFound)
1217
-
return
1218
-
}
1219
-
1220
-
// Fetch from PLC and verify
1221
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1222
-
if err != nil {
1223
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1224
-
return
1225
-
}
1226
-
1227
-
remoteHash := computeOperationsHash(remoteOps)
1228
-
verified := bundle.Hash == remoteHash
1229
-
1230
-
resp.json(map[string]interface{}{
1231
-
"bundle_number": bundleNumber,
1232
-
"verified": verified,
1233
-
"local_hash": bundle.Hash,
1234
-
"remote_hash": remoteHash,
1235
-
"local_op_count": plc.BUNDLE_SIZE,
1236
-
"remote_op_count": len(remoteOps),
1237
-
"boundary_cids_used": len(prevCIDs),
1238
-
})
1239
-
}
1240
-
1241
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1242
-
var after string
1243
-
var prevBoundaryCIDs map[string]bool
1244
-
1245
-
if bundleNum > 1 {
1246
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1247
-
if err != nil {
1248
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1249
-
}
1250
-
1251
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1252
-
1253
-
if len(prevBundle.BoundaryCIDs) > 0 {
1254
-
prevBoundaryCIDs = make(map[string]bool)
1255
-
for _, cid := range prevBundle.BoundaryCIDs {
1256
-
prevBoundaryCIDs[cid] = true
1257
-
}
1258
-
}
1259
-
}
1260
-
1261
-
var allRemoteOps []plc.PLCOperation
1262
-
seenCIDs := make(map[string]bool)
1263
-
1264
-
for cid := range prevBoundaryCIDs {
1265
-
seenCIDs[cid] = true
1266
-
}
1267
-
1268
-
currentAfter := after
1269
-
maxFetches := 20
1270
-
1271
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1272
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1273
-
Count: 1000,
1274
-
After: currentAfter,
1275
-
})
1276
-
if err != nil || len(batch) == 0 {
1277
-
break
1278
-
}
1279
-
1280
-
for _, op := range batch {
1281
-
if !seenCIDs[op.CID] {
1282
-
seenCIDs[op.CID] = true
1283
-
allRemoteOps = append(allRemoteOps, op)
1284
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1285
-
break
1286
-
}
1287
-
}
1288
-
}
1289
-
1290
-
if len(batch) > 0 {
1291
-
lastOp := batch[len(batch)-1]
1292
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1293
-
}
1294
-
1295
-
if len(batch) < 1000 {
1296
-
break
1297
-
}
1298
-
}
1299
-
1300
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1301
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1302
-
}
1303
-
1304
-
return allRemoteOps, prevBoundaryCIDs, nil
1305
-
}
1306
-
1307
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1308
1096
resp := newResponse(w)
1309
-
ctx := r.Context()
1310
1097
1311
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1312
-
if err != nil {
1313
-
resp.error(err.Error(), http.StatusInternalServerError)
1314
-
return
1315
-
}
1316
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1317
1099
if lastBundle == 0 {
1318
1100
resp.json(map[string]interface{}{
1319
1101
"status": "empty",
···
1327
1109
var errorMsg string
1328
1110
1329
1111
for i := 1; i <= lastBundle; i++ {
1330
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1331
1113
if err != nil {
1332
1114
valid = false
1333
1115
brokenAt = i
···
1336
1118
}
1337
1119
1338
1120
if i > 1 {
1339
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1340
1122
if err != nil {
1341
1123
valid = false
1342
1124
brokenAt = i
···
1344
1126
break
1345
1127
}
1346
1128
1347
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1348
1130
valid = false
1349
1131
brokenAt = i
1350
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1351
1133
break
1352
1134
}
1353
1135
}
···
1368
1150
1369
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1370
1152
resp := newResponse(w)
1371
-
ctx := r.Context()
1372
1153
1373
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1374
-
if err != nil {
1375
-
resp.error(err.Error(), http.StatusInternalServerError)
1376
-
return
1377
-
}
1378
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1379
1155
if lastBundle == 0 {
1380
1156
resp.json(map[string]interface{}{
1381
1157
"chain_length": 0,
···
1384
1160
return
1385
1161
}
1386
1162
1387
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1388
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1389
-
1390
-
// Updated to receive 5 values instead of 3
1391
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1392
-
if err != nil {
1393
-
resp.error(err.Error(), http.StatusInternalServerError)
1394
-
return
1395
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1396
1166
1397
1167
resp.json(map[string]interface{}{
1398
-
"chain_length": lastBundle,
1399
-
"total_bundles": count,
1400
-
"total_compressed_size": compressedSize,
1401
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1402
-
"total_uncompressed_size": uncompressedSize,
1403
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1404
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1405
-
"chain_start_time": firstBundle.StartTime,
1406
-
"chain_end_time": lastBundleData.EndTime,
1407
-
"chain_head_hash": lastBundleData.Hash,
1408
-
"first_prev_hash": firstBundle.PrevBundleHash,
1409
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1410
1177
})
1411
1178
}
1412
1179
···
1427
1194
return
1428
1195
}
1429
1196
1430
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1431
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1432
1199
1433
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1467
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1468
1235
}
1469
1236
1470
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1471
1238
if afterTime.IsZero() {
1472
1239
return 1
1473
1240
}
1474
1241
1475
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1476
-
if err != nil {
1477
-
return 1
1478
-
}
1479
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1480
1243
if foundBundle > 1 {
1481
1244
return foundBundle - 1
1482
1245
}
···
1487
1250
var allOps []plc.PLCOperation
1488
1251
seenCIDs := make(map[string]bool)
1489
1252
1490
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1491
1254
1492
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1493
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1647
1410
limit := getQueryInt(r, "limit", 0)
1648
1411
fromBundle := getQueryInt(r, "from", 1)
1649
1412
1650
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1651
1415
if err != nil {
1652
1416
resp.error(err.Error(), http.StatusInternalServerError)
1653
1417
return
···
1719
1483
})
1720
1484
}
1721
1485
1722
-
// ===== UTILITY FUNCTIONS =====
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1723
1488
1724
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1725
-
var jsonlData []byte
1726
-
for _, op := range ops {
1727
-
jsonlData = append(jsonlData, op.RawJSON...)
1728
-
jsonlData = append(jsonlData, '\n')
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1729
1493
}
1730
-
hash := sha256.Sum256(jsonlData)
1731
-
return hex.EncodeToString(hash[:])
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1732
1506
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1733
1509
1734
1510
func normalizeEndpoint(endpoint string) string {
1735
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+7
-11
internal/api/server.go
+7
-11
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
24
23
plcIndexDIDs bool
25
24
}
26
25
27
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
28
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
29
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
30
27
s := &Server{
31
28
router: mux.NewRouter(),
32
29
db: db,
33
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
34
30
plcBundleDir: plcCfg.BundleDir,
35
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
36
32
plcIndexDIDs: plcCfg.IndexDIDs,
37
33
}
38
34
···
88
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
89
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
90
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
91
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
92
88
93
89
// PLC history/metrics
94
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+44
-45
internal/pds/client.go
+44
-45
internal/pds/client.go
···
84
84
}
85
85
86
86
// DescribeServer fetches com.atproto.server.describeServer
87
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
88
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
89
91
90
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
91
116
92
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
93
118
if err != nil {
94
-
return nil, err
119
+
return nil, 0, "", err
95
120
}
96
121
97
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
98
125
if err != nil {
99
-
return nil, err
126
+
return nil, responseTime, usedIP, err
100
127
}
101
128
defer resp.Body.Close()
102
129
103
130
if resp.StatusCode != http.StatusOK {
104
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
105
132
}
106
133
107
134
var desc ServerDescription
108
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
109
-
return nil, err
136
+
return nil, responseTime, usedIP, err
110
137
}
111
138
112
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
113
140
}
114
141
115
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
116
-
// Returns: available, responseTime, version, usedIP, error
117
-
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, string, error) {
143
+
// Returns: available, responseTime, version, error
144
+
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
118
145
startTime := time.Now()
119
146
120
147
url := fmt.Sprintf("%s/xrpc/_health", endpoint)
121
148
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
122
149
if err != nil {
123
-
return false, 0, "", "", err
124
-
}
125
-
126
-
// Create a custom dialer to track which IP was actually used
127
-
var usedIP string
128
-
transport := &http.Transport{
129
-
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
130
-
conn, err := (&net.Dialer{
131
-
Timeout: 30 * time.Second,
132
-
KeepAlive: 30 * time.Second,
133
-
}).DialContext(ctx, network, addr)
134
-
135
-
if err == nil && conn != nil {
136
-
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
137
-
// Extract IP from "ip:port" format
138
-
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
139
-
usedIP = tcpAddr.IP.String()
140
-
}
141
-
}
142
-
}
143
-
144
-
return conn, err
145
-
},
150
+
return false, 0, "", err
146
151
}
147
152
148
-
// Create a client with our custom transport
149
-
client := &http.Client{
150
-
Timeout: c.httpClient.Timeout,
151
-
Transport: transport,
152
-
}
153
-
154
-
resp, err := client.Do(req)
153
+
resp, err := c.httpClient.Do(req)
155
154
duration := time.Since(startTime)
156
155
157
156
if err != nil {
158
-
return false, duration, "", usedIP, err
157
+
return false, duration, "", err
159
158
}
160
159
defer resp.Body.Close()
161
160
162
161
if resp.StatusCode != http.StatusOK {
163
-
return false, duration, "", usedIP, fmt.Errorf("health check returned status %d", resp.StatusCode)
162
+
return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode)
164
163
}
165
164
166
165
// Decode the JSON response and check for "version"
···
169
168
}
170
169
171
170
if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil {
172
-
return false, duration, "", usedIP, fmt.Errorf("failed to decode health JSON: %w", err)
171
+
return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err)
173
172
}
174
173
175
174
if healthResponse.Version == "" {
176
-
return false, duration, "", usedIP, fmt.Errorf("health JSON response missing 'version' field")
175
+
return false, duration, "", fmt.Errorf("health JSON response missing 'version' field")
177
176
}
178
177
179
178
// All checks passed
180
-
return true, duration, healthResponse.Version, usedIP, nil
179
+
return true, duration, healthResponse.Version, nil
181
180
}
+36
-32
internal/pds/scanner.go
+36
-32
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
127
127
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
128
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
146
145
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
147
146
}
148
147
149
-
// STEP 2: Health check (now returns which IP was used)
150
-
available, responseTime, version, usedIP, err := s.client.CheckHealth(ctx, ep.Endpoint)
151
-
if err != nil || !available {
152
-
errMsg := "health check failed"
153
-
if err != nil {
154
-
errMsg = err.Error()
155
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
156
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
157
152
Status: storage.EndpointStatusOffline,
158
-
ResponseTime: responseTime,
159
-
ErrorMessage: errMsg,
160
-
UsedIP: usedIP, // Save even if failed
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
161
156
})
162
157
return
163
158
}
164
159
165
-
// STEP 3: Fetch PDS-specific data
166
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
167
-
if err != nil {
168
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
169
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
170
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
171
163
}
172
164
173
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
174
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
175
182
if err != nil {
176
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
177
184
repoList = []Repo{}
178
185
}
179
186
180
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
181
188
dids := make([]string, len(repoList))
182
189
for i, repo := range repoList {
183
190
dids[i] = repo.DID
184
191
}
185
192
186
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
187
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
188
195
Status: storage.EndpointStatusOnline,
189
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
190
197
Description: desc,
191
198
DIDs: dids,
192
199
Version: version,
193
-
UsedIP: usedIP, // NEW: Save which IP was used
200
+
UsedIP: usedIP, // Only from describeServer
194
201
})
195
202
196
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
197
204
if len(repoList) > 0 {
198
-
batchSize := 200000
205
+
batchSize := 100_000
199
206
200
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
201
208
···
235
242
236
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
237
244
}
238
-
239
-
// IP info fetch already started at the beginning (step 1.5)
240
-
// It will complete in the background
241
245
}
242
246
243
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
-696
internal/plc/bundle.go
-696
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
log.Verbose("BundleManager disabled (enabled=false)")
36
-
return &BundleManager{enabled: false}, nil
37
-
}
38
-
39
-
if err := os.MkdirAll(dir, 0755); err != nil {
40
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
41
-
}
42
-
43
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
44
-
if err != nil {
45
-
return nil, err
46
-
}
47
-
48
-
decoder, err := zstd.NewReader(nil)
49
-
if err != nil {
50
-
return nil, err
51
-
}
52
-
53
-
log.Verbose("BundleManager initialized: enabled=%v, indexDIDs=%v, dir=%s", enabled, indexDIDs, dir)
54
-
55
-
return &BundleManager{
56
-
dir: dir,
57
-
enabled: enabled,
58
-
encoder: encoder,
59
-
decoder: decoder,
60
-
db: db,
61
-
indexDIDs: indexDIDs,
62
-
}, nil
63
-
}
64
-
65
-
func (bm *BundleManager) Close() {
66
-
if bm.encoder != nil {
67
-
bm.encoder.Close()
68
-
}
69
-
if bm.decoder != nil {
70
-
bm.decoder.Close()
71
-
}
72
-
}
73
-
74
-
// ===== BUNDLE FILE ABSTRACTION =====
75
-
76
-
type bundleFile struct {
77
-
path string
78
-
operations []PLCOperation
79
-
uncompressedHash string
80
-
compressedHash string
81
-
}
82
-
83
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
84
-
return &bundleFile{
85
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
86
-
}
87
-
}
88
-
89
-
func (bf *bundleFile) exists() bool {
90
-
_, err := os.Stat(bf.path)
91
-
return err == nil
92
-
}
93
-
94
-
func (bm *BundleManager) load(bf *bundleFile) error {
95
-
compressed, err := os.ReadFile(bf.path)
96
-
if err != nil {
97
-
return fmt.Errorf("read failed: %w", err)
98
-
}
99
-
100
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
101
-
if err != nil {
102
-
return fmt.Errorf("decompress failed: %w", err)
103
-
}
104
-
105
-
bf.operations = bm.parseJSONL(decompressed)
106
-
return nil
107
-
}
108
-
109
-
func (bm *BundleManager) save(bf *bundleFile) error {
110
-
jsonlData := bm.serializeJSONL(bf.operations)
111
-
bf.uncompressedHash = bm.hash(jsonlData)
112
-
113
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
114
-
bf.compressedHash = bm.hash(compressed)
115
-
116
-
return os.WriteFile(bf.path, compressed, 0644)
117
-
}
118
-
119
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
120
-
var ops []PLCOperation
121
-
scanner := bufio.NewScanner(bytes.NewReader(data))
122
-
123
-
for scanner.Scan() {
124
-
line := scanner.Bytes()
125
-
if len(line) == 0 {
126
-
continue
127
-
}
128
-
129
-
var op PLCOperation
130
-
if err := json.Unmarshal(line, &op); err == nil {
131
-
op.RawJSON = append([]byte(nil), line...)
132
-
ops = append(ops, op)
133
-
}
134
-
}
135
-
136
-
return ops
137
-
}
138
-
139
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
140
-
var buf []byte
141
-
for _, op := range ops {
142
-
buf = append(buf, op.RawJSON...)
143
-
buf = append(buf, '\n')
144
-
}
145
-
return buf
146
-
}
147
-
148
-
// ===== BUNDLE FETCHING =====
149
-
150
-
type bundleFetcher struct {
151
-
client *Client
152
-
seenCIDs map[string]bool
153
-
currentAfter string
154
-
fetchCount int
155
-
}
156
-
157
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
158
-
seen := make(map[string]bool)
159
-
for cid := range prevBoundaryCIDs {
160
-
seen[cid] = true
161
-
}
162
-
163
-
return &bundleFetcher{
164
-
client: client,
165
-
seenCIDs: seen,
166
-
currentAfter: afterTime,
167
-
}
168
-
}
169
-
170
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
171
-
var ops []PLCOperation
172
-
maxFetches := (target / 900) + 5
173
-
174
-
for len(ops) < target && bf.fetchCount < maxFetches {
175
-
bf.fetchCount++
176
-
batchSize := bf.calculateBatchSize(target - len(ops))
177
-
178
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
179
-
180
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
181
-
182
-
for _, op := range batch {
183
-
if !bf.seenCIDs[op.CID] {
184
-
bf.seenCIDs[op.CID] = true
185
-
ops = append(ops, op)
186
-
187
-
if len(ops) >= target {
188
-
return ops[:target], true
189
-
}
190
-
}
191
-
}
192
-
193
-
if !shouldContinue {
194
-
break
195
-
}
196
-
}
197
-
198
-
return ops, len(ops) >= target
199
-
}
200
-
201
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
202
-
if bf.fetchCount == 0 {
203
-
return 1000
204
-
}
205
-
if remaining < 100 {
206
-
return 50
207
-
}
208
-
if remaining < 500 {
209
-
return 200
210
-
}
211
-
return 1000
212
-
}
213
-
214
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
215
-
ops, err := bf.client.Export(ctx, ExportOptions{
216
-
Count: size,
217
-
After: bf.currentAfter,
218
-
})
219
-
220
-
if err != nil || len(ops) == 0 {
221
-
return nil, false
222
-
}
223
-
224
-
if len(ops) > 0 {
225
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
226
-
}
227
-
228
-
return ops, len(ops) >= size
229
-
}
230
-
231
-
// ===== MAIN BUNDLE LOADING =====
232
-
233
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
234
-
if !bm.enabled {
235
-
return nil, false, fmt.Errorf("bundle manager disabled")
236
-
}
237
-
238
-
bf := bm.newBundleFile(bundleNum)
239
-
240
-
// Try local file first
241
-
if bf.exists() {
242
-
return bm.loadFromFile(ctx, bundleNum, bf)
243
-
}
244
-
245
-
// Fetch from PLC
246
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
247
-
}
248
-
249
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
250
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
251
-
252
-
// Verify hash if bundle is in DB
253
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
254
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
255
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
256
-
os.Remove(bf.path)
257
-
return nil, false, fmt.Errorf("hash mismatch")
258
-
}
259
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
260
-
}
261
-
262
-
if err := bm.load(bf); err != nil {
263
-
return nil, false, err
264
-
}
265
-
266
-
// Index if not in DB
267
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
268
-
bf.compressedHash = bm.hashFile(bf.path)
269
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
270
-
271
-
// Calculate cursor from previous bundle
272
-
cursor := bm.calculateCursor(ctx, bundleNum)
273
-
274
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
275
-
}
276
-
277
-
return bf.operations, true, nil
278
-
}
279
-
280
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
281
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
282
-
283
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
284
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
285
-
286
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
287
-
288
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
289
-
len(ops), fetcher.fetchCount, isComplete)
290
-
291
-
if isComplete {
292
-
bf.operations = ops
293
-
if err := bm.save(bf); err != nil {
294
-
log.Error("Warning: failed to save bundle: %v", err)
295
-
} else {
296
-
// The cursor is the afterTime that was used to fetch this bundle
297
-
cursor := afterTime
298
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
299
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
300
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
301
-
}
302
-
}
303
-
304
-
return ops, isComplete, nil
305
-
}
306
-
307
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
308
-
if bundleNum == 1 {
309
-
return "", nil
310
-
}
311
-
312
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
313
-
if err != nil {
314
-
return "", nil
315
-
}
316
-
317
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
318
-
319
-
// Return stored boundary CIDs if available
320
-
if len(prevBundle.BoundaryCIDs) > 0 {
321
-
cids := make(map[string]bool)
322
-
for _, cid := range prevBundle.BoundaryCIDs {
323
-
cids[cid] = true
324
-
}
325
-
return afterTime, cids
326
-
}
327
-
328
-
// Fallback: compute from file
329
-
bf := bm.newBundleFile(bundleNum - 1)
330
-
if bf.exists() {
331
-
if err := bm.load(bf); err == nil {
332
-
_, cids := GetBoundaryCIDs(bf.operations)
333
-
return afterTime, cids
334
-
}
335
-
}
336
-
337
-
return afterTime, nil
338
-
}
339
-
340
-
// ===== BUNDLE INDEXING =====
341
-
342
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
343
-
log.Verbose("indexBundle called for bundle %06d: indexDIDs=%v", bundleNum, bm.indexDIDs)
344
-
345
-
prevHash := ""
346
-
if bundleNum > 1 {
347
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
348
-
prevHash = prev.Hash
349
-
}
350
-
}
351
-
352
-
dids := bm.extractUniqueDIDs(bf.operations)
353
-
log.Verbose("Extracted %d unique DIDs from bundle %06d", len(dids), bundleNum)
354
-
355
-
compressedFileSize := bm.getFileSize(bf.path)
356
-
357
-
// Calculate uncompressed size
358
-
uncompressedSize := int64(0)
359
-
for _, op := range bf.operations {
360
-
uncompressedSize += int64(len(op.RawJSON)) + 1
361
-
}
362
-
363
-
// Get time range from operations
364
-
firstSeenAt := bf.operations[0].CreatedAt
365
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
366
-
367
-
bundle := &storage.PLCBundle{
368
-
BundleNumber: bundleNum,
369
-
StartTime: firstSeenAt,
370
-
EndTime: lastSeenAt,
371
-
DIDCount: len(dids),
372
-
Hash: bf.uncompressedHash,
373
-
CompressedHash: bf.compressedHash,
374
-
CompressedSize: compressedFileSize,
375
-
UncompressedSize: uncompressedSize,
376
-
Cursor: cursor,
377
-
PrevBundleHash: prevHash,
378
-
Compressed: true,
379
-
CreatedAt: time.Now().UTC(),
380
-
}
381
-
382
-
log.Verbose("About to create bundle %06d in database (DIDCount=%d)", bundleNum, bundle.DIDCount)
383
-
384
-
// Create bundle first
385
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
386
-
log.Error("Failed to create bundle %06d in database: %v", bundleNum, err)
387
-
return err
388
-
}
389
-
390
-
log.Verbose("Bundle %06d created successfully in database", bundleNum)
391
-
392
-
// Index DIDs if enabled
393
-
if bm.indexDIDs {
394
-
start := time.Now()
395
-
log.Verbose("Starting DID indexing for bundle %06d: %d unique DIDs", bundleNum, len(dids))
396
-
397
-
// Extract handle and PDS for each DID
398
-
didInfoMap := ExtractDIDInfoMap(bf.operations)
399
-
log.Verbose("Extracted info for %d DIDs from operations", len(didInfoMap))
400
-
401
-
successCount := 0
402
-
errorCount := 0
403
-
invalidHandleCount := 0
404
-
405
-
// Upsert each DID with handle, pds, and bundle number
406
-
for did, info := range didInfoMap {
407
-
validHandle := ValidateHandle(info.Handle)
408
-
if info.Handle != "" && validHandle == "" {
409
-
//log.Verbose("Bundle %06d: Skipping invalid handle for DID %s (length: %d)", bundleNum, did, len(info.Handle))
410
-
invalidHandleCount++
411
-
}
412
-
413
-
if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil {
414
-
log.Error("Failed to index DID %s for bundle %06d: %v", did, bundleNum, err)
415
-
errorCount++
416
-
} else {
417
-
successCount++
418
-
}
419
-
}
420
-
421
-
elapsed := time.Since(start)
422
-
log.Info("✓ Indexed bundle %06d: %d DIDs succeeded, %d errors, %d invalid handles in %v",
423
-
bundleNum, successCount, errorCount, invalidHandleCount, elapsed)
424
-
} else {
425
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
426
-
}
427
-
428
-
return nil
429
-
}
430
-
431
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
432
-
didSet := make(map[string]bool)
433
-
for _, op := range ops {
434
-
didSet[op.DID] = true
435
-
}
436
-
437
-
dids := make([]string, 0, len(didSet))
438
-
for did := range didSet {
439
-
dids = append(dids, did)
440
-
}
441
-
return dids
442
-
}
443
-
444
-
// ===== MEMPOOL BUNDLE CREATION =====
445
-
446
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
447
-
if !bm.enabled {
448
-
return 0, fmt.Errorf("bundle manager disabled")
449
-
}
450
-
451
-
if len(operations) != BUNDLE_SIZE {
452
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
453
-
}
454
-
455
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
456
-
if err != nil {
457
-
return 0, err
458
-
}
459
-
bundleNum := lastBundle + 1
460
-
461
-
bf := bm.newBundleFile(bundleNum)
462
-
bf.operations = operations
463
-
464
-
if err := bm.save(bf); err != nil {
465
-
return 0, err
466
-
}
467
-
468
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
469
-
return 0, err
470
-
}
471
-
472
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
473
-
bundleNum, bf.uncompressedHash[:16])
474
-
475
-
return bundleNum, nil
476
-
}
477
-
478
-
// ===== VERIFICATION =====
479
-
480
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
481
-
if !bm.enabled {
482
-
return fmt.Errorf("bundle manager disabled")
483
-
}
484
-
485
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
486
-
487
-
for i := 1; i <= endBundle; i++ {
488
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
489
-
if err != nil {
490
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
491
-
}
492
-
493
-
// Verify file hash
494
-
path := bm.newBundleFile(i).path
495
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
496
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
497
-
}
498
-
499
-
// Verify chain link
500
-
if i > 1 {
501
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
502
-
if err != nil {
503
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
504
-
}
505
-
506
-
if bundle.PrevBundleHash != prevBundle.Hash {
507
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
508
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
509
-
}
510
-
}
511
-
512
-
if i%100 == 0 {
513
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
514
-
}
515
-
}
516
-
517
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
518
-
return nil
519
-
}
520
-
521
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
522
-
if !bm.enabled {
523
-
return nil
524
-
}
525
-
526
-
for i := 1; i < targetBundle; i++ {
527
-
if !bm.newBundleFile(i).exists() {
528
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
529
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
530
-
}
531
-
}
532
-
}
533
-
534
-
return nil
535
-
}
536
-
537
-
// ===== UTILITY METHODS =====
538
-
539
-
func (bm *BundleManager) hash(data []byte) string {
540
-
h := sha256.Sum256(data)
541
-
return hex.EncodeToString(h[:])
542
-
}
543
-
544
-
func (bm *BundleManager) hashFile(path string) string {
545
-
data, _ := os.ReadFile(path)
546
-
return bm.hash(data)
547
-
}
548
-
549
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
550
-
if expectedHash == "" {
551
-
return nil
552
-
}
553
-
554
-
actualHash := bm.hashFile(path)
555
-
if actualHash != expectedHash {
556
-
return fmt.Errorf("hash mismatch")
557
-
}
558
-
return nil
559
-
}
560
-
561
-
func (bm *BundleManager) getFileSize(path string) int64 {
562
-
if info, err := os.Stat(path); err == nil {
563
-
return info.Size()
564
-
}
565
-
return 0
566
-
}
567
-
568
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
569
-
if !bm.enabled {
570
-
return 0, 0, 0, 0, nil
571
-
}
572
-
return bm.db.GetBundleStats(ctx)
573
-
}
574
-
575
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
576
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
577
-
if err != nil {
578
-
return nil, err
579
-
}
580
-
581
-
if lastBundle == 0 {
582
-
return map[string]interface{}{
583
-
"chain_length": 0,
584
-
"status": "empty",
585
-
}, nil
586
-
}
587
-
588
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
589
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
590
-
591
-
return map[string]interface{}{
592
-
"chain_length": lastBundle,
593
-
"first_bundle": 1,
594
-
"last_bundle": lastBundle,
595
-
"chain_start_time": firstBundle.StartTime,
596
-
"chain_end_time": lastBundleData.EndTime,
597
-
"chain_head_hash": lastBundleData.Hash,
598
-
}, nil
599
-
}
600
-
601
-
// ===== EXPORTED HELPERS =====
602
-
603
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
604
-
if len(operations) == 0 {
605
-
return time.Time{}, nil
606
-
}
607
-
608
-
lastOp := operations[len(operations)-1]
609
-
boundaryTime := lastOp.CreatedAt
610
-
cidSet := make(map[string]bool)
611
-
612
-
for i := len(operations) - 1; i >= 0; i-- {
613
-
op := operations[i]
614
-
if op.CreatedAt.Equal(boundaryTime) {
615
-
cidSet[op.CID] = true
616
-
} else {
617
-
break
618
-
}
619
-
}
620
-
621
-
return boundaryTime, cidSet
622
-
}
623
-
624
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
625
-
if len(operations) == 0 {
626
-
return operations
627
-
}
628
-
629
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
630
-
if err != nil {
631
-
return operations
632
-
}
633
-
634
-
startIdx := 0
635
-
for startIdx < len(operations) {
636
-
op := operations[startIdx]
637
-
638
-
if op.CreatedAt.After(boundaryTime) {
639
-
break
640
-
}
641
-
642
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
643
-
startIdx++
644
-
continue
645
-
}
646
-
647
-
break
648
-
}
649
-
650
-
return operations[startIdx:]
651
-
}
652
-
653
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
654
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
655
-
if !bm.enabled {
656
-
return nil, fmt.Errorf("bundle manager disabled")
657
-
}
658
-
659
-
bf := bm.newBundleFile(bundleNum)
660
-
661
-
if !bf.exists() {
662
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
663
-
}
664
-
665
-
if err := bm.load(bf); err != nil {
666
-
return nil, err
667
-
}
668
-
669
-
return bf.operations, nil
670
-
}
671
-
672
-
// calculateCursor determines the cursor value for a given bundle
673
-
// For bundle 1: returns empty string
674
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
675
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
676
-
if bundleNum == 1 {
677
-
return ""
678
-
}
679
-
680
-
// Try to get cursor from previous bundle in DB
681
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
682
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
683
-
}
684
-
685
-
// If previous bundle not in DB, try to load it from file
686
-
prevBf := bm.newBundleFile(bundleNum - 1)
687
-
if prevBf.exists() {
688
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
689
-
// Return the createdAt of the last operation in previous bundle
690
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
691
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
692
-
}
693
-
}
694
-
695
-
return ""
696
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+91
-432
internal/plc/scanner.go
+91
-432
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
24
21
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
25
22
26
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs)
27
-
if err != nil {
28
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
29
-
bundleManager = &BundleManager{enabled: false}
30
-
}
31
-
32
23
return &Scanner{
33
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
34
25
db: db,
35
26
config: cfg,
36
-
bundleManager: bundleManager,
37
27
}
38
28
}
39
29
40
30
func (s *Scanner) Close() {
41
-
if s.bundleManager != nil {
42
-
s.bundleManager.Close()
43
-
}
44
-
}
45
-
46
-
// ScanMetrics tracks scan progress
47
-
type ScanMetrics struct {
48
-
totalFetched int64 // Total ops fetched from PLC/bundles
49
-
totalProcessed int64 // Unique ops processed (after dedup)
50
-
newEndpoints int64 // New endpoints discovered
51
-
endpointCounts map[string]int64
52
-
currentBundle int
53
-
startTime time.Time
54
-
}
55
-
56
-
func newMetrics(startBundle int) *ScanMetrics {
57
-
return &ScanMetrics{
58
-
endpointCounts: make(map[string]int64),
59
-
currentBundle: startBundle,
60
-
startTime: time.Now(),
61
-
}
62
-
}
63
-
64
-
func (m *ScanMetrics) logSummary() {
65
-
summary := formatEndpointCounts(m.endpointCounts)
66
-
if m.newEndpoints > 0 {
67
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
68
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
69
-
} else {
70
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
71
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
72
-
}
31
+
// Don't close bundleManager here - it's shared
73
32
}
74
33
75
34
func (s *Scanner) Scan(ctx context.Context) error {
76
35
log.Info("Starting PLC directory scan...")
77
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
78
36
79
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
80
38
if err != nil {
81
39
return fmt.Errorf("failed to get scan cursor: %w", err)
82
40
}
83
41
84
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
85
-
metrics := newMetrics(startBundle)
86
-
87
-
if startBundle > 1 {
88
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
89
-
return err
90
-
}
91
-
}
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
92
43
93
-
// Handle existing mempool first
94
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
95
-
return s.handleMempoolOnly(ctx, metrics)
96
-
}
97
-
98
-
// Process bundles until incomplete or error
44
+
// Main processing loop
99
45
for {
100
46
if err := ctx.Err(); err != nil {
101
47
return err
102
48
}
103
49
104
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
105
-
if s.shouldRetry(err) {
106
-
continue
107
-
}
108
-
break
109
-
}
110
-
111
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
112
-
log.Error("Warning: failed to update cursor: %v", err)
113
-
}
114
-
}
115
-
116
-
// Try to finalize mempool
117
-
s.finalizeMempool(ctx, metrics)
118
-
119
-
metrics.logSummary()
120
-
return nil
121
-
}
122
-
123
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
124
-
if lastBundle == 0 {
125
-
return 1
126
-
}
127
-
return lastBundle + 1
128
-
}
129
-
130
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
131
-
log.Info("Checking bundle continuity...")
132
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
133
-
return fmt.Errorf("bundle continuity check failed: %w", err)
134
-
}
135
-
return nil
136
-
}
137
-
138
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
139
-
count, err := s.db.GetMempoolCount(ctx)
140
-
if err != nil {
141
-
return false, err
142
-
}
143
-
return count > 0, nil
144
-
}
145
-
146
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
147
-
count, _ := s.db.GetMempoolCount(ctx)
148
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
149
-
150
-
if err := s.fillMempool(ctx, m); err != nil {
151
-
return err
152
-
}
153
-
154
-
if err := s.processMempool(ctx, m); err != nil {
155
-
log.Error("Error processing mempool: %v", err)
156
-
}
157
-
158
-
m.logSummary()
159
-
return nil
160
-
}
161
-
162
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
163
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
164
-
165
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
166
-
if err != nil {
167
-
return s.handleBundleError(err, m)
168
-
}
169
-
170
-
if isComplete {
171
-
return s.handleCompleteBundle(ctx, ops, m)
172
-
}
173
-
return s.handleIncompleteBundle(ctx, ops, m)
174
-
}
175
-
176
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
177
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
178
-
179
-
if strings.Contains(err.Error(), "rate limited") {
180
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
181
-
time.Sleep(5 * time.Minute)
182
-
return fmt.Errorf("retry")
183
-
}
184
-
185
-
if m.currentBundle > 1 {
186
-
log.Info("→ Reached end of available data")
187
-
}
188
-
return err
189
-
}
190
-
191
-
func (s *Scanner) shouldRetry(err error) bool {
192
-
return err != nil && err.Error() == "retry"
193
-
}
194
-
195
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
196
-
counts, err := s.processBatch(ctx, ops)
197
-
if err != nil {
198
-
return err
199
-
}
200
-
201
-
s.mergeCounts(m.endpointCounts, counts)
202
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
203
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
204
-
205
-
batchTotal := sumCounts(counts)
206
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
207
-
m.currentBundle, len(ops), batchTotal)
208
-
209
-
m.currentBundle++
210
-
return nil
211
-
}
212
-
213
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
214
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
215
-
216
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
217
-
return err
218
-
}
219
-
220
-
s.finalizeMempool(ctx, m)
221
-
return fmt.Errorf("incomplete") // Signal end of processing
222
-
}
223
-
224
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
225
-
if err := s.fillMempool(ctx, m); err != nil {
226
-
log.Error("Error filling mempool: %v", err)
227
-
}
228
-
if err := s.processMempool(ctx, m); err != nil {
229
-
log.Error("Error processing mempool: %v", err)
230
-
}
231
-
}
232
-
233
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
234
-
const fetchLimit = 1000
235
-
236
-
for {
237
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
238
52
if err != nil {
239
-
return err
240
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
241
57
242
-
if count >= BUNDLE_SIZE {
243
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
244
-
return nil
245
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
246
66
247
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
248
-
249
-
// ✅ Fix: Don't capture unused 'ops' variable
250
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
251
-
if err != nil {
252
-
return err
253
-
}
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
254
72
255
-
if !shouldContinue {
256
-
finalCount, _ := s.db.GetMempoolCount(ctx)
257
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
258
-
return nil
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
259
74
}
260
-
}
261
-
}
262
75
263
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
264
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
265
-
if err != nil {
266
-
return false, err
267
-
}
268
-
269
-
var after string
270
-
if lastOp != nil {
271
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
272
-
log.Verbose(" Using cursor: %s", after)
273
-
}
274
-
275
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
276
-
if err != nil {
277
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
278
-
}
279
-
280
-
fetchedCount := len(ops)
281
-
m.totalFetched += int64(fetchedCount) // Track all fetched
282
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
283
-
284
-
if fetchedCount == 0 {
285
-
count, _ := s.db.GetMempoolCount(ctx)
286
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
287
-
return false, nil
288
-
}
289
-
290
-
beforeCount, err := s.db.GetMempoolCount(ctx)
291
-
if err != nil {
292
-
return false, err
293
-
}
294
-
295
-
endpointsBefore := sumCounts(m.endpointCounts)
296
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
297
-
return false, err
298
-
}
299
-
endpointsAfter := sumCounts(m.endpointCounts)
300
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
301
-
302
-
afterCount, err := s.db.GetMempoolCount(ctx)
303
-
if err != nil {
304
-
return false, err
305
-
}
306
-
307
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
308
-
m.totalProcessed += uniqueAdded // Track unique ops processed
309
-
310
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
311
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
312
-
313
-
// Continue only if got full batch
314
-
shouldContinue := fetchedCount >= limit
315
-
if !shouldContinue {
316
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
317
-
}
318
-
319
-
return shouldContinue, nil
320
-
}
321
-
322
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
323
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
324
-
for i, op := range ops {
325
-
mempoolOps[i] = storage.MempoolOperation{
326
-
DID: op.DID,
327
-
Operation: string(op.RawJSON),
328
-
CID: op.CID,
329
-
CreatedAt: op.CreatedAt,
330
-
}
331
-
}
332
-
333
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
334
-
return err
335
-
}
336
-
337
-
// NEW: Create/update DID records immediately when adding to mempool
338
-
for _, op := range ops {
339
-
info := ExtractDIDInfo(&op)
340
-
341
-
// Validate handle length before saving
342
-
validHandle := ValidateHandle(info.Handle)
343
-
if info.Handle != "" && validHandle == "" {
344
-
log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle))
345
-
}
346
-
347
-
if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil {
348
-
log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err)
349
-
// Don't fail the whole operation, just log
350
-
}
351
-
}
352
-
353
-
// Process for endpoint discovery
354
-
batchCounts, err := s.processBatch(ctx, ops)
355
-
s.mergeCounts(counts, batchCounts)
356
-
return err
357
-
}
358
-
359
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
360
-
for {
361
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
362
78
if err != nil {
363
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
364
81
}
365
82
366
-
log.Verbose("Mempool contains %d operations", count)
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
367
88
368
-
if count < BUNDLE_SIZE {
369
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
370
-
return nil
371
-
}
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
372
91
373
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
374
-
375
-
// Updated to receive 4 values instead of 3
376
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
377
-
if err != nil {
378
-
return err
379
-
}
380
-
381
-
// Process and update metrics
382
-
countsBefore := sumCounts(m.endpointCounts)
383
-
counts, _ := s.processBatch(ctx, ops)
384
-
s.mergeCounts(m.endpointCounts, counts)
385
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
386
-
387
-
m.totalProcessed += int64(len(ops))
388
-
m.newEndpoints += newEndpointsFound
389
-
m.currentBundle = bundleNum
390
-
391
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
392
94
log.Error("Warning: failed to update cursor: %v", err)
393
95
}
394
-
395
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
396
96
}
397
-
}
398
97
399
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
400
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
401
-
if err != nil {
402
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
403
103
}
404
104
405
-
ops, ids := s.deduplicateMempool(mempoolOps)
406
-
if len(ops) < BUNDLE_SIZE {
407
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
408
-
}
409
-
410
-
// Determine cursor from last bundle
411
-
cursor := ""
412
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
413
-
if err == nil && lastBundle > 0 {
414
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
415
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
416
-
}
417
-
}
418
-
419
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
420
-
if err != nil {
421
-
return 0, nil, "", err
422
-
}
423
-
424
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
425
-
return 0, nil, "", err
426
-
}
427
-
428
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
429
107
}
430
108
431
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
432
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
433
-
ids := make([]int64, 0, BUNDLE_SIZE)
434
-
seenCIDs := make(map[string]bool)
435
-
436
-
for _, mop := range mempoolOps {
437
-
if seenCIDs[mop.CID] {
438
-
ids = append(ids, mop.ID)
439
-
continue
440
-
}
441
-
seenCIDs[mop.CID] = true
442
-
443
-
var op PLCOperation
444
-
json.Unmarshal([]byte(mop.Operation), &op)
445
-
op.RawJSON = []byte(mop.Operation)
446
-
447
-
ops = append(ops, op)
448
-
ids = append(ids, mop.ID)
449
-
450
-
if len(ops) >= BUNDLE_SIZE {
451
-
break
452
-
}
453
-
}
454
-
455
-
return ops, ids
456
-
}
457
-
109
+
// processBatch extracts endpoints from operations
458
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
459
111
counts := make(map[string]int64)
460
112
seen := make(map[string]*PLCOperation)
461
113
462
114
// Collect unique endpoints
463
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
464
118
if op.IsNullified() {
465
119
continue
466
120
}
467
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
468
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
469
124
if _, exists := seen[key]; !exists {
470
-
seen[key] = &op
125
+
seen[key] = op
471
126
}
472
127
}
473
128
}
···
483
138
}
484
139
485
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
486
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
487
142
continue
488
143
}
489
144
490
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
491
146
counts[epType]++
492
147
}
493
148
494
149
return counts, nil
495
-
}
496
-
497
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
498
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
499
-
EndpointType: epType,
500
-
Endpoint: endpoint,
501
-
DiscoveredAt: discoveredAt,
502
-
LastChecked: time.Time{},
503
-
Status: storage.EndpointStatusUnknown,
504
-
})
505
150
}
506
151
507
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
544
189
return nil
545
190
}
546
191
547
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
548
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
549
-
Source: "plc_directory",
550
-
LastBundleNumber: m.currentBundle - 1,
551
-
LastScanTime: time.Now().UTC(),
552
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
553
201
})
554
202
}
555
203
···
577
225
return total
578
226
}
579
227
580
-
func formatEndpointCounts(counts map[string]int64) string {
581
-
if len(counts) == 0 {
582
-
return "0 new endpoints"
583
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
584
231
585
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
586
240
587
-
if len(counts) == 1 {
588
-
for typ, count := range counts {
589
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
590
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
591
246
}
247
+
}
592
248
593
-
parts := make([]string, 0, len(counts))
594
-
for typ, count := range counts {
595
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
596
256
}
597
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
598
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
-21
internal/storage/db.go
-21
internal/storage/db.go
···
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
52
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error)
59
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
60
-
GetLastBundleNumber(ctx context.Context) (int, error)
61
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
62
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
63
-
64
-
// Mempool operations
65
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
66
-
GetMempoolCount(ctx context.Context) (int, error)
67
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
68
-
DeleteFromMempool(ctx context.Context, ids []int64) error
69
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
71
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
72
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
73
-
74
53
// Metrics
75
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
76
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+25
-549
internal/storage/postgres.go
+25
-549
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
85
84
ip TEXT,
86
85
ipv6 TEXT,
87
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
88
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
89
UNIQUE(endpoint_type, endpoint)
90
90
);
···
96
96
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
97
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
98
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
99
100
100
101
-- IP infos table (IP as PRIMARY KEY)
101
102
CREATE TABLE IF NOT EXISTS ip_infos (
···
157
158
records_processed BIGINT DEFAULT 0
158
159
);
159
160
160
-
CREATE TABLE IF NOT EXISTS plc_bundles (
161
-
bundle_number INTEGER PRIMARY KEY,
162
-
start_time TIMESTAMP NOT NULL,
163
-
end_time TIMESTAMP NOT NULL,
164
-
did_count INTEGER NOT NULL DEFAULT 0,
165
-
hash TEXT NOT NULL,
166
-
compressed_hash TEXT NOT NULL,
167
-
compressed_size BIGINT NOT NULL,
168
-
uncompressed_size BIGINT NOT NULL,
169
-
cumulative_compressed_size BIGINT NOT NULL,
170
-
cumulative_uncompressed_size BIGINT NOT NULL,
171
-
cursor TEXT,
172
-
prev_bundle_hash TEXT,
173
-
compressed BOOLEAN DEFAULT true,
174
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
175
-
);
176
-
177
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
178
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
179
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
180
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
181
-
182
-
CREATE TABLE IF NOT EXISTS plc_mempool (
183
-
id BIGSERIAL PRIMARY KEY,
184
-
did TEXT NOT NULL,
185
-
operation TEXT NOT NULL,
186
-
cid TEXT NOT NULL UNIQUE,
187
-
created_at TIMESTAMP NOT NULL,
188
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
189
-
);
190
-
191
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
192
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
193
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
194
-
195
161
-- Minimal dids table
196
162
CREATE TABLE IF NOT EXISTS dids (
197
163
did TEXT PRIMARY KEY,
···
244
210
245
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
246
212
query := `
247
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at)
248
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
249
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
250
216
last_checked = EXCLUDED.last_checked,
251
217
status = EXCLUDED.status,
···
261
227
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
262
228
ELSE endpoints.ip_resolved_at
263
229
END,
230
+
valid = EXCLUDED.valid,
264
231
updated_at = CURRENT_TIMESTAMP
265
232
RETURNING id
266
233
`
267
234
err := p.db.QueryRowContext(ctx, query,
268
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
269
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
270
237
return err
271
238
}
272
239
···
287
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
288
255
query := `
289
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
290
-
ip, ipv6, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
291
258
FROM endpoints
292
259
WHERE endpoint = $1 AND endpoint_type = $2
293
260
`
···
298
265
299
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
300
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
301
-
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
302
269
)
303
270
if err != nil {
304
271
return nil, err
···
324
291
query := `
325
292
SELECT DISTINCT ON (COALESCE(server_did, id::text))
326
293
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
327
-
ip, ipv6, ip_resolved_at, updated_at
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
328
295
FROM endpoints
329
296
WHERE 1=1
330
297
`
···
337
304
args = append(args, filter.Type)
338
305
argIdx++
339
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
340
312
if filter.Status != "" {
341
313
statusInt := EndpointStatusUnknown
342
314
switch filter.Status {
···
602
574
last_checked,
603
575
status,
604
576
ip,
605
-
ipv6
577
+
ipv6,
578
+
valid
606
579
FROM endpoints
607
580
WHERE endpoint_type = 'pds'
608
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
609
582
)
610
583
SELECT
611
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
612
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
613
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
614
587
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
···
679
652
var scannedAt sql.NullTime
680
653
681
654
err := rows.Scan(
682
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
683
656
&userCount, &responseTime, &version, &scannedAt,
684
657
&city, &country, &countryCode, &asn, &asnOrg,
685
658
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
741
714
742
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
743
716
query := `
744
-
WITH target_endpoint AS MATERIALIZED ( -- MATERIALIZED fence for optimization
717
+
WITH target_endpoint AS MATERIALIZED (
745
718
SELECT
746
719
e.id,
747
720
e.endpoint,
···
750
723
e.last_checked,
751
724
e.status,
752
725
e.ip,
753
-
e.ipv6
726
+
e.ipv6,
727
+
e.valid
754
728
FROM endpoints e
755
729
WHERE e.endpoint = $1
756
730
AND e.endpoint_type = 'pds'
757
-
LIMIT 1 -- Early termination since we expect exactly 1 row
731
+
LIMIT 1
758
732
)
759
733
SELECT
760
734
te.id,
···
765
739
te.status,
766
740
te.ip,
767
741
te.ipv6,
742
+
te.valid,
768
743
latest.user_count,
769
744
latest.response_time,
770
745
latest.version,
···
774
749
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
775
750
i.latitude, i.longitude,
776
751
i.raw_data,
777
-
-- Inline aliases aggregation (avoid second CTE)
778
752
COALESCE(
779
753
ARRAY(
780
754
SELECT e2.endpoint
···
787
761
),
788
762
ARRAY[]::text[]
789
763
) as aliases,
790
-
-- Inline first_discovered_at (avoid aggregation)
791
764
CASE
792
765
WHEN te.server_did IS NOT NULL THEN (
793
766
SELECT MIN(e3.discovered_at)
···
828
801
var firstDiscoveredAt sql.NullTime
829
802
830
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
831
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
832
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
833
806
&city, &country, &countryCode, &asn, &asnOrg,
834
807
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
855
828
// Set aliases and is_primary
856
829
detail.Aliases = aliases
857
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
858
-
// Has server_did - check if this is the first discovered
859
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
860
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
861
833
} else {
862
-
// No server_did means unique server
863
834
detail.IsPrimary = true
864
835
}
865
836
···
1190
1161
}
1191
1162
}
1192
1163
return 0
1193
-
}
1194
-
1195
-
// ===== BUNDLE OPERATIONS =====
1196
-
1197
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1198
-
// Calculate cumulative sizes from previous bundle
1199
-
if bundle.BundleNumber > 1 {
1200
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1201
-
if err == nil && prevBundle != nil {
1202
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1203
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1204
-
} else {
1205
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1206
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1207
-
}
1208
-
} else {
1209
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1210
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1211
-
}
1212
-
1213
-
query := `
1214
-
INSERT INTO plc_bundles (
1215
-
bundle_number, start_time, end_time, did_count,
1216
-
hash, compressed_hash, compressed_size, uncompressed_size,
1217
-
cumulative_compressed_size, cumulative_uncompressed_size,
1218
-
cursor, prev_bundle_hash, compressed
1219
-
)
1220
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1221
-
ON CONFLICT(bundle_number) DO UPDATE SET
1222
-
start_time = EXCLUDED.start_time,
1223
-
end_time = EXCLUDED.end_time,
1224
-
did_count = EXCLUDED.did_count,
1225
-
hash = EXCLUDED.hash,
1226
-
compressed_hash = EXCLUDED.compressed_hash,
1227
-
compressed_size = EXCLUDED.compressed_size,
1228
-
uncompressed_size = EXCLUDED.uncompressed_size,
1229
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1230
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1231
-
cursor = EXCLUDED.cursor,
1232
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1233
-
compressed = EXCLUDED.compressed
1234
-
`
1235
-
_, err := p.db.ExecContext(ctx, query,
1236
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1237
-
bundle.DIDCount, bundle.Hash, bundle.CompressedHash,
1238
-
bundle.CompressedSize, bundle.UncompressedSize,
1239
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1240
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1241
-
)
1242
-
1243
-
return err
1244
-
}
1245
-
1246
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1247
-
query := `
1248
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1249
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1250
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1251
-
FROM plc_bundles
1252
-
WHERE bundle_number = $1
1253
-
`
1254
-
1255
-
var bundle PLCBundle
1256
-
var prevHash sql.NullString
1257
-
var cursor sql.NullString
1258
-
1259
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1260
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1261
-
&bundle.DIDCount, &bundle.Hash, &bundle.CompressedHash,
1262
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1263
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1264
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1265
-
)
1266
-
if err != nil {
1267
-
return nil, err
1268
-
}
1269
-
1270
-
if prevHash.Valid {
1271
-
bundle.PrevBundleHash = prevHash.String
1272
-
}
1273
-
if cursor.Valid {
1274
-
bundle.Cursor = cursor.String
1275
-
}
1276
-
1277
-
return &bundle, nil
1278
-
}
1279
-
1280
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1281
-
query := `
1282
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1283
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1284
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1285
-
FROM plc_bundles
1286
-
ORDER BY bundle_number DESC
1287
-
LIMIT $1
1288
-
`
1289
-
1290
-
rows, err := p.db.QueryContext(ctx, query, limit)
1291
-
if err != nil {
1292
-
return nil, err
1293
-
}
1294
-
defer rows.Close()
1295
-
1296
-
return p.scanBundles(rows)
1297
-
}
1298
-
1299
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1300
-
// Get bundle numbers from dids table
1301
-
var bundleNumbersJSON []byte
1302
-
err := p.db.QueryRowContext(ctx, `
1303
-
SELECT bundle_numbers FROM dids WHERE did = $1
1304
-
`, did).Scan(&bundleNumbersJSON)
1305
-
1306
-
if err == sql.ErrNoRows {
1307
-
return []*PLCBundle{}, nil
1308
-
}
1309
-
if err != nil {
1310
-
return nil, err
1311
-
}
1312
-
1313
-
var bundleNumbers []int
1314
-
if err := json.Unmarshal(bundleNumbersJSON, &bundleNumbers); err != nil {
1315
-
return nil, err
1316
-
}
1317
-
1318
-
if len(bundleNumbers) == 0 {
1319
-
return []*PLCBundle{}, nil
1320
-
}
1321
-
1322
-
// Build query with IN clause
1323
-
placeholders := make([]string, len(bundleNumbers))
1324
-
args := make([]interface{}, len(bundleNumbers))
1325
-
for i, num := range bundleNumbers {
1326
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1327
-
args[i] = num
1328
-
}
1329
-
1330
-
query := fmt.Sprintf(`
1331
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1332
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1333
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1334
-
FROM plc_bundles
1335
-
WHERE bundle_number IN (%s)
1336
-
ORDER BY bundle_number ASC
1337
-
`, strings.Join(placeholders, ","))
1338
-
1339
-
rows, err := p.db.QueryContext(ctx, query, args...)
1340
-
if err != nil {
1341
-
return nil, err
1342
-
}
1343
-
defer rows.Close()
1344
-
1345
-
return p.scanBundles(rows)
1346
-
}
1347
-
1348
-
func (p *PostgresDB) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error) {
1349
-
query := `
1350
-
SELECT did
1351
-
FROM dids
1352
-
WHERE bundle_numbers @> $1::jsonb
1353
-
ORDER BY did
1354
-
`
1355
-
1356
-
rows, err := p.db.QueryContext(ctx, query, fmt.Sprintf("[%d]", bundleNum))
1357
-
if err != nil {
1358
-
return nil, err
1359
-
}
1360
-
defer rows.Close()
1361
-
1362
-
var dids []string
1363
-
for rows.Next() {
1364
-
var did string
1365
-
if err := rows.Scan(&did); err != nil {
1366
-
return nil, err
1367
-
}
1368
-
dids = append(dids, did)
1369
-
}
1370
-
1371
-
return dids, rows.Err()
1372
-
}
1373
-
1374
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1375
-
var bundles []*PLCBundle
1376
-
1377
-
for rows.Next() {
1378
-
var bundle PLCBundle
1379
-
var prevHash sql.NullString
1380
-
var cursor sql.NullString
1381
-
1382
-
if err := rows.Scan(
1383
-
&bundle.BundleNumber,
1384
-
&bundle.StartTime,
1385
-
&bundle.EndTime,
1386
-
&bundle.DIDCount,
1387
-
&bundle.Hash,
1388
-
&bundle.CompressedHash,
1389
-
&bundle.CompressedSize,
1390
-
&bundle.UncompressedSize,
1391
-
&bundle.CumulativeCompressedSize,
1392
-
&bundle.CumulativeUncompressedSize,
1393
-
&cursor,
1394
-
&prevHash,
1395
-
&bundle.Compressed,
1396
-
&bundle.CreatedAt,
1397
-
); err != nil {
1398
-
return nil, err
1399
-
}
1400
-
1401
-
if prevHash.Valid {
1402
-
bundle.PrevBundleHash = prevHash.String
1403
-
}
1404
-
if cursor.Valid {
1405
-
bundle.Cursor = cursor.String
1406
-
}
1407
-
1408
-
bundles = append(bundles, &bundle)
1409
-
}
1410
-
1411
-
return bundles, rows.Err()
1412
-
}
1413
-
1414
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1415
-
var count, lastBundleNum int64
1416
-
err := p.db.QueryRowContext(ctx, `
1417
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1418
-
FROM plc_bundles
1419
-
`).Scan(&count, &lastBundleNum)
1420
-
if err != nil {
1421
-
return 0, 0, 0, 0, err
1422
-
}
1423
-
1424
-
if lastBundleNum == 0 {
1425
-
return 0, 0, 0, 0, nil
1426
-
}
1427
-
1428
-
var compressedSize, uncompressedSize int64
1429
-
err = p.db.QueryRowContext(ctx, `
1430
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1431
-
FROM plc_bundles
1432
-
WHERE bundle_number = $1
1433
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1434
-
if err != nil {
1435
-
return 0, 0, 0, 0, err
1436
-
}
1437
-
1438
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1439
-
}
1440
-
1441
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1442
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1443
-
var num int
1444
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1445
-
return num, err
1446
-
}
1447
-
1448
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1449
-
query := `
1450
-
SELECT bundle_number
1451
-
FROM plc_bundles
1452
-
WHERE start_time <= $1 AND end_time >= $1
1453
-
ORDER BY bundle_number ASC
1454
-
LIMIT 1
1455
-
`
1456
-
1457
-
var bundleNum int
1458
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1459
-
if err == sql.ErrNoRows {
1460
-
query = `
1461
-
SELECT bundle_number
1462
-
FROM plc_bundles
1463
-
WHERE end_time < $1
1464
-
ORDER BY bundle_number DESC
1465
-
LIMIT 1
1466
-
`
1467
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1468
-
if err == sql.ErrNoRows {
1469
-
return 1, nil
1470
-
}
1471
-
if err != nil {
1472
-
return 0, err
1473
-
}
1474
-
return bundleNum, nil
1475
-
}
1476
-
if err != nil {
1477
-
return 0, err
1478
-
}
1479
-
1480
-
return bundleNum, nil
1481
-
}
1482
-
1483
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1484
-
query := `
1485
-
WITH daily_stats AS (
1486
-
SELECT
1487
-
DATE(start_time) as date,
1488
-
MAX(bundle_number) as last_bundle,
1489
-
COUNT(*) as bundle_count,
1490
-
SUM(uncompressed_size) as total_uncompressed,
1491
-
SUM(compressed_size) as total_compressed,
1492
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1493
-
MAX(cumulative_compressed_size) as cumulative_compressed
1494
-
FROM plc_bundles
1495
-
WHERE bundle_number >= $1
1496
-
GROUP BY DATE(start_time)
1497
-
)
1498
-
SELECT
1499
-
date::text,
1500
-
last_bundle,
1501
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1502
-
total_uncompressed,
1503
-
total_compressed,
1504
-
cumulative_uncompressed,
1505
-
cumulative_compressed
1506
-
FROM daily_stats
1507
-
ORDER BY date ASC
1508
-
`
1509
-
1510
-
if limit > 0 {
1511
-
query += fmt.Sprintf(" LIMIT %d", limit)
1512
-
}
1513
-
1514
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1515
-
if err != nil {
1516
-
return nil, err
1517
-
}
1518
-
defer rows.Close()
1519
-
1520
-
var history []*PLCHistoryPoint
1521
-
for rows.Next() {
1522
-
var point PLCHistoryPoint
1523
-
var cumulativeOps int64
1524
-
1525
-
err := rows.Scan(
1526
-
&point.Date,
1527
-
&point.BundleNumber,
1528
-
&cumulativeOps,
1529
-
&point.UncompressedSize,
1530
-
&point.CompressedSize,
1531
-
&point.CumulativeUncompressed,
1532
-
&point.CumulativeCompressed,
1533
-
)
1534
-
if err != nil {
1535
-
return nil, err
1536
-
}
1537
-
1538
-
point.OperationCount = int(cumulativeOps)
1539
-
1540
-
history = append(history, &point)
1541
-
}
1542
-
1543
-
return history, rows.Err()
1544
-
}
1545
-
1546
-
// ===== MEMPOOL OPERATIONS =====
1547
-
1548
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1549
-
if len(ops) == 0 {
1550
-
return nil
1551
-
}
1552
-
1553
-
tx, err := p.db.BeginTx(ctx, nil)
1554
-
if err != nil {
1555
-
return err
1556
-
}
1557
-
defer tx.Rollback()
1558
-
1559
-
stmt, err := tx.PrepareContext(ctx, `
1560
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1561
-
VALUES ($1, $2, $3, $4)
1562
-
ON CONFLICT(cid) DO NOTHING
1563
-
`)
1564
-
if err != nil {
1565
-
return err
1566
-
}
1567
-
defer stmt.Close()
1568
-
1569
-
for _, op := range ops {
1570
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1571
-
if err != nil {
1572
-
return err
1573
-
}
1574
-
}
1575
-
1576
-
return tx.Commit()
1577
-
}
1578
-
1579
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1580
-
query := "SELECT COUNT(*) FROM plc_mempool"
1581
-
var count int
1582
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1583
-
return count, err
1584
-
}
1585
-
1586
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1587
-
query := `
1588
-
SELECT id, did, operation, cid, created_at, added_at
1589
-
FROM plc_mempool
1590
-
ORDER BY created_at ASC
1591
-
LIMIT $1
1592
-
`
1593
-
1594
-
rows, err := p.db.QueryContext(ctx, query, limit)
1595
-
if err != nil {
1596
-
return nil, err
1597
-
}
1598
-
defer rows.Close()
1599
-
1600
-
var ops []MempoolOperation
1601
-
for rows.Next() {
1602
-
var op MempoolOperation
1603
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1604
-
if err != nil {
1605
-
return nil, err
1606
-
}
1607
-
ops = append(ops, op)
1608
-
}
1609
-
1610
-
return ops, rows.Err()
1611
-
}
1612
-
1613
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1614
-
if len(ids) == 0 {
1615
-
return nil
1616
-
}
1617
-
1618
-
placeholders := make([]string, len(ids))
1619
-
args := make([]interface{}, len(ids))
1620
-
for i, id := range ids {
1621
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1622
-
args[i] = id
1623
-
}
1624
-
1625
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1626
-
strings.Join(placeholders, ","))
1627
-
1628
-
_, err := p.db.ExecContext(ctx, query, args...)
1629
-
return err
1630
-
}
1631
-
1632
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1633
-
query := `
1634
-
SELECT id, did, operation, cid, created_at, added_at
1635
-
FROM plc_mempool
1636
-
ORDER BY created_at ASC, id ASC
1637
-
LIMIT 1
1638
-
`
1639
-
1640
-
var op MempoolOperation
1641
-
err := p.db.QueryRowContext(ctx, query).Scan(
1642
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1643
-
)
1644
-
if err == sql.ErrNoRows {
1645
-
return nil, nil
1646
-
}
1647
-
if err != nil {
1648
-
return nil, err
1649
-
}
1650
-
1651
-
return &op, nil
1652
-
}
1653
-
1654
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1655
-
query := `
1656
-
SELECT id, did, operation, cid, created_at, added_at
1657
-
FROM plc_mempool
1658
-
ORDER BY created_at DESC, id DESC
1659
-
LIMIT 1
1660
-
`
1661
-
1662
-
var op MempoolOperation
1663
-
err := p.db.QueryRowContext(ctx, query).Scan(
1664
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1665
-
)
1666
-
if err == sql.ErrNoRows {
1667
-
return nil, nil
1668
-
}
1669
-
if err != nil {
1670
-
return nil, err
1671
-
}
1672
-
1673
-
return &op, nil
1674
-
}
1675
-
1676
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1677
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1678
-
var count int
1679
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1680
-
return count, err
1681
-
}
1682
-
1683
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1684
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1685
-
var size int64
1686
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1687
-
return size, err
1688
1164
}
1689
1165
1690
1166
// ===== CURSOR OPERATIONS =====
+7
-14
internal/storage/types.go
+7
-14
internal/storage/types.go
···
26
26
LastChecked time.Time
27
27
Status int
28
28
IP string
29
-
IPv6 string // NEW
29
+
IPv6 string
30
30
IPResolvedAt time.Time
31
+
Valid bool
31
32
UpdatedAt time.Time
32
33
}
33
34
···
76
77
77
78
// EndpointFilter for querying endpoints
78
79
type EndpointFilter struct {
79
-
Type string // "pds", "labeler", etc.
80
+
Type string
80
81
Status string
81
82
MinUserCount int64
82
83
OnlyStale bool
84
+
OnlyValid bool
83
85
RecheckInterval time.Duration
84
-
Random bool // NEW: Return results in random order
86
+
Random bool
85
87
Limit int
86
88
Offset int
87
89
}
···
153
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
154
156
}
155
157
156
-
// MempoolOperation represents an operation waiting to be bundled
157
-
type MempoolOperation struct {
158
-
ID int64
159
-
DID string
160
-
Operation string
161
-
CID string
162
-
CreatedAt time.Time
163
-
AddedAt time.Time
164
-
}
165
-
166
158
// ScanCursor stores scanning progress
167
159
type ScanCursor struct {
168
160
Source string
···
223
215
LastChecked time.Time
224
216
Status int
225
217
IP string
226
-
IPv6 string // NEW
218
+
IPv6 string
219
+
Valid bool // NEW
227
220
228
221
// From latest endpoint_scans (via JOIN)
229
222
LatestScan *struct {
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"