+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+1
-1
config.sample.yaml
+1
-1
config.sample.yaml
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+271
-450
internal/api/handlers.go
+271
-450
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
47
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
48
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
49
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
50
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
51
49
}
52
50
53
51
// ===== REQUEST HELPERS =====
···
77
75
78
76
// ===== FORMATTING HELPERS =====
79
77
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": len(bundle.DIDs),
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
-
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
100
80
"id": ep.ID,
···
103
83
"discovered_at": ep.DiscoveredAt,
104
84
"last_checked": ep.LastChecked,
105
85
"status": statusToString(ep.Status),
106
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
107
86
}
108
87
109
-
// Add IP if available
88
+
// Add IPs if available
110
89
if ep.IP != "" {
111
90
response["ip"] = ep.IP
112
91
}
113
-
114
-
// REMOVED: IP info extraction - no longer in Endpoint struct
115
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
116
95
117
96
return response
118
97
}
···
165
144
resp.json(stats)
166
145
}
167
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
168
183
// ===== PDS HANDLERS =====
169
184
170
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
233
248
"endpoint": pds.Endpoint,
234
249
"discovered_at": pds.DiscoveredAt,
235
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
236
252
}
237
253
238
254
// Add server_did if available
···
257
273
}
258
274
}
259
275
260
-
// Add IP if available
276
+
// Add IPs if available
261
277
if pds.IP != "" {
262
278
response["ip"] = pds.IP
279
+
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
263
282
}
264
283
265
284
// Add IP info (from ip_infos table via JOIN)
···
665
684
return
666
685
}
667
686
668
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
669
-
if err != nil {
670
-
resp.error(err.Error(), http.StatusInternalServerError)
671
-
return
672
-
}
673
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
674
688
resp.json(map[string]interface{}{
675
689
"total_unique_dids": totalDIDs,
676
690
"last_bundle": lastBundle,
···
681
695
682
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
683
697
resp := newResponse(w)
684
-
685
698
bundleNum, err := getBundleNumber(r)
686
699
if err != nil {
687
700
resp.error("invalid bundle number", http.StatusBadRequest)
688
701
return
689
702
}
690
703
691
-
// Try to get existing bundle
692
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
693
-
if err == nil {
694
-
// Bundle exists, return it normally
695
-
resp.json(formatBundleResponse(bundle))
696
-
return
697
-
}
698
-
699
-
// Bundle not found - check if it's the next upcoming bundle
700
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
701
707
if err != nil {
702
-
resp.error("bundle not found", http.StatusNotFound)
703
-
return
704
-
}
705
-
706
-
if bundleNum == lastBundle+1 {
707
-
// This is the upcoming bundle - return preview based on mempool
708
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
709
-
if err != nil {
710
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
711
717
return
712
718
}
713
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
714
720
return
715
721
}
716
722
717
-
// Not an upcoming bundle, just not found
718
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
719
724
}
720
725
721
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
722
-
// Get mempool stats
723
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
724
-
if err != nil {
725
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
726
743
}
744
+
}
745
+
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
727
749
728
-
if mempoolCount == 0 {
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
729
752
return map[string]interface{}{
730
753
"plc_bundle_number": bundleNum,
731
754
"is_upcoming": true,
···
735
758
}, nil
736
759
}
737
760
738
-
// Get first and last operations for time range
739
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
740
-
if err != nil {
741
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
742
771
}
743
772
744
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
745
-
if err != nil {
746
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
747
775
}
748
776
749
-
// Get unique DID count
750
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
751
-
if err != nil {
752
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
753
780
}
754
-
755
-
// Get uncompressed size estimate
756
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
757
-
if err != nil {
758
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
759
783
}
760
784
761
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
762
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
763
-
764
-
// Calculate completion estimate
765
-
var estimatedCompletionTime *time.Time
766
-
var operationsNeeded int
767
-
var currentRate float64
768
-
769
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
770
-
771
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
772
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
773
-
if timeSpan > 0 {
774
-
currentRate = float64(mempoolCount) / timeSpan
775
-
if currentRate > 0 {
776
-
secondsNeeded := float64(operationsNeeded) / currentRate
777
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
778
-
estimatedCompletionTime = &completionTime
779
-
}
780
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
781
789
}
782
790
783
-
// Get previous bundle for cursor context
784
-
var prevBundleHash string
785
-
var cursor string
791
+
// Get previous bundle info
786
792
if bundleNum > 1 {
787
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
788
-
if err == nil {
789
-
prevBundleHash = prevBundle.Hash
790
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
791
-
}
792
-
}
793
-
794
-
// Determine bundle status
795
-
status := "filling"
796
-
if mempoolCount >= plc.BUNDLE_SIZE {
797
-
status = "ready"
798
-
}
799
-
800
-
// Build upcoming bundle response
801
-
result := map[string]interface{}{
802
-
"plc_bundle_number": bundleNum,
803
-
"is_upcoming": true,
804
-
"status": status,
805
-
"operation_count": mempoolCount,
806
-
"target_operation_count": plc.BUNDLE_SIZE,
807
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
808
-
"operations_needed": operationsNeeded,
809
-
"did_count": uniqueDIDCount,
810
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
811
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
812
-
"uncompressed_size": uncompressedSize,
813
-
"estimated_compressed_size": estimatedCompressedSize,
814
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
815
-
"prev_bundle_hash": prevBundleHash,
816
-
"cursor": cursor,
817
-
}
818
-
819
-
if estimatedCompletionTime != nil {
820
-
result["estimated_completion_time"] = *estimatedCompletionTime
821
-
result["current_rate_per_second"] = currentRate
822
-
}
823
-
824
-
// Get actual mempool operations if requested
825
-
if r.URL.Query().Get("include_dids") == "true" {
826
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
827
-
if err == nil {
828
-
// Extract unique DIDs
829
-
didSet := make(map[string]bool)
830
-
for _, op := range ops {
831
-
didSet[op.DID] = true
832
-
}
833
-
dids := make([]string, 0, len(didSet))
834
-
for did := range didSet {
835
-
dids = append(dids, did)
836
-
}
837
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
838
796
}
839
797
}
840
798
···
850
808
return
851
809
}
852
810
853
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
854
813
if err != nil {
855
814
resp.error("bundle not found", http.StatusNotFound)
856
815
return
857
816
}
858
817
859
818
resp.json(map[string]interface{}{
860
-
"plc_bundle_number": bundle.BundleNumber,
861
-
"did_count": len(bundle.DIDs),
862
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
863
822
})
864
823
}
865
824
···
874
833
875
834
compressed := r.URL.Query().Get("compressed") != "false"
876
835
877
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
878
837
if err == nil {
879
838
// Bundle exists, serve it normally
880
839
resp.bundleHeaders(bundle)
···
888
847
}
889
848
890
849
// Bundle not found - check if it's the upcoming bundle
891
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
892
-
if err != nil {
893
-
resp.error("bundle not found", http.StatusNotFound)
894
-
return
895
-
}
896
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
897
851
if bundleNum == lastBundle+1 {
898
852
// This is the upcoming bundle - serve from mempool
899
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
900
854
return
901
855
}
902
856
···
904
858
resp.error("bundle not found", http.StatusNotFound)
905
859
}
906
860
907
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
908
-
ctx := r.Context()
909
-
910
-
// Get mempool count
911
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
912
-
if err != nil {
913
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
914
-
return
915
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
916
865
917
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
918
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
919
868
return
920
869
}
921
870
922
-
// Get mempool operations (up to BUNDLE_SIZE)
923
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
924
873
if err != nil {
925
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
926
875
return
927
876
}
928
877
929
-
if len(mempoolOps) == 0 {
930
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
931
880
return
932
881
}
933
882
934
-
// Get time range
935
-
firstOp := mempoolOps[0]
936
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
937
886
938
887
// Extract unique DIDs
939
888
didSet := make(map[string]bool)
940
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
941
890
didSet[op.DID] = true
942
891
}
943
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
944
899
// Get previous bundle hash
945
900
prevBundleHash := ""
946
901
if bundleNum > 1 {
947
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
948
903
prevBundleHash = prevBundle.Hash
949
904
}
950
905
}
951
906
952
-
// Serialize operations to JSONL
953
-
var buf []byte
954
-
for _, mop := range mempoolOps {
955
-
buf = append(buf, []byte(mop.Operation)...)
956
-
buf = append(buf, '\n')
957
-
}
958
-
959
-
// Calculate size
960
-
uncompressedSize := int64(len(buf))
961
-
962
907
// Set headers
963
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
964
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
965
910
w.Header().Set("X-Bundle-Status", "preview")
966
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
967
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
968
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
969
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
970
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
971
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
972
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
973
919
974
920
w.Header().Set("Content-Type", "application/jsonl")
975
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
976
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
977
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
978
922
923
+
// Stream operations as JSONL
979
924
w.WriteHeader(http.StatusOK)
980
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
981
937
}
982
938
983
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
984
940
resp := newResponse(w)
985
-
path := bundle.GetFilePath(s.plcBundleDir)
986
941
987
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
988
944
if err != nil {
989
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
990
946
return
991
947
}
992
-
defer file.Close()
993
-
994
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
995
949
996
950
w.Header().Set("Content-Type", "application/zstd")
997
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
998
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
999
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
1000
954
1001
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
1002
958
}
1003
959
1004
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1005
961
resp := newResponse(w)
1006
962
1007
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
1008
965
if err != nil {
1009
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
1010
967
return
1011
968
}
1012
-
1013
-
// Serialize to JSONL
1014
-
var buf []byte
1015
-
for _, op := range ops {
1016
-
buf = append(buf, op.RawJSON...)
1017
-
buf = append(buf, '\n')
1018
-
}
1019
-
1020
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
1021
-
compressedSize := int64(0)
1022
-
if fileInfo != nil {
1023
-
compressedSize = fileInfo.Size()
1024
-
}
969
+
defer reader.Close()
1025
970
1026
971
w.Header().Set("Content-Type", "application/jsonl")
1027
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
1028
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
1029
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
1030
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
1031
-
if compressedSize > 0 {
1032
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
1033
978
}
1034
979
980
+
// Stream the data directly to the response
1035
981
w.WriteHeader(http.StatusOK)
1036
-
w.Write(buf)
982
+
io.Copy(w, reader)
1037
983
}
1038
984
1039
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
1040
986
resp := newResponse(w)
1041
987
limit := getQueryInt(r, "limit", 50)
1042
988
1043
-
bundles, err := s.db.GetBundles(r.Context(), limit)
1044
-
if err != nil {
1045
-
resp.error(err.Error(), http.StatusInternalServerError)
1046
-
return
1047
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
1048
990
1049
991
response := make([]map[string]interface{}, len(bundles))
1050
992
for i, bundle := range bundles {
1051
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
1052
994
}
1053
995
1054
996
resp.json(response)
···
1057
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
1058
1000
resp := newResponse(w)
1059
1001
1060
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
1061
-
if err != nil {
1062
-
resp.error(err.Error(), http.StatusInternalServerError)
1063
-
return
1064
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
1065
1008
1066
1009
resp.json(map[string]interface{}{
1067
-
"plc_bundle_count": count,
1068
-
"last_bundle_number": lastBundle,
1069
-
"total_compressed_size": compressedSize,
1070
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1071
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
1072
-
"total_uncompressed_size": uncompressedSize,
1073
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1074
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
1075
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
1076
1015
})
1077
1016
}
1078
1017
···
1080
1019
1081
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
1082
1021
resp := newResponse(w)
1083
-
ctx := r.Context()
1084
1022
1085
-
count, err := s.db.GetMempoolCount(ctx)
1086
-
if err != nil {
1087
-
resp.error(err.Error(), http.StatusInternalServerError)
1088
-
return
1089
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
1090
1025
1091
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
1092
-
if err != nil {
1093
-
resp.error(err.Error(), http.StatusInternalServerError)
1094
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
1095
1030
}
1096
1031
1097
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
1098
-
if err != nil {
1099
-
resp.error(err.Error(), http.StatusInternalServerError)
1100
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
1101
1036
}
1102
1037
1103
-
result := map[string]interface{}{
1104
-
"operation_count": count,
1105
-
"unique_did_count": uniqueDIDCount,
1106
-
"uncompressed_size": uncompressedSize,
1107
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1108
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
1109
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
1110
1042
1111
-
if count > 0 {
1112
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
1113
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1114
1045
1115
-
if count < plc.BUNDLE_SIZE {
1116
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1117
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1118
1049
if timeSpan > 0 {
1119
1050
opsPerSecond := float64(count) / timeSpan
1120
1051
if opsPerSecond > 0 {
1121
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1122
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1123
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1057
+
result["current_rate_per_second"] = opsPerSecond
1124
1058
result["operations_needed"] = remainingOps
1125
-
result["current_rate_per_second"] = opsPerSecond
1126
1059
}
1127
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1128
1066
}
1129
-
} else {
1130
-
result["estimated_next_bundle_time"] = time.Now()
1131
-
result["operations_needed"] = 0
1132
1067
}
1133
1068
}
1134
1069
} else {
1070
+
// Empty mempool
1135
1071
result["mempool_start_time"] = nil
1136
1072
result["estimated_next_bundle_time"] = nil
1137
1073
}
···
1156
1092
1157
1093
// ===== VERIFICATION HANDLERS =====
1158
1094
1159
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1160
-
resp := newResponse(w)
1161
-
vars := mux.Vars(r)
1162
-
1163
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1164
-
if err != nil {
1165
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1166
-
return
1167
-
}
1168
-
1169
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1170
-
if err != nil {
1171
-
resp.error("Bundle not found", http.StatusNotFound)
1172
-
return
1173
-
}
1174
-
1175
-
// Fetch from PLC and verify
1176
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1177
-
if err != nil {
1178
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1179
-
return
1180
-
}
1181
-
1182
-
remoteHash := computeOperationsHash(remoteOps)
1183
-
verified := bundle.Hash == remoteHash
1184
-
1185
-
resp.json(map[string]interface{}{
1186
-
"bundle_number": bundleNumber,
1187
-
"verified": verified,
1188
-
"local_hash": bundle.Hash,
1189
-
"remote_hash": remoteHash,
1190
-
"local_op_count": plc.BUNDLE_SIZE,
1191
-
"remote_op_count": len(remoteOps),
1192
-
"boundary_cids_used": len(prevCIDs),
1193
-
})
1194
-
}
1195
-
1196
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1197
-
var after string
1198
-
var prevBoundaryCIDs map[string]bool
1199
-
1200
-
if bundleNum > 1 {
1201
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1202
-
if err != nil {
1203
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1204
-
}
1205
-
1206
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1207
-
1208
-
if len(prevBundle.BoundaryCIDs) > 0 {
1209
-
prevBoundaryCIDs = make(map[string]bool)
1210
-
for _, cid := range prevBundle.BoundaryCIDs {
1211
-
prevBoundaryCIDs[cid] = true
1212
-
}
1213
-
}
1214
-
}
1215
-
1216
-
var allRemoteOps []plc.PLCOperation
1217
-
seenCIDs := make(map[string]bool)
1218
-
1219
-
for cid := range prevBoundaryCIDs {
1220
-
seenCIDs[cid] = true
1221
-
}
1222
-
1223
-
currentAfter := after
1224
-
maxFetches := 20
1225
-
1226
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1227
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1228
-
Count: 1000,
1229
-
After: currentAfter,
1230
-
})
1231
-
if err != nil || len(batch) == 0 {
1232
-
break
1233
-
}
1234
-
1235
-
for _, op := range batch {
1236
-
if !seenCIDs[op.CID] {
1237
-
seenCIDs[op.CID] = true
1238
-
allRemoteOps = append(allRemoteOps, op)
1239
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1240
-
break
1241
-
}
1242
-
}
1243
-
}
1244
-
1245
-
if len(batch) > 0 {
1246
-
lastOp := batch[len(batch)-1]
1247
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1248
-
}
1249
-
1250
-
if len(batch) < 1000 {
1251
-
break
1252
-
}
1253
-
}
1254
-
1255
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1256
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1257
-
}
1258
-
1259
-
return allRemoteOps, prevBoundaryCIDs, nil
1260
-
}
1261
-
1262
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1263
1096
resp := newResponse(w)
1264
-
ctx := r.Context()
1265
1097
1266
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1267
-
if err != nil {
1268
-
resp.error(err.Error(), http.StatusInternalServerError)
1269
-
return
1270
-
}
1271
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1272
1099
if lastBundle == 0 {
1273
1100
resp.json(map[string]interface{}{
1274
1101
"status": "empty",
···
1282
1109
var errorMsg string
1283
1110
1284
1111
for i := 1; i <= lastBundle; i++ {
1285
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1286
1113
if err != nil {
1287
1114
valid = false
1288
1115
brokenAt = i
···
1291
1118
}
1292
1119
1293
1120
if i > 1 {
1294
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1295
1122
if err != nil {
1296
1123
valid = false
1297
1124
brokenAt = i
···
1299
1126
break
1300
1127
}
1301
1128
1302
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1303
1130
valid = false
1304
1131
brokenAt = i
1305
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1306
1133
break
1307
1134
}
1308
1135
}
···
1323
1150
1324
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1325
1152
resp := newResponse(w)
1326
-
ctx := r.Context()
1327
1153
1328
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1329
-
if err != nil {
1330
-
resp.error(err.Error(), http.StatusInternalServerError)
1331
-
return
1332
-
}
1333
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1334
1155
if lastBundle == 0 {
1335
1156
resp.json(map[string]interface{}{
1336
1157
"chain_length": 0,
···
1339
1160
return
1340
1161
}
1341
1162
1342
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1343
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1344
-
1345
-
// Updated to receive 5 values instead of 3
1346
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1347
-
if err != nil {
1348
-
resp.error(err.Error(), http.StatusInternalServerError)
1349
-
return
1350
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1351
1166
1352
1167
resp.json(map[string]interface{}{
1353
-
"chain_length": lastBundle,
1354
-
"total_bundles": count,
1355
-
"total_compressed_size": compressedSize,
1356
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1357
-
"total_uncompressed_size": uncompressedSize,
1358
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1359
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1360
-
"chain_start_time": firstBundle.StartTime,
1361
-
"chain_end_time": lastBundleData.EndTime,
1362
-
"chain_head_hash": lastBundleData.Hash,
1363
-
"first_prev_hash": firstBundle.PrevBundleHash,
1364
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1365
1177
})
1366
1178
}
1367
1179
···
1382
1194
return
1383
1195
}
1384
1196
1385
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1386
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1387
1199
1388
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1422
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1423
1235
}
1424
1236
1425
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1426
1238
if afterTime.IsZero() {
1427
1239
return 1
1428
1240
}
1429
1241
1430
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1431
-
if err != nil {
1432
-
return 1
1433
-
}
1434
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1435
1243
if foundBundle > 1 {
1436
1244
return foundBundle - 1
1437
1245
}
···
1442
1250
var allOps []plc.PLCOperation
1443
1251
seenCIDs := make(map[string]bool)
1444
1252
1445
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1446
1254
1447
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1448
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1602
1410
limit := getQueryInt(r, "limit", 0)
1603
1411
fromBundle := getQueryInt(r, "from", 1)
1604
1412
1605
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1606
1415
if err != nil {
1607
1416
resp.error(err.Error(), http.StatusInternalServerError)
1608
1417
return
···
1674
1483
})
1675
1484
}
1676
1485
1677
-
// ===== UTILITY FUNCTIONS =====
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1678
1494
1679
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1680
-
var jsonlData []byte
1681
-
for _, op := range ops {
1682
-
jsonlData = append(jsonlData, op.RawJSON...)
1683
-
jsonlData = append(jsonlData, '\n')
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1684
1499
}
1685
-
hash := sha256.Sum256(jsonlData)
1686
-
return hex.EncodeToString(hash[:])
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1687
1506
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1688
1509
1689
1510
func normalizeEndpoint(endpoint string) string {
1690
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+8
-11
internal/api/server.go
+8
-11
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
24
23
plcIndexDIDs bool
25
24
}
26
25
27
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
28
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
29
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
30
27
s := &Server{
31
28
router: mux.NewRouter(),
32
29
db: db,
33
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
34
30
plcBundleDir: plcCfg.BundleDir,
35
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
36
32
plcIndexDIDs: plcCfg.IndexDIDs,
37
33
}
38
34
···
61
57
// Generic endpoints (keep as-is)
62
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
63
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
64
61
65
62
//PDS-specific endpoints (virtual, created via JOINs)
66
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
···
87
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
88
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
89
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
90
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
91
88
92
89
// PLC history/metrics
93
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+2
-2
internal/log/log.go
+2
-2
internal/log/log.go
···
28
28
errorLog = log.New(os.Stderr, "", 0)
29
29
}
30
30
31
-
// timestamp returns current time in ISO 8601 format
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
32
func timestamp() string {
33
-
return time.Now().Format(time.RFC3339)
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
34
34
}
35
35
36
36
func Verbose(format string, v ...interface{}) {
+44
-45
internal/pds/client.go
+44
-45
internal/pds/client.go
···
84
84
}
85
85
86
86
// DescribeServer fetches com.atproto.server.describeServer
87
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
88
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
89
91
90
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
91
116
92
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
93
118
if err != nil {
94
-
return nil, err
119
+
return nil, 0, "", err
95
120
}
96
121
97
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
98
125
if err != nil {
99
-
return nil, err
126
+
return nil, responseTime, usedIP, err
100
127
}
101
128
defer resp.Body.Close()
102
129
103
130
if resp.StatusCode != http.StatusOK {
104
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
105
132
}
106
133
107
134
var desc ServerDescription
108
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
109
-
return nil, err
136
+
return nil, responseTime, usedIP, err
110
137
}
111
138
112
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
113
140
}
114
141
115
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
116
-
// Returns: available, responseTime, version, usedIP, error
117
-
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, string, error) {
143
+
// Returns: available, responseTime, version, error
144
+
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
118
145
startTime := time.Now()
119
146
120
147
url := fmt.Sprintf("%s/xrpc/_health", endpoint)
121
148
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
122
149
if err != nil {
123
-
return false, 0, "", "", err
124
-
}
125
-
126
-
// Create a custom dialer to track which IP was actually used
127
-
var usedIP string
128
-
transport := &http.Transport{
129
-
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
130
-
conn, err := (&net.Dialer{
131
-
Timeout: 30 * time.Second,
132
-
KeepAlive: 30 * time.Second,
133
-
}).DialContext(ctx, network, addr)
134
-
135
-
if err == nil && conn != nil {
136
-
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
137
-
// Extract IP from "ip:port" format
138
-
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
139
-
usedIP = tcpAddr.IP.String()
140
-
}
141
-
}
142
-
}
143
-
144
-
return conn, err
145
-
},
150
+
return false, 0, "", err
146
151
}
147
152
148
-
// Create a client with our custom transport
149
-
client := &http.Client{
150
-
Timeout: c.httpClient.Timeout,
151
-
Transport: transport,
152
-
}
153
-
154
-
resp, err := client.Do(req)
153
+
resp, err := c.httpClient.Do(req)
155
154
duration := time.Since(startTime)
156
155
157
156
if err != nil {
158
-
return false, duration, "", usedIP, err
157
+
return false, duration, "", err
159
158
}
160
159
defer resp.Body.Close()
161
160
162
161
if resp.StatusCode != http.StatusOK {
163
-
return false, duration, "", usedIP, fmt.Errorf("health check returned status %d", resp.StatusCode)
162
+
return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode)
164
163
}
165
164
166
165
// Decode the JSON response and check for "version"
···
169
168
}
170
169
171
170
if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil {
172
-
return false, duration, "", usedIP, fmt.Errorf("failed to decode health JSON: %w", err)
171
+
return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err)
173
172
}
174
173
175
174
if healthResponse.Version == "" {
176
-
return false, duration, "", usedIP, fmt.Errorf("health JSON response missing 'version' field")
175
+
return false, duration, "", fmt.Errorf("health JSON response missing 'version' field")
177
176
}
178
177
179
178
// All checks passed
180
-
return true, duration, healthResponse.Version, usedIP, nil
179
+
return true, duration, healthResponse.Version, nil
181
180
}
+36
-32
internal/pds/scanner.go
+36
-32
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
127
127
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
128
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
146
145
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
147
146
}
148
147
149
-
// STEP 2: Health check (now returns which IP was used)
150
-
available, responseTime, version, usedIP, err := s.client.CheckHealth(ctx, ep.Endpoint)
151
-
if err != nil || !available {
152
-
errMsg := "health check failed"
153
-
if err != nil {
154
-
errMsg = err.Error()
155
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
156
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
157
152
Status: storage.EndpointStatusOffline,
158
-
ResponseTime: responseTime,
159
-
ErrorMessage: errMsg,
160
-
UsedIP: usedIP, // Save even if failed
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
161
156
})
162
157
return
163
158
}
164
159
165
-
// STEP 3: Fetch PDS-specific data
166
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
167
-
if err != nil {
168
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
169
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
170
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
171
163
}
172
164
173
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
174
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
175
182
if err != nil {
176
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
177
184
repoList = []Repo{}
178
185
}
179
186
180
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
181
188
dids := make([]string, len(repoList))
182
189
for i, repo := range repoList {
183
190
dids[i] = repo.DID
184
191
}
185
192
186
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
187
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
188
195
Status: storage.EndpointStatusOnline,
189
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
190
197
Description: desc,
191
198
DIDs: dids,
192
199
Version: version,
193
-
UsedIP: usedIP, // NEW: Save which IP was used
200
+
UsedIP: usedIP, // Only from describeServer
194
201
})
195
202
196
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
197
204
if len(repoList) > 0 {
198
-
batchSize := 10000
205
+
batchSize := 100_000
199
206
200
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
201
208
···
235
242
236
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
237
244
}
238
-
239
-
// IP info fetch already started at the beginning (step 1.5)
240
-
// It will complete in the background
241
245
}
242
246
243
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
-676
internal/plc/bundle.go
-676
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
384
-
// Extract handle and PDS for each DID using centralized helper
385
-
didInfoMap := ExtractDIDInfoMap(bf.operations)
386
-
387
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
388
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
389
-
// Don't return error - bundle is already created
390
-
} else {
391
-
// Update handle and PDS for each DID
392
-
for did, info := range didInfoMap {
393
-
// Validate handle length before saving
394
-
validHandle := ValidateHandle(info.Handle)
395
-
396
-
if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil {
397
-
log.Error("Failed to update DID %s metadata: %v", did, err)
398
-
}
399
-
}
400
-
401
-
elapsed := time.Since(start)
402
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
403
-
}
404
-
} else {
405
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
406
-
}
407
-
408
-
return nil
409
-
}
410
-
411
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
412
-
didSet := make(map[string]bool)
413
-
for _, op := range ops {
414
-
didSet[op.DID] = true
415
-
}
416
-
417
-
dids := make([]string, 0, len(didSet))
418
-
for did := range didSet {
419
-
dids = append(dids, did)
420
-
}
421
-
return dids
422
-
}
423
-
424
-
// ===== MEMPOOL BUNDLE CREATION =====
425
-
426
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
427
-
if !bm.enabled {
428
-
return 0, fmt.Errorf("bundle manager disabled")
429
-
}
430
-
431
-
if len(operations) != BUNDLE_SIZE {
432
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
433
-
}
434
-
435
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
436
-
if err != nil {
437
-
return 0, err
438
-
}
439
-
bundleNum := lastBundle + 1
440
-
441
-
bf := bm.newBundleFile(bundleNum)
442
-
bf.operations = operations
443
-
444
-
if err := bm.save(bf); err != nil {
445
-
return 0, err
446
-
}
447
-
448
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
449
-
return 0, err
450
-
}
451
-
452
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
453
-
bundleNum, bf.uncompressedHash[:16])
454
-
455
-
return bundleNum, nil
456
-
}
457
-
458
-
// ===== VERIFICATION =====
459
-
460
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
461
-
if !bm.enabled {
462
-
return fmt.Errorf("bundle manager disabled")
463
-
}
464
-
465
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
466
-
467
-
for i := 1; i <= endBundle; i++ {
468
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
469
-
if err != nil {
470
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
471
-
}
472
-
473
-
// Verify file hash
474
-
path := bm.newBundleFile(i).path
475
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
476
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
477
-
}
478
-
479
-
// Verify chain link
480
-
if i > 1 {
481
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
482
-
if err != nil {
483
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
484
-
}
485
-
486
-
if bundle.PrevBundleHash != prevBundle.Hash {
487
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
488
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
489
-
}
490
-
}
491
-
492
-
if i%100 == 0 {
493
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
494
-
}
495
-
}
496
-
497
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
498
-
return nil
499
-
}
500
-
501
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
502
-
if !bm.enabled {
503
-
return nil
504
-
}
505
-
506
-
for i := 1; i < targetBundle; i++ {
507
-
if !bm.newBundleFile(i).exists() {
508
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
509
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
510
-
}
511
-
}
512
-
}
513
-
514
-
return nil
515
-
}
516
-
517
-
// ===== UTILITY METHODS =====
518
-
519
-
func (bm *BundleManager) hash(data []byte) string {
520
-
h := sha256.Sum256(data)
521
-
return hex.EncodeToString(h[:])
522
-
}
523
-
524
-
func (bm *BundleManager) hashFile(path string) string {
525
-
data, _ := os.ReadFile(path)
526
-
return bm.hash(data)
527
-
}
528
-
529
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
530
-
if expectedHash == "" {
531
-
return nil
532
-
}
533
-
534
-
actualHash := bm.hashFile(path)
535
-
if actualHash != expectedHash {
536
-
return fmt.Errorf("hash mismatch")
537
-
}
538
-
return nil
539
-
}
540
-
541
-
func (bm *BundleManager) getFileSize(path string) int64 {
542
-
if info, err := os.Stat(path); err == nil {
543
-
return info.Size()
544
-
}
545
-
return 0
546
-
}
547
-
548
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
549
-
if !bm.enabled {
550
-
return 0, 0, 0, 0, nil
551
-
}
552
-
return bm.db.GetBundleStats(ctx)
553
-
}
554
-
555
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
556
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
557
-
if err != nil {
558
-
return nil, err
559
-
}
560
-
561
-
if lastBundle == 0 {
562
-
return map[string]interface{}{
563
-
"chain_length": 0,
564
-
"status": "empty",
565
-
}, nil
566
-
}
567
-
568
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
569
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
570
-
571
-
return map[string]interface{}{
572
-
"chain_length": lastBundle,
573
-
"first_bundle": 1,
574
-
"last_bundle": lastBundle,
575
-
"chain_start_time": firstBundle.StartTime,
576
-
"chain_end_time": lastBundleData.EndTime,
577
-
"chain_head_hash": lastBundleData.Hash,
578
-
}, nil
579
-
}
580
-
581
-
// ===== EXPORTED HELPERS =====
582
-
583
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
584
-
if len(operations) == 0 {
585
-
return time.Time{}, nil
586
-
}
587
-
588
-
lastOp := operations[len(operations)-1]
589
-
boundaryTime := lastOp.CreatedAt
590
-
cidSet := make(map[string]bool)
591
-
592
-
for i := len(operations) - 1; i >= 0; i-- {
593
-
op := operations[i]
594
-
if op.CreatedAt.Equal(boundaryTime) {
595
-
cidSet[op.CID] = true
596
-
} else {
597
-
break
598
-
}
599
-
}
600
-
601
-
return boundaryTime, cidSet
602
-
}
603
-
604
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
605
-
if len(operations) == 0 {
606
-
return operations
607
-
}
608
-
609
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
610
-
if err != nil {
611
-
return operations
612
-
}
613
-
614
-
startIdx := 0
615
-
for startIdx < len(operations) {
616
-
op := operations[startIdx]
617
-
618
-
if op.CreatedAt.After(boundaryTime) {
619
-
break
620
-
}
621
-
622
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
623
-
startIdx++
624
-
continue
625
-
}
626
-
627
-
break
628
-
}
629
-
630
-
return operations[startIdx:]
631
-
}
632
-
633
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
634
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
635
-
if !bm.enabled {
636
-
return nil, fmt.Errorf("bundle manager disabled")
637
-
}
638
-
639
-
bf := bm.newBundleFile(bundleNum)
640
-
641
-
if !bf.exists() {
642
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
643
-
}
644
-
645
-
if err := bm.load(bf); err != nil {
646
-
return nil, err
647
-
}
648
-
649
-
return bf.operations, nil
650
-
}
651
-
652
-
// calculateCursor determines the cursor value for a given bundle
653
-
// For bundle 1: returns empty string
654
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
655
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
656
-
if bundleNum == 1 {
657
-
return ""
658
-
}
659
-
660
-
// Try to get cursor from previous bundle in DB
661
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
662
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
663
-
}
664
-
665
-
// If previous bundle not in DB, try to load it from file
666
-
prevBf := bm.newBundleFile(bundleNum - 1)
667
-
if prevBf.exists() {
668
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
669
-
// Return the createdAt of the last operation in previous bundle
670
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
671
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
672
-
}
673
-
}
674
-
675
-
return ""
676
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+20
-2
internal/plc/helpers.go
+20
-2
internal/plc/helpers.go
···
1
1
package plc
2
2
3
-
import "strings"
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
4
7
5
8
// MaxHandleLength is the maximum allowed handle length for database storage
6
9
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
7
14
8
15
// ExtractHandle safely extracts the handle from a PLC operation
9
16
func ExtractHandle(op *PLCOperation) string {
···
29
36
}
30
37
31
38
// ValidateHandle checks if a handle is valid for database storage
32
-
// Returns empty string if handle is too long
39
+
// Returns empty string if handle is invalid (too long or wrong format)
33
40
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
34
46
if len(handle) > MaxHandleLength {
35
47
return ""
36
48
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
37
55
return handle
38
56
}
39
57
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-431
internal/plc/scanner.go
+92
-431
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
84
-
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
90
43
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
246
-
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
251
-
}
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
252
72
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
257
74
}
258
-
}
259
-
}
260
75
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// NEW: Create/update DID records immediately when adding to mempool
336
-
for _, op := range ops {
337
-
info := ExtractDIDInfo(&op)
338
-
339
-
// Validate handle length before saving
340
-
validHandle := ValidateHandle(info.Handle)
341
-
if info.Handle != "" && validHandle == "" {
342
-
log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle))
343
-
}
344
-
345
-
if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil {
346
-
log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err)
347
-
// Don't fail the whole operation, just log
348
-
}
349
-
}
350
-
351
-
// Process for endpoint discovery
352
-
batchCounts, err := s.processBatch(ctx, ops)
353
-
s.mergeCounts(counts, batchCounts)
354
-
return err
355
-
}
356
-
357
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
358
-
for {
359
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
360
78
if err != nil {
361
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
362
81
}
363
82
364
-
log.Verbose("Mempool contains %d operations", count)
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
365
88
366
-
if count < BUNDLE_SIZE {
367
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
368
-
return nil
369
-
}
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
370
91
371
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
372
-
373
-
// Updated to receive 4 values instead of 3
374
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
375
-
if err != nil {
376
-
return err
377
-
}
378
-
379
-
// Process and update metrics
380
-
countsBefore := sumCounts(m.endpointCounts)
381
-
counts, _ := s.processBatch(ctx, ops)
382
-
s.mergeCounts(m.endpointCounts, counts)
383
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
384
-
385
-
m.totalProcessed += int64(len(ops))
386
-
m.newEndpoints += newEndpointsFound
387
-
m.currentBundle = bundleNum
388
-
389
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
390
94
log.Error("Warning: failed to update cursor: %v", err)
391
95
}
392
-
393
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
394
96
}
395
-
}
396
97
397
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
398
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
399
-
if err != nil {
400
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
401
103
}
402
104
403
-
ops, ids := s.deduplicateMempool(mempoolOps)
404
-
if len(ops) < BUNDLE_SIZE {
405
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
406
-
}
407
-
408
-
// Determine cursor from last bundle
409
-
cursor := ""
410
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
411
-
if err == nil && lastBundle > 0 {
412
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
413
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
414
-
}
415
-
}
416
-
417
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
418
-
if err != nil {
419
-
return 0, nil, "", err
420
-
}
421
-
422
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
423
-
return 0, nil, "", err
424
-
}
425
-
426
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
427
107
}
428
108
429
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
430
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
431
-
ids := make([]int64, 0, BUNDLE_SIZE)
432
-
seenCIDs := make(map[string]bool)
433
-
434
-
for _, mop := range mempoolOps {
435
-
if seenCIDs[mop.CID] {
436
-
ids = append(ids, mop.ID)
437
-
continue
438
-
}
439
-
seenCIDs[mop.CID] = true
440
-
441
-
var op PLCOperation
442
-
json.Unmarshal([]byte(mop.Operation), &op)
443
-
op.RawJSON = []byte(mop.Operation)
444
-
445
-
ops = append(ops, op)
446
-
ids = append(ids, mop.ID)
447
-
448
-
if len(ops) >= BUNDLE_SIZE {
449
-
break
450
-
}
451
-
}
452
-
453
-
return ops, ids
454
-
}
455
-
109
+
// processBatch extracts endpoints from operations
456
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
457
111
counts := make(map[string]int64)
458
112
seen := make(map[string]*PLCOperation)
459
113
460
114
// Collect unique endpoints
461
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
462
118
if op.IsNullified() {
463
119
continue
464
120
}
465
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
466
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
467
124
if _, exists := seen[key]; !exists {
468
-
seen[key] = &op
125
+
seen[key] = op
469
126
}
470
127
}
471
128
}
···
481
138
}
482
139
483
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
484
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
485
142
continue
486
143
}
487
144
488
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
489
146
counts[epType]++
490
147
}
491
148
492
149
return counts, nil
493
-
}
494
-
495
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
496
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
497
-
EndpointType: epType,
498
-
Endpoint: endpoint,
499
-
DiscoveredAt: discoveredAt,
500
-
LastChecked: time.Time{},
501
-
Status: storage.EndpointStatusUnknown,
502
-
})
503
150
}
504
151
505
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
542
189
return nil
543
190
}
544
191
545
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
546
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
547
-
Source: "plc_directory",
548
-
LastBundleNumber: m.currentBundle - 1,
549
-
LastScanTime: time.Now().UTC(),
550
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
551
201
})
552
202
}
553
203
···
575
225
return total
576
226
}
577
227
578
-
func formatEndpointCounts(counts map[string]int64) string {
579
-
if len(counts) == 0 {
580
-
return "0 new endpoints"
581
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
582
231
583
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
584
240
585
-
if len(counts) == 1 {
586
-
for typ, count := range counts {
587
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
588
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
589
246
}
247
+
}
590
248
591
-
parts := make([]string, 0, len(counts))
592
-
for typ, count := range counts {
593
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
594
256
}
595
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
596
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
-20
internal/storage/db.go
-20
internal/storage/db.go
···
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
52
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
59
-
GetLastBundleNumber(ctx context.Context) (int, error)
60
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
61
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
62
-
63
-
// Mempool operations
64
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
65
-
GetMempoolCount(ctx context.Context) (int, error)
66
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
67
-
DeleteFromMempool(ctx context.Context, ids []int64) error
68
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
69
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
71
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
72
-
73
53
// Metrics
74
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
75
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+105
-546
internal/storage/postgres.go
+105
-546
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
73
72
log.Info("Running database migrations...")
74
73
75
74
schema := `
76
-
-- Endpoints table (NO user_count, NO ip_info)
77
-
CREATE TABLE IF NOT EXISTS endpoints (
78
-
id BIGSERIAL PRIMARY KEY,
79
-
endpoint_type TEXT NOT NULL DEFAULT 'pds',
80
-
endpoint TEXT NOT NULL,
81
-
server_did TEXT,
82
-
discovered_at TIMESTAMP NOT NULL,
83
-
last_checked TIMESTAMP,
84
-
status INTEGER DEFAULT 0,
85
-
ip TEXT,
86
-
ipv6 TEXT,
87
-
ip_resolved_at TIMESTAMP,
88
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
-
UNIQUE(endpoint_type, endpoint)
90
-
);
75
+
-- Endpoints table (with IPv6 support)
76
+
CREATE TABLE IF NOT EXISTS endpoints (
77
+
id BIGSERIAL PRIMARY KEY,
78
+
endpoint_type TEXT NOT NULL DEFAULT 'pds',
79
+
endpoint TEXT NOT NULL,
80
+
server_did TEXT,
81
+
discovered_at TIMESTAMP NOT NULL,
82
+
last_checked TIMESTAMP,
83
+
status INTEGER DEFAULT 0,
84
+
ip TEXT,
85
+
ipv6 TEXT,
86
+
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
88
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
+
UNIQUE(endpoint_type, endpoint)
90
+
);
91
91
92
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
93
-
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
94
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
95
-
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
-
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
92
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
93
+
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
94
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
95
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
99
100
100
101
-- IP infos table (IP as PRIMARY KEY)
101
102
CREATE TABLE IF NOT EXISTS ip_infos (
···
157
158
records_processed BIGINT DEFAULT 0
158
159
);
159
160
160
-
CREATE TABLE IF NOT EXISTS plc_bundles (
161
-
bundle_number INTEGER PRIMARY KEY,
162
-
start_time TIMESTAMP NOT NULL,
163
-
end_time TIMESTAMP NOT NULL,
164
-
dids JSONB NOT NULL,
165
-
hash TEXT NOT NULL,
166
-
compressed_hash TEXT NOT NULL,
167
-
compressed_size BIGINT NOT NULL,
168
-
uncompressed_size BIGINT NOT NULL,
169
-
cumulative_compressed_size BIGINT NOT NULL,
170
-
cumulative_uncompressed_size BIGINT NOT NULL,
171
-
cursor TEXT,
172
-
prev_bundle_hash TEXT,
173
-
compressed BOOLEAN DEFAULT true,
174
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
175
-
);
176
-
177
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
178
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
179
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
180
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
181
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
182
-
183
-
CREATE TABLE IF NOT EXISTS plc_mempool (
184
-
id BIGSERIAL PRIMARY KEY,
185
-
did TEXT NOT NULL,
186
-
operation TEXT NOT NULL,
187
-
cid TEXT NOT NULL UNIQUE,
188
-
created_at TIMESTAMP NOT NULL,
189
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
190
-
);
191
-
192
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
193
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
194
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
195
-
196
161
-- Minimal dids table
197
162
CREATE TABLE IF NOT EXISTS dids (
198
163
did TEXT PRIMARY KEY,
···
245
210
246
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
247
212
query := `
248
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at)
249
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
250
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
251
216
last_checked = EXCLUDED.last_checked,
252
217
status = EXCLUDED.status,
···
262
227
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
263
228
ELSE endpoints.ip_resolved_at
264
229
END,
230
+
valid = EXCLUDED.valid,
265
231
updated_at = CURRENT_TIMESTAMP
266
232
RETURNING id
267
233
`
268
234
err := p.db.QueryRowContext(ctx, query,
269
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
270
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
271
237
return err
272
238
}
273
239
···
288
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
289
255
query := `
290
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
291
-
ip, ipv6, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
292
258
FROM endpoints
293
259
WHERE endpoint = $1 AND endpoint_type = $2
294
260
`
···
299
265
300
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
301
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
302
-
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
303
269
)
304
270
if err != nil {
305
271
return nil, err
···
325
291
query := `
326
292
SELECT DISTINCT ON (COALESCE(server_did, id::text))
327
293
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
328
-
ip, ipv6, ip_resolved_at, updated_at
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
329
295
FROM endpoints
330
296
WHERE 1=1
331
297
`
···
338
304
args = append(args, filter.Type)
339
305
argIdx++
340
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
341
312
if filter.Status != "" {
342
313
statusInt := EndpointStatusUnknown
343
314
switch filter.Status {
···
360
331
}
361
332
}
362
333
363
-
// NEW: Order by server_did and discovered_at to get primary endpoints
364
-
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
365
348
366
349
if filter != nil && filter.Limit > 0 {
367
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
590
573
discovered_at,
591
574
last_checked,
592
575
status,
593
-
ip
576
+
ip,
577
+
ipv6,
578
+
valid
594
579
FROM endpoints
595
580
WHERE endpoint_type = 'pds'
596
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
597
582
)
598
583
SELECT
599
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
600
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
601
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
602
587
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
···
657
642
var items []*PDSListItem
658
643
for rows.Next() {
659
644
item := &PDSListItem{}
660
-
var ip, serverDID, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
661
646
var asn sql.NullInt32
662
647
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
663
648
var lat, lon sql.NullFloat64
···
667
652
var scannedAt sql.NullTime
668
653
669
654
err := rows.Scan(
670
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
671
656
&userCount, &responseTime, &version, &scannedAt,
672
657
&city, &country, &countryCode, &asn, &asnOrg,
673
658
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
679
664
680
665
if ip.Valid {
681
666
item.IP = ip.String
667
+
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
682
670
}
683
671
if serverDID.Valid {
684
672
item.ServerDID = serverDID.String
···
726
714
727
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
728
716
query := `
729
-
WITH target_endpoint AS (
717
+
WITH target_endpoint AS MATERIALIZED (
730
718
SELECT
731
719
e.id,
732
720
e.endpoint,
···
734
722
e.discovered_at,
735
723
e.last_checked,
736
724
e.status,
737
-
e.ip
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
738
728
FROM endpoints e
739
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
740
-
),
741
-
aliases_agg AS (
742
-
SELECT
743
-
te.server_did,
744
-
array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases,
745
-
MIN(e.discovered_at) as first_discovered_at
746
-
FROM target_endpoint te
747
-
LEFT JOIN endpoints e ON te.server_did = e.server_did
748
-
AND e.endpoint_type = 'pds'
749
-
AND te.server_did IS NOT NULL
750
-
GROUP BY te.server_did
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
751
732
)
752
733
SELECT
753
734
te.id,
···
757
738
te.last_checked,
758
739
te.status,
759
740
te.ip,
741
+
te.ipv6,
742
+
te.valid,
760
743
latest.user_count,
761
744
latest.response_time,
762
745
latest.version,
···
766
749
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
767
750
i.latitude, i.longitude,
768
751
i.raw_data,
769
-
COALESCE(aa.aliases, ARRAY[]::text[]) as aliases,
770
-
aa.first_discovered_at
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
771
773
FROM target_endpoint te
772
-
LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did
773
774
LEFT JOIN LATERAL (
774
-
SELECT scan_data, response_time, version, scanned_at, user_count
775
-
FROM endpoint_scans
776
-
WHERE endpoint_id = te.id
777
-
ORDER BY scanned_at DESC
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
778
784
LIMIT 1
779
785
) latest ON true
780
-
LEFT JOIN ip_infos i ON te.ip = i.ip
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
781
787
`
782
788
783
789
detail := &PDSDetail{}
784
-
var ip, city, country, countryCode, asnOrg, serverDID sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
785
791
var asn sql.NullInt32
786
792
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
787
793
var lat, lon sql.NullFloat64
···
795
801
var firstDiscoveredAt sql.NullTime
796
802
797
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
798
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
799
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
800
806
&city, &country, &countryCode, &asn, &asnOrg,
801
807
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
811
817
if ip.Valid {
812
818
detail.IP = ip.String
813
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
814
823
815
824
if serverDID.Valid {
816
825
detail.ServerDID = serverDID.String
···
819
828
// Set aliases and is_primary
820
829
detail.Aliases = aliases
821
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
822
-
// Has server_did - check if this is the first discovered
823
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
824
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
825
833
} else {
826
-
// No server_did means unique server
827
834
detail.IsPrimary = true
828
835
}
829
836
···
1154
1161
}
1155
1162
}
1156
1163
return 0
1157
-
}
1158
-
1159
-
// ===== BUNDLE OPERATIONS =====
1160
-
1161
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1162
-
didsJSON, err := json.Marshal(bundle.DIDs)
1163
-
if err != nil {
1164
-
return err
1165
-
}
1166
-
1167
-
// Calculate cumulative sizes from previous bundle
1168
-
if bundle.BundleNumber > 1 {
1169
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1170
-
if err == nil && prevBundle != nil {
1171
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1172
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1173
-
} else {
1174
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1175
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1176
-
}
1177
-
} else {
1178
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1179
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1180
-
}
1181
-
1182
-
query := `
1183
-
INSERT INTO plc_bundles (
1184
-
bundle_number, start_time, end_time, dids,
1185
-
hash, compressed_hash, compressed_size, uncompressed_size,
1186
-
cumulative_compressed_size, cumulative_uncompressed_size,
1187
-
cursor, prev_bundle_hash, compressed
1188
-
)
1189
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1190
-
ON CONFLICT(bundle_number) DO UPDATE SET
1191
-
start_time = EXCLUDED.start_time,
1192
-
end_time = EXCLUDED.end_time,
1193
-
dids = EXCLUDED.dids,
1194
-
hash = EXCLUDED.hash,
1195
-
compressed_hash = EXCLUDED.compressed_hash,
1196
-
compressed_size = EXCLUDED.compressed_size,
1197
-
uncompressed_size = EXCLUDED.uncompressed_size,
1198
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1199
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1200
-
cursor = EXCLUDED.cursor,
1201
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1202
-
compressed = EXCLUDED.compressed
1203
-
`
1204
-
_, err = p.db.ExecContext(ctx, query,
1205
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1206
-
didsJSON, bundle.Hash, bundle.CompressedHash,
1207
-
bundle.CompressedSize, bundle.UncompressedSize,
1208
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1209
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1210
-
)
1211
-
1212
-
return err
1213
-
}
1214
-
1215
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1216
-
query := `
1217
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1218
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1219
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1220
-
FROM plc_bundles
1221
-
WHERE bundle_number = $1
1222
-
`
1223
-
1224
-
var bundle PLCBundle
1225
-
var didsJSON []byte
1226
-
var prevHash sql.NullString
1227
-
var cursor sql.NullString
1228
-
1229
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1230
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1231
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1232
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1233
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1234
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1235
-
)
1236
-
if err != nil {
1237
-
return nil, err
1238
-
}
1239
-
1240
-
if prevHash.Valid {
1241
-
bundle.PrevBundleHash = prevHash.String
1242
-
}
1243
-
if cursor.Valid {
1244
-
bundle.Cursor = cursor.String
1245
-
}
1246
-
1247
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1248
-
return &bundle, nil
1249
-
}
1250
-
1251
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1252
-
query := `
1253
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1254
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1255
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1256
-
FROM plc_bundles
1257
-
ORDER BY bundle_number DESC
1258
-
LIMIT $1
1259
-
`
1260
-
1261
-
rows, err := p.db.QueryContext(ctx, query, limit)
1262
-
if err != nil {
1263
-
return nil, err
1264
-
}
1265
-
defer rows.Close()
1266
-
1267
-
return p.scanBundles(rows)
1268
-
}
1269
-
1270
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1271
-
query := `
1272
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1273
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1274
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1275
-
FROM plc_bundles
1276
-
WHERE dids ? $1
1277
-
ORDER BY bundle_number ASC
1278
-
`
1279
-
1280
-
rows, err := p.db.QueryContext(ctx, query, did)
1281
-
if err != nil {
1282
-
return nil, err
1283
-
}
1284
-
defer rows.Close()
1285
-
1286
-
return p.scanBundles(rows)
1287
-
}
1288
-
1289
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1290
-
var bundles []*PLCBundle
1291
-
1292
-
for rows.Next() {
1293
-
var bundle PLCBundle
1294
-
var didsJSON []byte
1295
-
var prevHash sql.NullString
1296
-
var cursor sql.NullString
1297
-
1298
-
if err := rows.Scan(
1299
-
&bundle.BundleNumber,
1300
-
&bundle.StartTime,
1301
-
&bundle.EndTime,
1302
-
&didsJSON,
1303
-
&bundle.Hash,
1304
-
&bundle.CompressedHash,
1305
-
&bundle.CompressedSize,
1306
-
&bundle.UncompressedSize,
1307
-
&bundle.CumulativeCompressedSize,
1308
-
&bundle.CumulativeUncompressedSize,
1309
-
&cursor,
1310
-
&prevHash,
1311
-
&bundle.Compressed,
1312
-
&bundle.CreatedAt,
1313
-
); err != nil {
1314
-
return nil, err
1315
-
}
1316
-
1317
-
if prevHash.Valid {
1318
-
bundle.PrevBundleHash = prevHash.String
1319
-
}
1320
-
if cursor.Valid {
1321
-
bundle.Cursor = cursor.String
1322
-
}
1323
-
1324
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1325
-
bundles = append(bundles, &bundle)
1326
-
}
1327
-
1328
-
return bundles, rows.Err()
1329
-
}
1330
-
1331
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1332
-
var count, lastBundleNum int64
1333
-
err := p.db.QueryRowContext(ctx, `
1334
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1335
-
FROM plc_bundles
1336
-
`).Scan(&count, &lastBundleNum)
1337
-
if err != nil {
1338
-
return 0, 0, 0, 0, err
1339
-
}
1340
-
1341
-
if lastBundleNum == 0 {
1342
-
return 0, 0, 0, 0, nil
1343
-
}
1344
-
1345
-
var compressedSize, uncompressedSize int64
1346
-
err = p.db.QueryRowContext(ctx, `
1347
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1348
-
FROM plc_bundles
1349
-
WHERE bundle_number = $1
1350
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1351
-
if err != nil {
1352
-
return 0, 0, 0, 0, err
1353
-
}
1354
-
1355
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1356
-
}
1357
-
1358
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1359
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1360
-
var num int
1361
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1362
-
return num, err
1363
-
}
1364
-
1365
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1366
-
query := `
1367
-
SELECT bundle_number
1368
-
FROM plc_bundles
1369
-
WHERE start_time <= $1 AND end_time >= $1
1370
-
ORDER BY bundle_number ASC
1371
-
LIMIT 1
1372
-
`
1373
-
1374
-
var bundleNum int
1375
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1376
-
if err == sql.ErrNoRows {
1377
-
query = `
1378
-
SELECT bundle_number
1379
-
FROM plc_bundles
1380
-
WHERE end_time < $1
1381
-
ORDER BY bundle_number DESC
1382
-
LIMIT 1
1383
-
`
1384
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1385
-
if err == sql.ErrNoRows {
1386
-
return 1, nil
1387
-
}
1388
-
if err != nil {
1389
-
return 0, err
1390
-
}
1391
-
return bundleNum, nil
1392
-
}
1393
-
if err != nil {
1394
-
return 0, err
1395
-
}
1396
-
1397
-
return bundleNum, nil
1398
-
}
1399
-
1400
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1401
-
query := `
1402
-
WITH daily_stats AS (
1403
-
SELECT
1404
-
DATE(start_time) as date,
1405
-
MAX(bundle_number) as last_bundle,
1406
-
COUNT(*) as bundle_count,
1407
-
SUM(uncompressed_size) as total_uncompressed,
1408
-
SUM(compressed_size) as total_compressed,
1409
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1410
-
MAX(cumulative_compressed_size) as cumulative_compressed
1411
-
FROM plc_bundles
1412
-
WHERE bundle_number >= $1
1413
-
GROUP BY DATE(start_time)
1414
-
)
1415
-
SELECT
1416
-
date::text,
1417
-
last_bundle,
1418
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1419
-
total_uncompressed,
1420
-
total_compressed,
1421
-
cumulative_uncompressed,
1422
-
cumulative_compressed
1423
-
FROM daily_stats
1424
-
ORDER BY date ASC
1425
-
`
1426
-
1427
-
if limit > 0 {
1428
-
query += fmt.Sprintf(" LIMIT %d", limit)
1429
-
}
1430
-
1431
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1432
-
if err != nil {
1433
-
return nil, err
1434
-
}
1435
-
defer rows.Close()
1436
-
1437
-
var history []*PLCHistoryPoint
1438
-
for rows.Next() {
1439
-
var point PLCHistoryPoint
1440
-
var cumulativeOps int64
1441
-
1442
-
err := rows.Scan(
1443
-
&point.Date,
1444
-
&point.BundleNumber,
1445
-
&cumulativeOps,
1446
-
&point.UncompressedSize,
1447
-
&point.CompressedSize,
1448
-
&point.CumulativeUncompressed,
1449
-
&point.CumulativeCompressed,
1450
-
)
1451
-
if err != nil {
1452
-
return nil, err
1453
-
}
1454
-
1455
-
point.OperationCount = int(cumulativeOps)
1456
-
1457
-
history = append(history, &point)
1458
-
}
1459
-
1460
-
return history, rows.Err()
1461
-
}
1462
-
1463
-
// ===== MEMPOOL OPERATIONS =====
1464
-
1465
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1466
-
if len(ops) == 0 {
1467
-
return nil
1468
-
}
1469
-
1470
-
tx, err := p.db.BeginTx(ctx, nil)
1471
-
if err != nil {
1472
-
return err
1473
-
}
1474
-
defer tx.Rollback()
1475
-
1476
-
stmt, err := tx.PrepareContext(ctx, `
1477
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1478
-
VALUES ($1, $2, $3, $4)
1479
-
ON CONFLICT(cid) DO NOTHING
1480
-
`)
1481
-
if err != nil {
1482
-
return err
1483
-
}
1484
-
defer stmt.Close()
1485
-
1486
-
for _, op := range ops {
1487
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1488
-
if err != nil {
1489
-
return err
1490
-
}
1491
-
}
1492
-
1493
-
return tx.Commit()
1494
-
}
1495
-
1496
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1497
-
query := "SELECT COUNT(*) FROM plc_mempool"
1498
-
var count int
1499
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1500
-
return count, err
1501
-
}
1502
-
1503
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1504
-
query := `
1505
-
SELECT id, did, operation, cid, created_at, added_at
1506
-
FROM plc_mempool
1507
-
ORDER BY created_at ASC
1508
-
LIMIT $1
1509
-
`
1510
-
1511
-
rows, err := p.db.QueryContext(ctx, query, limit)
1512
-
if err != nil {
1513
-
return nil, err
1514
-
}
1515
-
defer rows.Close()
1516
-
1517
-
var ops []MempoolOperation
1518
-
for rows.Next() {
1519
-
var op MempoolOperation
1520
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1521
-
if err != nil {
1522
-
return nil, err
1523
-
}
1524
-
ops = append(ops, op)
1525
-
}
1526
-
1527
-
return ops, rows.Err()
1528
-
}
1529
-
1530
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1531
-
if len(ids) == 0 {
1532
-
return nil
1533
-
}
1534
-
1535
-
placeholders := make([]string, len(ids))
1536
-
args := make([]interface{}, len(ids))
1537
-
for i, id := range ids {
1538
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1539
-
args[i] = id
1540
-
}
1541
-
1542
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1543
-
strings.Join(placeholders, ","))
1544
-
1545
-
_, err := p.db.ExecContext(ctx, query, args...)
1546
-
return err
1547
-
}
1548
-
1549
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1550
-
query := `
1551
-
SELECT id, did, operation, cid, created_at, added_at
1552
-
FROM plc_mempool
1553
-
ORDER BY created_at ASC, id ASC
1554
-
LIMIT 1
1555
-
`
1556
-
1557
-
var op MempoolOperation
1558
-
err := p.db.QueryRowContext(ctx, query).Scan(
1559
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1560
-
)
1561
-
if err == sql.ErrNoRows {
1562
-
return nil, nil
1563
-
}
1564
-
if err != nil {
1565
-
return nil, err
1566
-
}
1567
-
1568
-
return &op, nil
1569
-
}
1570
-
1571
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1572
-
query := `
1573
-
SELECT id, did, operation, cid, created_at, added_at
1574
-
FROM plc_mempool
1575
-
ORDER BY created_at DESC, id DESC
1576
-
LIMIT 1
1577
-
`
1578
-
1579
-
var op MempoolOperation
1580
-
err := p.db.QueryRowContext(ctx, query).Scan(
1581
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1582
-
)
1583
-
if err == sql.ErrNoRows {
1584
-
return nil, nil
1585
-
}
1586
-
if err != nil {
1587
-
return nil, err
1588
-
}
1589
-
1590
-
return &op, nil
1591
-
}
1592
-
1593
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1594
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1595
-
var count int
1596
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1597
-
return count, err
1598
-
}
1599
-
1600
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1601
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1602
-
var size int64
1603
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1604
-
return size, err
1605
1164
}
1606
1165
1607
1166
// ===== CURSOR OPERATIONS =====
+11
-16
internal/storage/types.go
+11
-16
internal/storage/types.go
···
26
26
LastChecked time.Time
27
27
Status int
28
28
IP string
29
-
IPv6 string // NEW
29
+
IPv6 string
30
30
IPResolvedAt time.Time
31
+
Valid bool
31
32
UpdatedAt time.Time
32
33
}
33
34
···
76
77
77
78
// EndpointFilter for querying endpoints
78
79
type EndpointFilter struct {
79
-
Type string // "pds", "labeler", etc.
80
+
Type string
80
81
Status string
81
82
MinUserCount int64
82
-
OnlyStale bool // NEW: Only return endpoints that need re-checking
83
-
RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
84
87
Limit int
85
88
Offset int
86
89
}
···
119
122
StartTime time.Time
120
123
EndTime time.Time
121
124
BoundaryCIDs []string
122
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
123
126
Hash string
124
127
CompressedHash string
125
128
CompressedSize int64
···
152
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
153
156
}
154
157
155
-
// MempoolOperation represents an operation waiting to be bundled
156
-
type MempoolOperation struct {
157
-
ID int64
158
-
DID string
159
-
Operation string
160
-
CID string
161
-
CreatedAt time.Time
162
-
AddedAt time.Time
163
-
}
164
-
165
158
// ScanCursor stores scanning progress
166
159
type ScanCursor struct {
167
160
Source string
···
217
210
// From endpoints table
218
211
ID int64
219
212
Endpoint string
220
-
ServerDID string // NEW: Add this
213
+
ServerDID string
221
214
DiscoveredAt time.Time
222
215
LastChecked time.Time
223
216
Status int
224
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
225
220
226
221
// From latest endpoint_scans (via JOIN)
227
222
LatestScan *struct {
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"