+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+1
-1
config.sample.yaml
+1
-1
config.sample.yaml
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+275
-451
internal/api/handlers.go
+275
-451
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
47
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
48
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
49
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
50
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
51
49
}
52
50
53
51
// ===== REQUEST HELPERS =====
···
77
75
78
76
// ===== FORMATTING HELPERS =====
79
77
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": len(bundle.DIDs),
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
-
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
100
80
"id": ep.ID,
···
103
83
"discovered_at": ep.DiscoveredAt,
104
84
"last_checked": ep.LastChecked,
105
85
"status": statusToString(ep.Status),
106
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
107
86
}
108
87
109
-
// Add IP if available
88
+
// Add IPs if available
110
89
if ep.IP != "" {
111
90
response["ip"] = ep.IP
112
91
}
113
-
114
-
// REMOVED: IP info extraction - no longer in Endpoint struct
115
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
116
95
117
96
return response
118
97
}
···
165
144
resp.json(stats)
166
145
}
167
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
168
183
// ===== PDS HANDLERS =====
169
184
170
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
233
248
"endpoint": pds.Endpoint,
234
249
"discovered_at": pds.DiscoveredAt,
235
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
236
252
}
237
253
238
254
// Add server_did if available
···
257
273
}
258
274
}
259
275
260
-
// Add IP if available
276
+
// Add IPs if available
261
277
if pds.IP != "" {
262
278
response["ip"] = pds.IP
263
279
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
282
+
}
264
283
265
284
// Add IP info (from ip_infos table via JOIN)
266
285
if pds.IPInfo != nil {
···
358
377
scanMap["response_time"] = scan.ResponseTime
359
378
}
360
379
361
-
// NEW: Add version if available
362
380
if scan.Version != "" {
363
381
scanMap["version"] = scan.Version
364
382
}
365
383
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
386
+
}
387
+
366
388
// Use the top-level UserCount field first
367
389
if scan.UserCount > 0 {
368
390
scanMap["user_count"] = scan.UserCount
···
662
684
return
663
685
}
664
686
665
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
666
-
if err != nil {
667
-
resp.error(err.Error(), http.StatusInternalServerError)
668
-
return
669
-
}
670
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
671
688
resp.json(map[string]interface{}{
672
689
"total_unique_dids": totalDIDs,
673
690
"last_bundle": lastBundle,
···
678
695
679
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
680
697
resp := newResponse(w)
681
-
682
698
bundleNum, err := getBundleNumber(r)
683
699
if err != nil {
684
700
resp.error("invalid bundle number", http.StatusBadRequest)
685
701
return
686
702
}
687
703
688
-
// Try to get existing bundle
689
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
690
-
if err == nil {
691
-
// Bundle exists, return it normally
692
-
resp.json(formatBundleResponse(bundle))
693
-
return
694
-
}
695
-
696
-
// Bundle not found - check if it's the next upcoming bundle
697
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
698
707
if err != nil {
699
-
resp.error("bundle not found", http.StatusNotFound)
700
-
return
701
-
}
702
-
703
-
if bundleNum == lastBundle+1 {
704
-
// This is the upcoming bundle - return preview based on mempool
705
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
706
-
if err != nil {
707
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
708
717
return
709
718
}
710
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
711
720
return
712
721
}
713
722
714
-
// Not an upcoming bundle, just not found
715
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
716
724
}
717
725
718
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
719
-
// Get mempool stats
720
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
721
-
if err != nil {
722
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
723
743
}
744
+
}
724
745
725
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
726
752
return map[string]interface{}{
727
753
"plc_bundle_number": bundleNum,
728
754
"is_upcoming": true,
···
732
758
}, nil
733
759
}
734
760
735
-
// Get first and last operations for time range
736
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
737
-
if err != nil {
738
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
739
771
}
740
772
741
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
742
-
if err != nil {
743
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
744
775
}
745
776
746
-
// Get unique DID count
747
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
748
-
if err != nil {
749
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
750
780
}
751
-
752
-
// Get uncompressed size estimate
753
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
754
-
if err != nil {
755
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
756
783
}
757
784
758
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
759
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
760
-
761
-
// Calculate completion estimate
762
-
var estimatedCompletionTime *time.Time
763
-
var operationsNeeded int
764
-
var currentRate float64
765
-
766
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
767
-
768
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
769
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
770
-
if timeSpan > 0 {
771
-
currentRate = float64(mempoolCount) / timeSpan
772
-
if currentRate > 0 {
773
-
secondsNeeded := float64(operationsNeeded) / currentRate
774
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
775
-
estimatedCompletionTime = &completionTime
776
-
}
777
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
778
789
}
779
790
780
-
// Get previous bundle for cursor context
781
-
var prevBundleHash string
782
-
var cursor string
791
+
// Get previous bundle info
783
792
if bundleNum > 1 {
784
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
785
-
if err == nil {
786
-
prevBundleHash = prevBundle.Hash
787
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
788
-
}
789
-
}
790
-
791
-
// Determine bundle status
792
-
status := "filling"
793
-
if mempoolCount >= plc.BUNDLE_SIZE {
794
-
status = "ready"
795
-
}
796
-
797
-
// Build upcoming bundle response
798
-
result := map[string]interface{}{
799
-
"plc_bundle_number": bundleNum,
800
-
"is_upcoming": true,
801
-
"status": status,
802
-
"operation_count": mempoolCount,
803
-
"target_operation_count": plc.BUNDLE_SIZE,
804
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
805
-
"operations_needed": operationsNeeded,
806
-
"did_count": uniqueDIDCount,
807
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
808
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
809
-
"uncompressed_size": uncompressedSize,
810
-
"estimated_compressed_size": estimatedCompressedSize,
811
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
812
-
"prev_bundle_hash": prevBundleHash,
813
-
"cursor": cursor,
814
-
}
815
-
816
-
if estimatedCompletionTime != nil {
817
-
result["estimated_completion_time"] = *estimatedCompletionTime
818
-
result["current_rate_per_second"] = currentRate
819
-
}
820
-
821
-
// Get actual mempool operations if requested
822
-
if r.URL.Query().Get("include_dids") == "true" {
823
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
824
-
if err == nil {
825
-
// Extract unique DIDs
826
-
didSet := make(map[string]bool)
827
-
for _, op := range ops {
828
-
didSet[op.DID] = true
829
-
}
830
-
dids := make([]string, 0, len(didSet))
831
-
for did := range didSet {
832
-
dids = append(dids, did)
833
-
}
834
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
835
796
}
836
797
}
837
798
···
847
808
return
848
809
}
849
810
850
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
851
813
if err != nil {
852
814
resp.error("bundle not found", http.StatusNotFound)
853
815
return
854
816
}
855
817
856
818
resp.json(map[string]interface{}{
857
-
"plc_bundle_number": bundle.BundleNumber,
858
-
"did_count": len(bundle.DIDs),
859
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
860
822
})
861
823
}
862
824
···
871
833
872
834
compressed := r.URL.Query().Get("compressed") != "false"
873
835
874
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
875
837
if err == nil {
876
838
// Bundle exists, serve it normally
877
839
resp.bundleHeaders(bundle)
···
885
847
}
886
848
887
849
// Bundle not found - check if it's the upcoming bundle
888
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
889
-
if err != nil {
890
-
resp.error("bundle not found", http.StatusNotFound)
891
-
return
892
-
}
893
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
894
851
if bundleNum == lastBundle+1 {
895
852
// This is the upcoming bundle - serve from mempool
896
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
897
854
return
898
855
}
899
856
···
901
858
resp.error("bundle not found", http.StatusNotFound)
902
859
}
903
860
904
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
905
-
ctx := r.Context()
906
-
907
-
// Get mempool count
908
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
909
-
if err != nil {
910
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
911
-
return
912
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
913
865
914
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
915
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
916
868
return
917
869
}
918
870
919
-
// Get mempool operations (up to BUNDLE_SIZE)
920
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
921
873
if err != nil {
922
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
923
875
return
924
876
}
925
877
926
-
if len(mempoolOps) == 0 {
927
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
928
880
return
929
881
}
930
882
931
-
// Get time range
932
-
firstOp := mempoolOps[0]
933
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
934
886
935
887
// Extract unique DIDs
936
888
didSet := make(map[string]bool)
937
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
938
890
didSet[op.DID] = true
939
891
}
940
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
941
899
// Get previous bundle hash
942
900
prevBundleHash := ""
943
901
if bundleNum > 1 {
944
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
945
903
prevBundleHash = prevBundle.Hash
946
904
}
947
905
}
948
906
949
-
// Serialize operations to JSONL
950
-
var buf []byte
951
-
for _, mop := range mempoolOps {
952
-
buf = append(buf, []byte(mop.Operation)...)
953
-
buf = append(buf, '\n')
954
-
}
955
-
956
-
// Calculate size
957
-
uncompressedSize := int64(len(buf))
958
-
959
907
// Set headers
960
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
961
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
962
910
w.Header().Set("X-Bundle-Status", "preview")
963
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
964
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
965
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
966
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
967
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
968
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
969
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
970
919
971
920
w.Header().Set("Content-Type", "application/jsonl")
972
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
973
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
974
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
975
922
923
+
// Stream operations as JSONL
976
924
w.WriteHeader(http.StatusOK)
977
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
978
937
}
979
938
980
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
981
940
resp := newResponse(w)
982
-
path := bundle.GetFilePath(s.plcBundleDir)
983
941
984
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
985
944
if err != nil {
986
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
987
946
return
988
947
}
989
-
defer file.Close()
990
-
991
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
992
949
993
950
w.Header().Set("Content-Type", "application/zstd")
994
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
995
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
996
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
997
954
998
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
999
958
}
1000
959
1001
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1002
961
resp := newResponse(w)
1003
962
1004
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
1005
965
if err != nil {
1006
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
1007
967
return
1008
968
}
1009
-
1010
-
// Serialize to JSONL
1011
-
var buf []byte
1012
-
for _, op := range ops {
1013
-
buf = append(buf, op.RawJSON...)
1014
-
buf = append(buf, '\n')
1015
-
}
1016
-
1017
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
1018
-
compressedSize := int64(0)
1019
-
if fileInfo != nil {
1020
-
compressedSize = fileInfo.Size()
1021
-
}
969
+
defer reader.Close()
1022
970
1023
971
w.Header().Set("Content-Type", "application/jsonl")
1024
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
1025
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
1026
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
1027
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
1028
-
if compressedSize > 0 {
1029
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
1030
978
}
1031
979
980
+
// Stream the data directly to the response
1032
981
w.WriteHeader(http.StatusOK)
1033
-
w.Write(buf)
982
+
io.Copy(w, reader)
1034
983
}
1035
984
1036
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
1037
986
resp := newResponse(w)
1038
987
limit := getQueryInt(r, "limit", 50)
1039
988
1040
-
bundles, err := s.db.GetBundles(r.Context(), limit)
1041
-
if err != nil {
1042
-
resp.error(err.Error(), http.StatusInternalServerError)
1043
-
return
1044
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
1045
990
1046
991
response := make([]map[string]interface{}, len(bundles))
1047
992
for i, bundle := range bundles {
1048
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
1049
994
}
1050
995
1051
996
resp.json(response)
···
1054
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
1055
1000
resp := newResponse(w)
1056
1001
1057
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
1058
-
if err != nil {
1059
-
resp.error(err.Error(), http.StatusInternalServerError)
1060
-
return
1061
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
1062
1008
1063
1009
resp.json(map[string]interface{}{
1064
-
"plc_bundle_count": count,
1065
-
"last_bundle_number": lastBundle,
1066
-
"total_compressed_size": compressedSize,
1067
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1068
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
1069
-
"total_uncompressed_size": uncompressedSize,
1070
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1071
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
1072
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
1073
1015
})
1074
1016
}
1075
1017
···
1077
1019
1078
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
1079
1021
resp := newResponse(w)
1080
-
ctx := r.Context()
1081
1022
1082
-
count, err := s.db.GetMempoolCount(ctx)
1083
-
if err != nil {
1084
-
resp.error(err.Error(), http.StatusInternalServerError)
1085
-
return
1086
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
1087
1025
1088
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
1089
-
if err != nil {
1090
-
resp.error(err.Error(), http.StatusInternalServerError)
1091
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
1092
1030
}
1093
1031
1094
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
1095
-
if err != nil {
1096
-
resp.error(err.Error(), http.StatusInternalServerError)
1097
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
1098
1036
}
1099
1037
1100
-
result := map[string]interface{}{
1101
-
"operation_count": count,
1102
-
"unique_did_count": uniqueDIDCount,
1103
-
"uncompressed_size": uncompressedSize,
1104
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1105
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
1106
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
1107
1042
1108
-
if count > 0 {
1109
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
1110
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1111
1045
1112
-
if count < plc.BUNDLE_SIZE {
1113
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1114
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1115
1049
if timeSpan > 0 {
1116
1050
opsPerSecond := float64(count) / timeSpan
1117
1051
if opsPerSecond > 0 {
1118
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1119
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1120
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1057
+
result["current_rate_per_second"] = opsPerSecond
1121
1058
result["operations_needed"] = remainingOps
1122
-
result["current_rate_per_second"] = opsPerSecond
1123
1059
}
1124
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1125
1066
}
1126
-
} else {
1127
-
result["estimated_next_bundle_time"] = time.Now()
1128
-
result["operations_needed"] = 0
1129
1067
}
1130
1068
}
1131
1069
} else {
1070
+
// Empty mempool
1132
1071
result["mempool_start_time"] = nil
1133
1072
result["estimated_next_bundle_time"] = nil
1134
1073
}
···
1153
1092
1154
1093
// ===== VERIFICATION HANDLERS =====
1155
1094
1156
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1157
-
resp := newResponse(w)
1158
-
vars := mux.Vars(r)
1159
-
1160
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1161
-
if err != nil {
1162
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1163
-
return
1164
-
}
1165
-
1166
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1167
-
if err != nil {
1168
-
resp.error("Bundle not found", http.StatusNotFound)
1169
-
return
1170
-
}
1171
-
1172
-
// Fetch from PLC and verify
1173
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1174
-
if err != nil {
1175
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1176
-
return
1177
-
}
1178
-
1179
-
remoteHash := computeOperationsHash(remoteOps)
1180
-
verified := bundle.Hash == remoteHash
1181
-
1182
-
resp.json(map[string]interface{}{
1183
-
"bundle_number": bundleNumber,
1184
-
"verified": verified,
1185
-
"local_hash": bundle.Hash,
1186
-
"remote_hash": remoteHash,
1187
-
"local_op_count": plc.BUNDLE_SIZE,
1188
-
"remote_op_count": len(remoteOps),
1189
-
"boundary_cids_used": len(prevCIDs),
1190
-
})
1191
-
}
1192
-
1193
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1194
-
var after string
1195
-
var prevBoundaryCIDs map[string]bool
1196
-
1197
-
if bundleNum > 1 {
1198
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1199
-
if err != nil {
1200
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1201
-
}
1202
-
1203
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1204
-
1205
-
if len(prevBundle.BoundaryCIDs) > 0 {
1206
-
prevBoundaryCIDs = make(map[string]bool)
1207
-
for _, cid := range prevBundle.BoundaryCIDs {
1208
-
prevBoundaryCIDs[cid] = true
1209
-
}
1210
-
}
1211
-
}
1212
-
1213
-
var allRemoteOps []plc.PLCOperation
1214
-
seenCIDs := make(map[string]bool)
1215
-
1216
-
for cid := range prevBoundaryCIDs {
1217
-
seenCIDs[cid] = true
1218
-
}
1219
-
1220
-
currentAfter := after
1221
-
maxFetches := 20
1222
-
1223
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1224
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1225
-
Count: 1000,
1226
-
After: currentAfter,
1227
-
})
1228
-
if err != nil || len(batch) == 0 {
1229
-
break
1230
-
}
1231
-
1232
-
for _, op := range batch {
1233
-
if !seenCIDs[op.CID] {
1234
-
seenCIDs[op.CID] = true
1235
-
allRemoteOps = append(allRemoteOps, op)
1236
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1237
-
break
1238
-
}
1239
-
}
1240
-
}
1241
-
1242
-
if len(batch) > 0 {
1243
-
lastOp := batch[len(batch)-1]
1244
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1245
-
}
1246
-
1247
-
if len(batch) < 1000 {
1248
-
break
1249
-
}
1250
-
}
1251
-
1252
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1253
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1254
-
}
1255
-
1256
-
return allRemoteOps, prevBoundaryCIDs, nil
1257
-
}
1258
-
1259
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1260
1096
resp := newResponse(w)
1261
-
ctx := r.Context()
1262
1097
1263
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1264
-
if err != nil {
1265
-
resp.error(err.Error(), http.StatusInternalServerError)
1266
-
return
1267
-
}
1268
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1269
1099
if lastBundle == 0 {
1270
1100
resp.json(map[string]interface{}{
1271
1101
"status": "empty",
···
1279
1109
var errorMsg string
1280
1110
1281
1111
for i := 1; i <= lastBundle; i++ {
1282
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1283
1113
if err != nil {
1284
1114
valid = false
1285
1115
brokenAt = i
···
1288
1118
}
1289
1119
1290
1120
if i > 1 {
1291
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1292
1122
if err != nil {
1293
1123
valid = false
1294
1124
brokenAt = i
···
1296
1126
break
1297
1127
}
1298
1128
1299
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1300
1130
valid = false
1301
1131
brokenAt = i
1302
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1303
1133
break
1304
1134
}
1305
1135
}
···
1320
1150
1321
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1322
1152
resp := newResponse(w)
1323
-
ctx := r.Context()
1324
1153
1325
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1326
-
if err != nil {
1327
-
resp.error(err.Error(), http.StatusInternalServerError)
1328
-
return
1329
-
}
1330
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1331
1155
if lastBundle == 0 {
1332
1156
resp.json(map[string]interface{}{
1333
1157
"chain_length": 0,
···
1336
1160
return
1337
1161
}
1338
1162
1339
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1340
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1341
-
1342
-
// Updated to receive 5 values instead of 3
1343
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1344
-
if err != nil {
1345
-
resp.error(err.Error(), http.StatusInternalServerError)
1346
-
return
1347
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1348
1166
1349
1167
resp.json(map[string]interface{}{
1350
-
"chain_length": lastBundle,
1351
-
"total_bundles": count,
1352
-
"total_compressed_size": compressedSize,
1353
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1354
-
"total_uncompressed_size": uncompressedSize,
1355
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1356
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1357
-
"chain_start_time": firstBundle.StartTime,
1358
-
"chain_end_time": lastBundleData.EndTime,
1359
-
"chain_head_hash": lastBundleData.Hash,
1360
-
"first_prev_hash": firstBundle.PrevBundleHash,
1361
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1362
1177
})
1363
1178
}
1364
1179
···
1379
1194
return
1380
1195
}
1381
1196
1382
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1383
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1384
1199
1385
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1419
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1420
1235
}
1421
1236
1422
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1423
1238
if afterTime.IsZero() {
1424
1239
return 1
1425
1240
}
1426
1241
1427
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1428
-
if err != nil {
1429
-
return 1
1430
-
}
1431
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1432
1243
if foundBundle > 1 {
1433
1244
return foundBundle - 1
1434
1245
}
···
1439
1250
var allOps []plc.PLCOperation
1440
1251
seenCIDs := make(map[string]bool)
1441
1252
1442
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1443
1254
1444
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1445
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1599
1410
limit := getQueryInt(r, "limit", 0)
1600
1411
fromBundle := getQueryInt(r, "from", 1)
1601
1412
1602
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1603
1415
if err != nil {
1604
1416
resp.error(err.Error(), http.StatusInternalServerError)
1605
1417
return
···
1671
1483
})
1672
1484
}
1673
1485
1674
-
// ===== UTILITY FUNCTIONS =====
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1675
1494
1676
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1677
-
var jsonlData []byte
1678
-
for _, op := range ops {
1679
-
jsonlData = append(jsonlData, op.RawJSON...)
1680
-
jsonlData = append(jsonlData, '\n')
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1681
1499
}
1682
-
hash := sha256.Sum256(jsonlData)
1683
-
return hex.EncodeToString(hash[:])
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1684
1506
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1685
1509
1686
1510
func normalizeEndpoint(endpoint string) string {
1687
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+8
-11
internal/api/server.go
+8
-11
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
24
23
plcIndexDIDs bool
25
24
}
26
25
27
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
28
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
29
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
30
27
s := &Server{
31
28
router: mux.NewRouter(),
32
29
db: db,
33
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
34
30
plcBundleDir: plcCfg.BundleDir,
35
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
36
32
plcIndexDIDs: plcCfg.IndexDIDs,
37
33
}
38
34
···
61
57
// Generic endpoints (keep as-is)
62
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
63
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
64
61
65
62
//PDS-specific endpoints (virtual, created via JOINs)
66
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
···
87
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
88
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
89
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
90
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
91
88
92
89
// PLC history/metrics
93
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+2
-2
internal/log/log.go
+2
-2
internal/log/log.go
···
28
28
errorLog = log.New(os.Stderr, "", 0)
29
29
}
30
30
31
-
// timestamp returns current time in ISO 8601 format
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
32
func timestamp() string {
33
-
return time.Now().Format(time.RFC3339)
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
34
34
}
35
35
36
36
func Verbose(format string, v ...interface{}) {
+37
-8
internal/pds/client.go
+37
-8
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
115
144
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
+41
-34
internal/pds/scanner.go
+41
-34
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
127
127
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
128
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
146
145
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
147
146
}
148
147
149
-
// STEP 2: Health check (rest remains the same)
150
-
available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
151
-
if err != nil || !available {
152
-
errMsg := "health check failed"
153
-
if err != nil {
154
-
errMsg = err.Error()
155
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
156
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
157
152
Status: storage.EndpointStatusOffline,
158
-
ResponseTime: responseTime,
159
-
ErrorMessage: errMsg,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
160
156
})
161
157
return
162
158
}
163
159
164
-
// STEP 3: Fetch PDS-specific data
165
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
166
-
if err != nil {
167
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
168
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
169
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
170
163
}
171
164
172
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
173
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
174
182
if err != nil {
175
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
176
184
repoList = []Repo{}
177
185
}
178
186
179
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
180
188
dids := make([]string, len(repoList))
181
189
for i, repo := range repoList {
182
190
dids[i] = repo.DID
183
191
}
184
192
185
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
186
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
187
195
Status: storage.EndpointStatusOnline,
188
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
189
197
Description: desc,
190
198
DIDs: dids,
191
199
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
192
201
})
193
202
194
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
195
204
if len(repoList) > 0 {
196
-
batchSize := 10000
205
+
batchSize := 100_000
197
206
198
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
199
208
···
233
242
234
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
235
244
}
236
-
237
-
// IP info fetch already started at the beginning (step 1.5)
238
-
// It will complete in the background
239
245
}
240
246
241
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
···
245
251
Metadata: make(map[string]interface{}),
246
252
}
247
253
248
-
var userCount int64 // NEW: Declare user count
254
+
var userCount int64
249
255
250
256
// Add PDS-specific metadata
251
257
if result.Status == storage.EndpointStatusOnline {
252
-
userCount = int64(len(result.DIDs)) // NEW: Get user count
253
-
scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
254
260
if result.Description != nil {
255
261
scanData.Metadata["server_info"] = result.Description
256
262
}
···
267
273
Status: result.Status,
268
274
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
269
275
UserCount: userCount,
270
-
Version: result.Version, // NEW: Set the version field
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
271
278
ScanData: scanData,
272
279
ScannedAt: time.Now().UTC(),
273
280
}
+2
-1
internal/pds/types.go
+2
-1
internal/pds/types.go
-676
internal/plc/bundle.go
-676
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
384
-
// Extract handle and PDS for each DID using centralized helper
385
-
didInfoMap := ExtractDIDInfoMap(bf.operations)
386
-
387
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
388
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
389
-
// Don't return error - bundle is already created
390
-
} else {
391
-
// Update handle and PDS for each DID
392
-
for did, info := range didInfoMap {
393
-
// Validate handle length before saving
394
-
validHandle := ValidateHandle(info.Handle)
395
-
396
-
if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil {
397
-
log.Error("Failed to update DID %s metadata: %v", did, err)
398
-
}
399
-
}
400
-
401
-
elapsed := time.Since(start)
402
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
403
-
}
404
-
} else {
405
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
406
-
}
407
-
408
-
return nil
409
-
}
410
-
411
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
412
-
didSet := make(map[string]bool)
413
-
for _, op := range ops {
414
-
didSet[op.DID] = true
415
-
}
416
-
417
-
dids := make([]string, 0, len(didSet))
418
-
for did := range didSet {
419
-
dids = append(dids, did)
420
-
}
421
-
return dids
422
-
}
423
-
424
-
// ===== MEMPOOL BUNDLE CREATION =====
425
-
426
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
427
-
if !bm.enabled {
428
-
return 0, fmt.Errorf("bundle manager disabled")
429
-
}
430
-
431
-
if len(operations) != BUNDLE_SIZE {
432
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
433
-
}
434
-
435
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
436
-
if err != nil {
437
-
return 0, err
438
-
}
439
-
bundleNum := lastBundle + 1
440
-
441
-
bf := bm.newBundleFile(bundleNum)
442
-
bf.operations = operations
443
-
444
-
if err := bm.save(bf); err != nil {
445
-
return 0, err
446
-
}
447
-
448
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
449
-
return 0, err
450
-
}
451
-
452
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
453
-
bundleNum, bf.uncompressedHash[:16])
454
-
455
-
return bundleNum, nil
456
-
}
457
-
458
-
// ===== VERIFICATION =====
459
-
460
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
461
-
if !bm.enabled {
462
-
return fmt.Errorf("bundle manager disabled")
463
-
}
464
-
465
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
466
-
467
-
for i := 1; i <= endBundle; i++ {
468
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
469
-
if err != nil {
470
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
471
-
}
472
-
473
-
// Verify file hash
474
-
path := bm.newBundleFile(i).path
475
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
476
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
477
-
}
478
-
479
-
// Verify chain link
480
-
if i > 1 {
481
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
482
-
if err != nil {
483
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
484
-
}
485
-
486
-
if bundle.PrevBundleHash != prevBundle.Hash {
487
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
488
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
489
-
}
490
-
}
491
-
492
-
if i%100 == 0 {
493
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
494
-
}
495
-
}
496
-
497
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
498
-
return nil
499
-
}
500
-
501
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
502
-
if !bm.enabled {
503
-
return nil
504
-
}
505
-
506
-
for i := 1; i < targetBundle; i++ {
507
-
if !bm.newBundleFile(i).exists() {
508
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
509
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
510
-
}
511
-
}
512
-
}
513
-
514
-
return nil
515
-
}
516
-
517
-
// ===== UTILITY METHODS =====
518
-
519
-
func (bm *BundleManager) hash(data []byte) string {
520
-
h := sha256.Sum256(data)
521
-
return hex.EncodeToString(h[:])
522
-
}
523
-
524
-
func (bm *BundleManager) hashFile(path string) string {
525
-
data, _ := os.ReadFile(path)
526
-
return bm.hash(data)
527
-
}
528
-
529
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
530
-
if expectedHash == "" {
531
-
return nil
532
-
}
533
-
534
-
actualHash := bm.hashFile(path)
535
-
if actualHash != expectedHash {
536
-
return fmt.Errorf("hash mismatch")
537
-
}
538
-
return nil
539
-
}
540
-
541
-
func (bm *BundleManager) getFileSize(path string) int64 {
542
-
if info, err := os.Stat(path); err == nil {
543
-
return info.Size()
544
-
}
545
-
return 0
546
-
}
547
-
548
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
549
-
if !bm.enabled {
550
-
return 0, 0, 0, 0, nil
551
-
}
552
-
return bm.db.GetBundleStats(ctx)
553
-
}
554
-
555
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
556
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
557
-
if err != nil {
558
-
return nil, err
559
-
}
560
-
561
-
if lastBundle == 0 {
562
-
return map[string]interface{}{
563
-
"chain_length": 0,
564
-
"status": "empty",
565
-
}, nil
566
-
}
567
-
568
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
569
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
570
-
571
-
return map[string]interface{}{
572
-
"chain_length": lastBundle,
573
-
"first_bundle": 1,
574
-
"last_bundle": lastBundle,
575
-
"chain_start_time": firstBundle.StartTime,
576
-
"chain_end_time": lastBundleData.EndTime,
577
-
"chain_head_hash": lastBundleData.Hash,
578
-
}, nil
579
-
}
580
-
581
-
// ===== EXPORTED HELPERS =====
582
-
583
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
584
-
if len(operations) == 0 {
585
-
return time.Time{}, nil
586
-
}
587
-
588
-
lastOp := operations[len(operations)-1]
589
-
boundaryTime := lastOp.CreatedAt
590
-
cidSet := make(map[string]bool)
591
-
592
-
for i := len(operations) - 1; i >= 0; i-- {
593
-
op := operations[i]
594
-
if op.CreatedAt.Equal(boundaryTime) {
595
-
cidSet[op.CID] = true
596
-
} else {
597
-
break
598
-
}
599
-
}
600
-
601
-
return boundaryTime, cidSet
602
-
}
603
-
604
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
605
-
if len(operations) == 0 {
606
-
return operations
607
-
}
608
-
609
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
610
-
if err != nil {
611
-
return operations
612
-
}
613
-
614
-
startIdx := 0
615
-
for startIdx < len(operations) {
616
-
op := operations[startIdx]
617
-
618
-
if op.CreatedAt.After(boundaryTime) {
619
-
break
620
-
}
621
-
622
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
623
-
startIdx++
624
-
continue
625
-
}
626
-
627
-
break
628
-
}
629
-
630
-
return operations[startIdx:]
631
-
}
632
-
633
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
634
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
635
-
if !bm.enabled {
636
-
return nil, fmt.Errorf("bundle manager disabled")
637
-
}
638
-
639
-
bf := bm.newBundleFile(bundleNum)
640
-
641
-
if !bf.exists() {
642
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
643
-
}
644
-
645
-
if err := bm.load(bf); err != nil {
646
-
return nil, err
647
-
}
648
-
649
-
return bf.operations, nil
650
-
}
651
-
652
-
// calculateCursor determines the cursor value for a given bundle
653
-
// For bundle 1: returns empty string
654
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
655
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
656
-
if bundleNum == 1 {
657
-
return ""
658
-
}
659
-
660
-
// Try to get cursor from previous bundle in DB
661
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
662
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
663
-
}
664
-
665
-
// If previous bundle not in DB, try to load it from file
666
-
prevBf := bm.newBundleFile(bundleNum - 1)
667
-
if prevBf.exists() {
668
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
669
-
// Return the createdAt of the last operation in previous bundle
670
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
671
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
672
-
}
673
-
}
674
-
675
-
return ""
676
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+20
-2
internal/plc/helpers.go
+20
-2
internal/plc/helpers.go
···
1
1
package plc
2
2
3
-
import "strings"
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
4
7
5
8
// MaxHandleLength is the maximum allowed handle length for database storage
6
9
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
7
14
8
15
// ExtractHandle safely extracts the handle from a PLC operation
9
16
func ExtractHandle(op *PLCOperation) string {
···
29
36
}
30
37
31
38
// ValidateHandle checks if a handle is valid for database storage
32
-
// Returns empty string if handle is too long
39
+
// Returns empty string if handle is invalid (too long or wrong format)
33
40
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
34
46
if len(handle) > MaxHandleLength {
35
47
return ""
36
48
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
37
55
return handle
38
56
}
39
57
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-431
internal/plc/scanner.go
+92
-431
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
84
-
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
90
43
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
246
-
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
251
-
}
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
252
72
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
257
74
}
258
-
}
259
-
}
260
75
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// NEW: Create/update DID records immediately when adding to mempool
336
-
for _, op := range ops {
337
-
info := ExtractDIDInfo(&op)
338
-
339
-
// Validate handle length before saving
340
-
validHandle := ValidateHandle(info.Handle)
341
-
if info.Handle != "" && validHandle == "" {
342
-
log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle))
343
-
}
344
-
345
-
if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil {
346
-
log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err)
347
-
// Don't fail the whole operation, just log
348
-
}
349
-
}
350
-
351
-
// Process for endpoint discovery
352
-
batchCounts, err := s.processBatch(ctx, ops)
353
-
s.mergeCounts(counts, batchCounts)
354
-
return err
355
-
}
356
-
357
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
358
-
for {
359
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
360
78
if err != nil {
361
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
362
81
}
363
82
364
-
log.Verbose("Mempool contains %d operations", count)
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
365
88
366
-
if count < BUNDLE_SIZE {
367
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
368
-
return nil
369
-
}
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
370
91
371
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
372
-
373
-
// Updated to receive 4 values instead of 3
374
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
375
-
if err != nil {
376
-
return err
377
-
}
378
-
379
-
// Process and update metrics
380
-
countsBefore := sumCounts(m.endpointCounts)
381
-
counts, _ := s.processBatch(ctx, ops)
382
-
s.mergeCounts(m.endpointCounts, counts)
383
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
384
-
385
-
m.totalProcessed += int64(len(ops))
386
-
m.newEndpoints += newEndpointsFound
387
-
m.currentBundle = bundleNum
388
-
389
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
390
94
log.Error("Warning: failed to update cursor: %v", err)
391
95
}
392
-
393
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
394
96
}
395
-
}
396
97
397
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
398
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
399
-
if err != nil {
400
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
401
103
}
402
104
403
-
ops, ids := s.deduplicateMempool(mempoolOps)
404
-
if len(ops) < BUNDLE_SIZE {
405
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
406
-
}
407
-
408
-
// Determine cursor from last bundle
409
-
cursor := ""
410
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
411
-
if err == nil && lastBundle > 0 {
412
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
413
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
414
-
}
415
-
}
416
-
417
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
418
-
if err != nil {
419
-
return 0, nil, "", err
420
-
}
421
-
422
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
423
-
return 0, nil, "", err
424
-
}
425
-
426
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
427
107
}
428
108
429
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
430
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
431
-
ids := make([]int64, 0, BUNDLE_SIZE)
432
-
seenCIDs := make(map[string]bool)
433
-
434
-
for _, mop := range mempoolOps {
435
-
if seenCIDs[mop.CID] {
436
-
ids = append(ids, mop.ID)
437
-
continue
438
-
}
439
-
seenCIDs[mop.CID] = true
440
-
441
-
var op PLCOperation
442
-
json.Unmarshal([]byte(mop.Operation), &op)
443
-
op.RawJSON = []byte(mop.Operation)
444
-
445
-
ops = append(ops, op)
446
-
ids = append(ids, mop.ID)
447
-
448
-
if len(ops) >= BUNDLE_SIZE {
449
-
break
450
-
}
451
-
}
452
-
453
-
return ops, ids
454
-
}
455
-
109
+
// processBatch extracts endpoints from operations
456
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
457
111
counts := make(map[string]int64)
458
112
seen := make(map[string]*PLCOperation)
459
113
460
114
// Collect unique endpoints
461
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
462
118
if op.IsNullified() {
463
119
continue
464
120
}
465
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
466
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
467
124
if _, exists := seen[key]; !exists {
468
-
seen[key] = &op
125
+
seen[key] = op
469
126
}
470
127
}
471
128
}
···
481
138
}
482
139
483
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
484
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
485
142
continue
486
143
}
487
144
488
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
489
146
counts[epType]++
490
147
}
491
148
492
149
return counts, nil
493
-
}
494
-
495
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
496
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
497
-
EndpointType: epType,
498
-
Endpoint: endpoint,
499
-
DiscoveredAt: discoveredAt,
500
-
LastChecked: time.Time{},
501
-
Status: storage.EndpointStatusUnknown,
502
-
})
503
150
}
504
151
505
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
542
189
return nil
543
190
}
544
191
545
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
546
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
547
-
Source: "plc_directory",
548
-
LastBundleNumber: m.currentBundle - 1,
549
-
LastScanTime: time.Now().UTC(),
550
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
551
201
})
552
202
}
553
203
···
575
225
return total
576
226
}
577
227
578
-
func formatEndpointCounts(counts map[string]int64) string {
579
-
if len(counts) == 0 {
580
-
return "0 new endpoints"
581
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
582
231
583
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
584
240
585
-
if len(counts) == 1 {
586
-
for typ, count := range counts {
587
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
588
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
589
246
}
247
+
}
590
248
591
-
parts := make([]string, 0, len(counts))
592
-
for typ, count := range counts {
593
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
594
256
}
595
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
596
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
-20
internal/storage/db.go
-20
internal/storage/db.go
···
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
52
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
59
-
GetLastBundleNumber(ctx context.Context) (int, error)
60
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
61
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
62
-
63
-
// Mempool operations
64
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
65
-
GetMempoolCount(ctx context.Context) (int, error)
66
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
67
-
DeleteFromMempool(ctx context.Context, ids []int64) error
68
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
69
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
71
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
72
-
73
53
// Metrics
74
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
75
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+120
-554
internal/storage/postgres.go
+120
-554
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
73
72
log.Info("Running database migrations...")
74
73
75
74
schema := `
76
-
-- Endpoints table (NO user_count, NO ip_info)
77
-
CREATE TABLE IF NOT EXISTS endpoints (
78
-
id BIGSERIAL PRIMARY KEY,
79
-
endpoint_type TEXT NOT NULL DEFAULT 'pds',
80
-
endpoint TEXT NOT NULL,
81
-
server_did TEXT,
82
-
discovered_at TIMESTAMP NOT NULL,
83
-
last_checked TIMESTAMP,
84
-
status INTEGER DEFAULT 0,
85
-
ip TEXT,
86
-
ipv6 TEXT,
87
-
ip_resolved_at TIMESTAMP,
88
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
-
UNIQUE(endpoint_type, endpoint)
90
-
);
75
+
-- Endpoints table (with IPv6 support)
76
+
CREATE TABLE IF NOT EXISTS endpoints (
77
+
id BIGSERIAL PRIMARY KEY,
78
+
endpoint_type TEXT NOT NULL DEFAULT 'pds',
79
+
endpoint TEXT NOT NULL,
80
+
server_did TEXT,
81
+
discovered_at TIMESTAMP NOT NULL,
82
+
last_checked TIMESTAMP,
83
+
status INTEGER DEFAULT 0,
84
+
ip TEXT,
85
+
ipv6 TEXT,
86
+
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
88
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
+
UNIQUE(endpoint_type, endpoint)
90
+
);
91
91
92
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
93
-
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
94
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
95
-
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
-
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
92
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
93
+
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
94
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
95
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
99
100
100
101
-- IP infos table (IP as PRIMARY KEY)
101
102
CREATE TABLE IF NOT EXISTS ip_infos (
···
120
121
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
121
122
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
122
123
123
-
-- Endpoint scans (renamed from pds_scans)
124
+
-- Endpoint scans
124
125
CREATE TABLE IF NOT EXISTS endpoint_scans (
125
126
id BIGSERIAL PRIMARY KEY,
126
127
endpoint_id BIGINT NOT NULL,
···
128
129
response_time DOUBLE PRECISION,
129
130
user_count BIGINT,
130
131
version TEXT,
132
+
used_ip TEXT,
131
133
scan_data JSONB,
132
134
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
133
135
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
···
136
138
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
137
139
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
138
140
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
139
143
140
144
CREATE TABLE IF NOT EXISTS plc_metrics (
141
145
id BIGSERIAL PRIMARY KEY,
···
154
158
records_processed BIGINT DEFAULT 0
155
159
);
156
160
157
-
CREATE TABLE IF NOT EXISTS plc_bundles (
158
-
bundle_number INTEGER PRIMARY KEY,
159
-
start_time TIMESTAMP NOT NULL,
160
-
end_time TIMESTAMP NOT NULL,
161
-
dids JSONB NOT NULL,
162
-
hash TEXT NOT NULL,
163
-
compressed_hash TEXT NOT NULL,
164
-
compressed_size BIGINT NOT NULL,
165
-
uncompressed_size BIGINT NOT NULL,
166
-
cumulative_compressed_size BIGINT NOT NULL,
167
-
cumulative_uncompressed_size BIGINT NOT NULL,
168
-
cursor TEXT,
169
-
prev_bundle_hash TEXT,
170
-
compressed BOOLEAN DEFAULT true,
171
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
172
-
);
173
-
174
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
175
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
176
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
177
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
178
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
179
-
180
-
CREATE TABLE IF NOT EXISTS plc_mempool (
181
-
id BIGSERIAL PRIMARY KEY,
182
-
did TEXT NOT NULL,
183
-
operation TEXT NOT NULL,
184
-
cid TEXT NOT NULL UNIQUE,
185
-
created_at TIMESTAMP NOT NULL,
186
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
187
-
);
188
-
189
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
190
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
191
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
192
-
193
161
-- Minimal dids table
194
162
CREATE TABLE IF NOT EXISTS dids (
195
163
did TEXT PRIMARY KEY,
···
242
210
243
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
244
212
query := `
245
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at)
246
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
247
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
248
216
last_checked = EXCLUDED.last_checked,
249
217
status = EXCLUDED.status,
···
259
227
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
260
228
ELSE endpoints.ip_resolved_at
261
229
END,
230
+
valid = EXCLUDED.valid,
262
231
updated_at = CURRENT_TIMESTAMP
263
232
RETURNING id
264
233
`
265
234
err := p.db.QueryRowContext(ctx, query,
266
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
267
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
268
237
return err
269
238
}
270
239
···
285
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
286
255
query := `
287
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
288
-
ip, ipv6, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
289
258
FROM endpoints
290
259
WHERE endpoint = $1 AND endpoint_type = $2
291
260
`
···
296
265
297
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
298
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
299
-
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
300
269
)
301
270
if err != nil {
302
271
return nil, err
···
322
291
query := `
323
292
SELECT DISTINCT ON (COALESCE(server_did, id::text))
324
293
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
325
-
ip, ipv6, ip_resolved_at, updated_at
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
326
295
FROM endpoints
327
296
WHERE 1=1
328
297
`
···
335
304
args = append(args, filter.Type)
336
305
argIdx++
337
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
338
312
if filter.Status != "" {
339
313
statusInt := EndpointStatusUnknown
340
314
switch filter.Status {
···
357
331
}
358
332
}
359
333
360
-
// NEW: Order by server_did and discovered_at to get primary endpoints
361
-
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
362
348
363
349
if filter != nil && filter.Limit > 0 {
364
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
490
476
defer tx.Rollback()
491
477
492
478
query := `
493
-
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at)
494
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
495
481
`
496
-
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt)
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
497
483
if err != nil {
498
484
return err
499
485
}
···
520
506
521
507
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
522
508
query := `
523
-
SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
524
510
FROM endpoint_scans
525
511
WHERE endpoint_id = $1
526
512
ORDER BY scanned_at DESC
···
538
524
var scan EndpointScan
539
525
var responseTime sql.NullFloat64
540
526
var userCount sql.NullInt64
541
-
var version sql.NullString // NEW
527
+
var version, usedIP sql.NullString
542
528
var scanDataJSON []byte
543
529
544
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt)
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
545
531
if err != nil {
546
532
return nil, err
547
533
}
···
554
540
scan.UserCount = userCount.Int64
555
541
}
556
542
557
-
if version.Valid { // NEW
543
+
if version.Valid {
558
544
scan.Version = version.String
559
545
}
560
546
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
561
551
if len(scanDataJSON) > 0 {
562
552
var scanData EndpointScanData
563
553
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
···
583
573
discovered_at,
584
574
last_checked,
585
575
status,
586
-
ip
576
+
ip,
577
+
ipv6,
578
+
valid
587
579
FROM endpoints
588
580
WHERE endpoint_type = 'pds'
589
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
590
582
)
591
583
SELECT
592
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
593
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
594
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
595
587
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
···
650
642
var items []*PDSListItem
651
643
for rows.Next() {
652
644
item := &PDSListItem{}
653
-
var ip, serverDID, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
654
646
var asn sql.NullInt32
655
647
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
656
648
var lat, lon sql.NullFloat64
···
660
652
var scannedAt sql.NullTime
661
653
662
654
err := rows.Scan(
663
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
664
656
&userCount, &responseTime, &version, &scannedAt,
665
657
&city, &country, &countryCode, &asn, &asnOrg,
666
658
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
672
664
673
665
if ip.Valid {
674
666
item.IP = ip.String
667
+
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
675
670
}
676
671
if serverDID.Valid {
677
672
item.ServerDID = serverDID.String
···
719
714
720
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
721
716
query := `
722
-
WITH target_endpoint AS (
717
+
WITH target_endpoint AS MATERIALIZED (
723
718
SELECT
724
719
e.id,
725
720
e.endpoint,
···
727
722
e.discovered_at,
728
723
e.last_checked,
729
724
e.status,
730
-
e.ip
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
731
728
FROM endpoints e
732
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
733
-
),
734
-
aliases_agg AS (
735
-
SELECT
736
-
te.server_did,
737
-
array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases,
738
-
MIN(e.discovered_at) as first_discovered_at
739
-
FROM target_endpoint te
740
-
LEFT JOIN endpoints e ON te.server_did = e.server_did
741
-
AND e.endpoint_type = 'pds'
742
-
AND te.server_did IS NOT NULL
743
-
GROUP BY te.server_did
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
744
732
)
745
733
SELECT
746
734
te.id,
···
750
738
te.last_checked,
751
739
te.status,
752
740
te.ip,
741
+
te.ipv6,
742
+
te.valid,
753
743
latest.user_count,
754
744
latest.response_time,
755
745
latest.version,
···
759
749
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
760
750
i.latitude, i.longitude,
761
751
i.raw_data,
762
-
COALESCE(aa.aliases, ARRAY[]::text[]) as aliases,
763
-
aa.first_discovered_at
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
764
773
FROM target_endpoint te
765
-
LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did
766
774
LEFT JOIN LATERAL (
767
-
SELECT scan_data, response_time, version, scanned_at, user_count
768
-
FROM endpoint_scans
769
-
WHERE endpoint_id = te.id
770
-
ORDER BY scanned_at DESC
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
771
784
LIMIT 1
772
785
) latest ON true
773
-
LEFT JOIN ip_infos i ON te.ip = i.ip
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
774
787
`
775
788
776
789
detail := &PDSDetail{}
777
-
var ip, city, country, countryCode, asnOrg, serverDID sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
778
791
var asn sql.NullInt32
779
792
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
780
793
var lat, lon sql.NullFloat64
···
788
801
var firstDiscoveredAt sql.NullTime
789
802
790
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
791
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
792
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
793
806
&city, &country, &countryCode, &asn, &asnOrg,
794
807
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
804
817
if ip.Valid {
805
818
detail.IP = ip.String
806
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
807
823
808
824
if serverDID.Valid {
809
825
detail.ServerDID = serverDID.String
···
812
828
// Set aliases and is_primary
813
829
detail.Aliases = aliases
814
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
815
-
// Has server_did - check if this is the first discovered
816
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
817
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
818
833
} else {
819
-
// No server_did means unique server
820
834
detail.IsPrimary = true
821
835
}
822
836
···
1147
1161
}
1148
1162
}
1149
1163
return 0
1150
-
}
1151
-
1152
-
// ===== BUNDLE OPERATIONS =====
1153
-
1154
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1155
-
didsJSON, err := json.Marshal(bundle.DIDs)
1156
-
if err != nil {
1157
-
return err
1158
-
}
1159
-
1160
-
// Calculate cumulative sizes from previous bundle
1161
-
if bundle.BundleNumber > 1 {
1162
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1163
-
if err == nil && prevBundle != nil {
1164
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1165
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1166
-
} else {
1167
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1168
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1169
-
}
1170
-
} else {
1171
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1172
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1173
-
}
1174
-
1175
-
query := `
1176
-
INSERT INTO plc_bundles (
1177
-
bundle_number, start_time, end_time, dids,
1178
-
hash, compressed_hash, compressed_size, uncompressed_size,
1179
-
cumulative_compressed_size, cumulative_uncompressed_size,
1180
-
cursor, prev_bundle_hash, compressed
1181
-
)
1182
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1183
-
ON CONFLICT(bundle_number) DO UPDATE SET
1184
-
start_time = EXCLUDED.start_time,
1185
-
end_time = EXCLUDED.end_time,
1186
-
dids = EXCLUDED.dids,
1187
-
hash = EXCLUDED.hash,
1188
-
compressed_hash = EXCLUDED.compressed_hash,
1189
-
compressed_size = EXCLUDED.compressed_size,
1190
-
uncompressed_size = EXCLUDED.uncompressed_size,
1191
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1192
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1193
-
cursor = EXCLUDED.cursor,
1194
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1195
-
compressed = EXCLUDED.compressed
1196
-
`
1197
-
_, err = p.db.ExecContext(ctx, query,
1198
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1199
-
didsJSON, bundle.Hash, bundle.CompressedHash,
1200
-
bundle.CompressedSize, bundle.UncompressedSize,
1201
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1202
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1203
-
)
1204
-
1205
-
return err
1206
-
}
1207
-
1208
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1209
-
query := `
1210
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1211
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1212
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1213
-
FROM plc_bundles
1214
-
WHERE bundle_number = $1
1215
-
`
1216
-
1217
-
var bundle PLCBundle
1218
-
var didsJSON []byte
1219
-
var prevHash sql.NullString
1220
-
var cursor sql.NullString
1221
-
1222
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1223
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1224
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1225
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1226
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1227
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1228
-
)
1229
-
if err != nil {
1230
-
return nil, err
1231
-
}
1232
-
1233
-
if prevHash.Valid {
1234
-
bundle.PrevBundleHash = prevHash.String
1235
-
}
1236
-
if cursor.Valid {
1237
-
bundle.Cursor = cursor.String
1238
-
}
1239
-
1240
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1241
-
return &bundle, nil
1242
-
}
1243
-
1244
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1245
-
query := `
1246
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1247
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1248
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1249
-
FROM plc_bundles
1250
-
ORDER BY bundle_number DESC
1251
-
LIMIT $1
1252
-
`
1253
-
1254
-
rows, err := p.db.QueryContext(ctx, query, limit)
1255
-
if err != nil {
1256
-
return nil, err
1257
-
}
1258
-
defer rows.Close()
1259
-
1260
-
return p.scanBundles(rows)
1261
-
}
1262
-
1263
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1264
-
query := `
1265
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1266
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1267
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1268
-
FROM plc_bundles
1269
-
WHERE dids ? $1
1270
-
ORDER BY bundle_number ASC
1271
-
`
1272
-
1273
-
rows, err := p.db.QueryContext(ctx, query, did)
1274
-
if err != nil {
1275
-
return nil, err
1276
-
}
1277
-
defer rows.Close()
1278
-
1279
-
return p.scanBundles(rows)
1280
-
}
1281
-
1282
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1283
-
var bundles []*PLCBundle
1284
-
1285
-
for rows.Next() {
1286
-
var bundle PLCBundle
1287
-
var didsJSON []byte
1288
-
var prevHash sql.NullString
1289
-
var cursor sql.NullString
1290
-
1291
-
if err := rows.Scan(
1292
-
&bundle.BundleNumber,
1293
-
&bundle.StartTime,
1294
-
&bundle.EndTime,
1295
-
&didsJSON,
1296
-
&bundle.Hash,
1297
-
&bundle.CompressedHash,
1298
-
&bundle.CompressedSize,
1299
-
&bundle.UncompressedSize,
1300
-
&bundle.CumulativeCompressedSize,
1301
-
&bundle.CumulativeUncompressedSize,
1302
-
&cursor,
1303
-
&prevHash,
1304
-
&bundle.Compressed,
1305
-
&bundle.CreatedAt,
1306
-
); err != nil {
1307
-
return nil, err
1308
-
}
1309
-
1310
-
if prevHash.Valid {
1311
-
bundle.PrevBundleHash = prevHash.String
1312
-
}
1313
-
if cursor.Valid {
1314
-
bundle.Cursor = cursor.String
1315
-
}
1316
-
1317
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1318
-
bundles = append(bundles, &bundle)
1319
-
}
1320
-
1321
-
return bundles, rows.Err()
1322
-
}
1323
-
1324
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1325
-
var count, lastBundleNum int64
1326
-
err := p.db.QueryRowContext(ctx, `
1327
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1328
-
FROM plc_bundles
1329
-
`).Scan(&count, &lastBundleNum)
1330
-
if err != nil {
1331
-
return 0, 0, 0, 0, err
1332
-
}
1333
-
1334
-
if lastBundleNum == 0 {
1335
-
return 0, 0, 0, 0, nil
1336
-
}
1337
-
1338
-
var compressedSize, uncompressedSize int64
1339
-
err = p.db.QueryRowContext(ctx, `
1340
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1341
-
FROM plc_bundles
1342
-
WHERE bundle_number = $1
1343
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1344
-
if err != nil {
1345
-
return 0, 0, 0, 0, err
1346
-
}
1347
-
1348
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1349
-
}
1350
-
1351
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1352
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1353
-
var num int
1354
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1355
-
return num, err
1356
-
}
1357
-
1358
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1359
-
query := `
1360
-
SELECT bundle_number
1361
-
FROM plc_bundles
1362
-
WHERE start_time <= $1 AND end_time >= $1
1363
-
ORDER BY bundle_number ASC
1364
-
LIMIT 1
1365
-
`
1366
-
1367
-
var bundleNum int
1368
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1369
-
if err == sql.ErrNoRows {
1370
-
query = `
1371
-
SELECT bundle_number
1372
-
FROM plc_bundles
1373
-
WHERE end_time < $1
1374
-
ORDER BY bundle_number DESC
1375
-
LIMIT 1
1376
-
`
1377
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1378
-
if err == sql.ErrNoRows {
1379
-
return 1, nil
1380
-
}
1381
-
if err != nil {
1382
-
return 0, err
1383
-
}
1384
-
return bundleNum, nil
1385
-
}
1386
-
if err != nil {
1387
-
return 0, err
1388
-
}
1389
-
1390
-
return bundleNum, nil
1391
-
}
1392
-
1393
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1394
-
query := `
1395
-
WITH daily_stats AS (
1396
-
SELECT
1397
-
DATE(start_time) as date,
1398
-
MAX(bundle_number) as last_bundle,
1399
-
COUNT(*) as bundle_count,
1400
-
SUM(uncompressed_size) as total_uncompressed,
1401
-
SUM(compressed_size) as total_compressed,
1402
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1403
-
MAX(cumulative_compressed_size) as cumulative_compressed
1404
-
FROM plc_bundles
1405
-
WHERE bundle_number >= $1
1406
-
GROUP BY DATE(start_time)
1407
-
)
1408
-
SELECT
1409
-
date::text,
1410
-
last_bundle,
1411
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1412
-
total_uncompressed,
1413
-
total_compressed,
1414
-
cumulative_uncompressed,
1415
-
cumulative_compressed
1416
-
FROM daily_stats
1417
-
ORDER BY date ASC
1418
-
`
1419
-
1420
-
if limit > 0 {
1421
-
query += fmt.Sprintf(" LIMIT %d", limit)
1422
-
}
1423
-
1424
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1425
-
if err != nil {
1426
-
return nil, err
1427
-
}
1428
-
defer rows.Close()
1429
-
1430
-
var history []*PLCHistoryPoint
1431
-
for rows.Next() {
1432
-
var point PLCHistoryPoint
1433
-
var cumulativeOps int64
1434
-
1435
-
err := rows.Scan(
1436
-
&point.Date,
1437
-
&point.BundleNumber,
1438
-
&cumulativeOps,
1439
-
&point.UncompressedSize,
1440
-
&point.CompressedSize,
1441
-
&point.CumulativeUncompressed,
1442
-
&point.CumulativeCompressed,
1443
-
)
1444
-
if err != nil {
1445
-
return nil, err
1446
-
}
1447
-
1448
-
point.OperationCount = int(cumulativeOps)
1449
-
1450
-
history = append(history, &point)
1451
-
}
1452
-
1453
-
return history, rows.Err()
1454
-
}
1455
-
1456
-
// ===== MEMPOOL OPERATIONS =====
1457
-
1458
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1459
-
if len(ops) == 0 {
1460
-
return nil
1461
-
}
1462
-
1463
-
tx, err := p.db.BeginTx(ctx, nil)
1464
-
if err != nil {
1465
-
return err
1466
-
}
1467
-
defer tx.Rollback()
1468
-
1469
-
stmt, err := tx.PrepareContext(ctx, `
1470
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1471
-
VALUES ($1, $2, $3, $4)
1472
-
ON CONFLICT(cid) DO NOTHING
1473
-
`)
1474
-
if err != nil {
1475
-
return err
1476
-
}
1477
-
defer stmt.Close()
1478
-
1479
-
for _, op := range ops {
1480
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1481
-
if err != nil {
1482
-
return err
1483
-
}
1484
-
}
1485
-
1486
-
return tx.Commit()
1487
-
}
1488
-
1489
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1490
-
query := "SELECT COUNT(*) FROM plc_mempool"
1491
-
var count int
1492
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1493
-
return count, err
1494
-
}
1495
-
1496
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1497
-
query := `
1498
-
SELECT id, did, operation, cid, created_at, added_at
1499
-
FROM plc_mempool
1500
-
ORDER BY created_at ASC
1501
-
LIMIT $1
1502
-
`
1503
-
1504
-
rows, err := p.db.QueryContext(ctx, query, limit)
1505
-
if err != nil {
1506
-
return nil, err
1507
-
}
1508
-
defer rows.Close()
1509
-
1510
-
var ops []MempoolOperation
1511
-
for rows.Next() {
1512
-
var op MempoolOperation
1513
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1514
-
if err != nil {
1515
-
return nil, err
1516
-
}
1517
-
ops = append(ops, op)
1518
-
}
1519
-
1520
-
return ops, rows.Err()
1521
-
}
1522
-
1523
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1524
-
if len(ids) == 0 {
1525
-
return nil
1526
-
}
1527
-
1528
-
placeholders := make([]string, len(ids))
1529
-
args := make([]interface{}, len(ids))
1530
-
for i, id := range ids {
1531
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1532
-
args[i] = id
1533
-
}
1534
-
1535
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1536
-
strings.Join(placeholders, ","))
1537
-
1538
-
_, err := p.db.ExecContext(ctx, query, args...)
1539
-
return err
1540
-
}
1541
-
1542
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1543
-
query := `
1544
-
SELECT id, did, operation, cid, created_at, added_at
1545
-
FROM plc_mempool
1546
-
ORDER BY created_at ASC, id ASC
1547
-
LIMIT 1
1548
-
`
1549
-
1550
-
var op MempoolOperation
1551
-
err := p.db.QueryRowContext(ctx, query).Scan(
1552
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1553
-
)
1554
-
if err == sql.ErrNoRows {
1555
-
return nil, nil
1556
-
}
1557
-
if err != nil {
1558
-
return nil, err
1559
-
}
1560
-
1561
-
return &op, nil
1562
-
}
1563
-
1564
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1565
-
query := `
1566
-
SELECT id, did, operation, cid, created_at, added_at
1567
-
FROM plc_mempool
1568
-
ORDER BY created_at DESC, id DESC
1569
-
LIMIT 1
1570
-
`
1571
-
1572
-
var op MempoolOperation
1573
-
err := p.db.QueryRowContext(ctx, query).Scan(
1574
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1575
-
)
1576
-
if err == sql.ErrNoRows {
1577
-
return nil, nil
1578
-
}
1579
-
if err != nil {
1580
-
return nil, err
1581
-
}
1582
-
1583
-
return &op, nil
1584
-
}
1585
-
1586
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1587
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1588
-
var count int
1589
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1590
-
return count, err
1591
-
}
1592
-
1593
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1594
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1595
-
var size int64
1596
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1597
-
return size, err
1598
1164
}
1599
1165
1600
1166
// ===== CURSOR OPERATIONS =====
+13
-17
internal/storage/types.go
+13
-17
internal/storage/types.go
···
26
26
LastChecked time.Time
27
27
Status int
28
28
IP string
29
-
IPv6 string // NEW
29
+
IPv6 string
30
30
IPResolvedAt time.Time
31
+
Valid bool
31
32
UpdatedAt time.Time
32
33
}
33
34
···
54
55
Status int
55
56
ResponseTime float64
56
57
UserCount int64
57
-
Version string // NEW: Add this field
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
58
60
ScanData *EndpointScanData
59
61
ScannedAt time.Time
60
62
}
···
75
77
76
78
// EndpointFilter for querying endpoints
77
79
type EndpointFilter struct {
78
-
Type string // "pds", "labeler", etc.
80
+
Type string
79
81
Status string
80
82
MinUserCount int64
81
-
OnlyStale bool // NEW: Only return endpoints that need re-checking
82
-
RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
83
87
Limit int
84
88
Offset int
85
89
}
···
118
122
StartTime time.Time
119
123
EndTime time.Time
120
124
BoundaryCIDs []string
121
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
122
126
Hash string
123
127
CompressedHash string
124
128
CompressedSize int64
···
149
153
CompressedSize int64 `json:"size_compressed"`
150
154
CumulativeUncompressed int64 `json:"cumulative_uncompressed"`
151
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
152
-
}
153
-
154
-
// MempoolOperation represents an operation waiting to be bundled
155
-
type MempoolOperation struct {
156
-
ID int64
157
-
DID string
158
-
Operation string
159
-
CID string
160
-
CreatedAt time.Time
161
-
AddedAt time.Time
162
156
}
163
157
164
158
// ScanCursor stores scanning progress
···
216
210
// From endpoints table
217
211
ID int64
218
212
Endpoint string
219
-
ServerDID string // NEW: Add this
213
+
ServerDID string
220
214
DiscoveredAt time.Time
221
215
LastChecked time.Time
222
216
Status int
223
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
224
220
225
221
// From latest endpoint_scans (via JOIN)
226
222
LatestScan *struct {
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"