+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+1
-1
config.sample.yaml
+1
-1
config.sample.yaml
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+295
-456
internal/api/handlers.go
+295
-456
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
47
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
48
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
49
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
50
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
51
49
}
52
50
53
51
// ===== REQUEST HELPERS =====
···
77
75
78
76
// ===== FORMATTING HELPERS =====
79
77
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": len(bundle.DIDs),
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
-
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
100
80
"id": ep.ID,
···
103
83
"discovered_at": ep.DiscoveredAt,
104
84
"last_checked": ep.LastChecked,
105
85
"status": statusToString(ep.Status),
106
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
107
86
}
108
87
109
-
// Add IP if available
88
+
// Add IPs if available
110
89
if ep.IP != "" {
111
90
response["ip"] = ep.IP
112
91
}
113
-
114
-
// REMOVED: IP info extraction - no longer in Endpoint struct
115
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
116
95
117
96
return response
118
97
}
···
165
144
resp.json(stats)
166
145
}
167
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
168
183
// ===== PDS HANDLERS =====
169
184
170
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
233
248
"endpoint": pds.Endpoint,
234
249
"discovered_at": pds.DiscoveredAt,
235
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
236
252
}
237
253
238
254
// Add server_did if available
···
257
273
}
258
274
}
259
275
260
-
// Add IP if available
276
+
// Add IPs if available
261
277
if pds.IP != "" {
262
278
response["ip"] = pds.IP
279
+
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
263
282
}
264
283
265
284
// Add IP info (from ip_infos table via JOIN)
···
276
295
if pds.IPInfo.ASN > 0 {
277
296
response["asn"] = pds.IPInfo.ASN
278
297
}
279
-
if pds.IPInfo.IsDatacenter {
280
-
response["is_datacenter"] = pds.IPInfo.IsDatacenter
281
-
}
298
+
299
+
// Add all network type flags
300
+
response["is_datacenter"] = pds.IPInfo.IsDatacenter
301
+
response["is_vpn"] = pds.IPInfo.IsVPN
302
+
response["is_crawler"] = pds.IPInfo.IsCrawler
303
+
response["is_tor"] = pds.IPInfo.IsTor
304
+
response["is_proxy"] = pds.IPInfo.IsProxy
305
+
306
+
// Add computed is_home field
307
+
response["is_home"] = pds.IPInfo.IsHome()
282
308
}
283
309
284
310
return response
···
316
342
}
317
343
}
318
344
319
-
// Add full IP info
345
+
// Add full IP info with computed is_home field
320
346
if pds.IPInfo != nil {
321
-
response["ip_info"] = pds.IPInfo
347
+
// Convert IPInfo to map
348
+
ipInfoMap := make(map[string]interface{})
349
+
ipInfoJSON, _ := json.Marshal(pds.IPInfo)
350
+
json.Unmarshal(ipInfoJSON, &ipInfoMap)
351
+
352
+
// Add computed is_home field
353
+
ipInfoMap["is_home"] = pds.IPInfo.IsHome()
354
+
355
+
response["ip_info"] = ipInfoMap
322
356
}
323
357
324
358
return response
···
343
377
scanMap["response_time"] = scan.ResponseTime
344
378
}
345
379
346
-
// NEW: Add version if available
347
380
if scan.Version != "" {
348
381
scanMap["version"] = scan.Version
382
+
}
383
+
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
349
386
}
350
387
351
388
// Use the top-level UserCount field first
···
647
684
return
648
685
}
649
686
650
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
651
-
if err != nil {
652
-
resp.error(err.Error(), http.StatusInternalServerError)
653
-
return
654
-
}
655
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
656
688
resp.json(map[string]interface{}{
657
689
"total_unique_dids": totalDIDs,
658
690
"last_bundle": lastBundle,
···
663
695
664
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
665
697
resp := newResponse(w)
666
-
667
698
bundleNum, err := getBundleNumber(r)
668
699
if err != nil {
669
700
resp.error("invalid bundle number", http.StatusBadRequest)
670
701
return
671
702
}
672
703
673
-
// Try to get existing bundle
674
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
675
-
if err == nil {
676
-
// Bundle exists, return it normally
677
-
resp.json(formatBundleResponse(bundle))
678
-
return
679
-
}
680
-
681
-
// Bundle not found - check if it's the next upcoming bundle
682
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
683
707
if err != nil {
684
-
resp.error("bundle not found", http.StatusNotFound)
685
-
return
686
-
}
687
-
688
-
if bundleNum == lastBundle+1 {
689
-
// This is the upcoming bundle - return preview based on mempool
690
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
691
-
if err != nil {
692
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
693
717
return
694
718
}
695
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
696
720
return
697
721
}
698
722
699
-
// Not an upcoming bundle, just not found
700
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
701
724
}
702
725
703
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
704
-
// Get mempool stats
705
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
706
-
if err != nil {
707
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
708
743
}
744
+
}
709
745
710
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
711
752
return map[string]interface{}{
712
753
"plc_bundle_number": bundleNum,
713
754
"is_upcoming": true,
···
717
758
}, nil
718
759
}
719
760
720
-
// Get first and last operations for time range
721
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
722
-
if err != nil {
723
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
724
771
}
725
772
726
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
727
-
if err != nil {
728
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
729
775
}
730
776
731
-
// Get unique DID count
732
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
733
-
if err != nil {
734
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
735
780
}
736
-
737
-
// Get uncompressed size estimate
738
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
739
-
if err != nil {
740
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
741
783
}
742
784
743
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
744
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
745
-
746
-
// Calculate completion estimate
747
-
var estimatedCompletionTime *time.Time
748
-
var operationsNeeded int
749
-
var currentRate float64
750
-
751
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
752
-
753
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
754
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
755
-
if timeSpan > 0 {
756
-
currentRate = float64(mempoolCount) / timeSpan
757
-
if currentRate > 0 {
758
-
secondsNeeded := float64(operationsNeeded) / currentRate
759
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
760
-
estimatedCompletionTime = &completionTime
761
-
}
762
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
763
789
}
764
790
765
-
// Get previous bundle for cursor context
766
-
var prevBundleHash string
767
-
var cursor string
791
+
// Get previous bundle info
768
792
if bundleNum > 1 {
769
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
770
-
if err == nil {
771
-
prevBundleHash = prevBundle.Hash
772
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
773
-
}
774
-
}
775
-
776
-
// Determine bundle status
777
-
status := "filling"
778
-
if mempoolCount >= plc.BUNDLE_SIZE {
779
-
status = "ready"
780
-
}
781
-
782
-
// Build upcoming bundle response
783
-
result := map[string]interface{}{
784
-
"plc_bundle_number": bundleNum,
785
-
"is_upcoming": true,
786
-
"status": status,
787
-
"operation_count": mempoolCount,
788
-
"target_operation_count": plc.BUNDLE_SIZE,
789
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
790
-
"operations_needed": operationsNeeded,
791
-
"did_count": uniqueDIDCount,
792
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
793
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
794
-
"uncompressed_size": uncompressedSize,
795
-
"estimated_compressed_size": estimatedCompressedSize,
796
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
797
-
"prev_bundle_hash": prevBundleHash,
798
-
"cursor": cursor,
799
-
}
800
-
801
-
if estimatedCompletionTime != nil {
802
-
result["estimated_completion_time"] = *estimatedCompletionTime
803
-
result["current_rate_per_second"] = currentRate
804
-
}
805
-
806
-
// Get actual mempool operations if requested
807
-
if r.URL.Query().Get("include_dids") == "true" {
808
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
809
-
if err == nil {
810
-
// Extract unique DIDs
811
-
didSet := make(map[string]bool)
812
-
for _, op := range ops {
813
-
didSet[op.DID] = true
814
-
}
815
-
dids := make([]string, 0, len(didSet))
816
-
for did := range didSet {
817
-
dids = append(dids, did)
818
-
}
819
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
820
796
}
821
797
}
822
798
···
832
808
return
833
809
}
834
810
835
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
836
813
if err != nil {
837
814
resp.error("bundle not found", http.StatusNotFound)
838
815
return
839
816
}
840
817
841
818
resp.json(map[string]interface{}{
842
-
"plc_bundle_number": bundle.BundleNumber,
843
-
"did_count": len(bundle.DIDs),
844
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
845
822
})
846
823
}
847
824
···
856
833
857
834
compressed := r.URL.Query().Get("compressed") != "false"
858
835
859
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
860
837
if err == nil {
861
838
// Bundle exists, serve it normally
862
839
resp.bundleHeaders(bundle)
···
870
847
}
871
848
872
849
// Bundle not found - check if it's the upcoming bundle
873
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
874
-
if err != nil {
875
-
resp.error("bundle not found", http.StatusNotFound)
876
-
return
877
-
}
878
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
879
851
if bundleNum == lastBundle+1 {
880
852
// This is the upcoming bundle - serve from mempool
881
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
882
854
return
883
855
}
884
856
···
886
858
resp.error("bundle not found", http.StatusNotFound)
887
859
}
888
860
889
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
890
-
ctx := r.Context()
891
-
892
-
// Get mempool count
893
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
894
-
if err != nil {
895
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
896
-
return
897
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
898
865
899
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
900
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
901
868
return
902
869
}
903
870
904
-
// Get mempool operations (up to BUNDLE_SIZE)
905
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
906
873
if err != nil {
907
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
908
875
return
909
876
}
910
877
911
-
if len(mempoolOps) == 0 {
912
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
913
880
return
914
881
}
915
882
916
-
// Get time range
917
-
firstOp := mempoolOps[0]
918
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
919
886
920
887
// Extract unique DIDs
921
888
didSet := make(map[string]bool)
922
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
923
890
didSet[op.DID] = true
924
891
}
925
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
926
899
// Get previous bundle hash
927
900
prevBundleHash := ""
928
901
if bundleNum > 1 {
929
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
930
903
prevBundleHash = prevBundle.Hash
931
904
}
932
905
}
933
906
934
-
// Serialize operations to JSONL
935
-
var buf []byte
936
-
for _, mop := range mempoolOps {
937
-
buf = append(buf, []byte(mop.Operation)...)
938
-
buf = append(buf, '\n')
939
-
}
940
-
941
-
// Calculate size
942
-
uncompressedSize := int64(len(buf))
943
-
944
907
// Set headers
945
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
946
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
947
910
w.Header().Set("X-Bundle-Status", "preview")
948
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
949
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
950
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
951
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
952
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
953
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
954
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
955
919
956
920
w.Header().Set("Content-Type", "application/jsonl")
957
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
958
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
959
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
960
922
923
+
// Stream operations as JSONL
961
924
w.WriteHeader(http.StatusOK)
962
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
963
937
}
964
938
965
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
966
940
resp := newResponse(w)
967
-
path := bundle.GetFilePath(s.plcBundleDir)
968
941
969
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
970
944
if err != nil {
971
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
972
946
return
973
947
}
974
-
defer file.Close()
975
-
976
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
977
949
978
950
w.Header().Set("Content-Type", "application/zstd")
979
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
980
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
981
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
982
954
983
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
984
958
}
985
959
986
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
987
961
resp := newResponse(w)
988
962
989
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
990
965
if err != nil {
991
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
992
967
return
993
968
}
994
-
995
-
// Serialize to JSONL
996
-
var buf []byte
997
-
for _, op := range ops {
998
-
buf = append(buf, op.RawJSON...)
999
-
buf = append(buf, '\n')
1000
-
}
1001
-
1002
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
1003
-
compressedSize := int64(0)
1004
-
if fileInfo != nil {
1005
-
compressedSize = fileInfo.Size()
1006
-
}
969
+
defer reader.Close()
1007
970
1008
971
w.Header().Set("Content-Type", "application/jsonl")
1009
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
1010
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
1011
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
1012
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
1013
-
if compressedSize > 0 {
1014
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
1015
978
}
1016
979
980
+
// Stream the data directly to the response
1017
981
w.WriteHeader(http.StatusOK)
1018
-
w.Write(buf)
982
+
io.Copy(w, reader)
1019
983
}
1020
984
1021
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
1022
986
resp := newResponse(w)
1023
987
limit := getQueryInt(r, "limit", 50)
1024
988
1025
-
bundles, err := s.db.GetBundles(r.Context(), limit)
1026
-
if err != nil {
1027
-
resp.error(err.Error(), http.StatusInternalServerError)
1028
-
return
1029
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
1030
990
1031
991
response := make([]map[string]interface{}, len(bundles))
1032
992
for i, bundle := range bundles {
1033
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
1034
994
}
1035
995
1036
996
resp.json(response)
···
1039
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
1040
1000
resp := newResponse(w)
1041
1001
1042
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
1043
-
if err != nil {
1044
-
resp.error(err.Error(), http.StatusInternalServerError)
1045
-
return
1046
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
1047
1008
1048
1009
resp.json(map[string]interface{}{
1049
-
"plc_bundle_count": count,
1050
-
"last_bundle_number": lastBundle,
1051
-
"total_compressed_size": compressedSize,
1052
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1053
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
1054
-
"total_uncompressed_size": uncompressedSize,
1055
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1056
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
1057
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
1058
1015
})
1059
1016
}
1060
1017
···
1062
1019
1063
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
1064
1021
resp := newResponse(w)
1065
-
ctx := r.Context()
1066
1022
1067
-
count, err := s.db.GetMempoolCount(ctx)
1068
-
if err != nil {
1069
-
resp.error(err.Error(), http.StatusInternalServerError)
1070
-
return
1071
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
1072
1025
1073
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
1074
-
if err != nil {
1075
-
resp.error(err.Error(), http.StatusInternalServerError)
1076
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
1077
1030
}
1078
1031
1079
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
1080
-
if err != nil {
1081
-
resp.error(err.Error(), http.StatusInternalServerError)
1082
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
1083
1036
}
1084
1037
1085
-
result := map[string]interface{}{
1086
-
"operation_count": count,
1087
-
"unique_did_count": uniqueDIDCount,
1088
-
"uncompressed_size": uncompressedSize,
1089
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1090
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
1091
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
1092
1042
1093
-
if count > 0 {
1094
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
1095
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1096
1045
1097
-
if count < plc.BUNDLE_SIZE {
1098
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1099
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1100
1049
if timeSpan > 0 {
1101
1050
opsPerSecond := float64(count) / timeSpan
1102
1051
if opsPerSecond > 0 {
1103
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1104
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1105
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1106
-
result["operations_needed"] = remainingOps
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1107
1057
result["current_rate_per_second"] = opsPerSecond
1058
+
result["operations_needed"] = remainingOps
1108
1059
}
1109
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1110
1066
}
1111
-
} else {
1112
-
result["estimated_next_bundle_time"] = time.Now()
1113
-
result["operations_needed"] = 0
1114
1067
}
1115
1068
}
1116
1069
} else {
1070
+
// Empty mempool
1117
1071
result["mempool_start_time"] = nil
1118
1072
result["estimated_next_bundle_time"] = nil
1119
1073
}
···
1138
1092
1139
1093
// ===== VERIFICATION HANDLERS =====
1140
1094
1141
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1142
-
resp := newResponse(w)
1143
-
vars := mux.Vars(r)
1144
-
1145
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1146
-
if err != nil {
1147
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1148
-
return
1149
-
}
1150
-
1151
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1152
-
if err != nil {
1153
-
resp.error("Bundle not found", http.StatusNotFound)
1154
-
return
1155
-
}
1156
-
1157
-
// Fetch from PLC and verify
1158
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1159
-
if err != nil {
1160
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1161
-
return
1162
-
}
1163
-
1164
-
remoteHash := computeOperationsHash(remoteOps)
1165
-
verified := bundle.Hash == remoteHash
1166
-
1167
-
resp.json(map[string]interface{}{
1168
-
"bundle_number": bundleNumber,
1169
-
"verified": verified,
1170
-
"local_hash": bundle.Hash,
1171
-
"remote_hash": remoteHash,
1172
-
"local_op_count": plc.BUNDLE_SIZE,
1173
-
"remote_op_count": len(remoteOps),
1174
-
"boundary_cids_used": len(prevCIDs),
1175
-
})
1176
-
}
1177
-
1178
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1179
-
var after string
1180
-
var prevBoundaryCIDs map[string]bool
1181
-
1182
-
if bundleNum > 1 {
1183
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1184
-
if err != nil {
1185
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1186
-
}
1187
-
1188
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1189
-
1190
-
if len(prevBundle.BoundaryCIDs) > 0 {
1191
-
prevBoundaryCIDs = make(map[string]bool)
1192
-
for _, cid := range prevBundle.BoundaryCIDs {
1193
-
prevBoundaryCIDs[cid] = true
1194
-
}
1195
-
}
1196
-
}
1197
-
1198
-
var allRemoteOps []plc.PLCOperation
1199
-
seenCIDs := make(map[string]bool)
1200
-
1201
-
for cid := range prevBoundaryCIDs {
1202
-
seenCIDs[cid] = true
1203
-
}
1204
-
1205
-
currentAfter := after
1206
-
maxFetches := 20
1207
-
1208
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1209
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1210
-
Count: 1000,
1211
-
After: currentAfter,
1212
-
})
1213
-
if err != nil || len(batch) == 0 {
1214
-
break
1215
-
}
1216
-
1217
-
for _, op := range batch {
1218
-
if !seenCIDs[op.CID] {
1219
-
seenCIDs[op.CID] = true
1220
-
allRemoteOps = append(allRemoteOps, op)
1221
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1222
-
break
1223
-
}
1224
-
}
1225
-
}
1226
-
1227
-
if len(batch) > 0 {
1228
-
lastOp := batch[len(batch)-1]
1229
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1230
-
}
1231
-
1232
-
if len(batch) < 1000 {
1233
-
break
1234
-
}
1235
-
}
1236
-
1237
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1238
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1239
-
}
1240
-
1241
-
return allRemoteOps, prevBoundaryCIDs, nil
1242
-
}
1243
-
1244
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1245
1096
resp := newResponse(w)
1246
-
ctx := r.Context()
1247
1097
1248
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1249
-
if err != nil {
1250
-
resp.error(err.Error(), http.StatusInternalServerError)
1251
-
return
1252
-
}
1253
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1254
1099
if lastBundle == 0 {
1255
1100
resp.json(map[string]interface{}{
1256
1101
"status": "empty",
···
1264
1109
var errorMsg string
1265
1110
1266
1111
for i := 1; i <= lastBundle; i++ {
1267
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1268
1113
if err != nil {
1269
1114
valid = false
1270
1115
brokenAt = i
···
1273
1118
}
1274
1119
1275
1120
if i > 1 {
1276
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1277
1122
if err != nil {
1278
1123
valid = false
1279
1124
brokenAt = i
···
1281
1126
break
1282
1127
}
1283
1128
1284
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1285
1130
valid = false
1286
1131
brokenAt = i
1287
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1288
1133
break
1289
1134
}
1290
1135
}
···
1305
1150
1306
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1307
1152
resp := newResponse(w)
1308
-
ctx := r.Context()
1309
1153
1310
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1311
-
if err != nil {
1312
-
resp.error(err.Error(), http.StatusInternalServerError)
1313
-
return
1314
-
}
1315
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1316
1155
if lastBundle == 0 {
1317
1156
resp.json(map[string]interface{}{
1318
1157
"chain_length": 0,
···
1321
1160
return
1322
1161
}
1323
1162
1324
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1325
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1326
-
1327
-
// Updated to receive 5 values instead of 3
1328
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1329
-
if err != nil {
1330
-
resp.error(err.Error(), http.StatusInternalServerError)
1331
-
return
1332
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1333
1166
1334
1167
resp.json(map[string]interface{}{
1335
-
"chain_length": lastBundle,
1336
-
"total_bundles": count,
1337
-
"total_compressed_size": compressedSize,
1338
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1339
-
"total_uncompressed_size": uncompressedSize,
1340
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1341
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1342
-
"chain_start_time": firstBundle.StartTime,
1343
-
"chain_end_time": lastBundleData.EndTime,
1344
-
"chain_head_hash": lastBundleData.Hash,
1345
-
"first_prev_hash": firstBundle.PrevBundleHash,
1346
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1347
1177
})
1348
1178
}
1349
1179
···
1364
1194
return
1365
1195
}
1366
1196
1367
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1368
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1369
1199
1370
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1404
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1405
1235
}
1406
1236
1407
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1408
1238
if afterTime.IsZero() {
1409
1239
return 1
1410
1240
}
1411
1241
1412
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1413
-
if err != nil {
1414
-
return 1
1415
-
}
1416
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1417
1243
if foundBundle > 1 {
1418
1244
return foundBundle - 1
1419
1245
}
···
1424
1250
var allOps []plc.PLCOperation
1425
1251
seenCIDs := make(map[string]bool)
1426
1252
1427
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1428
1254
1429
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1430
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1584
1410
limit := getQueryInt(r, "limit", 0)
1585
1411
fromBundle := getQueryInt(r, "from", 1)
1586
1412
1587
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1588
1415
if err != nil {
1589
1416
resp.error(err.Error(), http.StatusInternalServerError)
1590
1417
return
···
1656
1483
})
1657
1484
}
1658
1485
1659
-
// ===== UTILITY FUNCTIONS =====
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1660
1488
1661
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1662
-
var jsonlData []byte
1663
-
for _, op := range ops {
1664
-
jsonlData = append(jsonlData, op.RawJSON...)
1665
-
jsonlData = append(jsonlData, '\n')
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1666
1499
}
1667
-
hash := sha256.Sum256(jsonlData)
1668
-
return hex.EncodeToString(hash[:])
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1669
1506
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1670
1509
1671
1510
func normalizeEndpoint(endpoint string) string {
1672
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+8
-11
internal/api/server.go
+8
-11
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
24
23
plcIndexDIDs bool
25
24
}
26
25
27
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
28
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
29
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
30
27
s := &Server{
31
28
router: mux.NewRouter(),
32
29
db: db,
33
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
34
30
plcBundleDir: plcCfg.BundleDir,
35
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
36
32
plcIndexDIDs: plcCfg.IndexDIDs,
37
33
}
38
34
···
61
57
// Generic endpoints (keep as-is)
62
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
63
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
64
61
65
62
//PDS-specific endpoints (virtual, created via JOINs)
66
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
···
87
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
88
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
89
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
90
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
91
88
92
89
// PLC history/metrics
93
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+36
-13
internal/ipinfo/client.go
+36
-13
internal/ipinfo/client.go
···
99
99
return ipInfo, nil
100
100
}
101
101
102
-
// ExtractIPFromEndpoint extracts IP from endpoint URL
103
-
func ExtractIPFromEndpoint(endpoint string) (string, error) {
102
+
// IPAddresses holds both IPv4 and IPv6 addresses
103
+
type IPAddresses struct {
104
+
IPv4 string
105
+
IPv6 string
106
+
}
107
+
108
+
// ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL
109
+
func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) {
104
110
// Parse URL
105
111
parsedURL, err := url.Parse(endpoint)
106
112
if err != nil {
107
-
return "", fmt.Errorf("failed to parse endpoint URL: %w", err)
113
+
return nil, fmt.Errorf("failed to parse endpoint URL: %w", err)
108
114
}
109
115
110
116
host := parsedURL.Hostname()
111
117
if host == "" {
112
-
return "", fmt.Errorf("no hostname in endpoint")
118
+
return nil, fmt.Errorf("no hostname in endpoint")
113
119
}
120
+
121
+
result := &IPAddresses{}
114
122
115
123
// Check if host is already an IP
116
-
if net.ParseIP(host) != nil {
117
-
return host, nil
124
+
if ip := net.ParseIP(host); ip != nil {
125
+
if ip.To4() != nil {
126
+
result.IPv4 = host
127
+
} else {
128
+
result.IPv6 = host
129
+
}
130
+
return result, nil
118
131
}
119
132
120
-
// Resolve hostname to IP
133
+
// Resolve hostname to IPs
121
134
ips, err := net.LookupIP(host)
122
135
if err != nil {
123
-
return "", fmt.Errorf("failed to resolve hostname: %w", err)
136
+
return nil, fmt.Errorf("failed to resolve hostname: %w", err)
124
137
}
125
138
126
139
if len(ips) == 0 {
127
-
return "", fmt.Errorf("no IPs found for hostname")
140
+
return nil, fmt.Errorf("no IPs found for hostname")
128
141
}
129
142
130
-
// Return first IPv4 address
143
+
// Extract both IPv4 and IPv6
131
144
for _, ip := range ips {
132
145
if ipv4 := ip.To4(); ipv4 != nil {
133
-
return ipv4.String(), nil
146
+
if result.IPv4 == "" {
147
+
result.IPv4 = ipv4.String()
148
+
}
149
+
} else {
150
+
if result.IPv6 == "" {
151
+
result.IPv6 = ip.String()
152
+
}
134
153
}
135
154
}
136
155
137
-
// Fallback to first IP (might be IPv6)
138
-
return ips[0].String(), nil
156
+
// Must have at least one IP
157
+
if result.IPv4 == "" && result.IPv6 == "" {
158
+
return nil, fmt.Errorf("no valid IPs found")
159
+
}
160
+
161
+
return result, nil
139
162
}
+2
-2
internal/log/log.go
+2
-2
internal/log/log.go
···
28
28
errorLog = log.New(os.Stderr, "", 0)
29
29
}
30
30
31
-
// timestamp returns current time in ISO 8601 format
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
32
func timestamp() string {
33
-
return time.Now().Format(time.RFC3339)
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
34
34
}
35
35
36
36
func Verbose(format string, v ...interface{}) {
+37
-8
internal/pds/client.go
+37
-8
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
115
144
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
+52
-40
internal/pds/scanner.go
+52
-40
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
124
124
}
125
125
126
126
func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) {
127
-
// STEP 1: Resolve IP (before any network call)
128
-
ip, err := ipinfo.ExtractIPFromEndpoint(ep.Endpoint)
127
+
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
+
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
135
134
return
136
135
}
137
136
138
-
// Update IP immediately
139
-
s.db.UpdateEndpointIP(ctx, ep.ID, ip, time.Now().UTC())
137
+
// Update IPs immediately
138
+
s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC())
140
139
141
-
// STEP 1.5: Fetch IP info asynchronously ASAP (runs in parallel with scanning)
142
-
go s.updateIPInfoIfNeeded(ctx, ip)
140
+
// STEP 1.5: Fetch IP info asynchronously for both IPs
141
+
if ips.IPv4 != "" {
142
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv4)
143
+
}
144
+
if ips.IPv6 != "" {
145
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
146
+
}
143
147
144
-
// STEP 2: Health check
145
-
available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
146
-
if err != nil || !available {
147
-
errMsg := "health check failed"
148
-
if err != nil {
149
-
errMsg = err.Error()
150
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
151
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
152
152
Status: storage.EndpointStatusOffline,
153
-
ResponseTime: responseTime,
154
-
ErrorMessage: errMsg,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
155
156
})
156
157
return
157
158
}
158
159
159
-
// STEP 3: Fetch PDS-specific data
160
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
161
-
if err != nil {
162
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
163
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
164
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
165
163
}
166
164
167
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
168
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
169
182
if err != nil {
170
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
171
184
repoList = []Repo{}
172
185
}
173
186
174
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
175
188
dids := make([]string, len(repoList))
176
189
for i, repo := range repoList {
177
190
dids[i] = repo.DID
178
191
}
179
192
180
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
181
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
182
195
Status: storage.EndpointStatusOnline,
183
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
184
197
Description: desc,
185
198
DIDs: dids,
186
199
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
187
201
})
188
202
189
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
190
204
if len(repoList) > 0 {
191
-
batchSize := 10000
205
+
batchSize := 100_000
192
206
193
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
194
208
···
228
242
229
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
230
244
}
231
-
232
-
// IP info fetch already started at the beginning (step 1.5)
233
-
// It will complete in the background
234
245
}
235
246
236
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
···
240
251
Metadata: make(map[string]interface{}),
241
252
}
242
253
243
-
var userCount int64 // NEW: Declare user count
254
+
var userCount int64
244
255
245
256
// Add PDS-specific metadata
246
257
if result.Status == storage.EndpointStatusOnline {
247
-
userCount = int64(len(result.DIDs)) // NEW: Get user count
248
-
scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
249
260
if result.Description != nil {
250
261
scanData.Metadata["server_info"] = result.Description
251
262
}
···
262
273
Status: result.Status,
263
274
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
264
275
UserCount: userCount,
265
-
Version: result.Version, // NEW: Set the version field
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
266
278
ScanData: scanData,
267
279
ScannedAt: time.Now().UTC(),
268
280
}
+2
-1
internal/pds/types.go
+2
-1
internal/pds/types.go
-676
internal/plc/bundle.go
-676
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
384
-
// Extract handle and PDS for each DID using centralized helper
385
-
didInfoMap := ExtractDIDInfoMap(bf.operations)
386
-
387
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
388
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
389
-
// Don't return error - bundle is already created
390
-
} else {
391
-
// Update handle and PDS for each DID
392
-
for did, info := range didInfoMap {
393
-
// Validate handle length before saving
394
-
validHandle := ValidateHandle(info.Handle)
395
-
396
-
if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil {
397
-
log.Error("Failed to update DID %s metadata: %v", did, err)
398
-
}
399
-
}
400
-
401
-
elapsed := time.Since(start)
402
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
403
-
}
404
-
} else {
405
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
406
-
}
407
-
408
-
return nil
409
-
}
410
-
411
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
412
-
didSet := make(map[string]bool)
413
-
for _, op := range ops {
414
-
didSet[op.DID] = true
415
-
}
416
-
417
-
dids := make([]string, 0, len(didSet))
418
-
for did := range didSet {
419
-
dids = append(dids, did)
420
-
}
421
-
return dids
422
-
}
423
-
424
-
// ===== MEMPOOL BUNDLE CREATION =====
425
-
426
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
427
-
if !bm.enabled {
428
-
return 0, fmt.Errorf("bundle manager disabled")
429
-
}
430
-
431
-
if len(operations) != BUNDLE_SIZE {
432
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
433
-
}
434
-
435
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
436
-
if err != nil {
437
-
return 0, err
438
-
}
439
-
bundleNum := lastBundle + 1
440
-
441
-
bf := bm.newBundleFile(bundleNum)
442
-
bf.operations = operations
443
-
444
-
if err := bm.save(bf); err != nil {
445
-
return 0, err
446
-
}
447
-
448
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
449
-
return 0, err
450
-
}
451
-
452
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
453
-
bundleNum, bf.uncompressedHash[:16])
454
-
455
-
return bundleNum, nil
456
-
}
457
-
458
-
// ===== VERIFICATION =====
459
-
460
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
461
-
if !bm.enabled {
462
-
return fmt.Errorf("bundle manager disabled")
463
-
}
464
-
465
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
466
-
467
-
for i := 1; i <= endBundle; i++ {
468
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
469
-
if err != nil {
470
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
471
-
}
472
-
473
-
// Verify file hash
474
-
path := bm.newBundleFile(i).path
475
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
476
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
477
-
}
478
-
479
-
// Verify chain link
480
-
if i > 1 {
481
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
482
-
if err != nil {
483
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
484
-
}
485
-
486
-
if bundle.PrevBundleHash != prevBundle.Hash {
487
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
488
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
489
-
}
490
-
}
491
-
492
-
if i%100 == 0 {
493
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
494
-
}
495
-
}
496
-
497
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
498
-
return nil
499
-
}
500
-
501
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
502
-
if !bm.enabled {
503
-
return nil
504
-
}
505
-
506
-
for i := 1; i < targetBundle; i++ {
507
-
if !bm.newBundleFile(i).exists() {
508
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
509
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
510
-
}
511
-
}
512
-
}
513
-
514
-
return nil
515
-
}
516
-
517
-
// ===== UTILITY METHODS =====
518
-
519
-
func (bm *BundleManager) hash(data []byte) string {
520
-
h := sha256.Sum256(data)
521
-
return hex.EncodeToString(h[:])
522
-
}
523
-
524
-
func (bm *BundleManager) hashFile(path string) string {
525
-
data, _ := os.ReadFile(path)
526
-
return bm.hash(data)
527
-
}
528
-
529
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
530
-
if expectedHash == "" {
531
-
return nil
532
-
}
533
-
534
-
actualHash := bm.hashFile(path)
535
-
if actualHash != expectedHash {
536
-
return fmt.Errorf("hash mismatch")
537
-
}
538
-
return nil
539
-
}
540
-
541
-
func (bm *BundleManager) getFileSize(path string) int64 {
542
-
if info, err := os.Stat(path); err == nil {
543
-
return info.Size()
544
-
}
545
-
return 0
546
-
}
547
-
548
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
549
-
if !bm.enabled {
550
-
return 0, 0, 0, 0, nil
551
-
}
552
-
return bm.db.GetBundleStats(ctx)
553
-
}
554
-
555
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
556
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
557
-
if err != nil {
558
-
return nil, err
559
-
}
560
-
561
-
if lastBundle == 0 {
562
-
return map[string]interface{}{
563
-
"chain_length": 0,
564
-
"status": "empty",
565
-
}, nil
566
-
}
567
-
568
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
569
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
570
-
571
-
return map[string]interface{}{
572
-
"chain_length": lastBundle,
573
-
"first_bundle": 1,
574
-
"last_bundle": lastBundle,
575
-
"chain_start_time": firstBundle.StartTime,
576
-
"chain_end_time": lastBundleData.EndTime,
577
-
"chain_head_hash": lastBundleData.Hash,
578
-
}, nil
579
-
}
580
-
581
-
// ===== EXPORTED HELPERS =====
582
-
583
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
584
-
if len(operations) == 0 {
585
-
return time.Time{}, nil
586
-
}
587
-
588
-
lastOp := operations[len(operations)-1]
589
-
boundaryTime := lastOp.CreatedAt
590
-
cidSet := make(map[string]bool)
591
-
592
-
for i := len(operations) - 1; i >= 0; i-- {
593
-
op := operations[i]
594
-
if op.CreatedAt.Equal(boundaryTime) {
595
-
cidSet[op.CID] = true
596
-
} else {
597
-
break
598
-
}
599
-
}
600
-
601
-
return boundaryTime, cidSet
602
-
}
603
-
604
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
605
-
if len(operations) == 0 {
606
-
return operations
607
-
}
608
-
609
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
610
-
if err != nil {
611
-
return operations
612
-
}
613
-
614
-
startIdx := 0
615
-
for startIdx < len(operations) {
616
-
op := operations[startIdx]
617
-
618
-
if op.CreatedAt.After(boundaryTime) {
619
-
break
620
-
}
621
-
622
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
623
-
startIdx++
624
-
continue
625
-
}
626
-
627
-
break
628
-
}
629
-
630
-
return operations[startIdx:]
631
-
}
632
-
633
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
634
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
635
-
if !bm.enabled {
636
-
return nil, fmt.Errorf("bundle manager disabled")
637
-
}
638
-
639
-
bf := bm.newBundleFile(bundleNum)
640
-
641
-
if !bf.exists() {
642
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
643
-
}
644
-
645
-
if err := bm.load(bf); err != nil {
646
-
return nil, err
647
-
}
648
-
649
-
return bf.operations, nil
650
-
}
651
-
652
-
// calculateCursor determines the cursor value for a given bundle
653
-
// For bundle 1: returns empty string
654
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
655
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
656
-
if bundleNum == 1 {
657
-
return ""
658
-
}
659
-
660
-
// Try to get cursor from previous bundle in DB
661
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
662
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
663
-
}
664
-
665
-
// If previous bundle not in DB, try to load it from file
666
-
prevBf := bm.newBundleFile(bundleNum - 1)
667
-
if prevBf.exists() {
668
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
669
-
// Return the createdAt of the last operation in previous bundle
670
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
671
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
672
-
}
673
-
}
674
-
675
-
return ""
676
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+20
-2
internal/plc/helpers.go
+20
-2
internal/plc/helpers.go
···
1
1
package plc
2
2
3
-
import "strings"
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
4
7
5
8
// MaxHandleLength is the maximum allowed handle length for database storage
6
9
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
7
14
8
15
// ExtractHandle safely extracts the handle from a PLC operation
9
16
func ExtractHandle(op *PLCOperation) string {
···
29
36
}
30
37
31
38
// ValidateHandle checks if a handle is valid for database storage
32
-
// Returns empty string if handle is too long
39
+
// Returns empty string if handle is invalid (too long or wrong format)
33
40
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
34
46
if len(handle) > MaxHandleLength {
35
47
return ""
36
48
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
37
55
return handle
38
56
}
39
57
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-431
internal/plc/scanner.go
+92
-431
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
84
-
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
90
43
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
246
-
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
251
-
}
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
252
72
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
257
74
}
258
-
}
259
-
}
260
75
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// NEW: Create/update DID records immediately when adding to mempool
336
-
for _, op := range ops {
337
-
info := ExtractDIDInfo(&op)
338
-
339
-
// Validate handle length before saving
340
-
validHandle := ValidateHandle(info.Handle)
341
-
if info.Handle != "" && validHandle == "" {
342
-
log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle))
343
-
}
344
-
345
-
if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil {
346
-
log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err)
347
-
// Don't fail the whole operation, just log
348
-
}
349
-
}
350
-
351
-
// Process for endpoint discovery
352
-
batchCounts, err := s.processBatch(ctx, ops)
353
-
s.mergeCounts(counts, batchCounts)
354
-
return err
355
-
}
356
-
357
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
358
-
for {
359
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
360
78
if err != nil {
361
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
362
81
}
363
82
364
-
log.Verbose("Mempool contains %d operations", count)
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
365
88
366
-
if count < BUNDLE_SIZE {
367
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
368
-
return nil
369
-
}
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
370
91
371
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
372
-
373
-
// Updated to receive 4 values instead of 3
374
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
375
-
if err != nil {
376
-
return err
377
-
}
378
-
379
-
// Process and update metrics
380
-
countsBefore := sumCounts(m.endpointCounts)
381
-
counts, _ := s.processBatch(ctx, ops)
382
-
s.mergeCounts(m.endpointCounts, counts)
383
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
384
-
385
-
m.totalProcessed += int64(len(ops))
386
-
m.newEndpoints += newEndpointsFound
387
-
m.currentBundle = bundleNum
388
-
389
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
390
94
log.Error("Warning: failed to update cursor: %v", err)
391
95
}
392
-
393
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
394
96
}
395
-
}
396
97
397
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
398
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
399
-
if err != nil {
400
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
401
103
}
402
104
403
-
ops, ids := s.deduplicateMempool(mempoolOps)
404
-
if len(ops) < BUNDLE_SIZE {
405
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
406
-
}
407
-
408
-
// Determine cursor from last bundle
409
-
cursor := ""
410
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
411
-
if err == nil && lastBundle > 0 {
412
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
413
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
414
-
}
415
-
}
416
-
417
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
418
-
if err != nil {
419
-
return 0, nil, "", err
420
-
}
421
-
422
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
423
-
return 0, nil, "", err
424
-
}
425
-
426
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
427
107
}
428
108
429
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
430
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
431
-
ids := make([]int64, 0, BUNDLE_SIZE)
432
-
seenCIDs := make(map[string]bool)
433
-
434
-
for _, mop := range mempoolOps {
435
-
if seenCIDs[mop.CID] {
436
-
ids = append(ids, mop.ID)
437
-
continue
438
-
}
439
-
seenCIDs[mop.CID] = true
440
-
441
-
var op PLCOperation
442
-
json.Unmarshal([]byte(mop.Operation), &op)
443
-
op.RawJSON = []byte(mop.Operation)
444
-
445
-
ops = append(ops, op)
446
-
ids = append(ids, mop.ID)
447
-
448
-
if len(ops) >= BUNDLE_SIZE {
449
-
break
450
-
}
451
-
}
452
-
453
-
return ops, ids
454
-
}
455
-
109
+
// processBatch extracts endpoints from operations
456
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
457
111
counts := make(map[string]int64)
458
112
seen := make(map[string]*PLCOperation)
459
113
460
114
// Collect unique endpoints
461
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
462
118
if op.IsNullified() {
463
119
continue
464
120
}
465
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
466
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
467
124
if _, exists := seen[key]; !exists {
468
-
seen[key] = &op
125
+
seen[key] = op
469
126
}
470
127
}
471
128
}
···
481
138
}
482
139
483
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
484
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
485
142
continue
486
143
}
487
144
488
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
489
146
counts[epType]++
490
147
}
491
148
492
149
return counts, nil
493
-
}
494
-
495
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
496
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
497
-
EndpointType: epType,
498
-
Endpoint: endpoint,
499
-
DiscoveredAt: discoveredAt,
500
-
LastChecked: time.Time{},
501
-
Status: storage.EndpointStatusUnknown,
502
-
})
503
150
}
504
151
505
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
542
189
return nil
543
190
}
544
191
545
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
546
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
547
-
Source: "plc_directory",
548
-
LastBundleNumber: m.currentBundle - 1,
549
-
LastScanTime: time.Now().UTC(),
550
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
551
201
})
552
202
}
553
203
···
575
225
return total
576
226
}
577
227
578
-
func formatEndpointCounts(counts map[string]int64) string {
579
-
if len(counts) == 0 {
580
-
return "0 new endpoints"
581
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
582
231
583
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
584
240
585
-
if len(counts) == 1 {
586
-
for typ, count := range counts {
587
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
588
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
589
246
}
247
+
}
590
248
591
-
parts := make([]string, 0, len(counts))
592
-
for typ, count := range counts {
593
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
594
256
}
595
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
596
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
+1
-21
internal/storage/db.go
+1
-21
internal/storage/db.go
···
27
27
EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error)
28
28
GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error)
29
29
GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error)
30
-
UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error
30
+
UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error
31
31
SaveEndpointScan(ctx context.Context, scan *EndpointScan) error
32
32
SetScanRetention(retention int)
33
33
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
···
49
49
// Cursor operations
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
-
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
59
-
GetLastBundleNumber(ctx context.Context) (int, error)
60
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
61
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
62
-
63
-
// Mempool operations
64
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
65
-
GetMempoolCount(ctx context.Context) (int, error)
66
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
67
-
DeleteFromMempool(ctx context.Context, ids []int64) error
68
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
69
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
71
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
72
52
73
53
// Metrics
74
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
+186
-600
internal/storage/postgres.go
+186
-600
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
73
72
log.Info("Running database migrations...")
74
73
75
74
schema := `
76
-
-- Endpoints table (NO user_count, NO ip_info)
75
+
-- Endpoints table (with IPv6 support)
77
76
CREATE TABLE IF NOT EXISTS endpoints (
78
77
id BIGSERIAL PRIMARY KEY,
79
78
endpoint_type TEXT NOT NULL DEFAULT 'pds',
···
83
82
last_checked TIMESTAMP,
84
83
status INTEGER DEFAULT 0,
85
84
ip TEXT,
85
+
ipv6 TEXT,
86
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
87
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
88
89
UNIQUE(endpoint_type, endpoint)
89
90
);
···
92
93
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
93
94
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
94
95
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
95
97
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
96
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
97
100
98
-
-- IP infos table (IP as PRIMARY KEY)
99
-
CREATE TABLE IF NOT EXISTS ip_infos (
100
-
ip TEXT PRIMARY KEY,
101
-
city TEXT,
102
-
country TEXT,
103
-
country_code TEXT,
104
-
asn INTEGER,
105
-
asn_org TEXT,
106
-
is_datacenter BOOLEAN,
107
-
is_vpn BOOLEAN,
108
-
latitude REAL,
109
-
longitude REAL,
110
-
raw_data JSONB,
111
-
fetched_at TIMESTAMP NOT NULL,
112
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
113
-
);
101
+
-- IP infos table (IP as PRIMARY KEY)
102
+
CREATE TABLE IF NOT EXISTS ip_infos (
103
+
ip TEXT PRIMARY KEY,
104
+
city TEXT,
105
+
country TEXT,
106
+
country_code TEXT,
107
+
asn INTEGER,
108
+
asn_org TEXT,
109
+
is_datacenter BOOLEAN,
110
+
is_vpn BOOLEAN,
111
+
is_crawler BOOLEAN,
112
+
is_tor BOOLEAN,
113
+
is_proxy BOOLEAN,
114
+
latitude REAL,
115
+
longitude REAL,
116
+
raw_data JSONB,
117
+
fetched_at TIMESTAMP NOT NULL,
118
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
119
+
);
114
120
115
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
116
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
121
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
122
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
117
123
118
-
-- Endpoint scans (renamed from pds_scans)
124
+
-- Endpoint scans
119
125
CREATE TABLE IF NOT EXISTS endpoint_scans (
120
126
id BIGSERIAL PRIMARY KEY,
121
127
endpoint_id BIGINT NOT NULL,
···
123
129
response_time DOUBLE PRECISION,
124
130
user_count BIGINT,
125
131
version TEXT,
132
+
used_ip TEXT,
126
133
scan_data JSONB,
127
134
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
128
135
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
···
131
138
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
132
139
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
133
140
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
134
143
135
144
CREATE TABLE IF NOT EXISTS plc_metrics (
136
145
id BIGSERIAL PRIMARY KEY,
···
149
158
records_processed BIGINT DEFAULT 0
150
159
);
151
160
152
-
CREATE TABLE IF NOT EXISTS plc_bundles (
153
-
bundle_number INTEGER PRIMARY KEY,
154
-
start_time TIMESTAMP NOT NULL,
155
-
end_time TIMESTAMP NOT NULL,
156
-
dids JSONB NOT NULL,
157
-
hash TEXT NOT NULL,
158
-
compressed_hash TEXT NOT NULL,
159
-
compressed_size BIGINT NOT NULL,
160
-
uncompressed_size BIGINT NOT NULL,
161
-
cumulative_compressed_size BIGINT NOT NULL,
162
-
cumulative_uncompressed_size BIGINT NOT NULL,
163
-
cursor TEXT,
164
-
prev_bundle_hash TEXT,
165
-
compressed BOOLEAN DEFAULT true,
166
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
167
-
);
168
-
169
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
170
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
171
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
172
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
173
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
174
-
175
-
CREATE TABLE IF NOT EXISTS plc_mempool (
176
-
id BIGSERIAL PRIMARY KEY,
177
-
did TEXT NOT NULL,
178
-
operation TEXT NOT NULL,
179
-
cid TEXT NOT NULL UNIQUE,
180
-
created_at TIMESTAMP NOT NULL,
181
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
182
-
);
183
-
184
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
185
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
186
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
187
-
188
161
-- Minimal dids table
189
162
CREATE TABLE IF NOT EXISTS dids (
190
163
did TEXT PRIMARY KEY,
···
237
210
238
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
239
212
query := `
240
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ip_resolved_at)
241
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
242
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
243
216
last_checked = EXCLUDED.last_checked,
244
217
status = EXCLUDED.status,
245
218
ip = CASE
246
219
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip
247
220
ELSE endpoints.ip
221
+
END,
222
+
ipv6 = CASE
223
+
WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6
224
+
ELSE endpoints.ipv6
248
225
END,
249
226
ip_resolved_at = CASE
250
-
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip_resolved_at
227
+
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
251
228
ELSE endpoints.ip_resolved_at
252
229
END,
230
+
valid = EXCLUDED.valid,
253
231
updated_at = CURRENT_TIMESTAMP
254
232
RETURNING id
255
233
`
256
234
err := p.db.QueryRowContext(ctx, query,
257
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
258
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
259
237
return err
260
238
}
261
239
···
276
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
277
255
query := `
278
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
279
-
ip, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
280
258
FROM endpoints
281
259
WHERE endpoint = $1 AND endpoint_type = $2
282
260
`
283
261
284
262
var ep Endpoint
285
263
var lastChecked, ipResolvedAt sql.NullTime
286
-
var ip sql.NullString
264
+
var ip, ipv6 sql.NullString
287
265
288
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
289
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
290
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
291
269
)
292
270
if err != nil {
293
271
return nil, err
···
299
277
if ip.Valid {
300
278
ep.IP = ip.String
301
279
}
280
+
if ipv6.Valid {
281
+
ep.IPv6 = ipv6.String
282
+
}
302
283
if ipResolvedAt.Valid {
303
284
ep.IPResolvedAt = ipResolvedAt.Time
304
285
}
···
308
289
309
290
func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
310
291
query := `
311
-
SELECT DISTINCT ON (COALESCE(server_did, id::text))
312
-
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
313
-
ip, ip_resolved_at, updated_at
314
-
FROM endpoints
315
-
WHERE 1=1
292
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
293
+
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
295
+
FROM endpoints
296
+
WHERE 1=1
316
297
`
317
298
args := []interface{}{}
318
299
argIdx := 1
···
323
304
args = append(args, filter.Type)
324
305
argIdx++
325
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
326
312
if filter.Status != "" {
327
313
statusInt := EndpointStatusUnknown
328
314
switch filter.Status {
···
345
331
}
346
332
}
347
333
348
-
// NEW: Order by server_did and discovered_at to get primary endpoints
349
-
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
350
348
351
349
if filter != nil && filter.Limit > 0 {
352
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
363
361
for rows.Next() {
364
362
var ep Endpoint
365
363
var lastChecked, ipResolvedAt sql.NullTime
366
-
var ip, serverDID sql.NullString
364
+
var ip, ipv6, serverDID sql.NullString
367
365
368
366
err := rows.Scan(
369
367
&ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked,
370
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
368
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
371
369
)
372
370
if err != nil {
373
371
return nil, err
···
382
380
if ip.Valid {
383
381
ep.IP = ip.String
384
382
}
383
+
if ipv6.Valid {
384
+
ep.IPv6 = ipv6.String
385
+
}
385
386
if ipResolvedAt.Valid {
386
387
ep.IPResolvedAt = ipResolvedAt.Time
387
388
}
···
402
403
return err
403
404
}
404
405
405
-
func (p *PostgresDB) UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error {
406
+
func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error {
406
407
query := `
407
408
UPDATE endpoints
408
-
SET ip = $1, ip_resolved_at = $2, updated_at = $3
409
-
WHERE id = $4
409
+
SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4
410
+
WHERE id = $5
410
411
`
411
-
_, err := p.db.ExecContext(ctx, query, ip, resolvedAt, time.Now().UTC(), endpointID)
412
+
_, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID)
412
413
return err
413
414
}
414
415
···
475
476
defer tx.Rollback()
476
477
477
478
query := `
478
-
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at)
479
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
480
481
`
481
-
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt)
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
482
483
if err != nil {
483
484
return err
484
485
}
···
505
506
506
507
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
507
508
query := `
508
-
SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
509
510
FROM endpoint_scans
510
511
WHERE endpoint_id = $1
511
512
ORDER BY scanned_at DESC
···
523
524
var scan EndpointScan
524
525
var responseTime sql.NullFloat64
525
526
var userCount sql.NullInt64
526
-
var version sql.NullString // NEW
527
+
var version, usedIP sql.NullString
527
528
var scanDataJSON []byte
528
529
529
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt)
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
530
531
if err != nil {
531
532
return nil, err
532
533
}
···
539
540
scan.UserCount = userCount.Int64
540
541
}
541
542
542
-
if version.Valid { // NEW
543
+
if version.Valid {
543
544
scan.Version = version.String
544
545
}
545
546
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
546
551
if len(scanDataJSON) > 0 {
547
552
var scanData EndpointScanData
548
553
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
···
568
573
discovered_at,
569
574
last_checked,
570
575
status,
571
-
ip
576
+
ip,
577
+
ipv6,
578
+
valid
572
579
FROM endpoints
573
580
WHERE endpoint_type = 'pds'
574
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
575
582
)
576
583
SELECT
577
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
578
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
579
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
580
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude
587
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
588
+
i.latitude, i.longitude
581
589
FROM unique_servers e
582
590
LEFT JOIN LATERAL (
583
591
SELECT
···
634
642
var items []*PDSListItem
635
643
for rows.Next() {
636
644
item := &PDSListItem{}
637
-
var ip, serverDID, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
638
646
var asn sql.NullInt32
639
-
var isDatacenter, isVPN sql.NullBool
647
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
640
648
var lat, lon sql.NullFloat64
641
649
var userCount sql.NullInt32
642
650
var responseTime sql.NullFloat64
···
644
652
var scannedAt sql.NullTime
645
653
646
654
err := rows.Scan(
647
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
648
656
&userCount, &responseTime, &version, &scannedAt,
649
657
&city, &country, &countryCode, &asn, &asnOrg,
650
-
&isDatacenter, &isVPN, &lat, &lon,
658
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
659
+
&lat, &lon,
651
660
)
652
661
if err != nil {
653
662
return nil, err
···
656
665
if ip.Valid {
657
666
item.IP = ip.String
658
667
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
670
+
}
659
671
if serverDID.Valid {
660
672
item.ServerDID = serverDID.String
661
673
}
···
686
698
ASNOrg: asnOrg.String,
687
699
IsDatacenter: isDatacenter.Bool,
688
700
IsVPN: isVPN.Bool,
701
+
IsCrawler: isCrawler.Bool,
702
+
IsTor: isTor.Bool,
703
+
IsProxy: isProxy.Bool,
689
704
Latitude: float32(lat.Float64),
690
705
Longitude: float32(lon.Float64),
691
706
}
···
699
714
700
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
701
716
query := `
702
-
WITH target_endpoint AS (
717
+
WITH target_endpoint AS MATERIALIZED (
703
718
SELECT
704
719
e.id,
705
720
e.endpoint,
···
707
722
e.discovered_at,
708
723
e.last_checked,
709
724
e.status,
710
-
e.ip
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
711
728
FROM endpoints e
712
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
713
-
),
714
-
aliases_agg AS (
715
-
SELECT
716
-
te.server_did,
717
-
array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases,
718
-
MIN(e.discovered_at) as first_discovered_at
719
-
FROM target_endpoint te
720
-
LEFT JOIN endpoints e ON te.server_did = e.server_did
721
-
AND e.endpoint_type = 'pds'
722
-
AND te.server_did IS NOT NULL
723
-
GROUP BY te.server_did
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
724
732
)
725
733
SELECT
726
734
te.id,
···
730
738
te.last_checked,
731
739
te.status,
732
740
te.ip,
741
+
te.ipv6,
742
+
te.valid,
733
743
latest.user_count,
734
744
latest.response_time,
735
745
latest.version,
736
746
latest.scan_data->'metadata'->'server_info' as server_info,
737
747
latest.scanned_at,
738
748
i.city, i.country, i.country_code, i.asn, i.asn_org,
739
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude,
749
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
750
+
i.latitude, i.longitude,
740
751
i.raw_data,
741
-
COALESCE(aa.aliases, ARRAY[]::text[]) as aliases,
742
-
aa.first_discovered_at
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
743
773
FROM target_endpoint te
744
-
LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did
745
774
LEFT JOIN LATERAL (
746
-
SELECT scan_data, response_time, version, scanned_at, user_count
747
-
FROM endpoint_scans
748
-
WHERE endpoint_id = te.id
749
-
ORDER BY scanned_at DESC
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
750
784
LIMIT 1
751
785
) latest ON true
752
-
LEFT JOIN ip_infos i ON te.ip = i.ip
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
753
787
`
754
788
755
789
detail := &PDSDetail{}
756
-
var ip, city, country, countryCode, asnOrg, serverDID sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
757
791
var asn sql.NullInt32
758
-
var isDatacenter, isVPN sql.NullBool
792
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
759
793
var lat, lon sql.NullFloat64
760
794
var userCount sql.NullInt32
761
795
var responseTime sql.NullFloat64
···
767
801
var firstDiscoveredAt sql.NullTime
768
802
769
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
770
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
771
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
772
806
&city, &country, &countryCode, &asn, &asnOrg,
773
-
&isDatacenter, &isVPN, &lat, &lon,
807
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
808
+
&lat, &lon,
774
809
&rawDataJSON,
775
810
pq.Array(&aliases),
776
811
&firstDiscoveredAt,
···
782
817
if ip.Valid {
783
818
detail.IP = ip.String
784
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
785
823
786
824
if serverDID.Valid {
787
825
detail.ServerDID = serverDID.String
···
790
828
// Set aliases and is_primary
791
829
detail.Aliases = aliases
792
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
793
-
// Has server_did - check if this is the first discovered
794
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
795
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
796
833
} else {
797
-
// No server_did means unique server
798
834
detail.IsPrimary = true
799
835
}
800
836
···
820
856
}
821
857
}
822
858
823
-
// Parse IP info
859
+
// Parse IP info with all fields
824
860
if city.Valid || country.Valid {
825
861
detail.IPInfo = &IPInfo{
826
862
IP: ip.String,
···
831
867
ASNOrg: asnOrg.String,
832
868
IsDatacenter: isDatacenter.Bool,
833
869
IsVPN: isVPN.Bool,
870
+
IsCrawler: isCrawler.Bool,
871
+
IsTor: isTor.Bool,
872
+
IsProxy: isProxy.Bool,
834
873
Latitude: float32(lat.Float64),
835
874
Longitude: float32(lon.Float64),
836
875
}
···
978
1017
isVPN = val
979
1018
}
980
1019
1020
+
isCrawler := false
1021
+
if val, ok := ipInfo["is_crawler"].(bool); ok {
1022
+
isCrawler = val
1023
+
}
1024
+
1025
+
isTor := false
1026
+
if val, ok := ipInfo["is_tor"].(bool); ok {
1027
+
isTor = val
1028
+
}
1029
+
1030
+
isProxy := false
1031
+
if val, ok := ipInfo["is_proxy"].(bool); ok {
1032
+
isProxy = val
1033
+
}
1034
+
981
1035
lat := extractFloat(ipInfo, "location", "latitude")
982
1036
lon := extractFloat(ipInfo, "location", "longitude")
983
1037
984
1038
query := `
985
-
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, latitude, longitude, raw_data, fetched_at)
986
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
1039
+
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at)
1040
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
987
1041
ON CONFLICT(ip) DO UPDATE SET
988
1042
city = EXCLUDED.city,
989
1043
country = EXCLUDED.country,
···
992
1046
asn_org = EXCLUDED.asn_org,
993
1047
is_datacenter = EXCLUDED.is_datacenter,
994
1048
is_vpn = EXCLUDED.is_vpn,
1049
+
is_crawler = EXCLUDED.is_crawler,
1050
+
is_tor = EXCLUDED.is_tor,
1051
+
is_proxy = EXCLUDED.is_proxy,
995
1052
latitude = EXCLUDED.latitude,
996
1053
longitude = EXCLUDED.longitude,
997
1054
raw_data = EXCLUDED.raw_data,
998
1055
fetched_at = EXCLUDED.fetched_at,
999
1056
updated_at = CURRENT_TIMESTAMP
1000
1057
`
1001
-
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, lat, lon, rawDataJSON, time.Now().UTC())
1058
+
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC())
1002
1059
return err
1003
1060
}
1004
1061
1005
1062
func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) {
1006
1063
query := `
1007
-
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn,
1064
+
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy,
1008
1065
latitude, longitude, raw_data, fetched_at, updated_at
1009
1066
FROM ip_infos
1010
1067
WHERE ip = $1
···
1015
1072
1016
1073
err := p.db.QueryRowContext(ctx, query, ip).Scan(
1017
1074
&info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg,
1018
-
&info.IsDatacenter, &info.IsVPN, &info.Latitude, &info.Longitude,
1075
+
&info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy,
1076
+
&info.Latitude, &info.Longitude,
1019
1077
&rawDataJSON, &info.FetchedAt, &info.UpdatedAt,
1020
1078
)
1021
1079
if err != nil {
···
1103
1161
}
1104
1162
}
1105
1163
return 0
1106
-
}
1107
-
1108
-
func extractBool(data map[string]interface{}, keys ...string) bool {
1109
-
current := data
1110
-
for i, key := range keys {
1111
-
if i == len(keys)-1 {
1112
-
if val, ok := current[key].(bool); ok {
1113
-
return val
1114
-
}
1115
-
// Check if it's a string that matches (for type="hosting")
1116
-
if val, ok := current[key].(string); ok {
1117
-
// For cases like company.type == "hosting"
1118
-
expectedValue := keys[len(keys)-1]
1119
-
return val == expectedValue
1120
-
}
1121
-
return false
1122
-
}
1123
-
if nested, ok := current[key].(map[string]interface{}); ok {
1124
-
current = nested
1125
-
} else {
1126
-
return false
1127
-
}
1128
-
}
1129
-
return false
1130
-
}
1131
-
1132
-
// ===== BUNDLE OPERATIONS =====
1133
-
1134
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1135
-
didsJSON, err := json.Marshal(bundle.DIDs)
1136
-
if err != nil {
1137
-
return err
1138
-
}
1139
-
1140
-
// Calculate cumulative sizes from previous bundle
1141
-
if bundle.BundleNumber > 1 {
1142
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1143
-
if err == nil && prevBundle != nil {
1144
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1145
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1146
-
} else {
1147
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1148
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1149
-
}
1150
-
} else {
1151
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1152
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1153
-
}
1154
-
1155
-
query := `
1156
-
INSERT INTO plc_bundles (
1157
-
bundle_number, start_time, end_time, dids,
1158
-
hash, compressed_hash, compressed_size, uncompressed_size,
1159
-
cumulative_compressed_size, cumulative_uncompressed_size,
1160
-
cursor, prev_bundle_hash, compressed
1161
-
)
1162
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1163
-
ON CONFLICT(bundle_number) DO UPDATE SET
1164
-
start_time = EXCLUDED.start_time,
1165
-
end_time = EXCLUDED.end_time,
1166
-
dids = EXCLUDED.dids,
1167
-
hash = EXCLUDED.hash,
1168
-
compressed_hash = EXCLUDED.compressed_hash,
1169
-
compressed_size = EXCLUDED.compressed_size,
1170
-
uncompressed_size = EXCLUDED.uncompressed_size,
1171
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1172
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1173
-
cursor = EXCLUDED.cursor,
1174
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1175
-
compressed = EXCLUDED.compressed
1176
-
`
1177
-
_, err = p.db.ExecContext(ctx, query,
1178
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1179
-
didsJSON, bundle.Hash, bundle.CompressedHash,
1180
-
bundle.CompressedSize, bundle.UncompressedSize,
1181
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1182
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1183
-
)
1184
-
1185
-
return err
1186
-
}
1187
-
1188
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1189
-
query := `
1190
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1191
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1192
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1193
-
FROM plc_bundles
1194
-
WHERE bundle_number = $1
1195
-
`
1196
-
1197
-
var bundle PLCBundle
1198
-
var didsJSON []byte
1199
-
var prevHash sql.NullString
1200
-
var cursor sql.NullString
1201
-
1202
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1203
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1204
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1205
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1206
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1207
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1208
-
)
1209
-
if err != nil {
1210
-
return nil, err
1211
-
}
1212
-
1213
-
if prevHash.Valid {
1214
-
bundle.PrevBundleHash = prevHash.String
1215
-
}
1216
-
if cursor.Valid {
1217
-
bundle.Cursor = cursor.String
1218
-
}
1219
-
1220
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1221
-
return &bundle, nil
1222
-
}
1223
-
1224
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1225
-
query := `
1226
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1227
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1228
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1229
-
FROM plc_bundles
1230
-
ORDER BY bundle_number DESC
1231
-
LIMIT $1
1232
-
`
1233
-
1234
-
rows, err := p.db.QueryContext(ctx, query, limit)
1235
-
if err != nil {
1236
-
return nil, err
1237
-
}
1238
-
defer rows.Close()
1239
-
1240
-
return p.scanBundles(rows)
1241
-
}
1242
-
1243
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1244
-
query := `
1245
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1246
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1247
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1248
-
FROM plc_bundles
1249
-
WHERE dids ? $1
1250
-
ORDER BY bundle_number ASC
1251
-
`
1252
-
1253
-
rows, err := p.db.QueryContext(ctx, query, did)
1254
-
if err != nil {
1255
-
return nil, err
1256
-
}
1257
-
defer rows.Close()
1258
-
1259
-
return p.scanBundles(rows)
1260
-
}
1261
-
1262
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1263
-
var bundles []*PLCBundle
1264
-
1265
-
for rows.Next() {
1266
-
var bundle PLCBundle
1267
-
var didsJSON []byte
1268
-
var prevHash sql.NullString
1269
-
var cursor sql.NullString
1270
-
1271
-
if err := rows.Scan(
1272
-
&bundle.BundleNumber,
1273
-
&bundle.StartTime,
1274
-
&bundle.EndTime,
1275
-
&didsJSON,
1276
-
&bundle.Hash,
1277
-
&bundle.CompressedHash,
1278
-
&bundle.CompressedSize,
1279
-
&bundle.UncompressedSize,
1280
-
&bundle.CumulativeCompressedSize,
1281
-
&bundle.CumulativeUncompressedSize,
1282
-
&cursor,
1283
-
&prevHash,
1284
-
&bundle.Compressed,
1285
-
&bundle.CreatedAt,
1286
-
); err != nil {
1287
-
return nil, err
1288
-
}
1289
-
1290
-
if prevHash.Valid {
1291
-
bundle.PrevBundleHash = prevHash.String
1292
-
}
1293
-
if cursor.Valid {
1294
-
bundle.Cursor = cursor.String
1295
-
}
1296
-
1297
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1298
-
bundles = append(bundles, &bundle)
1299
-
}
1300
-
1301
-
return bundles, rows.Err()
1302
-
}
1303
-
1304
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1305
-
var count, lastBundleNum int64
1306
-
err := p.db.QueryRowContext(ctx, `
1307
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1308
-
FROM plc_bundles
1309
-
`).Scan(&count, &lastBundleNum)
1310
-
if err != nil {
1311
-
return 0, 0, 0, 0, err
1312
-
}
1313
-
1314
-
if lastBundleNum == 0 {
1315
-
return 0, 0, 0, 0, nil
1316
-
}
1317
-
1318
-
var compressedSize, uncompressedSize int64
1319
-
err = p.db.QueryRowContext(ctx, `
1320
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1321
-
FROM plc_bundles
1322
-
WHERE bundle_number = $1
1323
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1324
-
if err != nil {
1325
-
return 0, 0, 0, 0, err
1326
-
}
1327
-
1328
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1329
-
}
1330
-
1331
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1332
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1333
-
var num int
1334
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1335
-
return num, err
1336
-
}
1337
-
1338
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1339
-
query := `
1340
-
SELECT bundle_number
1341
-
FROM plc_bundles
1342
-
WHERE start_time <= $1 AND end_time >= $1
1343
-
ORDER BY bundle_number ASC
1344
-
LIMIT 1
1345
-
`
1346
-
1347
-
var bundleNum int
1348
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1349
-
if err == sql.ErrNoRows {
1350
-
query = `
1351
-
SELECT bundle_number
1352
-
FROM plc_bundles
1353
-
WHERE end_time < $1
1354
-
ORDER BY bundle_number DESC
1355
-
LIMIT 1
1356
-
`
1357
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1358
-
if err == sql.ErrNoRows {
1359
-
return 1, nil
1360
-
}
1361
-
if err != nil {
1362
-
return 0, err
1363
-
}
1364
-
return bundleNum, nil
1365
-
}
1366
-
if err != nil {
1367
-
return 0, err
1368
-
}
1369
-
1370
-
return bundleNum, nil
1371
-
}
1372
-
1373
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1374
-
query := `
1375
-
WITH daily_stats AS (
1376
-
SELECT
1377
-
DATE(start_time) as date,
1378
-
MAX(bundle_number) as last_bundle,
1379
-
COUNT(*) as bundle_count,
1380
-
SUM(uncompressed_size) as total_uncompressed,
1381
-
SUM(compressed_size) as total_compressed,
1382
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1383
-
MAX(cumulative_compressed_size) as cumulative_compressed
1384
-
FROM plc_bundles
1385
-
WHERE bundle_number >= $1
1386
-
GROUP BY DATE(start_time)
1387
-
)
1388
-
SELECT
1389
-
date::text,
1390
-
last_bundle,
1391
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1392
-
total_uncompressed,
1393
-
total_compressed,
1394
-
cumulative_uncompressed,
1395
-
cumulative_compressed
1396
-
FROM daily_stats
1397
-
ORDER BY date ASC
1398
-
`
1399
-
1400
-
if limit > 0 {
1401
-
query += fmt.Sprintf(" LIMIT %d", limit)
1402
-
}
1403
-
1404
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1405
-
if err != nil {
1406
-
return nil, err
1407
-
}
1408
-
defer rows.Close()
1409
-
1410
-
var history []*PLCHistoryPoint
1411
-
for rows.Next() {
1412
-
var point PLCHistoryPoint
1413
-
var cumulativeOps int64
1414
-
1415
-
err := rows.Scan(
1416
-
&point.Date,
1417
-
&point.BundleNumber,
1418
-
&cumulativeOps,
1419
-
&point.UncompressedSize,
1420
-
&point.CompressedSize,
1421
-
&point.CumulativeUncompressed,
1422
-
&point.CumulativeCompressed,
1423
-
)
1424
-
if err != nil {
1425
-
return nil, err
1426
-
}
1427
-
1428
-
point.OperationCount = int(cumulativeOps)
1429
-
1430
-
history = append(history, &point)
1431
-
}
1432
-
1433
-
return history, rows.Err()
1434
-
}
1435
-
1436
-
// ===== MEMPOOL OPERATIONS =====
1437
-
1438
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1439
-
if len(ops) == 0 {
1440
-
return nil
1441
-
}
1442
-
1443
-
tx, err := p.db.BeginTx(ctx, nil)
1444
-
if err != nil {
1445
-
return err
1446
-
}
1447
-
defer tx.Rollback()
1448
-
1449
-
stmt, err := tx.PrepareContext(ctx, `
1450
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1451
-
VALUES ($1, $2, $3, $4)
1452
-
ON CONFLICT(cid) DO NOTHING
1453
-
`)
1454
-
if err != nil {
1455
-
return err
1456
-
}
1457
-
defer stmt.Close()
1458
-
1459
-
for _, op := range ops {
1460
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1461
-
if err != nil {
1462
-
return err
1463
-
}
1464
-
}
1465
-
1466
-
return tx.Commit()
1467
-
}
1468
-
1469
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1470
-
query := "SELECT COUNT(*) FROM plc_mempool"
1471
-
var count int
1472
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1473
-
return count, err
1474
-
}
1475
-
1476
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1477
-
query := `
1478
-
SELECT id, did, operation, cid, created_at, added_at
1479
-
FROM plc_mempool
1480
-
ORDER BY created_at ASC
1481
-
LIMIT $1
1482
-
`
1483
-
1484
-
rows, err := p.db.QueryContext(ctx, query, limit)
1485
-
if err != nil {
1486
-
return nil, err
1487
-
}
1488
-
defer rows.Close()
1489
-
1490
-
var ops []MempoolOperation
1491
-
for rows.Next() {
1492
-
var op MempoolOperation
1493
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1494
-
if err != nil {
1495
-
return nil, err
1496
-
}
1497
-
ops = append(ops, op)
1498
-
}
1499
-
1500
-
return ops, rows.Err()
1501
-
}
1502
-
1503
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1504
-
if len(ids) == 0 {
1505
-
return nil
1506
-
}
1507
-
1508
-
placeholders := make([]string, len(ids))
1509
-
args := make([]interface{}, len(ids))
1510
-
for i, id := range ids {
1511
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1512
-
args[i] = id
1513
-
}
1514
-
1515
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1516
-
strings.Join(placeholders, ","))
1517
-
1518
-
_, err := p.db.ExecContext(ctx, query, args...)
1519
-
return err
1520
-
}
1521
-
1522
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1523
-
query := `
1524
-
SELECT id, did, operation, cid, created_at, added_at
1525
-
FROM plc_mempool
1526
-
ORDER BY created_at ASC, id ASC
1527
-
LIMIT 1
1528
-
`
1529
-
1530
-
var op MempoolOperation
1531
-
err := p.db.QueryRowContext(ctx, query).Scan(
1532
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1533
-
)
1534
-
if err == sql.ErrNoRows {
1535
-
return nil, nil
1536
-
}
1537
-
if err != nil {
1538
-
return nil, err
1539
-
}
1540
-
1541
-
return &op, nil
1542
-
}
1543
-
1544
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1545
-
query := `
1546
-
SELECT id, did, operation, cid, created_at, added_at
1547
-
FROM plc_mempool
1548
-
ORDER BY created_at DESC, id DESC
1549
-
LIMIT 1
1550
-
`
1551
-
1552
-
var op MempoolOperation
1553
-
err := p.db.QueryRowContext(ctx, query).Scan(
1554
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1555
-
)
1556
-
if err == sql.ErrNoRows {
1557
-
return nil, nil
1558
-
}
1559
-
if err != nil {
1560
-
return nil, err
1561
-
}
1562
-
1563
-
return &op, nil
1564
-
}
1565
-
1566
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1567
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1568
-
var count int
1569
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1570
-
return count, err
1571
-
}
1572
-
1573
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1574
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1575
-
var size int64
1576
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1577
-
return size, err
1578
1164
}
1579
1165
1580
1166
// ===== CURSOR OPERATIONS =====
+22
-16
internal/storage/types.go
+22
-16
internal/storage/types.go
···
26
26
LastChecked time.Time
27
27
Status int
28
28
IP string
29
+
IPv6 string
29
30
IPResolvedAt time.Time
31
+
Valid bool
30
32
UpdatedAt time.Time
31
33
}
32
34
···
53
55
Status int
54
56
ResponseTime float64
55
57
UserCount int64
56
-
Version string // NEW: Add this field
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
57
60
ScanData *EndpointScanData
58
61
ScannedAt time.Time
59
62
}
···
74
77
75
78
// EndpointFilter for querying endpoints
76
79
type EndpointFilter struct {
77
-
Type string // "pds", "labeler", etc.
80
+
Type string
78
81
Status string
79
82
MinUserCount int64
80
-
OnlyStale bool // NEW: Only return endpoints that need re-checking
81
-
RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
82
87
Limit int
83
88
Offset int
84
89
}
···
117
122
StartTime time.Time
118
123
EndTime time.Time
119
124
BoundaryCIDs []string
120
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
121
126
Hash string
122
127
CompressedHash string
123
128
CompressedSize int64
···
150
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
151
156
}
152
157
153
-
// MempoolOperation represents an operation waiting to be bundled
154
-
type MempoolOperation struct {
155
-
ID int64
156
-
DID string
157
-
Operation string
158
-
CID string
159
-
CreatedAt time.Time
160
-
AddedAt time.Time
161
-
}
162
-
163
158
// ScanCursor stores scanning progress
164
159
type ScanCursor struct {
165
160
Source string
···
194
189
ASNOrg string `json:"asn_org,omitempty"`
195
190
IsDatacenter bool `json:"is_datacenter"`
196
191
IsVPN bool `json:"is_vpn"`
192
+
IsCrawler bool `json:"is_crawler"`
193
+
IsTor bool `json:"is_tor"`
194
+
IsProxy bool `json:"is_proxy"`
197
195
Latitude float32 `json:"latitude,omitempty"`
198
196
Longitude float32 `json:"longitude,omitempty"`
199
197
RawData map[string]interface{} `json:"raw_data,omitempty"`
···
201
199
UpdatedAt time.Time `json:"updated_at"`
202
200
}
203
201
202
+
// IsHome returns true if this is a residential/home IP
203
+
// (not crawler, datacenter, tor, proxy, or vpn)
204
+
func (i *IPInfo) IsHome() bool {
205
+
return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN
206
+
}
207
+
204
208
// PDSListItem is a virtual type created by JOIN for /pds endpoint
205
209
type PDSListItem struct {
206
210
// From endpoints table
207
211
ID int64
208
212
Endpoint string
209
-
ServerDID string // NEW: Add this
213
+
ServerDID string
210
214
DiscoveredAt time.Time
211
215
LastChecked time.Time
212
216
Status int
213
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
214
220
215
221
// From latest endpoint_scans (via JOIN)
216
222
LatestScan *struct {
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"