+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+22
config.sample.yaml
+22
config.sample.yaml
···
1
+
database:
2
+
type: "postgres" # or "sqlite"
3
+
path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable"
4
+
# For SQLite: path: "atscan.db"
5
+
6
+
plc:
7
+
directory_url: "https://plc.directory"
8
+
scan_interval: "5s"
9
+
bundle_dir: "./plc_bundles"
10
+
use_cache: true
11
+
index_dids: true
12
+
13
+
pds:
14
+
scan_interval: "30m"
15
+
timeout: "30s"
16
+
workers: 20
17
+
recheck_interval: "1.5h"
18
+
scan_retention: 20
19
+
20
+
api:
21
+
host: "0.0.0.0"
22
+
port: 8080
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+420
-456
internal/api/handlers.go
+420
-456
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
47
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
48
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
49
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
50
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
51
49
}
52
50
53
51
// ===== REQUEST HELPERS =====
···
77
75
78
76
// ===== FORMATTING HELPERS =====
79
77
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": len(bundle.DIDs),
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
-
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
100
80
"id": ep.ID,
···
103
83
"discovered_at": ep.DiscoveredAt,
104
84
"last_checked": ep.LastChecked,
105
85
"status": statusToString(ep.Status),
106
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
107
86
}
108
87
109
-
// Add IP if available
88
+
// Add IPs if available
110
89
if ep.IP != "" {
111
90
response["ip"] = ep.IP
112
91
}
113
-
114
-
// REMOVED: IP info extraction - no longer in Endpoint struct
115
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
116
95
117
96
return response
118
97
}
···
165
144
resp.json(stats)
166
145
}
167
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
168
183
// ===== PDS HANDLERS =====
169
184
170
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
233
248
"endpoint": pds.Endpoint,
234
249
"discovered_at": pds.DiscoveredAt,
235
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
236
252
}
237
253
238
254
// Add server_did if available
···
257
273
}
258
274
}
259
275
260
-
// Add IP if available
276
+
// Add IPs if available
261
277
if pds.IP != "" {
262
278
response["ip"] = pds.IP
279
+
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
263
282
}
264
283
265
284
// Add IP info (from ip_infos table via JOIN)
···
276
295
if pds.IPInfo.ASN > 0 {
277
296
response["asn"] = pds.IPInfo.ASN
278
297
}
279
-
if pds.IPInfo.IsDatacenter {
280
-
response["is_datacenter"] = pds.IPInfo.IsDatacenter
281
-
}
298
+
299
+
// Add all network type flags
300
+
response["is_datacenter"] = pds.IPInfo.IsDatacenter
301
+
response["is_vpn"] = pds.IPInfo.IsVPN
302
+
response["is_crawler"] = pds.IPInfo.IsCrawler
303
+
response["is_tor"] = pds.IPInfo.IsTor
304
+
response["is_proxy"] = pds.IPInfo.IsProxy
305
+
306
+
// Add computed is_home field
307
+
response["is_home"] = pds.IPInfo.IsHome()
282
308
}
283
309
284
310
return response
···
316
342
}
317
343
}
318
344
319
-
// Add full IP info
345
+
// Add full IP info with computed is_home field
320
346
if pds.IPInfo != nil {
321
-
response["ip_info"] = pds.IPInfo
347
+
// Convert IPInfo to map
348
+
ipInfoMap := make(map[string]interface{})
349
+
ipInfoJSON, _ := json.Marshal(pds.IPInfo)
350
+
json.Unmarshal(ipInfoJSON, &ipInfoMap)
351
+
352
+
// Add computed is_home field
353
+
ipInfoMap["is_home"] = pds.IPInfo.IsHome()
354
+
355
+
response["ip_info"] = ipInfoMap
322
356
}
323
357
324
358
return response
···
333
367
"scanned_at": scan.ScannedAt,
334
368
}
335
369
370
+
if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil {
371
+
if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" {
372
+
scanMap["error"] = errorMsg
373
+
}
374
+
}
375
+
336
376
if scan.ResponseTime > 0 {
337
377
scanMap["response_time"] = scan.ResponseTime
338
378
}
339
379
340
-
// NEW: Add version if available
341
380
if scan.Version != "" {
342
381
scanMap["version"] = scan.Version
382
+
}
383
+
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
343
386
}
344
387
345
388
// Use the top-level UserCount field first
···
450
493
resp.json(stats)
451
494
}
452
495
496
+
// ===== GLOBAL DID HANDLER =====
497
+
498
+
// handleGetGlobalDID provides a consolidated view of a DID
499
+
func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) {
500
+
resp := newResponse(w)
501
+
vars := mux.Vars(r)
502
+
did := vars["did"]
503
+
ctx := r.Context()
504
+
505
+
// Get DID info (now includes handle and pds from database)
506
+
didInfo, err := s.db.GetGlobalDIDInfo(ctx, did)
507
+
if err != nil {
508
+
if err == sql.ErrNoRows {
509
+
if !s.plcIndexDIDs {
510
+
resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
511
+
} else {
512
+
resp.error("DID not found in PLC index.", http.StatusNotFound)
513
+
}
514
+
} else {
515
+
resp.error(err.Error(), http.StatusInternalServerError)
516
+
}
517
+
return
518
+
}
519
+
520
+
// Optionally include latest operation details if requested
521
+
var latestOperation *plc.PLCOperation
522
+
if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 {
523
+
lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1]
524
+
ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum)
525
+
if err != nil {
526
+
log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err)
527
+
} else {
528
+
// Find latest operation for this DID (in reverse)
529
+
for i := len(ops) - 1; i >= 0; i-- {
530
+
if ops[i].DID == did {
531
+
latestOperation = &ops[i]
532
+
break
533
+
}
534
+
}
535
+
}
536
+
}
537
+
538
+
result := map[string]interface{}{
539
+
"did": didInfo.DID,
540
+
"handle": didInfo.Handle, // From database!
541
+
"current_pds": didInfo.CurrentPDS, // From database!
542
+
"plc_index_created_at": didInfo.CreatedAt,
543
+
"plc_bundle_history": didInfo.BundleNumbers,
544
+
"pds_hosting_on": didInfo.HostingOn,
545
+
}
546
+
547
+
// Only include operation if requested
548
+
if latestOperation != nil {
549
+
result["latest_plc_operation"] = latestOperation
550
+
}
551
+
552
+
resp.json(result)
553
+
}
554
+
555
+
// handleGetDIDByHandle resolves a handle to a DID
556
+
func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) {
557
+
resp := newResponse(w)
558
+
vars := mux.Vars(r)
559
+
handle := vars["handle"]
560
+
561
+
// Normalize handle (remove @ prefix if present)
562
+
handle = strings.TrimPrefix(handle, "@")
563
+
564
+
// Look up DID by handle
565
+
didRecord, err := s.db.GetDIDByHandle(r.Context(), handle)
566
+
if err != nil {
567
+
if err == sql.ErrNoRows {
568
+
if !s.plcIndexDIDs {
569
+
resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
570
+
} else {
571
+
resp.error("Handle not found.", http.StatusNotFound)
572
+
}
573
+
} else {
574
+
resp.error(err.Error(), http.StatusInternalServerError)
575
+
}
576
+
return
577
+
}
578
+
579
+
// Return just the handle and DID
580
+
resp.json(map[string]string{
581
+
"handle": handle,
582
+
"did": didRecord.DID,
583
+
})
584
+
}
585
+
453
586
// ===== DID HANDLERS =====
454
587
455
588
func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) {
···
551
684
return
552
685
}
553
686
554
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
555
-
if err != nil {
556
-
resp.error(err.Error(), http.StatusInternalServerError)
557
-
return
558
-
}
559
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
560
688
resp.json(map[string]interface{}{
561
689
"total_unique_dids": totalDIDs,
562
690
"last_bundle": lastBundle,
···
567
695
568
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
569
697
resp := newResponse(w)
570
-
571
698
bundleNum, err := getBundleNumber(r)
572
699
if err != nil {
573
700
resp.error("invalid bundle number", http.StatusBadRequest)
574
701
return
575
702
}
576
703
577
-
// Try to get existing bundle
578
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
579
-
if err == nil {
580
-
// Bundle exists, return it normally
581
-
resp.json(formatBundleResponse(bundle))
582
-
return
583
-
}
584
-
585
-
// Bundle not found - check if it's the next upcoming bundle
586
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
587
707
if err != nil {
588
-
resp.error("bundle not found", http.StatusNotFound)
589
-
return
590
-
}
591
-
592
-
if bundleNum == lastBundle+1 {
593
-
// This is the upcoming bundle - return preview based on mempool
594
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
595
-
if err != nil {
596
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
597
717
return
598
718
}
599
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
600
720
return
601
721
}
602
722
603
-
// Not an upcoming bundle, just not found
604
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
605
724
}
606
725
607
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
608
-
// Get mempool stats
609
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
610
-
if err != nil {
611
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
612
743
}
744
+
}
613
745
614
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
615
752
return map[string]interface{}{
616
753
"plc_bundle_number": bundleNum,
617
754
"is_upcoming": true,
···
621
758
}, nil
622
759
}
623
760
624
-
// Get first and last operations for time range
625
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
626
-
if err != nil {
627
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
628
771
}
629
772
630
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
631
-
if err != nil {
632
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
633
775
}
634
776
635
-
// Get unique DID count
636
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
637
-
if err != nil {
638
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
639
780
}
640
-
641
-
// Get uncompressed size estimate
642
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
643
-
if err != nil {
644
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
645
783
}
646
784
647
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
648
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
649
-
650
-
// Calculate completion estimate
651
-
var estimatedCompletionTime *time.Time
652
-
var operationsNeeded int
653
-
var currentRate float64
654
-
655
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
656
-
657
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
658
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
659
-
if timeSpan > 0 {
660
-
currentRate = float64(mempoolCount) / timeSpan
661
-
if currentRate > 0 {
662
-
secondsNeeded := float64(operationsNeeded) / currentRate
663
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
664
-
estimatedCompletionTime = &completionTime
665
-
}
666
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
667
789
}
668
790
669
-
// Get previous bundle for cursor context
670
-
var prevBundleHash string
671
-
var cursor string
791
+
// Get previous bundle info
672
792
if bundleNum > 1 {
673
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
674
-
if err == nil {
675
-
prevBundleHash = prevBundle.Hash
676
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
677
-
}
678
-
}
679
-
680
-
// Determine bundle status
681
-
status := "filling"
682
-
if mempoolCount >= plc.BUNDLE_SIZE {
683
-
status = "ready"
684
-
}
685
-
686
-
// Build upcoming bundle response
687
-
result := map[string]interface{}{
688
-
"plc_bundle_number": bundleNum,
689
-
"is_upcoming": true,
690
-
"status": status,
691
-
"operation_count": mempoolCount,
692
-
"target_operation_count": plc.BUNDLE_SIZE,
693
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
694
-
"operations_needed": operationsNeeded,
695
-
"did_count": uniqueDIDCount,
696
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
697
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
698
-
"uncompressed_size": uncompressedSize,
699
-
"estimated_compressed_size": estimatedCompressedSize,
700
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
701
-
"prev_bundle_hash": prevBundleHash,
702
-
"cursor": cursor,
703
-
}
704
-
705
-
if estimatedCompletionTime != nil {
706
-
result["estimated_completion_time"] = *estimatedCompletionTime
707
-
result["current_rate_per_second"] = currentRate
708
-
}
709
-
710
-
// Get actual mempool operations if requested
711
-
if r.URL.Query().Get("include_dids") == "true" {
712
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
713
-
if err == nil {
714
-
// Extract unique DIDs
715
-
didSet := make(map[string]bool)
716
-
for _, op := range ops {
717
-
didSet[op.DID] = true
718
-
}
719
-
dids := make([]string, 0, len(didSet))
720
-
for did := range didSet {
721
-
dids = append(dids, did)
722
-
}
723
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
724
796
}
725
797
}
726
798
···
736
808
return
737
809
}
738
810
739
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
740
813
if err != nil {
741
814
resp.error("bundle not found", http.StatusNotFound)
742
815
return
743
816
}
744
817
745
818
resp.json(map[string]interface{}{
746
-
"plc_bundle_number": bundle.BundleNumber,
747
-
"did_count": len(bundle.DIDs),
748
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
749
822
})
750
823
}
751
824
···
760
833
761
834
compressed := r.URL.Query().Get("compressed") != "false"
762
835
763
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
764
837
if err == nil {
765
838
// Bundle exists, serve it normally
766
839
resp.bundleHeaders(bundle)
···
774
847
}
775
848
776
849
// Bundle not found - check if it's the upcoming bundle
777
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
778
-
if err != nil {
779
-
resp.error("bundle not found", http.StatusNotFound)
780
-
return
781
-
}
782
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
783
851
if bundleNum == lastBundle+1 {
784
852
// This is the upcoming bundle - serve from mempool
785
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
786
854
return
787
855
}
788
856
···
790
858
resp.error("bundle not found", http.StatusNotFound)
791
859
}
792
860
793
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
794
-
ctx := r.Context()
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
795
865
796
-
// Get mempool count
797
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
798
-
if err != nil {
799
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
800
-
return
801
-
}
802
-
803
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
804
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
805
868
return
806
869
}
807
870
808
-
// Get mempool operations (up to BUNDLE_SIZE)
809
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
810
873
if err != nil {
811
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
812
875
return
813
876
}
814
877
815
-
if len(mempoolOps) == 0 {
816
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
817
880
return
818
881
}
819
882
820
-
// Get time range
821
-
firstOp := mempoolOps[0]
822
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
823
886
824
887
// Extract unique DIDs
825
888
didSet := make(map[string]bool)
826
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
827
890
didSet[op.DID] = true
891
+
}
892
+
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
828
897
}
829
898
830
899
// Get previous bundle hash
831
900
prevBundleHash := ""
832
901
if bundleNum > 1 {
833
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
834
903
prevBundleHash = prevBundle.Hash
835
904
}
836
905
}
837
906
838
-
// Serialize operations to JSONL
839
-
var buf []byte
840
-
for _, mop := range mempoolOps {
841
-
buf = append(buf, []byte(mop.Operation)...)
842
-
buf = append(buf, '\n')
843
-
}
844
-
845
-
// Calculate size
846
-
uncompressedSize := int64(len(buf))
847
-
848
907
// Set headers
849
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
850
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
851
910
w.Header().Set("X-Bundle-Status", "preview")
852
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
853
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
854
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
855
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
856
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
857
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
858
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
859
919
860
920
w.Header().Set("Content-Type", "application/jsonl")
861
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
862
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
863
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
864
922
923
+
// Stream operations as JSONL
865
924
w.WriteHeader(http.StatusOK)
866
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
867
937
}
868
938
869
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
870
940
resp := newResponse(w)
871
-
path := bundle.GetFilePath(s.plcBundleDir)
872
941
873
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
874
944
if err != nil {
875
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
876
946
return
877
947
}
878
-
defer file.Close()
879
-
880
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
881
949
882
950
w.Header().Set("Content-Type", "application/zstd")
883
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
884
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
885
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
886
954
887
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
888
958
}
889
959
890
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
891
961
resp := newResponse(w)
892
962
893
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
894
965
if err != nil {
895
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
896
967
return
897
968
}
898
-
899
-
// Serialize to JSONL
900
-
var buf []byte
901
-
for _, op := range ops {
902
-
buf = append(buf, op.RawJSON...)
903
-
buf = append(buf, '\n')
904
-
}
905
-
906
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
907
-
compressedSize := int64(0)
908
-
if fileInfo != nil {
909
-
compressedSize = fileInfo.Size()
910
-
}
969
+
defer reader.Close()
911
970
912
971
w.Header().Set("Content-Type", "application/jsonl")
913
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
914
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
915
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
916
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
917
-
if compressedSize > 0 {
918
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
919
978
}
920
979
980
+
// Stream the data directly to the response
921
981
w.WriteHeader(http.StatusOK)
922
-
w.Write(buf)
982
+
io.Copy(w, reader)
923
983
}
924
984
925
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
926
986
resp := newResponse(w)
927
987
limit := getQueryInt(r, "limit", 50)
928
988
929
-
bundles, err := s.db.GetBundles(r.Context(), limit)
930
-
if err != nil {
931
-
resp.error(err.Error(), http.StatusInternalServerError)
932
-
return
933
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
934
990
935
991
response := make([]map[string]interface{}, len(bundles))
936
992
for i, bundle := range bundles {
937
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
938
994
}
939
995
940
996
resp.json(response)
···
943
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
944
1000
resp := newResponse(w)
945
1001
946
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
947
-
if err != nil {
948
-
resp.error(err.Error(), http.StatusInternalServerError)
949
-
return
950
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
951
1008
952
1009
resp.json(map[string]interface{}{
953
-
"plc_bundle_count": count,
954
-
"last_bundle_number": lastBundle,
955
-
"total_compressed_size": compressedSize,
956
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
957
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
958
-
"total_uncompressed_size": uncompressedSize,
959
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
960
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
961
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
962
1015
})
963
1016
}
964
1017
···
966
1019
967
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
968
1021
resp := newResponse(w)
969
-
ctx := r.Context()
970
1022
971
-
count, err := s.db.GetMempoolCount(ctx)
972
-
if err != nil {
973
-
resp.error(err.Error(), http.StatusInternalServerError)
974
-
return
975
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
976
1025
977
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
978
-
if err != nil {
979
-
resp.error(err.Error(), http.StatusInternalServerError)
980
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
981
1030
}
982
1031
983
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
984
-
if err != nil {
985
-
resp.error(err.Error(), http.StatusInternalServerError)
986
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
987
1036
}
988
1037
989
-
result := map[string]interface{}{
990
-
"operation_count": count,
991
-
"unique_did_count": uniqueDIDCount,
992
-
"uncompressed_size": uncompressedSize,
993
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
994
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
995
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
996
1042
997
-
if count > 0 {
998
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
999
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1000
1045
1001
-
if count < plc.BUNDLE_SIZE {
1002
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1003
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1004
1049
if timeSpan > 0 {
1005
1050
opsPerSecond := float64(count) / timeSpan
1006
1051
if opsPerSecond > 0 {
1007
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1008
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1009
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1057
+
result["current_rate_per_second"] = opsPerSecond
1010
1058
result["operations_needed"] = remainingOps
1011
-
result["current_rate_per_second"] = opsPerSecond
1012
1059
}
1013
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1014
1066
}
1015
-
} else {
1016
-
result["estimated_next_bundle_time"] = time.Now()
1017
-
result["operations_needed"] = 0
1018
1067
}
1019
1068
}
1020
1069
} else {
1070
+
// Empty mempool
1021
1071
result["mempool_start_time"] = nil
1022
1072
result["estimated_next_bundle_time"] = nil
1023
1073
}
···
1042
1092
1043
1093
// ===== VERIFICATION HANDLERS =====
1044
1094
1045
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1046
-
resp := newResponse(w)
1047
-
vars := mux.Vars(r)
1048
-
1049
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1050
-
if err != nil {
1051
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1052
-
return
1053
-
}
1054
-
1055
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1056
-
if err != nil {
1057
-
resp.error("Bundle not found", http.StatusNotFound)
1058
-
return
1059
-
}
1060
-
1061
-
// Fetch from PLC and verify
1062
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1063
-
if err != nil {
1064
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1065
-
return
1066
-
}
1067
-
1068
-
remoteHash := computeOperationsHash(remoteOps)
1069
-
verified := bundle.Hash == remoteHash
1070
-
1071
-
resp.json(map[string]interface{}{
1072
-
"bundle_number": bundleNumber,
1073
-
"verified": verified,
1074
-
"local_hash": bundle.Hash,
1075
-
"remote_hash": remoteHash,
1076
-
"local_op_count": plc.BUNDLE_SIZE,
1077
-
"remote_op_count": len(remoteOps),
1078
-
"boundary_cids_used": len(prevCIDs),
1079
-
})
1080
-
}
1081
-
1082
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1083
-
var after string
1084
-
var prevBoundaryCIDs map[string]bool
1085
-
1086
-
if bundleNum > 1 {
1087
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1088
-
if err != nil {
1089
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1090
-
}
1091
-
1092
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1093
-
1094
-
if len(prevBundle.BoundaryCIDs) > 0 {
1095
-
prevBoundaryCIDs = make(map[string]bool)
1096
-
for _, cid := range prevBundle.BoundaryCIDs {
1097
-
prevBoundaryCIDs[cid] = true
1098
-
}
1099
-
}
1100
-
}
1101
-
1102
-
var allRemoteOps []plc.PLCOperation
1103
-
seenCIDs := make(map[string]bool)
1104
-
1105
-
for cid := range prevBoundaryCIDs {
1106
-
seenCIDs[cid] = true
1107
-
}
1108
-
1109
-
currentAfter := after
1110
-
maxFetches := 20
1111
-
1112
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1113
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1114
-
Count: 1000,
1115
-
After: currentAfter,
1116
-
})
1117
-
if err != nil || len(batch) == 0 {
1118
-
break
1119
-
}
1120
-
1121
-
for _, op := range batch {
1122
-
if !seenCIDs[op.CID] {
1123
-
seenCIDs[op.CID] = true
1124
-
allRemoteOps = append(allRemoteOps, op)
1125
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1126
-
break
1127
-
}
1128
-
}
1129
-
}
1130
-
1131
-
if len(batch) > 0 {
1132
-
lastOp := batch[len(batch)-1]
1133
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1134
-
}
1135
-
1136
-
if len(batch) < 1000 {
1137
-
break
1138
-
}
1139
-
}
1140
-
1141
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1142
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1143
-
}
1144
-
1145
-
return allRemoteOps, prevBoundaryCIDs, nil
1146
-
}
1147
-
1148
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1149
1096
resp := newResponse(w)
1150
-
ctx := r.Context()
1151
1097
1152
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1153
-
if err != nil {
1154
-
resp.error(err.Error(), http.StatusInternalServerError)
1155
-
return
1156
-
}
1157
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1158
1099
if lastBundle == 0 {
1159
1100
resp.json(map[string]interface{}{
1160
1101
"status": "empty",
···
1168
1109
var errorMsg string
1169
1110
1170
1111
for i := 1; i <= lastBundle; i++ {
1171
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1172
1113
if err != nil {
1173
1114
valid = false
1174
1115
brokenAt = i
···
1177
1118
}
1178
1119
1179
1120
if i > 1 {
1180
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1181
1122
if err != nil {
1182
1123
valid = false
1183
1124
brokenAt = i
···
1185
1126
break
1186
1127
}
1187
1128
1188
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1189
1130
valid = false
1190
1131
brokenAt = i
1191
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1192
1133
break
1193
1134
}
1194
1135
}
···
1209
1150
1210
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1211
1152
resp := newResponse(w)
1212
-
ctx := r.Context()
1213
1153
1214
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1215
-
if err != nil {
1216
-
resp.error(err.Error(), http.StatusInternalServerError)
1217
-
return
1218
-
}
1219
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1220
1155
if lastBundle == 0 {
1221
1156
resp.json(map[string]interface{}{
1222
1157
"chain_length": 0,
···
1225
1160
return
1226
1161
}
1227
1162
1228
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1229
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1230
-
1231
-
// Updated to receive 5 values instead of 3
1232
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1233
-
if err != nil {
1234
-
resp.error(err.Error(), http.StatusInternalServerError)
1235
-
return
1236
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1237
1166
1238
1167
resp.json(map[string]interface{}{
1239
-
"chain_length": lastBundle,
1240
-
"total_bundles": count,
1241
-
"total_compressed_size": compressedSize,
1242
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1243
-
"total_uncompressed_size": uncompressedSize,
1244
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1245
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1246
-
"chain_start_time": firstBundle.StartTime,
1247
-
"chain_end_time": lastBundleData.EndTime,
1248
-
"chain_head_hash": lastBundleData.Hash,
1249
-
"first_prev_hash": firstBundle.PrevBundleHash,
1250
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1251
1177
})
1252
1178
}
1253
1179
···
1268
1194
return
1269
1195
}
1270
1196
1271
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1272
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1273
1199
1274
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1308
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1309
1235
}
1310
1236
1311
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1312
1238
if afterTime.IsZero() {
1313
1239
return 1
1314
1240
}
1315
1241
1316
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1317
-
if err != nil {
1318
-
return 1
1319
-
}
1320
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1321
1243
if foundBundle > 1 {
1322
1244
return foundBundle - 1
1323
1245
}
···
1328
1250
var allOps []plc.PLCOperation
1329
1251
seenCIDs := make(map[string]bool)
1330
1252
1331
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1332
1254
1333
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1334
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1488
1410
limit := getQueryInt(r, "limit", 0)
1489
1411
fromBundle := getQueryInt(r, "from", 1)
1490
1412
1491
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1492
1415
if err != nil {
1493
1416
resp.error(err.Error(), http.StatusInternalServerError)
1494
1417
return
···
1531
1454
resp.json(result)
1532
1455
}
1533
1456
1534
-
// ===== UTILITY FUNCTIONS =====
1457
+
// ===== DEBUG HANDLERS =====
1458
+
1459
+
func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) {
1460
+
resp := newResponse(w)
1461
+
ctx := r.Context()
1462
+
schema := "public" // Or make configurable if needed
1535
1463
1536
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1537
-
var jsonlData []byte
1538
-
for _, op := range ops {
1539
-
jsonlData = append(jsonlData, op.RawJSON...)
1540
-
jsonlData = append(jsonlData, '\n')
1464
+
tableSizes, err := s.db.GetTableSizes(ctx, schema)
1465
+
if err != nil {
1466
+
log.Error("Failed to get table sizes: %v", err)
1467
+
resp.error("Failed to retrieve table sizes", http.StatusInternalServerError)
1468
+
return
1469
+
}
1470
+
1471
+
indexSizes, err := s.db.GetIndexSizes(ctx, schema)
1472
+
if err != nil {
1473
+
log.Error("Failed to get index sizes: %v", err)
1474
+
resp.error("Failed to retrieve index sizes", http.StatusInternalServerError)
1475
+
return
1541
1476
}
1542
-
hash := sha256.Sum256(jsonlData)
1543
-
return hex.EncodeToString(hash[:])
1477
+
1478
+
resp.json(map[string]interface{}{
1479
+
"schema": schema,
1480
+
"tables": tableSizes,
1481
+
"indexes": indexSizes,
1482
+
"retrievedAt": time.Now().UTC(),
1483
+
})
1544
1484
}
1485
+
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1506
+
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1545
1509
1546
1510
func normalizeEndpoint(endpoint string) string {
1547
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+17
-13
internal/api/server.go
+17
-13
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
23
+
plcIndexDIDs bool
24
24
}
25
25
26
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
27
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
28
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
29
27
s := &Server{
30
28
router: mux.NewRouter(),
31
29
db: db,
32
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
33
30
plcBundleDir: plcCfg.BundleDir,
34
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
32
+
plcIndexDIDs: plcCfg.IndexDIDs,
35
33
}
36
34
37
35
s.setupRoutes()
···
59
57
// Generic endpoints (keep as-is)
60
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
61
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
62
61
63
62
//PDS-specific endpoints (virtual, created via JOINs)
64
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
···
72
71
api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET")
73
72
api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET")
74
73
api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET")
74
+
75
+
// Global DID routes
76
+
api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET")
77
+
api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW
75
78
76
79
// PLC Bundle routes
77
80
api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET")
···
81
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
82
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
83
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
84
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
85
88
86
89
// PLC history/metrics
87
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
···
92
95
// DID routes
93
96
api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET")
94
97
api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET")
95
-
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") // NEW
98
+
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET")
96
99
97
100
// Mempool routes
98
101
api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET")
···
100
103
// Metrics routes
101
104
api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET")
102
105
103
-
// Job status endpoint
106
+
// Debug Endpoints
107
+
api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET")
104
108
api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET")
105
109
106
110
// Health check
+36
-13
internal/ipinfo/client.go
+36
-13
internal/ipinfo/client.go
···
99
99
return ipInfo, nil
100
100
}
101
101
102
-
// ExtractIPFromEndpoint extracts IP from endpoint URL
103
-
func ExtractIPFromEndpoint(endpoint string) (string, error) {
102
+
// IPAddresses holds both IPv4 and IPv6 addresses
103
+
type IPAddresses struct {
104
+
IPv4 string
105
+
IPv6 string
106
+
}
107
+
108
+
// ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL
109
+
func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) {
104
110
// Parse URL
105
111
parsedURL, err := url.Parse(endpoint)
106
112
if err != nil {
107
-
return "", fmt.Errorf("failed to parse endpoint URL: %w", err)
113
+
return nil, fmt.Errorf("failed to parse endpoint URL: %w", err)
108
114
}
109
115
110
116
host := parsedURL.Hostname()
111
117
if host == "" {
112
-
return "", fmt.Errorf("no hostname in endpoint")
118
+
return nil, fmt.Errorf("no hostname in endpoint")
113
119
}
120
+
121
+
result := &IPAddresses{}
114
122
115
123
// Check if host is already an IP
116
-
if net.ParseIP(host) != nil {
117
-
return host, nil
124
+
if ip := net.ParseIP(host); ip != nil {
125
+
if ip.To4() != nil {
126
+
result.IPv4 = host
127
+
} else {
128
+
result.IPv6 = host
129
+
}
130
+
return result, nil
118
131
}
119
132
120
-
// Resolve hostname to IP
133
+
// Resolve hostname to IPs
121
134
ips, err := net.LookupIP(host)
122
135
if err != nil {
123
-
return "", fmt.Errorf("failed to resolve hostname: %w", err)
136
+
return nil, fmt.Errorf("failed to resolve hostname: %w", err)
124
137
}
125
138
126
139
if len(ips) == 0 {
127
-
return "", fmt.Errorf("no IPs found for hostname")
140
+
return nil, fmt.Errorf("no IPs found for hostname")
128
141
}
129
142
130
-
// Return first IPv4 address
143
+
// Extract both IPv4 and IPv6
131
144
for _, ip := range ips {
132
145
if ipv4 := ip.To4(); ipv4 != nil {
133
-
return ipv4.String(), nil
146
+
if result.IPv4 == "" {
147
+
result.IPv4 = ipv4.String()
148
+
}
149
+
} else {
150
+
if result.IPv6 == "" {
151
+
result.IPv6 = ip.String()
152
+
}
134
153
}
135
154
}
136
155
137
-
// Fallback to first IP (might be IPv6)
138
-
return ips[0].String(), nil
156
+
// Must have at least one IP
157
+
if result.IPv4 == "" && result.IPv6 == "" {
158
+
return nil, fmt.Errorf("no valid IPs found")
159
+
}
160
+
161
+
return result, nil
139
162
}
+6
-2
internal/log/log.go
+6
-2
internal/log/log.go
···
28
28
errorLog = log.New(os.Stderr, "", 0)
29
29
}
30
30
31
-
// timestamp returns current time in ISO 8601 format
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
32
func timestamp() string {
33
-
return time.Now().Format(time.RFC3339)
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
34
34
}
35
35
36
36
func Verbose(format string, v ...interface{}) {
···
39
39
40
40
func Info(format string, v ...interface{}) {
41
41
infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...))
42
+
}
43
+
44
+
func Warn(format string, v ...interface{}) {
45
+
infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...))
42
46
}
43
47
44
48
func Error(format string, v ...interface{}) {
+37
-8
internal/pds/client.go
+37
-8
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
115
144
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
+53
-38
internal/pds/scanner.go
+53
-38
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
124
124
}
125
125
126
126
func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) {
127
-
// STEP 1: Resolve IP (before any network call)
128
-
ip, err := ipinfo.ExtractIPFromEndpoint(ep.Endpoint)
127
+
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
+
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
135
134
return
136
135
}
137
136
138
-
// Update IP immediately
139
-
s.db.UpdateEndpointIP(ctx, ep.ID, ip, time.Now().UTC())
137
+
// Update IPs immediately
138
+
s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC())
140
139
141
-
// STEP 2: Health check
142
-
available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
143
-
if err != nil || !available {
144
-
errMsg := "health check failed"
145
-
if err != nil {
146
-
errMsg = err.Error()
147
-
}
140
+
// STEP 1.5: Fetch IP info asynchronously for both IPs
141
+
if ips.IPv4 != "" {
142
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv4)
143
+
}
144
+
if ips.IPv6 != "" {
145
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
146
+
}
147
+
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
148
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
149
152
Status: storage.EndpointStatusOffline,
150
-
ResponseTime: responseTime,
151
-
ErrorMessage: errMsg,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
152
156
})
153
157
return
154
158
}
155
159
156
-
// STEP 3: Fetch PDS-specific data
157
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
158
-
if err != nil {
159
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
160
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
161
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
162
163
}
163
164
164
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
165
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
166
182
if err != nil {
167
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
168
184
repoList = []Repo{}
169
185
}
170
186
171
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
172
188
dids := make([]string, len(repoList))
173
189
for i, repo := range repoList {
174
190
dids[i] = repo.DID
175
191
}
176
192
177
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
178
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
179
195
Status: storage.EndpointStatusOnline,
180
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
181
197
Description: desc,
182
198
DIDs: dids,
183
199
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
184
201
})
185
202
186
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
187
204
if len(repoList) > 0 {
188
-
batchSize := 10000
205
+
batchSize := 100_000
189
206
190
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
191
208
···
225
242
226
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
227
244
}
228
-
229
-
// STEP 5: Fetch IP info if needed (async, with backoff)
230
-
go s.updateIPInfoIfNeeded(ctx, ip)
231
245
}
232
246
233
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
···
237
251
Metadata: make(map[string]interface{}),
238
252
}
239
253
240
-
var userCount int64 // NEW: Declare user count
254
+
var userCount int64
241
255
242
256
// Add PDS-specific metadata
243
257
if result.Status == storage.EndpointStatusOnline {
244
-
userCount = int64(len(result.DIDs)) // NEW: Get user count
245
-
scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
246
260
if result.Description != nil {
247
261
scanData.Metadata["server_info"] = result.Description
248
262
}
···
259
273
Status: result.Status,
260
274
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
261
275
UserCount: userCount,
262
-
Version: result.Version, // NEW: Set the version field
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
263
278
ScanData: scanData,
264
279
ScannedAt: time.Now().UTC(),
265
280
}
+2
-1
internal/pds/types.go
+2
-1
internal/pds/types.go
-662
internal/plc/bundle.go
-662
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
384
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
385
-
// Don't return error - bundle is already created
386
-
} else {
387
-
elapsed := time.Since(start)
388
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
389
-
}
390
-
} else {
391
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
392
-
}
393
-
394
-
return nil
395
-
}
396
-
397
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
398
-
didSet := make(map[string]bool)
399
-
for _, op := range ops {
400
-
didSet[op.DID] = true
401
-
}
402
-
403
-
dids := make([]string, 0, len(didSet))
404
-
for did := range didSet {
405
-
dids = append(dids, did)
406
-
}
407
-
return dids
408
-
}
409
-
410
-
// ===== MEMPOOL BUNDLE CREATION =====
411
-
412
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
413
-
if !bm.enabled {
414
-
return 0, fmt.Errorf("bundle manager disabled")
415
-
}
416
-
417
-
if len(operations) != BUNDLE_SIZE {
418
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
419
-
}
420
-
421
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
422
-
if err != nil {
423
-
return 0, err
424
-
}
425
-
bundleNum := lastBundle + 1
426
-
427
-
bf := bm.newBundleFile(bundleNum)
428
-
bf.operations = operations
429
-
430
-
if err := bm.save(bf); err != nil {
431
-
return 0, err
432
-
}
433
-
434
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
435
-
return 0, err
436
-
}
437
-
438
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
439
-
bundleNum, bf.uncompressedHash[:16])
440
-
441
-
return bundleNum, nil
442
-
}
443
-
444
-
// ===== VERIFICATION =====
445
-
446
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
447
-
if !bm.enabled {
448
-
return fmt.Errorf("bundle manager disabled")
449
-
}
450
-
451
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
452
-
453
-
for i := 1; i <= endBundle; i++ {
454
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
455
-
if err != nil {
456
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
457
-
}
458
-
459
-
// Verify file hash
460
-
path := bm.newBundleFile(i).path
461
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
462
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
463
-
}
464
-
465
-
// Verify chain link
466
-
if i > 1 {
467
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
468
-
if err != nil {
469
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
470
-
}
471
-
472
-
if bundle.PrevBundleHash != prevBundle.Hash {
473
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
474
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
475
-
}
476
-
}
477
-
478
-
if i%100 == 0 {
479
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
480
-
}
481
-
}
482
-
483
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
484
-
return nil
485
-
}
486
-
487
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
488
-
if !bm.enabled {
489
-
return nil
490
-
}
491
-
492
-
for i := 1; i < targetBundle; i++ {
493
-
if !bm.newBundleFile(i).exists() {
494
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
495
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
496
-
}
497
-
}
498
-
}
499
-
500
-
return nil
501
-
}
502
-
503
-
// ===== UTILITY METHODS =====
504
-
505
-
func (bm *BundleManager) hash(data []byte) string {
506
-
h := sha256.Sum256(data)
507
-
return hex.EncodeToString(h[:])
508
-
}
509
-
510
-
func (bm *BundleManager) hashFile(path string) string {
511
-
data, _ := os.ReadFile(path)
512
-
return bm.hash(data)
513
-
}
514
-
515
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
516
-
if expectedHash == "" {
517
-
return nil
518
-
}
519
-
520
-
actualHash := bm.hashFile(path)
521
-
if actualHash != expectedHash {
522
-
return fmt.Errorf("hash mismatch")
523
-
}
524
-
return nil
525
-
}
526
-
527
-
func (bm *BundleManager) getFileSize(path string) int64 {
528
-
if info, err := os.Stat(path); err == nil {
529
-
return info.Size()
530
-
}
531
-
return 0
532
-
}
533
-
534
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
535
-
if !bm.enabled {
536
-
return 0, 0, 0, 0, nil
537
-
}
538
-
return bm.db.GetBundleStats(ctx)
539
-
}
540
-
541
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
542
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
543
-
if err != nil {
544
-
return nil, err
545
-
}
546
-
547
-
if lastBundle == 0 {
548
-
return map[string]interface{}{
549
-
"chain_length": 0,
550
-
"status": "empty",
551
-
}, nil
552
-
}
553
-
554
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
555
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
556
-
557
-
return map[string]interface{}{
558
-
"chain_length": lastBundle,
559
-
"first_bundle": 1,
560
-
"last_bundle": lastBundle,
561
-
"chain_start_time": firstBundle.StartTime,
562
-
"chain_end_time": lastBundleData.EndTime,
563
-
"chain_head_hash": lastBundleData.Hash,
564
-
}, nil
565
-
}
566
-
567
-
// ===== EXPORTED HELPERS =====
568
-
569
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
570
-
if len(operations) == 0 {
571
-
return time.Time{}, nil
572
-
}
573
-
574
-
lastOp := operations[len(operations)-1]
575
-
boundaryTime := lastOp.CreatedAt
576
-
cidSet := make(map[string]bool)
577
-
578
-
for i := len(operations) - 1; i >= 0; i-- {
579
-
op := operations[i]
580
-
if op.CreatedAt.Equal(boundaryTime) {
581
-
cidSet[op.CID] = true
582
-
} else {
583
-
break
584
-
}
585
-
}
586
-
587
-
return boundaryTime, cidSet
588
-
}
589
-
590
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
591
-
if len(operations) == 0 {
592
-
return operations
593
-
}
594
-
595
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
596
-
if err != nil {
597
-
return operations
598
-
}
599
-
600
-
startIdx := 0
601
-
for startIdx < len(operations) {
602
-
op := operations[startIdx]
603
-
604
-
if op.CreatedAt.After(boundaryTime) {
605
-
break
606
-
}
607
-
608
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
609
-
startIdx++
610
-
continue
611
-
}
612
-
613
-
break
614
-
}
615
-
616
-
return operations[startIdx:]
617
-
}
618
-
619
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
620
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
621
-
if !bm.enabled {
622
-
return nil, fmt.Errorf("bundle manager disabled")
623
-
}
624
-
625
-
bf := bm.newBundleFile(bundleNum)
626
-
627
-
if !bf.exists() {
628
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
629
-
}
630
-
631
-
if err := bm.load(bf); err != nil {
632
-
return nil, err
633
-
}
634
-
635
-
return bf.operations, nil
636
-
}
637
-
638
-
// calculateCursor determines the cursor value for a given bundle
639
-
// For bundle 1: returns empty string
640
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
641
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
642
-
if bundleNum == 1 {
643
-
return ""
644
-
}
645
-
646
-
// Try to get cursor from previous bundle in DB
647
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
648
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
649
-
}
650
-
651
-
// If previous bundle not in DB, try to load it from file
652
-
prevBf := bm.newBundleFile(bundleNum - 1)
653
-
if prevBf.exists() {
654
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
655
-
// Return the createdAt of the last operation in previous bundle
656
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
657
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
658
-
}
659
-
}
660
-
661
-
return ""
662
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+112
internal/plc/helpers.go
+112
internal/plc/helpers.go
···
1
+
package plc
2
+
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
7
+
8
+
// MaxHandleLength is the maximum allowed handle length for database storage
9
+
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
14
+
15
+
// ExtractHandle safely extracts the handle from a PLC operation
16
+
func ExtractHandle(op *PLCOperation) string {
17
+
if op == nil || op.Operation == nil {
18
+
return ""
19
+
}
20
+
21
+
// Get "alsoKnownAs"
22
+
aka, ok := op.Operation["alsoKnownAs"].([]interface{})
23
+
if !ok {
24
+
return ""
25
+
}
26
+
27
+
// Find the handle (e.g., "at://handle.bsky.social")
28
+
for _, item := range aka {
29
+
if handle, ok := item.(string); ok {
30
+
if strings.HasPrefix(handle, "at://") {
31
+
return strings.TrimPrefix(handle, "at://")
32
+
}
33
+
}
34
+
}
35
+
return ""
36
+
}
37
+
38
+
// ValidateHandle checks if a handle is valid for database storage
39
+
// Returns empty string if handle is invalid (too long or wrong format)
40
+
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
46
+
if len(handle) > MaxHandleLength {
47
+
return ""
48
+
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
55
+
return handle
56
+
}
57
+
58
+
// ExtractPDS safely extracts the PDS endpoint from a PLC operation
59
+
func ExtractPDS(op *PLCOperation) string {
60
+
if op == nil || op.Operation == nil {
61
+
return ""
62
+
}
63
+
64
+
// Get "services"
65
+
services, ok := op.Operation["services"].(map[string]interface{})
66
+
if !ok {
67
+
return ""
68
+
}
69
+
70
+
// Get "atproto_pds"
71
+
pdsService, ok := services["atproto_pds"].(map[string]interface{})
72
+
if !ok {
73
+
return ""
74
+
}
75
+
76
+
// Get "endpoint"
77
+
if endpoint, ok := pdsService["endpoint"].(string); ok {
78
+
return endpoint
79
+
}
80
+
81
+
return ""
82
+
}
83
+
84
+
// DIDInfo contains extracted metadata from a PLC operation
85
+
type DIDInfo struct {
86
+
Handle string
87
+
PDS string
88
+
}
89
+
90
+
// ExtractDIDInfo extracts both handle and PDS from an operation
91
+
func ExtractDIDInfo(op *PLCOperation) DIDInfo {
92
+
return DIDInfo{
93
+
Handle: ExtractHandle(op),
94
+
PDS: ExtractPDS(op),
95
+
}
96
+
}
97
+
98
+
// ExtractDIDInfoMap creates a map of DID -> info from operations
99
+
// Processes in reverse order to get the latest state for each DID
100
+
func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo {
101
+
infoMap := make(map[string]DIDInfo)
102
+
103
+
// Process in reverse to get latest state
104
+
for i := len(ops) - 1; i >= 0; i-- {
105
+
op := ops[i]
106
+
if _, exists := infoMap[op.DID]; !exists {
107
+
infoMap[op.DID] = ExtractDIDInfo(&op)
108
+
}
109
+
}
110
+
111
+
return infoMap
112
+
}
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-415
internal/plc/scanner.go
+92
-415
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
84
43
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
90
-
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
246
72
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
251
74
}
252
75
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
257
-
}
258
-
}
259
-
}
260
-
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// Process for endpoint discovery
336
-
batchCounts, err := s.processBatch(ctx, ops)
337
-
s.mergeCounts(counts, batchCounts)
338
-
return err
339
-
}
340
-
341
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
342
-
for {
343
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
344
78
if err != nil {
345
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
346
81
}
347
82
348
-
log.Verbose("Mempool contains %d operations", count)
349
-
350
-
if count < BUNDLE_SIZE {
351
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
352
-
return nil
353
-
}
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
354
88
355
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
356
91
357
-
// Updated to receive 4 values instead of 3
358
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
359
-
if err != nil {
360
-
return err
361
-
}
362
-
363
-
// Process and update metrics
364
-
countsBefore := sumCounts(m.endpointCounts)
365
-
counts, _ := s.processBatch(ctx, ops)
366
-
s.mergeCounts(m.endpointCounts, counts)
367
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
368
-
369
-
m.totalProcessed += int64(len(ops))
370
-
m.newEndpoints += newEndpointsFound
371
-
m.currentBundle = bundleNum
372
-
373
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
374
94
log.Error("Warning: failed to update cursor: %v", err)
375
95
}
376
-
377
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
378
96
}
379
-
}
380
97
381
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
382
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
383
-
if err != nil {
384
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
385
103
}
386
104
387
-
ops, ids := s.deduplicateMempool(mempoolOps)
388
-
if len(ops) < BUNDLE_SIZE {
389
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
390
-
}
391
-
392
-
// Determine cursor from last bundle
393
-
cursor := ""
394
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
395
-
if err == nil && lastBundle > 0 {
396
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
397
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
398
-
}
399
-
}
400
-
401
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
402
-
if err != nil {
403
-
return 0, nil, "", err
404
-
}
405
-
406
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
407
-
return 0, nil, "", err
408
-
}
409
-
410
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
411
107
}
412
108
413
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
414
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
415
-
ids := make([]int64, 0, BUNDLE_SIZE)
416
-
seenCIDs := make(map[string]bool)
417
-
418
-
for _, mop := range mempoolOps {
419
-
if seenCIDs[mop.CID] {
420
-
ids = append(ids, mop.ID)
421
-
continue
422
-
}
423
-
seenCIDs[mop.CID] = true
424
-
425
-
var op PLCOperation
426
-
json.Unmarshal([]byte(mop.Operation), &op)
427
-
op.RawJSON = []byte(mop.Operation)
428
-
429
-
ops = append(ops, op)
430
-
ids = append(ids, mop.ID)
431
-
432
-
if len(ops) >= BUNDLE_SIZE {
433
-
break
434
-
}
435
-
}
436
-
437
-
return ops, ids
438
-
}
439
-
109
+
// processBatch extracts endpoints from operations
440
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
441
111
counts := make(map[string]int64)
442
112
seen := make(map[string]*PLCOperation)
443
113
444
114
// Collect unique endpoints
445
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
446
118
if op.IsNullified() {
447
119
continue
448
120
}
449
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
450
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
451
124
if _, exists := seen[key]; !exists {
452
-
seen[key] = &op
125
+
seen[key] = op
453
126
}
454
127
}
455
128
}
···
465
138
}
466
139
467
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
468
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
469
142
continue
470
143
}
471
144
472
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
473
146
counts[epType]++
474
147
}
475
148
476
149
return counts, nil
477
-
}
478
-
479
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
480
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
481
-
EndpointType: epType,
482
-
Endpoint: endpoint,
483
-
DiscoveredAt: discoveredAt,
484
-
LastChecked: time.Time{},
485
-
Status: storage.EndpointStatusUnknown,
486
-
})
487
150
}
488
151
489
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
526
189
return nil
527
190
}
528
191
529
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
530
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
531
-
Source: "plc_directory",
532
-
LastBundleNumber: m.currentBundle - 1,
533
-
LastScanTime: time.Now().UTC(),
534
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
535
201
})
536
202
}
537
203
···
559
225
return total
560
226
}
561
227
562
-
func formatEndpointCounts(counts map[string]int64) string {
563
-
if len(counts) == 0 {
564
-
return "0 new endpoints"
565
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
566
231
567
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
568
240
569
-
if len(counts) == 1 {
570
-
for typ, count := range counts {
571
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
572
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
573
246
}
247
+
}
574
248
575
-
parts := make([]string, 0, len(counts))
576
-
for typ, count := range counts {
577
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
578
256
}
579
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
580
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
+10
-23
internal/storage/db.go
+10
-23
internal/storage/db.go
···
27
27
EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error)
28
28
GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error)
29
29
GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error)
30
-
UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error
30
+
UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error
31
31
SaveEndpointScan(ctx context.Context, scan *EndpointScan) error
32
32
SetScanRetention(retention int)
33
33
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
···
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
52
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
59
-
GetLastBundleNumber(ctx context.Context) (int, error)
60
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
61
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
62
-
63
-
// Mempool operations
64
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
65
-
GetMempoolCount(ctx context.Context) (int, error)
66
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
67
-
DeleteFromMempool(ctx context.Context, ids []int64) error
68
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
69
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
71
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
72
-
73
53
// Metrics
74
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
75
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
76
56
GetEndpointStats(ctx context.Context) (*EndpointStats, error)
77
57
78
58
// DID operations
79
-
UpsertDID(ctx context.Context, did string, bundleNum int) error
59
+
UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error
60
+
UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error
80
61
GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error)
62
+
GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW
63
+
GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error)
81
64
AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error
82
65
GetTotalDIDCount(ctx context.Context) (int64, error)
83
66
84
67
// PDS Repo operations
85
68
UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error
86
-
GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) // Updated
69
+
GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error)
87
70
GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error)
88
71
GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error)
72
+
73
+
// Internal
74
+
GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error)
75
+
GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error)
89
76
}
+466
-618
internal/storage/postgres.go
+466
-618
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
73
72
log.Info("Running database migrations...")
74
73
75
74
schema := `
76
-
-- Endpoints table (NO user_count, NO ip_info)
75
+
-- Endpoints table (with IPv6 support)
77
76
CREATE TABLE IF NOT EXISTS endpoints (
78
77
id BIGSERIAL PRIMARY KEY,
79
78
endpoint_type TEXT NOT NULL DEFAULT 'pds',
···
83
82
last_checked TIMESTAMP,
84
83
status INTEGER DEFAULT 0,
85
84
ip TEXT,
85
+
ipv6 TEXT,
86
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
87
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
88
89
UNIQUE(endpoint_type, endpoint)
89
90
);
···
92
93
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
93
94
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
94
95
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
95
97
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
96
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
97
100
98
-
-- IP infos table (IP as PRIMARY KEY)
99
-
CREATE TABLE IF NOT EXISTS ip_infos (
100
-
ip TEXT PRIMARY KEY,
101
-
city TEXT,
102
-
country TEXT,
103
-
country_code TEXT,
104
-
asn INTEGER,
105
-
asn_org TEXT,
106
-
is_datacenter BOOLEAN,
107
-
is_vpn BOOLEAN,
108
-
latitude REAL,
109
-
longitude REAL,
110
-
raw_data JSONB,
111
-
fetched_at TIMESTAMP NOT NULL,
112
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
113
-
);
101
+
-- IP infos table (IP as PRIMARY KEY)
102
+
CREATE TABLE IF NOT EXISTS ip_infos (
103
+
ip TEXT PRIMARY KEY,
104
+
city TEXT,
105
+
country TEXT,
106
+
country_code TEXT,
107
+
asn INTEGER,
108
+
asn_org TEXT,
109
+
is_datacenter BOOLEAN,
110
+
is_vpn BOOLEAN,
111
+
is_crawler BOOLEAN,
112
+
is_tor BOOLEAN,
113
+
is_proxy BOOLEAN,
114
+
latitude REAL,
115
+
longitude REAL,
116
+
raw_data JSONB,
117
+
fetched_at TIMESTAMP NOT NULL,
118
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
119
+
);
114
120
115
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
116
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
121
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
122
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
117
123
118
-
-- Endpoint scans (renamed from pds_scans)
124
+
-- Endpoint scans
119
125
CREATE TABLE IF NOT EXISTS endpoint_scans (
120
126
id BIGSERIAL PRIMARY KEY,
121
127
endpoint_id BIGINT NOT NULL,
···
123
129
response_time DOUBLE PRECISION,
124
130
user_count BIGINT,
125
131
version TEXT,
132
+
used_ip TEXT,
126
133
scan_data JSONB,
127
134
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
128
135
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
···
131
138
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
132
139
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
133
140
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
134
143
135
144
CREATE TABLE IF NOT EXISTS plc_metrics (
136
145
id BIGSERIAL PRIMARY KEY,
···
149
158
records_processed BIGINT DEFAULT 0
150
159
);
151
160
152
-
CREATE TABLE IF NOT EXISTS plc_bundles (
153
-
bundle_number INTEGER PRIMARY KEY,
154
-
start_time TIMESTAMP NOT NULL,
155
-
end_time TIMESTAMP NOT NULL,
156
-
dids JSONB NOT NULL,
157
-
hash TEXT NOT NULL,
158
-
compressed_hash TEXT NOT NULL,
159
-
compressed_size BIGINT NOT NULL,
160
-
uncompressed_size BIGINT NOT NULL,
161
-
cumulative_compressed_size BIGINT NOT NULL,
162
-
cumulative_uncompressed_size BIGINT NOT NULL,
163
-
cursor TEXT,
164
-
prev_bundle_hash TEXT,
165
-
compressed BOOLEAN DEFAULT true,
166
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
167
-
);
168
-
169
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
170
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
171
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
172
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
173
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
174
-
175
-
CREATE TABLE IF NOT EXISTS plc_mempool (
176
-
id BIGSERIAL PRIMARY KEY,
177
-
did TEXT NOT NULL,
178
-
operation TEXT NOT NULL,
179
-
cid TEXT NOT NULL UNIQUE,
180
-
created_at TIMESTAMP NOT NULL,
181
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
182
-
);
183
-
184
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
185
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
186
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
187
-
188
-
-- Minimal dids table
189
-
CREATE TABLE IF NOT EXISTS dids (
190
-
did TEXT PRIMARY KEY,
191
-
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
192
-
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
193
-
);
161
+
-- Minimal dids table
162
+
CREATE TABLE IF NOT EXISTS dids (
163
+
did TEXT PRIMARY KEY,
164
+
handle TEXT,
165
+
pds TEXT,
166
+
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
167
+
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
168
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
169
+
);
194
170
195
-
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
196
-
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
171
+
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
172
+
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
173
+
CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle);
174
+
CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds);
197
175
198
176
-- PDS Repositories table
199
177
CREATE TABLE IF NOT EXISTS pds_repos (
···
232
210
233
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
234
212
query := `
235
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ip_resolved_at)
236
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
237
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
238
216
last_checked = EXCLUDED.last_checked,
239
217
status = EXCLUDED.status,
···
241
219
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip
242
220
ELSE endpoints.ip
243
221
END,
222
+
ipv6 = CASE
223
+
WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6
224
+
ELSE endpoints.ipv6
225
+
END,
244
226
ip_resolved_at = CASE
245
-
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip_resolved_at
227
+
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
246
228
ELSE endpoints.ip_resolved_at
247
229
END,
230
+
valid = EXCLUDED.valid,
248
231
updated_at = CURRENT_TIMESTAMP
249
232
RETURNING id
250
233
`
251
234
err := p.db.QueryRowContext(ctx, query,
252
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
253
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
254
237
return err
255
238
}
256
239
···
271
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
272
255
query := `
273
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
274
-
ip, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
275
258
FROM endpoints
276
259
WHERE endpoint = $1 AND endpoint_type = $2
277
260
`
278
261
279
262
var ep Endpoint
280
263
var lastChecked, ipResolvedAt sql.NullTime
281
-
var ip sql.NullString
264
+
var ip, ipv6 sql.NullString
282
265
283
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
284
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
285
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
286
269
)
287
270
if err != nil {
288
271
return nil, err
···
294
277
if ip.Valid {
295
278
ep.IP = ip.String
296
279
}
280
+
if ipv6.Valid {
281
+
ep.IPv6 = ipv6.String
282
+
}
297
283
if ipResolvedAt.Valid {
298
284
ep.IPResolvedAt = ipResolvedAt.Time
299
285
}
···
303
289
304
290
func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
305
291
query := `
306
-
SELECT DISTINCT ON (COALESCE(server_did, id::text))
307
-
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
308
-
ip, ip_resolved_at, updated_at
309
-
FROM endpoints
310
-
WHERE 1=1
292
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
293
+
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
295
+
FROM endpoints
296
+
WHERE 1=1
311
297
`
312
298
args := []interface{}{}
313
299
argIdx := 1
···
317
303
query += fmt.Sprintf(" AND endpoint_type = $%d", argIdx)
318
304
args = append(args, filter.Type)
319
305
argIdx++
306
+
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
320
311
}
321
312
if filter.Status != "" {
322
313
statusInt := EndpointStatusUnknown
···
340
331
}
341
332
}
342
333
343
-
// NEW: Order by server_did and discovered_at to get primary endpoints
344
-
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
345
348
346
349
if filter != nil && filter.Limit > 0 {
347
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
358
361
for rows.Next() {
359
362
var ep Endpoint
360
363
var lastChecked, ipResolvedAt sql.NullTime
361
-
var ip, serverDID sql.NullString
364
+
var ip, ipv6, serverDID sql.NullString
362
365
363
366
err := rows.Scan(
364
367
&ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked,
365
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
368
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
366
369
)
367
370
if err != nil {
368
371
return nil, err
···
377
380
if ip.Valid {
378
381
ep.IP = ip.String
379
382
}
383
+
if ipv6.Valid {
384
+
ep.IPv6 = ipv6.String
385
+
}
380
386
if ipResolvedAt.Valid {
381
387
ep.IPResolvedAt = ipResolvedAt.Time
382
388
}
···
397
403
return err
398
404
}
399
405
400
-
func (p *PostgresDB) UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error {
406
+
func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error {
401
407
query := `
402
408
UPDATE endpoints
403
-
SET ip = $1, ip_resolved_at = $2, updated_at = $3
404
-
WHERE id = $4
409
+
SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4
410
+
WHERE id = $5
405
411
`
406
-
_, err := p.db.ExecContext(ctx, query, ip, resolvedAt, time.Now().UTC(), endpointID)
412
+
_, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID)
407
413
return err
408
414
}
409
415
···
470
476
defer tx.Rollback()
471
477
472
478
query := `
473
-
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at)
474
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
475
481
`
476
-
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt)
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
477
483
if err != nil {
478
484
return err
479
485
}
···
500
506
501
507
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
502
508
query := `
503
-
SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
504
510
FROM endpoint_scans
505
511
WHERE endpoint_id = $1
506
512
ORDER BY scanned_at DESC
···
518
524
var scan EndpointScan
519
525
var responseTime sql.NullFloat64
520
526
var userCount sql.NullInt64
521
-
var version sql.NullString // NEW
527
+
var version, usedIP sql.NullString
522
528
var scanDataJSON []byte
523
529
524
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt)
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
525
531
if err != nil {
526
532
return nil, err
527
533
}
···
534
540
scan.UserCount = userCount.Int64
535
541
}
536
542
537
-
if version.Valid { // NEW
543
+
if version.Valid {
538
544
scan.Version = version.String
539
545
}
540
546
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
541
551
if len(scanDataJSON) > 0 {
542
552
var scanData EndpointScanData
543
553
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
···
563
573
discovered_at,
564
574
last_checked,
565
575
status,
566
-
ip
576
+
ip,
577
+
ipv6,
578
+
valid
567
579
FROM endpoints
568
580
WHERE endpoint_type = 'pds'
569
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
570
582
)
571
583
SELECT
572
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
573
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
574
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
575
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude
587
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
588
+
i.latitude, i.longitude
576
589
FROM unique_servers e
577
590
LEFT JOIN LATERAL (
578
591
SELECT
···
629
642
var items []*PDSListItem
630
643
for rows.Next() {
631
644
item := &PDSListItem{}
632
-
var ip, serverDID, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
633
646
var asn sql.NullInt32
634
-
var isDatacenter, isVPN sql.NullBool
647
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
635
648
var lat, lon sql.NullFloat64
636
649
var userCount sql.NullInt32
637
650
var responseTime sql.NullFloat64
···
639
652
var scannedAt sql.NullTime
640
653
641
654
err := rows.Scan(
642
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
643
656
&userCount, &responseTime, &version, &scannedAt,
644
657
&city, &country, &countryCode, &asn, &asnOrg,
645
-
&isDatacenter, &isVPN, &lat, &lon,
658
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
659
+
&lat, &lon,
646
660
)
647
661
if err != nil {
648
662
return nil, err
···
650
664
651
665
if ip.Valid {
652
666
item.IP = ip.String
667
+
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
653
670
}
654
671
if serverDID.Valid {
655
672
item.ServerDID = serverDID.String
···
681
698
ASNOrg: asnOrg.String,
682
699
IsDatacenter: isDatacenter.Bool,
683
700
IsVPN: isVPN.Bool,
701
+
IsCrawler: isCrawler.Bool,
702
+
IsTor: isTor.Bool,
703
+
IsProxy: isProxy.Bool,
684
704
Latitude: float32(lat.Float64),
685
705
Longitude: float32(lon.Float64),
686
706
}
···
694
714
695
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
696
716
query := `
697
-
WITH target_endpoint AS (
717
+
WITH target_endpoint AS MATERIALIZED (
698
718
SELECT
699
719
e.id,
700
720
e.endpoint,
···
702
722
e.discovered_at,
703
723
e.last_checked,
704
724
e.status,
705
-
e.ip
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
706
728
FROM endpoints e
707
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
708
-
),
709
-
aliases_agg AS (
710
-
SELECT
711
-
te.server_did,
712
-
array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases,
713
-
MIN(e.discovered_at) as first_discovered_at
714
-
FROM target_endpoint te
715
-
LEFT JOIN endpoints e ON te.server_did = e.server_did
716
-
AND e.endpoint_type = 'pds'
717
-
AND te.server_did IS NOT NULL
718
-
GROUP BY te.server_did
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
719
732
)
720
733
SELECT
721
734
te.id,
···
725
738
te.last_checked,
726
739
te.status,
727
740
te.ip,
741
+
te.ipv6,
742
+
te.valid,
728
743
latest.user_count,
729
744
latest.response_time,
730
745
latest.version,
731
746
latest.scan_data->'metadata'->'server_info' as server_info,
732
747
latest.scanned_at,
733
748
i.city, i.country, i.country_code, i.asn, i.asn_org,
734
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude,
749
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
750
+
i.latitude, i.longitude,
735
751
i.raw_data,
736
-
COALESCE(aa.aliases, ARRAY[]::text[]) as aliases,
737
-
aa.first_discovered_at
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
738
773
FROM target_endpoint te
739
-
LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did
740
774
LEFT JOIN LATERAL (
741
-
SELECT scan_data, response_time, version, scanned_at, user_count
742
-
FROM endpoint_scans
743
-
WHERE endpoint_id = te.id
744
-
ORDER BY scanned_at DESC
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
745
784
LIMIT 1
746
785
) latest ON true
747
-
LEFT JOIN ip_infos i ON te.ip = i.ip
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
748
787
`
749
788
750
789
detail := &PDSDetail{}
751
-
var ip, city, country, countryCode, asnOrg, serverDID sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
752
791
var asn sql.NullInt32
753
-
var isDatacenter, isVPN sql.NullBool
792
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
754
793
var lat, lon sql.NullFloat64
755
794
var userCount sql.NullInt32
756
795
var responseTime sql.NullFloat64
···
762
801
var firstDiscoveredAt sql.NullTime
763
802
764
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
765
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
766
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
767
806
&city, &country, &countryCode, &asn, &asnOrg,
768
-
&isDatacenter, &isVPN, &lat, &lon,
807
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
808
+
&lat, &lon,
769
809
&rawDataJSON,
770
810
pq.Array(&aliases),
771
811
&firstDiscoveredAt,
···
777
817
if ip.Valid {
778
818
detail.IP = ip.String
779
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
780
823
781
824
if serverDID.Valid {
782
825
detail.ServerDID = serverDID.String
···
785
828
// Set aliases and is_primary
786
829
detail.Aliases = aliases
787
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
788
-
// Has server_did - check if this is the first discovered
789
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
790
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
791
833
} else {
792
-
// No server_did means unique server
793
834
detail.IsPrimary = true
794
835
}
795
836
···
815
856
}
816
857
}
817
858
818
-
// Parse IP info
859
+
// Parse IP info with all fields
819
860
if city.Valid || country.Valid {
820
861
detail.IPInfo = &IPInfo{
821
862
IP: ip.String,
···
826
867
ASNOrg: asnOrg.String,
827
868
IsDatacenter: isDatacenter.Bool,
828
869
IsVPN: isVPN.Bool,
870
+
IsCrawler: isCrawler.Bool,
871
+
IsTor: isTor.Bool,
872
+
IsProxy: isProxy.Bool,
829
873
Latitude: float32(lat.Float64),
830
874
Longitude: float32(lon.Float64),
831
875
}
···
961
1005
countryCode := extractString(ipInfo, "location", "country_code")
962
1006
asn := extractInt(ipInfo, "asn", "asn")
963
1007
asnOrg := extractString(ipInfo, "asn", "org")
964
-
isDatacenter := extractBool(ipInfo, "company", "type", "hosting")
965
-
isVPN := extractBool(ipInfo, "security", "vpn")
1008
+
1009
+
// Extract top-level boolean flags
1010
+
isDatacenter := false
1011
+
if val, ok := ipInfo["is_datacenter"].(bool); ok {
1012
+
isDatacenter = val
1013
+
}
1014
+
1015
+
isVPN := false
1016
+
if val, ok := ipInfo["is_vpn"].(bool); ok {
1017
+
isVPN = val
1018
+
}
1019
+
1020
+
isCrawler := false
1021
+
if val, ok := ipInfo["is_crawler"].(bool); ok {
1022
+
isCrawler = val
1023
+
}
1024
+
1025
+
isTor := false
1026
+
if val, ok := ipInfo["is_tor"].(bool); ok {
1027
+
isTor = val
1028
+
}
1029
+
1030
+
isProxy := false
1031
+
if val, ok := ipInfo["is_proxy"].(bool); ok {
1032
+
isProxy = val
1033
+
}
1034
+
966
1035
lat := extractFloat(ipInfo, "location", "latitude")
967
1036
lon := extractFloat(ipInfo, "location", "longitude")
968
1037
969
1038
query := `
970
-
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, latitude, longitude, raw_data, fetched_at)
971
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
1039
+
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at)
1040
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
972
1041
ON CONFLICT(ip) DO UPDATE SET
973
1042
city = EXCLUDED.city,
974
1043
country = EXCLUDED.country,
···
977
1046
asn_org = EXCLUDED.asn_org,
978
1047
is_datacenter = EXCLUDED.is_datacenter,
979
1048
is_vpn = EXCLUDED.is_vpn,
1049
+
is_crawler = EXCLUDED.is_crawler,
1050
+
is_tor = EXCLUDED.is_tor,
1051
+
is_proxy = EXCLUDED.is_proxy,
980
1052
latitude = EXCLUDED.latitude,
981
1053
longitude = EXCLUDED.longitude,
982
1054
raw_data = EXCLUDED.raw_data,
983
1055
fetched_at = EXCLUDED.fetched_at,
984
1056
updated_at = CURRENT_TIMESTAMP
985
1057
`
986
-
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, lat, lon, rawDataJSON, time.Now().UTC())
1058
+
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC())
987
1059
return err
988
1060
}
989
1061
990
1062
func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) {
991
1063
query := `
992
-
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn,
1064
+
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy,
993
1065
latitude, longitude, raw_data, fetched_at, updated_at
994
1066
FROM ip_infos
995
1067
WHERE ip = $1
···
1000
1072
1001
1073
err := p.db.QueryRowContext(ctx, query, ip).Scan(
1002
1074
&info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg,
1003
-
&info.IsDatacenter, &info.IsVPN, &info.Latitude, &info.Longitude,
1075
+
&info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy,
1076
+
&info.Latitude, &info.Longitude,
1004
1077
&rawDataJSON, &info.FetchedAt, &info.UpdatedAt,
1005
1078
)
1006
1079
if err != nil {
···
1090
1163
return 0
1091
1164
}
1092
1165
1093
-
func extractBool(data map[string]interface{}, keys ...string) bool {
1094
-
current := data
1095
-
for i, key := range keys {
1096
-
if i == len(keys)-1 {
1097
-
if val, ok := current[key].(bool); ok {
1098
-
return val
1099
-
}
1100
-
// Check if it's a string that matches (for type="hosting")
1101
-
if val, ok := current[key].(string); ok {
1102
-
// For cases like company.type == "hosting"
1103
-
expectedValue := keys[len(keys)-1]
1104
-
return val == expectedValue
1105
-
}
1106
-
return false
1107
-
}
1108
-
if nested, ok := current[key].(map[string]interface{}); ok {
1109
-
current = nested
1110
-
} else {
1111
-
return false
1112
-
}
1113
-
}
1114
-
return false
1115
-
}
1116
-
1117
-
// ===== BUNDLE OPERATIONS =====
1118
-
1119
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1120
-
didsJSON, err := json.Marshal(bundle.DIDs)
1121
-
if err != nil {
1122
-
return err
1123
-
}
1124
-
1125
-
// Calculate cumulative sizes from previous bundle
1126
-
if bundle.BundleNumber > 1 {
1127
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1128
-
if err == nil && prevBundle != nil {
1129
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1130
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1131
-
} else {
1132
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1133
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1134
-
}
1135
-
} else {
1136
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1137
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1138
-
}
1139
-
1140
-
query := `
1141
-
INSERT INTO plc_bundles (
1142
-
bundle_number, start_time, end_time, dids,
1143
-
hash, compressed_hash, compressed_size, uncompressed_size,
1144
-
cumulative_compressed_size, cumulative_uncompressed_size,
1145
-
cursor, prev_bundle_hash, compressed
1146
-
)
1147
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1148
-
ON CONFLICT(bundle_number) DO UPDATE SET
1149
-
start_time = EXCLUDED.start_time,
1150
-
end_time = EXCLUDED.end_time,
1151
-
dids = EXCLUDED.dids,
1152
-
hash = EXCLUDED.hash,
1153
-
compressed_hash = EXCLUDED.compressed_hash,
1154
-
compressed_size = EXCLUDED.compressed_size,
1155
-
uncompressed_size = EXCLUDED.uncompressed_size,
1156
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1157
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1158
-
cursor = EXCLUDED.cursor,
1159
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1160
-
compressed = EXCLUDED.compressed
1161
-
`
1162
-
_, err = p.db.ExecContext(ctx, query,
1163
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1164
-
didsJSON, bundle.Hash, bundle.CompressedHash,
1165
-
bundle.CompressedSize, bundle.UncompressedSize,
1166
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1167
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1168
-
)
1169
-
1170
-
return err
1171
-
}
1172
-
1173
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1174
-
query := `
1175
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1176
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1177
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1178
-
FROM plc_bundles
1179
-
WHERE bundle_number = $1
1180
-
`
1181
-
1182
-
var bundle PLCBundle
1183
-
var didsJSON []byte
1184
-
var prevHash sql.NullString
1185
-
var cursor sql.NullString
1186
-
1187
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1188
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1189
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1190
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1191
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1192
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1193
-
)
1194
-
if err != nil {
1195
-
return nil, err
1196
-
}
1197
-
1198
-
if prevHash.Valid {
1199
-
bundle.PrevBundleHash = prevHash.String
1200
-
}
1201
-
if cursor.Valid {
1202
-
bundle.Cursor = cursor.String
1203
-
}
1204
-
1205
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1206
-
return &bundle, nil
1207
-
}
1208
-
1209
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1210
-
query := `
1211
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1212
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1213
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1214
-
FROM plc_bundles
1215
-
ORDER BY bundle_number DESC
1216
-
LIMIT $1
1217
-
`
1218
-
1219
-
rows, err := p.db.QueryContext(ctx, query, limit)
1220
-
if err != nil {
1221
-
return nil, err
1222
-
}
1223
-
defer rows.Close()
1224
-
1225
-
return p.scanBundles(rows)
1226
-
}
1227
-
1228
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1229
-
query := `
1230
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1231
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1232
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1233
-
FROM plc_bundles
1234
-
WHERE dids ? $1
1235
-
ORDER BY bundle_number ASC
1236
-
`
1237
-
1238
-
rows, err := p.db.QueryContext(ctx, query, did)
1239
-
if err != nil {
1240
-
return nil, err
1241
-
}
1242
-
defer rows.Close()
1243
-
1244
-
return p.scanBundles(rows)
1245
-
}
1246
-
1247
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1248
-
var bundles []*PLCBundle
1249
-
1250
-
for rows.Next() {
1251
-
var bundle PLCBundle
1252
-
var didsJSON []byte
1253
-
var prevHash sql.NullString
1254
-
var cursor sql.NullString
1255
-
1256
-
if err := rows.Scan(
1257
-
&bundle.BundleNumber,
1258
-
&bundle.StartTime,
1259
-
&bundle.EndTime,
1260
-
&didsJSON,
1261
-
&bundle.Hash,
1262
-
&bundle.CompressedHash,
1263
-
&bundle.CompressedSize,
1264
-
&bundle.UncompressedSize,
1265
-
&bundle.CumulativeCompressedSize,
1266
-
&bundle.CumulativeUncompressedSize,
1267
-
&cursor,
1268
-
&prevHash,
1269
-
&bundle.Compressed,
1270
-
&bundle.CreatedAt,
1271
-
); err != nil {
1272
-
return nil, err
1273
-
}
1274
-
1275
-
if prevHash.Valid {
1276
-
bundle.PrevBundleHash = prevHash.String
1277
-
}
1278
-
if cursor.Valid {
1279
-
bundle.Cursor = cursor.String
1280
-
}
1281
-
1282
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1283
-
bundles = append(bundles, &bundle)
1284
-
}
1285
-
1286
-
return bundles, rows.Err()
1287
-
}
1288
-
1289
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1290
-
var count, lastBundleNum int64
1291
-
err := p.db.QueryRowContext(ctx, `
1292
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1293
-
FROM plc_bundles
1294
-
`).Scan(&count, &lastBundleNum)
1295
-
if err != nil {
1296
-
return 0, 0, 0, 0, err
1297
-
}
1298
-
1299
-
if lastBundleNum == 0 {
1300
-
return 0, 0, 0, 0, nil
1301
-
}
1302
-
1303
-
var compressedSize, uncompressedSize int64
1304
-
err = p.db.QueryRowContext(ctx, `
1305
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1306
-
FROM plc_bundles
1307
-
WHERE bundle_number = $1
1308
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1309
-
if err != nil {
1310
-
return 0, 0, 0, 0, err
1311
-
}
1312
-
1313
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1314
-
}
1315
-
1316
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1317
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1318
-
var num int
1319
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1320
-
return num, err
1321
-
}
1322
-
1323
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1324
-
query := `
1325
-
SELECT bundle_number
1326
-
FROM plc_bundles
1327
-
WHERE start_time <= $1 AND end_time >= $1
1328
-
ORDER BY bundle_number ASC
1329
-
LIMIT 1
1330
-
`
1331
-
1332
-
var bundleNum int
1333
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1334
-
if err == sql.ErrNoRows {
1335
-
query = `
1336
-
SELECT bundle_number
1337
-
FROM plc_bundles
1338
-
WHERE end_time < $1
1339
-
ORDER BY bundle_number DESC
1340
-
LIMIT 1
1341
-
`
1342
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1343
-
if err == sql.ErrNoRows {
1344
-
return 1, nil
1345
-
}
1346
-
if err != nil {
1347
-
return 0, err
1348
-
}
1349
-
return bundleNum, nil
1350
-
}
1351
-
if err != nil {
1352
-
return 0, err
1353
-
}
1354
-
1355
-
return bundleNum, nil
1356
-
}
1357
-
1358
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1359
-
query := `
1360
-
WITH daily_stats AS (
1361
-
SELECT
1362
-
DATE(start_time) as date,
1363
-
MAX(bundle_number) as last_bundle,
1364
-
COUNT(*) as bundle_count,
1365
-
SUM(uncompressed_size) as total_uncompressed,
1366
-
SUM(compressed_size) as total_compressed,
1367
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1368
-
MAX(cumulative_compressed_size) as cumulative_compressed
1369
-
FROM plc_bundles
1370
-
WHERE bundle_number >= $1
1371
-
GROUP BY DATE(start_time)
1372
-
)
1373
-
SELECT
1374
-
date::text,
1375
-
last_bundle,
1376
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1377
-
total_uncompressed,
1378
-
total_compressed,
1379
-
cumulative_uncompressed,
1380
-
cumulative_compressed
1381
-
FROM daily_stats
1382
-
ORDER BY date ASC
1383
-
`
1384
-
1385
-
if limit > 0 {
1386
-
query += fmt.Sprintf(" LIMIT %d", limit)
1387
-
}
1388
-
1389
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1390
-
if err != nil {
1391
-
return nil, err
1392
-
}
1393
-
defer rows.Close()
1394
-
1395
-
var history []*PLCHistoryPoint
1396
-
for rows.Next() {
1397
-
var point PLCHistoryPoint
1398
-
var cumulativeOps int64
1399
-
1400
-
err := rows.Scan(
1401
-
&point.Date,
1402
-
&point.BundleNumber,
1403
-
&cumulativeOps,
1404
-
&point.UncompressedSize,
1405
-
&point.CompressedSize,
1406
-
&point.CumulativeUncompressed,
1407
-
&point.CumulativeCompressed,
1408
-
)
1409
-
if err != nil {
1410
-
return nil, err
1411
-
}
1412
-
1413
-
point.OperationCount = int(cumulativeOps)
1414
-
1415
-
history = append(history, &point)
1416
-
}
1417
-
1418
-
return history, rows.Err()
1419
-
}
1420
-
1421
-
// ===== MEMPOOL OPERATIONS =====
1422
-
1423
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1424
-
if len(ops) == 0 {
1425
-
return nil
1426
-
}
1427
-
1428
-
tx, err := p.db.BeginTx(ctx, nil)
1429
-
if err != nil {
1430
-
return err
1431
-
}
1432
-
defer tx.Rollback()
1433
-
1434
-
stmt, err := tx.PrepareContext(ctx, `
1435
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1436
-
VALUES ($1, $2, $3, $4)
1437
-
ON CONFLICT(cid) DO NOTHING
1438
-
`)
1439
-
if err != nil {
1440
-
return err
1441
-
}
1442
-
defer stmt.Close()
1443
-
1444
-
for _, op := range ops {
1445
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1446
-
if err != nil {
1447
-
return err
1448
-
}
1449
-
}
1450
-
1451
-
return tx.Commit()
1452
-
}
1453
-
1454
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1455
-
query := "SELECT COUNT(*) FROM plc_mempool"
1456
-
var count int
1457
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1458
-
return count, err
1459
-
}
1460
-
1461
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1462
-
query := `
1463
-
SELECT id, did, operation, cid, created_at, added_at
1464
-
FROM plc_mempool
1465
-
ORDER BY created_at ASC
1466
-
LIMIT $1
1467
-
`
1468
-
1469
-
rows, err := p.db.QueryContext(ctx, query, limit)
1470
-
if err != nil {
1471
-
return nil, err
1472
-
}
1473
-
defer rows.Close()
1474
-
1475
-
var ops []MempoolOperation
1476
-
for rows.Next() {
1477
-
var op MempoolOperation
1478
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1479
-
if err != nil {
1480
-
return nil, err
1481
-
}
1482
-
ops = append(ops, op)
1483
-
}
1484
-
1485
-
return ops, rows.Err()
1486
-
}
1487
-
1488
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1489
-
if len(ids) == 0 {
1490
-
return nil
1491
-
}
1492
-
1493
-
placeholders := make([]string, len(ids))
1494
-
args := make([]interface{}, len(ids))
1495
-
for i, id := range ids {
1496
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1497
-
args[i] = id
1498
-
}
1499
-
1500
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1501
-
strings.Join(placeholders, ","))
1502
-
1503
-
_, err := p.db.ExecContext(ctx, query, args...)
1504
-
return err
1505
-
}
1506
-
1507
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1508
-
query := `
1509
-
SELECT id, did, operation, cid, created_at, added_at
1510
-
FROM plc_mempool
1511
-
ORDER BY created_at ASC, id ASC
1512
-
LIMIT 1
1513
-
`
1514
-
1515
-
var op MempoolOperation
1516
-
err := p.db.QueryRowContext(ctx, query).Scan(
1517
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1518
-
)
1519
-
if err == sql.ErrNoRows {
1520
-
return nil, nil
1521
-
}
1522
-
if err != nil {
1523
-
return nil, err
1524
-
}
1525
-
1526
-
return &op, nil
1527
-
}
1528
-
1529
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1530
-
query := `
1531
-
SELECT id, did, operation, cid, created_at, added_at
1532
-
FROM plc_mempool
1533
-
ORDER BY created_at DESC, id DESC
1534
-
LIMIT 1
1535
-
`
1536
-
1537
-
var op MempoolOperation
1538
-
err := p.db.QueryRowContext(ctx, query).Scan(
1539
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1540
-
)
1541
-
if err == sql.ErrNoRows {
1542
-
return nil, nil
1543
-
}
1544
-
if err != nil {
1545
-
return nil, err
1546
-
}
1547
-
1548
-
return &op, nil
1549
-
}
1550
-
1551
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1552
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1553
-
var count int
1554
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1555
-
return count, err
1556
-
}
1557
-
1558
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1559
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1560
-
var size int64
1561
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1562
-
return size, err
1563
-
}
1564
-
1565
1166
// ===== CURSOR OPERATIONS =====
1566
1167
1567
1168
func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) {
···
1634
1235
1635
1236
// ===== DID OPERATIONS =====
1636
1237
1637
-
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int) error {
1238
+
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error {
1638
1239
query := `
1639
-
INSERT INTO dids (did, bundle_numbers, created_at)
1640
-
VALUES ($1, jsonb_build_array($2), CURRENT_TIMESTAMP)
1240
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1241
+
VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP)
1641
1242
ON CONFLICT(did) DO UPDATE SET
1243
+
handle = EXCLUDED.handle,
1244
+
pds = EXCLUDED.pds,
1642
1245
bundle_numbers = CASE
1643
-
WHEN dids.bundle_numbers ? $2::text THEN dids.bundle_numbers
1644
-
ELSE dids.bundle_numbers || jsonb_build_array($2)
1645
-
END
1246
+
WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers
1247
+
ELSE dids.bundle_numbers || jsonb_build_array($4::integer)
1248
+
END,
1249
+
updated_at = CURRENT_TIMESTAMP
1250
+
`
1251
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum)
1252
+
return err
1253
+
}
1254
+
1255
+
// UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers
1256
+
func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error {
1257
+
query := `
1258
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1259
+
VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP)
1260
+
ON CONFLICT(did) DO UPDATE SET
1261
+
handle = EXCLUDED.handle,
1262
+
pds = EXCLUDED.pds,
1263
+
updated_at = CURRENT_TIMESTAMP
1646
1264
`
1647
-
_, err := p.db.ExecContext(ctx, query, did, bundleNum)
1265
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds)
1648
1266
return err
1649
1267
}
1650
1268
1651
1269
func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) {
1652
1270
query := `
1653
-
SELECT did, bundle_numbers, created_at
1271
+
SELECT did, handle, pds, bundle_numbers, created_at
1654
1272
FROM dids
1655
1273
WHERE did = $1
1656
1274
`
1657
1275
1658
1276
var record DIDRecord
1659
1277
var bundleNumbersJSON []byte
1278
+
var handle, pds sql.NullString
1660
1279
1661
1280
err := p.db.QueryRowContext(ctx, query, did).Scan(
1662
1281
&record.DID,
1282
+
&handle,
1283
+
&pds,
1663
1284
&bundleNumbersJSON,
1664
1285
&record.CreatedAt,
1665
1286
)
···
1667
1288
return nil, err
1668
1289
}
1669
1290
1291
+
if handle.Valid {
1292
+
record.Handle = handle.String
1293
+
}
1294
+
if pds.Valid {
1295
+
record.CurrentPDS = pds.String
1296
+
}
1297
+
1670
1298
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1671
1299
return nil, err
1672
1300
}
1673
1301
1674
1302
return &record, nil
1303
+
}
1304
+
1305
+
func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) {
1306
+
query := `
1307
+
SELECT did, handle, pds, bundle_numbers, created_at
1308
+
FROM dids
1309
+
WHERE handle = $1
1310
+
`
1311
+
1312
+
var record DIDRecord
1313
+
var bundleNumbersJSON []byte
1314
+
var recordHandle, pds sql.NullString
1315
+
1316
+
err := p.db.QueryRowContext(ctx, query, handle).Scan(
1317
+
&record.DID,
1318
+
&recordHandle,
1319
+
&pds,
1320
+
&bundleNumbersJSON,
1321
+
&record.CreatedAt,
1322
+
)
1323
+
if err != nil {
1324
+
return nil, err
1325
+
}
1326
+
1327
+
if recordHandle.Valid {
1328
+
record.Handle = recordHandle.String
1329
+
}
1330
+
if pds.Valid {
1331
+
record.CurrentPDS = pds.String
1332
+
}
1333
+
1334
+
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1335
+
return nil, err
1336
+
}
1337
+
1338
+
return &record, nil
1339
+
}
1340
+
1341
+
// GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos'
1342
+
func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) {
1343
+
query := `
1344
+
WITH primary_endpoints AS (
1345
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
1346
+
id
1347
+
FROM endpoints
1348
+
WHERE endpoint_type = 'pds'
1349
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
1350
+
)
1351
+
SELECT
1352
+
d.did,
1353
+
d.handle,
1354
+
d.pds,
1355
+
d.bundle_numbers,
1356
+
d.created_at,
1357
+
COALESCE(
1358
+
jsonb_agg(
1359
+
jsonb_build_object(
1360
+
'id', pr.id,
1361
+
'endpoint_id', pr.endpoint_id,
1362
+
'endpoint', e.endpoint,
1363
+
'did', pr.did,
1364
+
'head', pr.head,
1365
+
'rev', pr.rev,
1366
+
'active', pr.active,
1367
+
'status', pr.status,
1368
+
'first_seen', pr.first_seen AT TIME ZONE 'UTC',
1369
+
'last_seen', pr.last_seen AT TIME ZONE 'UTC',
1370
+
'updated_at', pr.updated_at AT TIME ZONE 'UTC'
1371
+
)
1372
+
ORDER BY pr.last_seen DESC
1373
+
) FILTER (
1374
+
WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL
1375
+
),
1376
+
'[]'::jsonb
1377
+
) AS hosting_on
1378
+
FROM
1379
+
dids d
1380
+
LEFT JOIN
1381
+
pds_repos pr ON d.did = pr.did
1382
+
LEFT JOIN
1383
+
endpoints e ON pr.endpoint_id = e.id
1384
+
LEFT JOIN
1385
+
primary_endpoints pe ON pr.endpoint_id = pe.id
1386
+
WHERE
1387
+
d.did = $1
1388
+
GROUP BY
1389
+
d.did, d.handle, d.pds, d.bundle_numbers, d.created_at
1390
+
`
1391
+
1392
+
var info GlobalDIDInfo
1393
+
var bundleNumbersJSON []byte
1394
+
var hostingOnJSON []byte
1395
+
var handle, pds sql.NullString
1396
+
1397
+
err := p.db.QueryRowContext(ctx, query, did).Scan(
1398
+
&info.DID,
1399
+
&handle,
1400
+
&pds,
1401
+
&bundleNumbersJSON,
1402
+
&info.CreatedAt,
1403
+
&hostingOnJSON,
1404
+
)
1405
+
if err != nil {
1406
+
return nil, err
1407
+
}
1408
+
1409
+
if handle.Valid {
1410
+
info.Handle = handle.String
1411
+
}
1412
+
if pds.Valid {
1413
+
info.CurrentPDS = pds.String
1414
+
}
1415
+
1416
+
if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil {
1417
+
return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err)
1418
+
}
1419
+
1420
+
if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil {
1421
+
return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err)
1422
+
}
1423
+
1424
+
return &info, nil
1675
1425
}
1676
1426
1677
1427
func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error {
···
2254
2004
"recent_changes": recentChanges,
2255
2005
}, nil
2256
2006
}
2007
+
2008
+
// GetTableSizes fetches size information (in bytes) for all tables in the specified schema.
2009
+
func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) {
2010
+
// Query now selects raw byte values directly
2011
+
query := `
2012
+
SELECT
2013
+
c.relname AS table_name,
2014
+
pg_total_relation_size(c.oid) AS total_bytes,
2015
+
pg_relation_size(c.oid) AS table_heap_bytes,
2016
+
pg_indexes_size(c.oid) AS indexes_bytes
2017
+
FROM
2018
+
pg_class c
2019
+
LEFT JOIN
2020
+
pg_namespace n ON n.oid = c.relnamespace
2021
+
WHERE
2022
+
c.relkind = 'r' -- 'r' = ordinary table
2023
+
AND n.nspname = $1
2024
+
ORDER BY
2025
+
total_bytes DESC;
2026
+
`
2027
+
rows, err := p.db.QueryContext(ctx, query, schema)
2028
+
if err != nil {
2029
+
return nil, fmt.Errorf("failed to query table sizes: %w", err)
2030
+
}
2031
+
defer rows.Close()
2032
+
2033
+
var results []TableSizeInfo
2034
+
for rows.Next() {
2035
+
var info TableSizeInfo
2036
+
// Scan directly into int64 fields
2037
+
if err := rows.Scan(
2038
+
&info.TableName,
2039
+
&info.TotalBytes,
2040
+
&info.TableHeapBytes,
2041
+
&info.IndexesBytes,
2042
+
); err != nil {
2043
+
return nil, fmt.Errorf("failed to scan table size row: %w", err)
2044
+
}
2045
+
results = append(results, info)
2046
+
}
2047
+
if err := rows.Err(); err != nil {
2048
+
return nil, fmt.Errorf("error iterating table size rows: %w", err)
2049
+
}
2050
+
2051
+
return results, nil
2052
+
}
2053
+
2054
+
// GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema.
2055
+
func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) {
2056
+
// Query now selects raw byte values directly
2057
+
query := `
2058
+
SELECT
2059
+
c.relname AS index_name,
2060
+
COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name,
2061
+
pg_relation_size(c.oid) AS index_bytes
2062
+
FROM
2063
+
pg_class c
2064
+
LEFT JOIN
2065
+
pg_index i ON i.indexrelid = c.oid
2066
+
LEFT JOIN
2067
+
pg_namespace n ON n.oid = c.relnamespace
2068
+
WHERE
2069
+
c.relkind = 'i' -- 'i' = index
2070
+
AND n.nspname = $1
2071
+
ORDER BY
2072
+
index_bytes DESC;
2073
+
`
2074
+
rows, err := p.db.QueryContext(ctx, query, schema)
2075
+
if err != nil {
2076
+
return nil, fmt.Errorf("failed to query index sizes: %w", err)
2077
+
}
2078
+
defer rows.Close()
2079
+
2080
+
var results []IndexSizeInfo
2081
+
for rows.Next() {
2082
+
var info IndexSizeInfo
2083
+
var tableName sql.NullString
2084
+
// Scan directly into int64 field
2085
+
if err := rows.Scan(
2086
+
&info.IndexName,
2087
+
&tableName,
2088
+
&info.IndexBytes,
2089
+
); err != nil {
2090
+
return nil, fmt.Errorf("failed to scan index size row: %w", err)
2091
+
}
2092
+
if tableName.Valid {
2093
+
info.TableName = tableName.String
2094
+
} else {
2095
+
info.TableName = "N/A"
2096
+
}
2097
+
results = append(results, info)
2098
+
}
2099
+
if err := rows.Err(); err != nil {
2100
+
return nil, fmt.Errorf("error iterating index size rows: %w", err)
2101
+
}
2102
+
2103
+
return results, nil
2104
+
}
+60
-16
internal/storage/types.go
+60
-16
internal/storage/types.go
···
1
1
package storage
2
2
3
3
import (
4
+
"database/sql"
4
5
"fmt"
5
6
"path/filepath"
6
7
"time"
···
25
26
LastChecked time.Time
26
27
Status int
27
28
IP string
29
+
IPv6 string
28
30
IPResolvedAt time.Time
31
+
Valid bool
29
32
UpdatedAt time.Time
30
33
}
31
34
···
52
55
Status int
53
56
ResponseTime float64
54
57
UserCount int64
55
-
Version string // NEW: Add this field
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
56
60
ScanData *EndpointScanData
57
61
ScannedAt time.Time
58
62
}
···
73
77
74
78
// EndpointFilter for querying endpoints
75
79
type EndpointFilter struct {
76
-
Type string // "pds", "labeler", etc.
80
+
Type string
77
81
Status string
78
82
MinUserCount int64
79
-
OnlyStale bool // NEW: Only return endpoints that need re-checking
80
-
RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
81
87
Limit int
82
88
Offset int
83
89
}
···
116
122
StartTime time.Time
117
123
EndTime time.Time
118
124
BoundaryCIDs []string
119
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
120
126
Hash string
121
127
CompressedHash string
122
128
CompressedSize int64
···
149
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
150
156
}
151
157
152
-
// MempoolOperation represents an operation waiting to be bundled
153
-
type MempoolOperation struct {
154
-
ID int64
155
-
DID string
156
-
Operation string
157
-
CID string
158
-
CreatedAt time.Time
159
-
AddedAt time.Time
160
-
}
161
-
162
158
// ScanCursor stores scanning progress
163
159
type ScanCursor struct {
164
160
Source string
···
170
166
// DIDRecord represents a DID entry in the database
171
167
type DIDRecord struct {
172
168
DID string `json:"did"`
169
+
Handle string `json:"handle,omitempty"`
170
+
CurrentPDS string `json:"current_pds,omitempty"`
171
+
LastOpAt time.Time `json:"last_op_at,omitempty"`
173
172
BundleNumbers []int `json:"bundle_numbers"`
174
173
CreatedAt time.Time `json:"created_at"`
175
174
}
176
175
176
+
// GlobalDIDInfo consolidates DID data from PLC and PDS tables
177
+
type GlobalDIDInfo struct {
178
+
DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc.
179
+
HostingOn []*PDSRepo `json:"hosting_on"`
180
+
}
181
+
177
182
// IPInfo represents IP information (stored with IP as primary key)
178
183
type IPInfo struct {
179
184
IP string `json:"ip"`
···
184
189
ASNOrg string `json:"asn_org,omitempty"`
185
190
IsDatacenter bool `json:"is_datacenter"`
186
191
IsVPN bool `json:"is_vpn"`
192
+
IsCrawler bool `json:"is_crawler"`
193
+
IsTor bool `json:"is_tor"`
194
+
IsProxy bool `json:"is_proxy"`
187
195
Latitude float32 `json:"latitude,omitempty"`
188
196
Longitude float32 `json:"longitude,omitempty"`
189
197
RawData map[string]interface{} `json:"raw_data,omitempty"`
···
191
199
UpdatedAt time.Time `json:"updated_at"`
192
200
}
193
201
202
+
// IsHome returns true if this is a residential/home IP
203
+
// (not crawler, datacenter, tor, proxy, or vpn)
204
+
func (i *IPInfo) IsHome() bool {
205
+
return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN
206
+
}
207
+
194
208
// PDSListItem is a virtual type created by JOIN for /pds endpoint
195
209
type PDSListItem struct {
196
210
// From endpoints table
197
211
ID int64
198
212
Endpoint string
199
-
ServerDID string // NEW: Add this
213
+
ServerDID string
200
214
DiscoveredAt time.Time
201
215
LastChecked time.Time
202
216
Status int
203
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
204
220
205
221
// From latest endpoint_scans (via JOIN)
206
222
LatestScan *struct {
···
256
272
type PDSRepo struct {
257
273
ID int64 `json:"id"`
258
274
EndpointID int64 `json:"endpoint_id"`
275
+
Endpoint string `json:"endpoint,omitempty"`
259
276
DID string `json:"did"`
260
277
Head string `json:"head,omitempty"`
261
278
Rev string `json:"rev,omitempty"`
···
273
290
Active bool
274
291
Status string
275
292
}
293
+
294
+
type DIDBackfillInfo struct {
295
+
DID string
296
+
LastBundleNum int
297
+
}
298
+
299
+
type DIDStateUpdateData struct {
300
+
DID string
301
+
Handle sql.NullString // Use sql.NullString for potential NULLs
302
+
PDS sql.NullString
303
+
OpTime time.Time
304
+
}
305
+
306
+
// TableSizeInfo holds size information for a database table.
307
+
type TableSizeInfo struct {
308
+
TableName string `json:"table_name"`
309
+
TotalBytes int64 `json:"total_bytes"` // Raw bytes
310
+
TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes
311
+
IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes
312
+
}
313
+
314
+
// IndexSizeInfo holds size information for a database index.
315
+
type IndexSizeInfo struct {
316
+
IndexName string `json:"index_name"`
317
+
TableName string `json:"table_name"`
318
+
IndexBytes int64 `json:"index_bytes"` // Raw bytes
319
+
}
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+125
utils/db-sizes.sh
+125
utils/db-sizes.sh
···
1
+
#!/bin/bash
2
+
3
+
# === Configuration ===
4
+
CONFIG_FILE="config.yaml" # Path to your config file
5
+
SCHEMA_NAME="public" # Replace if your schema is different
6
+
7
+
# Check if config file exists
8
+
if [ ! -f "$CONFIG_FILE" ]; then
9
+
echo "Error: Config file not found at '$CONFIG_FILE'"
10
+
exit 1
11
+
fi
12
+
13
+
# Check if yq is installed
14
+
if ! command -v yq &> /dev/null; then
15
+
echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)."
16
+
echo "See: https://github.com/mikefarah/yq/"
17
+
exit 1
18
+
fi
19
+
20
+
echo "--- Reading connection info from '$CONFIG_FILE' ---"
21
+
22
+
# === Extract Database Config using yq ===
23
+
DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE")
24
+
DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI
25
+
26
+
if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then
27
+
echo "Error: Could not read database type or path from '$CONFIG_FILE'."
28
+
exit 1
29
+
fi
30
+
31
+
# === Parse the Connection String ===
32
+
DB_USER=""
33
+
DB_PASSWORD=""
34
+
DB_HOST="localhost" # Default
35
+
DB_PORT="5432" # Default
36
+
DB_NAME=""
37
+
38
+
# Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters)
39
+
if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then
40
+
DB_USER="${BASH_REMATCH[2]}"
41
+
DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group
42
+
DB_HOST="${BASH_REMATCH[5]}"
43
+
DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default
44
+
DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?'
45
+
else
46
+
echo "Error: Could not parse database connection string URI: $DB_CONN_STRING"
47
+
exit 1
48
+
fi
49
+
50
+
# Set PGPASSWORD environment variable if password was found
51
+
if [ -n "$DB_PASSWORD" ]; then
52
+
export PGPASSWORD="$DB_PASSWORD"
53
+
else
54
+
echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt."
55
+
unset PGPASSWORD
56
+
fi
57
+
58
+
echo "--- Database Size Investigation ---"
59
+
echo "Database: $DB_NAME"
60
+
echo "Schema: $SCHEMA_NAME"
61
+
echo "User: $DB_USER"
62
+
echo "Host: $DB_HOST:$DB_PORT"
63
+
echo "-----------------------------------"
64
+
65
+
# === Table Sizes ===
66
+
echo ""
67
+
echo "## Table Sizes (Schema: $SCHEMA_NAME) ##"
68
+
# Removed --tuples-only and --no-align, added -P footer=off
69
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
70
+
SELECT
71
+
c.relname AS "Table Name",
72
+
pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size",
73
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size",
74
+
pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size"
75
+
FROM
76
+
pg_class c
77
+
LEFT JOIN
78
+
pg_namespace n ON n.oid = c.relnamespace
79
+
WHERE
80
+
c.relkind = 'r' -- 'r' = ordinary table
81
+
AND n.nspname = '$SCHEMA_NAME'
82
+
ORDER BY
83
+
pg_total_relation_size(c.oid) DESC;
84
+
EOF
85
+
86
+
if [ $? -ne 0 ]; then
87
+
echo "Error querying table sizes. Check connection details, permissions, and password."
88
+
unset PGPASSWORD
89
+
exit 1
90
+
fi
91
+
92
+
# === Index Sizes ===
93
+
echo ""
94
+
echo "## Index Sizes (Schema: $SCHEMA_NAME) ##"
95
+
# Removed --tuples-only and --no-align, added -P footer=off
96
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
97
+
SELECT
98
+
c.relname AS "Index Name",
99
+
i.indrelid::regclass AS "Table Name", -- Show associated table
100
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size"
101
+
FROM
102
+
pg_class c
103
+
LEFT JOIN
104
+
pg_index i ON i.indexrelid = c.oid
105
+
LEFT JOIN
106
+
pg_namespace n ON n.oid = c.relnamespace
107
+
WHERE
108
+
c.relkind = 'i' -- 'i' = index
109
+
AND n.nspname = '$SCHEMA_NAME'
110
+
ORDER BY
111
+
pg_relation_size(c.oid) DESC;
112
+
EOF
113
+
114
+
if [ $? -ne 0 ]; then
115
+
echo "Error querying index sizes. Check connection details, permissions, and password."
116
+
unset PGPASSWORD
117
+
exit 1
118
+
fi
119
+
120
+
echo ""
121
+
echo "-----------------------------------"
122
+
echo "Investigation complete."
123
+
124
+
# Unset the password variable for security
125
+
unset PGPASSWORD
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"
+2
-2
utils/migrate-ipinfo.sh
+2
-2
utils/migrate-ipinfo.sh
+199
utils/vuln-scanner-parallel.sh
+199
utils/vuln-scanner-parallel.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
PARALLEL_JOBS=20
7
+
OUTPUT_DIR="./pds_scan_results"
8
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
9
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
10
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
11
+
12
+
# Paths to check
13
+
PATHS=(
14
+
"/info.php"
15
+
"/phpinfo.php"
16
+
"/test.php"
17
+
"/admin"
18
+
"/admin.php"
19
+
"/wp-admin"
20
+
"/robots.txt"
21
+
"/.env"
22
+
"/.git/config"
23
+
"/config.php"
24
+
"/backup"
25
+
"/db.sql"
26
+
"/.DS_Store"
27
+
"/server-status"
28
+
"/.well-known/security.txt"
29
+
)
30
+
31
+
# Colors
32
+
RED='\033[0;31m'
33
+
GREEN='\033[0;32m'
34
+
YELLOW='\033[1;33m'
35
+
BLUE='\033[0;34m'
36
+
NC='\033[0m'
37
+
38
+
# Check dependencies
39
+
if ! command -v jq &> /dev/null; then
40
+
echo -e "${RED}Error: jq is required${NC}"
41
+
echo "Install: sudo apt-get install jq"
42
+
exit 1
43
+
fi
44
+
45
+
if ! command -v parallel &> /dev/null; then
46
+
echo -e "${RED}Error: GNU parallel is required${NC}"
47
+
echo "Install: sudo apt-get install parallel (or brew install parallel)"
48
+
exit 1
49
+
fi
50
+
51
+
mkdir -p "$OUTPUT_DIR"
52
+
53
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
54
+
echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}"
55
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
56
+
echo ""
57
+
echo "API Host: $API_HOST"
58
+
echo "Timeout: ${TIMEOUT}s per request"
59
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
60
+
echo "Paths to check: ${#PATHS[@]}"
61
+
echo ""
62
+
63
+
# Scan function - will be called by GNU parallel
64
+
scan_endpoint() {
65
+
local endpoint="$1"
66
+
local timeout="$2"
67
+
shift 2
68
+
local paths=("$@")
69
+
70
+
for path in "${paths[@]}"; do
71
+
url="${endpoint}${path}"
72
+
73
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
74
+
--max-time "$timeout" \
75
+
--connect-timeout "$timeout" \
76
+
--retry 0 \
77
+
-A "Mozilla/5.0 (Security Scanner)" \
78
+
"$url" 2>/dev/null)
79
+
80
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
81
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
82
+
echo "FOUND|$endpoint|$path|$response"
83
+
elif [ "$response" != "403" ] && [ "$response" != "401" ]; then
84
+
echo "MAYBE|$endpoint|$path|$response"
85
+
fi
86
+
fi
87
+
done
88
+
}
89
+
90
+
export -f scan_endpoint
91
+
92
+
# Fetch active PDS endpoints
93
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
94
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
95
+
jq -r '.[].endpoint' 2>/dev/null)
96
+
97
+
if [ -z "$ENDPOINTS" ]; then
98
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
99
+
echo "Check that the API is running at: $API_HOST"
100
+
exit 1
101
+
fi
102
+
103
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ')
104
+
echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
105
+
echo ""
106
+
107
+
# Write header to results file
108
+
{
109
+
echo "PDS Security Scan Results"
110
+
echo "========================="
111
+
echo "Scan started: $(date)"
112
+
echo "Endpoints scanned: ${ENDPOINT_COUNT}"
113
+
echo "Paths checked: ${#PATHS[@]}"
114
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
115
+
echo ""
116
+
echo "Results:"
117
+
echo "--------"
118
+
} > "$RESULTS_FILE"
119
+
120
+
# Run parallel scan
121
+
echo -e "${YELLOW}Starting parallel scan...${NC}"
122
+
echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}"
123
+
echo ""
124
+
125
+
echo "$ENDPOINTS" | \
126
+
parallel \
127
+
-j "$PARALLEL_JOBS" \
128
+
--bar \
129
+
--joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \
130
+
scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \
131
+
>> "$RESULTS_FILE"
132
+
133
+
echo ""
134
+
echo -e "${YELLOW}Processing results...${NC}"
135
+
136
+
# Count results
137
+
FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0)
138
+
MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0)
139
+
140
+
# Extract found URLs to separate file
141
+
{
142
+
echo "Found URLs (HTTP 200/301/302)"
143
+
echo "=============================="
144
+
echo "Scan: $(date)"
145
+
echo ""
146
+
} > "$FOUND_FILE"
147
+
148
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do
149
+
echo "$endpoint$path [$code]"
150
+
done >> "$FOUND_FILE"
151
+
152
+
# Create summary at end of results file
153
+
{
154
+
echo ""
155
+
echo "Summary"
156
+
echo "======="
157
+
echo "Scan completed: $(date)"
158
+
echo "Total endpoints scanned: ${ENDPOINT_COUNT}"
159
+
echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))"
160
+
echo "Found (200/301/302): ${FOUND_COUNT}"
161
+
echo "Maybe (other codes): ${MAYBE_COUNT}"
162
+
} >> "$RESULTS_FILE"
163
+
164
+
# Display summary
165
+
echo ""
166
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
167
+
echo -e "${BLUE}║ Scan Complete! ║${NC}"
168
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
169
+
echo ""
170
+
echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}"
171
+
echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}"
172
+
echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}"
173
+
echo ""
174
+
echo -e "Results:"
175
+
echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}"
176
+
echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}"
177
+
echo ""
178
+
echo "Files created:"
179
+
echo " Full results: $RESULTS_FILE"
180
+
echo " Found URLs: $FOUND_FILE"
181
+
echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt"
182
+
183
+
# Show sample of found URLs if any
184
+
if [ "$FOUND_COUNT" -gt 0 ]; then
185
+
echo ""
186
+
echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}"
187
+
echo ""
188
+
echo "Sample findings (first 10):"
189
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do
190
+
echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]"
191
+
done
192
+
193
+
if [ "$FOUND_COUNT" -gt 10 ]; then
194
+
echo ""
195
+
echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)"
196
+
fi
197
+
fi
198
+
199
+
echo ""
+117
utils/vuln-scanner.sh
+117
utils/vuln-scanner.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
OUTPUT_DIR="./pds_scan_results"
7
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
8
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
9
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
10
+
11
+
# Paths to check (one per line for easier editing)
12
+
PATHS=(
13
+
"/info.php"
14
+
"/phpinfo.php"
15
+
"/test.php"
16
+
"/admin"
17
+
"/admin.php"
18
+
"/wp-admin"
19
+
"/robots.txt"
20
+
"/.env"
21
+
"/.git/config"
22
+
"/config.php"
23
+
"/backup"
24
+
"/db.sql"
25
+
"/.DS_Store"
26
+
"/server-status"
27
+
"/.well-known/security.txt"
28
+
)
29
+
30
+
# Colors
31
+
RED='\033[0;31m'
32
+
GREEN='\033[0;32m'
33
+
YELLOW='\033[1;33m'
34
+
BLUE='\033[0;34m'
35
+
NC='\033[0m'
36
+
37
+
mkdir -p "$OUTPUT_DIR"
38
+
39
+
echo -e "${BLUE}=== PDS Security Scanner ===${NC}"
40
+
echo "API Host: $API_HOST"
41
+
echo "Timeout: ${TIMEOUT}s"
42
+
echo "Scanning for ${#PATHS[@]} paths"
43
+
echo "Results: $RESULTS_FILE"
44
+
echo ""
45
+
46
+
# Fetch active PDS endpoints
47
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
48
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
49
+
jq -r '.[].endpoint' 2>/dev/null)
50
+
51
+
if [ -z "$ENDPOINTS" ]; then
52
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
53
+
exit 1
54
+
fi
55
+
56
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l)
57
+
echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
58
+
echo ""
59
+
60
+
# Write header
61
+
echo "PDS Security Scan - $(date)" > "$RESULTS_FILE"
62
+
echo "========================================" >> "$RESULTS_FILE"
63
+
echo "" >> "$RESULTS_FILE"
64
+
65
+
# Counters
66
+
CURRENT=0
67
+
TOTAL_FOUND=0
68
+
TOTAL_MAYBE=0
69
+
70
+
# Scan each endpoint sequentially
71
+
while IFS= read -r endpoint; do
72
+
CURRENT=$((CURRENT + 1))
73
+
74
+
echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint"
75
+
76
+
# Scan each path
77
+
for path in "${PATHS[@]}"; do
78
+
url="${endpoint}${path}"
79
+
80
+
# Make request with timeout
81
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
82
+
--max-time "$TIMEOUT" \
83
+
--connect-timeout "$TIMEOUT" \
84
+
-L \
85
+
-A "Mozilla/5.0 (Security Scanner)" \
86
+
"$url" 2>/dev/null)
87
+
88
+
# Check response
89
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
90
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
91
+
echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}"
92
+
echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE"
93
+
echo "$endpoint$path" >> "$FOUND_FILE"
94
+
TOTAL_FOUND=$((TOTAL_FOUND + 1))
95
+
elif [ "$response" != "403" ]; then
96
+
echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}"
97
+
echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE"
98
+
TOTAL_MAYBE=$((TOTAL_MAYBE + 1))
99
+
fi
100
+
fi
101
+
done
102
+
103
+
echo "" >> "$RESULTS_FILE"
104
+
105
+
done <<< "$ENDPOINTS"
106
+
107
+
# Summary
108
+
echo ""
109
+
echo -e "${BLUE}========================================${NC}"
110
+
echo -e "${GREEN}Scan Complete!${NC}"
111
+
echo "Scanned: ${ENDPOINT_COUNT} endpoints"
112
+
echo "Paths checked per endpoint: ${#PATHS[@]}"
113
+
echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}"
114
+
echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}"
115
+
echo ""
116
+
echo "Full results: $RESULTS_FILE"
117
+
[ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"