+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+22
config.sample.yaml
+22
config.sample.yaml
···
1
+
database:
2
+
type: "postgres" # or "sqlite"
3
+
path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable"
4
+
# For SQLite: path: "atscan.db"
5
+
6
+
plc:
7
+
directory_url: "https://plc.directory"
8
+
scan_interval: "5s"
9
+
bundle_dir: "./plc_bundles"
10
+
use_cache: true
11
+
index_dids: true
12
+
13
+
pds:
14
+
scan_interval: "30m"
15
+
timeout: "30s"
16
+
workers: 20
17
+
recheck_interval: "1.5h"
18
+
scan_retention: 20
19
+
20
+
api:
21
+
host: "0.0.0.0"
22
+
port: 8080
-22
config.yaml
-22
config.yaml
···
1
-
database:
2
-
type: "postgres" # or "sqlite"
3
-
path: "postgres://atscanner:Noor1kooz5eeFai9leZagh5ua5eihai4@localhost:5432/atscanner?sslmode=disable"
4
-
# For SQLite: path: "atscan.db"
5
-
6
-
plc:
7
-
directory_url: "https://plc.directory"
8
-
scan_interval: "5s"
9
-
bundle_dir: "./plc_bundles"
10
-
use_cache: true
11
-
index_dids: true
12
-
13
-
pds:
14
-
scan_interval: "30m"
15
-
timeout: "30s"
16
-
workers: 20
17
-
recheck_interval: "1.5h"
18
-
scan_retention: 3
19
-
20
-
api:
21
-
host: "0.0.0.0"
22
-
port: 8080
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+504
-456
internal/api/handlers.go
+504
-456
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
47
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
48
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
49
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
50
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
51
49
}
52
50
53
51
// ===== REQUEST HELPERS =====
···
77
75
78
76
// ===== FORMATTING HELPERS =====
79
77
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": len(bundle.DIDs),
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
-
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
100
80
"id": ep.ID,
···
103
83
"discovered_at": ep.DiscoveredAt,
104
84
"last_checked": ep.LastChecked,
105
85
"status": statusToString(ep.Status),
106
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
107
86
}
108
87
109
-
// Add IP if available
88
+
// Add IPs if available
110
89
if ep.IP != "" {
111
90
response["ip"] = ep.IP
112
91
}
113
-
114
-
// REMOVED: IP info extraction - no longer in Endpoint struct
115
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
116
95
117
96
return response
118
97
}
···
165
144
resp.json(stats)
166
145
}
167
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
168
183
// ===== PDS HANDLERS =====
169
184
170
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
233
248
"endpoint": pds.Endpoint,
234
249
"discovered_at": pds.DiscoveredAt,
235
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
236
252
}
237
253
238
254
// Add server_did if available
···
257
273
}
258
274
}
259
275
260
-
// Add IP if available
276
+
// Add IPs if available
261
277
if pds.IP != "" {
262
278
response["ip"] = pds.IP
279
+
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
263
282
}
264
283
265
284
// Add IP info (from ip_infos table via JOIN)
···
276
295
if pds.IPInfo.ASN > 0 {
277
296
response["asn"] = pds.IPInfo.ASN
278
297
}
279
-
if pds.IPInfo.IsDatacenter {
280
-
response["is_datacenter"] = pds.IPInfo.IsDatacenter
281
-
}
298
+
299
+
// Add all network type flags
300
+
response["is_datacenter"] = pds.IPInfo.IsDatacenter
301
+
response["is_vpn"] = pds.IPInfo.IsVPN
302
+
response["is_crawler"] = pds.IPInfo.IsCrawler
303
+
response["is_tor"] = pds.IPInfo.IsTor
304
+
response["is_proxy"] = pds.IPInfo.IsProxy
305
+
306
+
// Add computed is_home field
307
+
response["is_home"] = pds.IPInfo.IsHome()
282
308
}
283
309
284
310
return response
···
316
342
}
317
343
}
318
344
319
-
// Add full IP info
345
+
// Add full IP info with computed is_home field
320
346
if pds.IPInfo != nil {
321
-
response["ip_info"] = pds.IPInfo
347
+
// Convert IPInfo to map
348
+
ipInfoMap := make(map[string]interface{})
349
+
ipInfoJSON, _ := json.Marshal(pds.IPInfo)
350
+
json.Unmarshal(ipInfoJSON, &ipInfoMap)
351
+
352
+
// Add computed is_home field
353
+
ipInfoMap["is_home"] = pds.IPInfo.IsHome()
354
+
355
+
response["ip_info"] = ipInfoMap
322
356
}
323
357
324
358
return response
···
333
367
"scanned_at": scan.ScannedAt,
334
368
}
335
369
370
+
if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil {
371
+
if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" {
372
+
scanMap["error"] = errorMsg
373
+
}
374
+
}
375
+
336
376
if scan.ResponseTime > 0 {
337
377
scanMap["response_time"] = scan.ResponseTime
338
378
}
339
379
340
-
// NEW: Add version if available
341
380
if scan.Version != "" {
342
381
scanMap["version"] = scan.Version
343
382
}
344
383
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
386
+
}
387
+
345
388
// Use the top-level UserCount field first
346
389
if scan.UserCount > 0 {
347
390
scanMap["user_count"] = scan.UserCount
···
366
409
return result
367
410
}
368
411
412
+
// Get repos for a specific PDS
413
+
func (s *Server) handleGetPDSRepos(w http.ResponseWriter, r *http.Request) {
414
+
resp := newResponse(w)
415
+
vars := mux.Vars(r)
416
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
417
+
418
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
419
+
if err != nil {
420
+
resp.error("PDS not found", http.StatusNotFound)
421
+
return
422
+
}
423
+
424
+
// Parse query parameters
425
+
activeOnly := r.URL.Query().Get("active") == "true"
426
+
limit := getQueryInt(r, "limit", 100)
427
+
offset := getQueryInt(r, "offset", 0)
428
+
429
+
// Cap limit at 1000
430
+
if limit > 1000 {
431
+
limit = 1000
432
+
}
433
+
434
+
repos, err := s.db.GetPDSRepos(r.Context(), pds.ID, activeOnly, limit, offset)
435
+
if err != nil {
436
+
resp.error(err.Error(), http.StatusInternalServerError)
437
+
return
438
+
}
439
+
440
+
// Get total from latest scan (same as user_count)
441
+
totalRepos := 0
442
+
if pds.LatestScan != nil {
443
+
totalRepos = pds.LatestScan.UserCount
444
+
}
445
+
446
+
resp.json(map[string]interface{}{
447
+
"endpoint": pds.Endpoint,
448
+
"total_repos": totalRepos,
449
+
"returned": len(repos),
450
+
"limit": limit,
451
+
"offset": offset,
452
+
"repos": repos,
453
+
})
454
+
}
455
+
456
+
// Find which PDS hosts a specific DID
457
+
func (s *Server) handleGetDIDRepos(w http.ResponseWriter, r *http.Request) {
458
+
resp := newResponse(w)
459
+
vars := mux.Vars(r)
460
+
did := vars["did"]
461
+
462
+
repos, err := s.db.GetReposByDID(r.Context(), did)
463
+
if err != nil {
464
+
resp.error(err.Error(), http.StatusInternalServerError)
465
+
return
466
+
}
467
+
468
+
resp.json(map[string]interface{}{
469
+
"did": did,
470
+
"pds_count": len(repos),
471
+
"hosting_on": repos,
472
+
})
473
+
}
474
+
475
+
// Add to internal/api/handlers.go
476
+
func (s *Server) handleGetPDSRepoStats(w http.ResponseWriter, r *http.Request) {
477
+
resp := newResponse(w)
478
+
vars := mux.Vars(r)
479
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
480
+
481
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
482
+
if err != nil {
483
+
resp.error("PDS not found", http.StatusNotFound)
484
+
return
485
+
}
486
+
487
+
stats, err := s.db.GetPDSRepoStats(r.Context(), pds.ID)
488
+
if err != nil {
489
+
resp.error(err.Error(), http.StatusInternalServerError)
490
+
return
491
+
}
492
+
493
+
resp.json(stats)
494
+
}
495
+
496
+
// ===== GLOBAL DID HANDLER =====
497
+
498
+
// handleGetGlobalDID provides a consolidated view of a DID
499
+
func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) {
500
+
resp := newResponse(w)
501
+
vars := mux.Vars(r)
502
+
did := vars["did"]
503
+
ctx := r.Context()
504
+
505
+
// Get DID info (now includes handle and pds from database)
506
+
didInfo, err := s.db.GetGlobalDIDInfo(ctx, did)
507
+
if err != nil {
508
+
if err == sql.ErrNoRows {
509
+
if !s.plcIndexDIDs {
510
+
resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
511
+
} else {
512
+
resp.error("DID not found in PLC index.", http.StatusNotFound)
513
+
}
514
+
} else {
515
+
resp.error(err.Error(), http.StatusInternalServerError)
516
+
}
517
+
return
518
+
}
519
+
520
+
// Optionally include latest operation details if requested
521
+
var latestOperation *plc.PLCOperation
522
+
if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 {
523
+
lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1]
524
+
ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum)
525
+
if err != nil {
526
+
log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err)
527
+
} else {
528
+
// Find latest operation for this DID (in reverse)
529
+
for i := len(ops) - 1; i >= 0; i-- {
530
+
if ops[i].DID == did {
531
+
latestOperation = &ops[i]
532
+
break
533
+
}
534
+
}
535
+
}
536
+
}
537
+
538
+
result := map[string]interface{}{
539
+
"did": didInfo.DID,
540
+
"handle": didInfo.Handle, // From database!
541
+
"current_pds": didInfo.CurrentPDS, // From database!
542
+
"plc_index_created_at": didInfo.CreatedAt,
543
+
"plc_bundle_history": didInfo.BundleNumbers,
544
+
"pds_hosting_on": didInfo.HostingOn,
545
+
}
546
+
547
+
// Only include operation if requested
548
+
if latestOperation != nil {
549
+
result["latest_plc_operation"] = latestOperation
550
+
}
551
+
552
+
resp.json(result)
553
+
}
554
+
555
+
// handleGetDIDByHandle resolves a handle to a DID
556
+
func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) {
557
+
resp := newResponse(w)
558
+
vars := mux.Vars(r)
559
+
handle := vars["handle"]
560
+
561
+
// Normalize handle (remove @ prefix if present)
562
+
handle = strings.TrimPrefix(handle, "@")
563
+
564
+
// Look up DID by handle
565
+
didRecord, err := s.db.GetDIDByHandle(r.Context(), handle)
566
+
if err != nil {
567
+
if err == sql.ErrNoRows {
568
+
if !s.plcIndexDIDs {
569
+
resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
570
+
} else {
571
+
resp.error("Handle not found.", http.StatusNotFound)
572
+
}
573
+
} else {
574
+
resp.error(err.Error(), http.StatusInternalServerError)
575
+
}
576
+
return
577
+
}
578
+
579
+
// Return just the handle and DID
580
+
resp.json(map[string]string{
581
+
"handle": handle,
582
+
"did": didRecord.DID,
583
+
})
584
+
}
585
+
369
586
// ===== DID HANDLERS =====
370
587
371
588
func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) {
···
467
684
return
468
685
}
469
686
470
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
471
-
if err != nil {
472
-
resp.error(err.Error(), http.StatusInternalServerError)
473
-
return
474
-
}
475
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
476
688
resp.json(map[string]interface{}{
477
689
"total_unique_dids": totalDIDs,
478
690
"last_bundle": lastBundle,
···
483
695
484
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
485
697
resp := newResponse(w)
486
-
487
698
bundleNum, err := getBundleNumber(r)
488
699
if err != nil {
489
700
resp.error("invalid bundle number", http.StatusBadRequest)
490
701
return
491
702
}
492
703
493
-
// Try to get existing bundle
494
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
495
-
if err == nil {
496
-
// Bundle exists, return it normally
497
-
resp.json(formatBundleResponse(bundle))
498
-
return
499
-
}
500
-
501
-
// Bundle not found - check if it's the next upcoming bundle
502
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
503
707
if err != nil {
504
-
resp.error("bundle not found", http.StatusNotFound)
505
-
return
506
-
}
507
-
508
-
if bundleNum == lastBundle+1 {
509
-
// This is the upcoming bundle - return preview based on mempool
510
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
511
-
if err != nil {
512
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
513
717
return
514
718
}
515
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
516
720
return
517
721
}
518
722
519
-
// Not an upcoming bundle, just not found
520
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
521
724
}
522
725
523
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
524
-
// Get mempool stats
525
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
526
-
if err != nil {
527
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
528
743
}
744
+
}
745
+
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
529
749
530
-
if mempoolCount == 0 {
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
531
752
return map[string]interface{}{
532
753
"plc_bundle_number": bundleNum,
533
754
"is_upcoming": true,
···
537
758
}, nil
538
759
}
539
760
540
-
// Get first and last operations for time range
541
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
542
-
if err != nil {
543
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
544
771
}
545
772
546
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
547
-
if err != nil {
548
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
549
775
}
550
776
551
-
// Get unique DID count
552
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
553
-
if err != nil {
554
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
555
780
}
556
-
557
-
// Get uncompressed size estimate
558
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
559
-
if err != nil {
560
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
561
783
}
562
784
563
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
564
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
565
-
566
-
// Calculate completion estimate
567
-
var estimatedCompletionTime *time.Time
568
-
var operationsNeeded int
569
-
var currentRate float64
570
-
571
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
572
-
573
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
574
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
575
-
if timeSpan > 0 {
576
-
currentRate = float64(mempoolCount) / timeSpan
577
-
if currentRate > 0 {
578
-
secondsNeeded := float64(operationsNeeded) / currentRate
579
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
580
-
estimatedCompletionTime = &completionTime
581
-
}
582
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
583
789
}
584
790
585
-
// Get previous bundle for cursor context
586
-
var prevBundleHash string
587
-
var cursor string
791
+
// Get previous bundle info
588
792
if bundleNum > 1 {
589
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
590
-
if err == nil {
591
-
prevBundleHash = prevBundle.Hash
592
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
593
-
}
594
-
}
595
-
596
-
// Determine bundle status
597
-
status := "filling"
598
-
if mempoolCount >= plc.BUNDLE_SIZE {
599
-
status = "ready"
600
-
}
601
-
602
-
// Build upcoming bundle response
603
-
result := map[string]interface{}{
604
-
"plc_bundle_number": bundleNum,
605
-
"is_upcoming": true,
606
-
"status": status,
607
-
"operation_count": mempoolCount,
608
-
"target_operation_count": plc.BUNDLE_SIZE,
609
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
610
-
"operations_needed": operationsNeeded,
611
-
"did_count": uniqueDIDCount,
612
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
613
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
614
-
"uncompressed_size": uncompressedSize,
615
-
"estimated_compressed_size": estimatedCompressedSize,
616
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
617
-
"prev_bundle_hash": prevBundleHash,
618
-
"cursor": cursor,
619
-
}
620
-
621
-
if estimatedCompletionTime != nil {
622
-
result["estimated_completion_time"] = *estimatedCompletionTime
623
-
result["current_rate_per_second"] = currentRate
624
-
}
625
-
626
-
// Get actual mempool operations if requested
627
-
if r.URL.Query().Get("include_dids") == "true" {
628
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
629
-
if err == nil {
630
-
// Extract unique DIDs
631
-
didSet := make(map[string]bool)
632
-
for _, op := range ops {
633
-
didSet[op.DID] = true
634
-
}
635
-
dids := make([]string, 0, len(didSet))
636
-
for did := range didSet {
637
-
dids = append(dids, did)
638
-
}
639
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
640
796
}
641
797
}
642
798
···
652
808
return
653
809
}
654
810
655
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
656
813
if err != nil {
657
814
resp.error("bundle not found", http.StatusNotFound)
658
815
return
659
816
}
660
817
661
818
resp.json(map[string]interface{}{
662
-
"plc_bundle_number": bundle.BundleNumber,
663
-
"did_count": len(bundle.DIDs),
664
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
665
822
})
666
823
}
667
824
···
676
833
677
834
compressed := r.URL.Query().Get("compressed") != "false"
678
835
679
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
680
837
if err == nil {
681
838
// Bundle exists, serve it normally
682
839
resp.bundleHeaders(bundle)
···
690
847
}
691
848
692
849
// Bundle not found - check if it's the upcoming bundle
693
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
694
-
if err != nil {
695
-
resp.error("bundle not found", http.StatusNotFound)
696
-
return
697
-
}
698
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
699
851
if bundleNum == lastBundle+1 {
700
852
// This is the upcoming bundle - serve from mempool
701
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
702
854
return
703
855
}
704
856
···
706
858
resp.error("bundle not found", http.StatusNotFound)
707
859
}
708
860
709
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
710
-
ctx := r.Context()
711
-
712
-
// Get mempool count
713
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
714
-
if err != nil {
715
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
716
-
return
717
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
718
865
719
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
720
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
721
868
return
722
869
}
723
870
724
-
// Get mempool operations (up to BUNDLE_SIZE)
725
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
726
873
if err != nil {
727
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
728
875
return
729
876
}
730
877
731
-
if len(mempoolOps) == 0 {
732
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
733
880
return
734
881
}
735
882
736
-
// Get time range
737
-
firstOp := mempoolOps[0]
738
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
739
886
740
887
// Extract unique DIDs
741
888
didSet := make(map[string]bool)
742
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
743
890
didSet[op.DID] = true
744
891
}
745
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
746
899
// Get previous bundle hash
747
900
prevBundleHash := ""
748
901
if bundleNum > 1 {
749
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
750
903
prevBundleHash = prevBundle.Hash
751
904
}
752
905
}
753
-
754
-
// Serialize operations to JSONL
755
-
var buf []byte
756
-
for _, mop := range mempoolOps {
757
-
buf = append(buf, []byte(mop.Operation)...)
758
-
buf = append(buf, '\n')
759
-
}
760
-
761
-
// Calculate size
762
-
uncompressedSize := int64(len(buf))
763
906
764
907
// Set headers
765
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
···
767
910
w.Header().Set("X-Bundle-Status", "preview")
768
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
769
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
770
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
771
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
772
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
773
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
774
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
775
919
776
920
w.Header().Set("Content-Type", "application/jsonl")
777
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
778
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
779
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
780
922
923
+
// Stream operations as JSONL
781
924
w.WriteHeader(http.StatusOK)
782
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
783
937
}
784
938
785
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
786
940
resp := newResponse(w)
787
-
path := bundle.GetFilePath(s.plcBundleDir)
788
941
789
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
790
944
if err != nil {
791
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
792
946
return
793
947
}
794
-
defer file.Close()
795
-
796
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
797
949
798
950
w.Header().Set("Content-Type", "application/zstd")
799
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
800
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
801
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
802
954
803
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
804
958
}
805
959
806
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
807
961
resp := newResponse(w)
808
962
809
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
810
965
if err != nil {
811
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
812
967
return
813
968
}
814
-
815
-
// Serialize to JSONL
816
-
var buf []byte
817
-
for _, op := range ops {
818
-
buf = append(buf, op.RawJSON...)
819
-
buf = append(buf, '\n')
820
-
}
821
-
822
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
823
-
compressedSize := int64(0)
824
-
if fileInfo != nil {
825
-
compressedSize = fileInfo.Size()
826
-
}
969
+
defer reader.Close()
827
970
828
971
w.Header().Set("Content-Type", "application/jsonl")
829
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
830
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
831
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
832
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
833
-
if compressedSize > 0 {
834
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
835
978
}
836
979
980
+
// Stream the data directly to the response
837
981
w.WriteHeader(http.StatusOK)
838
-
w.Write(buf)
982
+
io.Copy(w, reader)
839
983
}
840
984
841
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
842
986
resp := newResponse(w)
843
987
limit := getQueryInt(r, "limit", 50)
844
988
845
-
bundles, err := s.db.GetBundles(r.Context(), limit)
846
-
if err != nil {
847
-
resp.error(err.Error(), http.StatusInternalServerError)
848
-
return
849
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
850
990
851
991
response := make([]map[string]interface{}, len(bundles))
852
992
for i, bundle := range bundles {
853
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
854
994
}
855
995
856
996
resp.json(response)
···
859
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
860
1000
resp := newResponse(w)
861
1001
862
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
863
-
if err != nil {
864
-
resp.error(err.Error(), http.StatusInternalServerError)
865
-
return
866
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
867
1008
868
1009
resp.json(map[string]interface{}{
869
-
"plc_bundle_count": count,
870
-
"last_bundle_number": lastBundle,
871
-
"total_compressed_size": compressedSize,
872
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
873
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
874
-
"total_uncompressed_size": uncompressedSize,
875
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
876
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
877
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
878
1015
})
879
1016
}
880
1017
···
882
1019
883
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
884
1021
resp := newResponse(w)
885
-
ctx := r.Context()
886
1022
887
-
count, err := s.db.GetMempoolCount(ctx)
888
-
if err != nil {
889
-
resp.error(err.Error(), http.StatusInternalServerError)
890
-
return
891
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
892
1025
893
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
894
-
if err != nil {
895
-
resp.error(err.Error(), http.StatusInternalServerError)
896
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
897
1030
}
898
1031
899
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
900
-
if err != nil {
901
-
resp.error(err.Error(), http.StatusInternalServerError)
902
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
903
1036
}
904
1037
905
-
result := map[string]interface{}{
906
-
"operation_count": count,
907
-
"unique_did_count": uniqueDIDCount,
908
-
"uncompressed_size": uncompressedSize,
909
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
910
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
911
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
912
1042
913
-
if count > 0 {
914
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
915
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
916
1045
917
-
if count < plc.BUNDLE_SIZE {
918
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
919
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
920
1049
if timeSpan > 0 {
921
1050
opsPerSecond := float64(count) / timeSpan
922
1051
if opsPerSecond > 0 {
923
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
924
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
925
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1057
+
result["current_rate_per_second"] = opsPerSecond
926
1058
result["operations_needed"] = remainingOps
927
-
result["current_rate_per_second"] = opsPerSecond
928
1059
}
929
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
930
1066
}
931
-
} else {
932
-
result["estimated_next_bundle_time"] = time.Now()
933
-
result["operations_needed"] = 0
934
1067
}
935
1068
}
936
1069
} else {
1070
+
// Empty mempool
937
1071
result["mempool_start_time"] = nil
938
1072
result["estimated_next_bundle_time"] = nil
939
1073
}
···
958
1092
959
1093
// ===== VERIFICATION HANDLERS =====
960
1094
961
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
962
-
resp := newResponse(w)
963
-
vars := mux.Vars(r)
964
-
965
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
966
-
if err != nil {
967
-
resp.error("Invalid bundle number", http.StatusBadRequest)
968
-
return
969
-
}
970
-
971
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
972
-
if err != nil {
973
-
resp.error("Bundle not found", http.StatusNotFound)
974
-
return
975
-
}
976
-
977
-
// Fetch from PLC and verify
978
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
979
-
if err != nil {
980
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
981
-
return
982
-
}
983
-
984
-
remoteHash := computeOperationsHash(remoteOps)
985
-
verified := bundle.Hash == remoteHash
986
-
987
-
resp.json(map[string]interface{}{
988
-
"bundle_number": bundleNumber,
989
-
"verified": verified,
990
-
"local_hash": bundle.Hash,
991
-
"remote_hash": remoteHash,
992
-
"local_op_count": plc.BUNDLE_SIZE,
993
-
"remote_op_count": len(remoteOps),
994
-
"boundary_cids_used": len(prevCIDs),
995
-
})
996
-
}
997
-
998
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
999
-
var after string
1000
-
var prevBoundaryCIDs map[string]bool
1001
-
1002
-
if bundleNum > 1 {
1003
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1004
-
if err != nil {
1005
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1006
-
}
1007
-
1008
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1009
-
1010
-
if len(prevBundle.BoundaryCIDs) > 0 {
1011
-
prevBoundaryCIDs = make(map[string]bool)
1012
-
for _, cid := range prevBundle.BoundaryCIDs {
1013
-
prevBoundaryCIDs[cid] = true
1014
-
}
1015
-
}
1016
-
}
1017
-
1018
-
var allRemoteOps []plc.PLCOperation
1019
-
seenCIDs := make(map[string]bool)
1020
-
1021
-
for cid := range prevBoundaryCIDs {
1022
-
seenCIDs[cid] = true
1023
-
}
1024
-
1025
-
currentAfter := after
1026
-
maxFetches := 20
1027
-
1028
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1029
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1030
-
Count: 1000,
1031
-
After: currentAfter,
1032
-
})
1033
-
if err != nil || len(batch) == 0 {
1034
-
break
1035
-
}
1036
-
1037
-
for _, op := range batch {
1038
-
if !seenCIDs[op.CID] {
1039
-
seenCIDs[op.CID] = true
1040
-
allRemoteOps = append(allRemoteOps, op)
1041
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1042
-
break
1043
-
}
1044
-
}
1045
-
}
1046
-
1047
-
if len(batch) > 0 {
1048
-
lastOp := batch[len(batch)-1]
1049
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1050
-
}
1051
-
1052
-
if len(batch) < 1000 {
1053
-
break
1054
-
}
1055
-
}
1056
-
1057
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1058
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1059
-
}
1060
-
1061
-
return allRemoteOps, prevBoundaryCIDs, nil
1062
-
}
1063
-
1064
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1065
1096
resp := newResponse(w)
1066
-
ctx := r.Context()
1067
1097
1068
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1069
-
if err != nil {
1070
-
resp.error(err.Error(), http.StatusInternalServerError)
1071
-
return
1072
-
}
1073
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1074
1099
if lastBundle == 0 {
1075
1100
resp.json(map[string]interface{}{
1076
1101
"status": "empty",
···
1084
1109
var errorMsg string
1085
1110
1086
1111
for i := 1; i <= lastBundle; i++ {
1087
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1088
1113
if err != nil {
1089
1114
valid = false
1090
1115
brokenAt = i
···
1093
1118
}
1094
1119
1095
1120
if i > 1 {
1096
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1097
1122
if err != nil {
1098
1123
valid = false
1099
1124
brokenAt = i
···
1101
1126
break
1102
1127
}
1103
1128
1104
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1105
1130
valid = false
1106
1131
brokenAt = i
1107
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1108
1133
break
1109
1134
}
1110
1135
}
···
1125
1150
1126
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1127
1152
resp := newResponse(w)
1128
-
ctx := r.Context()
1129
1153
1130
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1131
-
if err != nil {
1132
-
resp.error(err.Error(), http.StatusInternalServerError)
1133
-
return
1134
-
}
1135
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1136
1155
if lastBundle == 0 {
1137
1156
resp.json(map[string]interface{}{
1138
1157
"chain_length": 0,
···
1141
1160
return
1142
1161
}
1143
1162
1144
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1145
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1146
-
1147
-
// Updated to receive 5 values instead of 3
1148
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1149
-
if err != nil {
1150
-
resp.error(err.Error(), http.StatusInternalServerError)
1151
-
return
1152
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1153
1166
1154
1167
resp.json(map[string]interface{}{
1155
-
"chain_length": lastBundle,
1156
-
"total_bundles": count,
1157
-
"total_compressed_size": compressedSize,
1158
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1159
-
"total_uncompressed_size": uncompressedSize,
1160
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1161
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1162
-
"chain_start_time": firstBundle.StartTime,
1163
-
"chain_end_time": lastBundleData.EndTime,
1164
-
"chain_head_hash": lastBundleData.Hash,
1165
-
"first_prev_hash": firstBundle.PrevBundleHash,
1166
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1167
1177
})
1168
1178
}
1169
1179
···
1184
1194
return
1185
1195
}
1186
1196
1187
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1188
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1189
1199
1190
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1224
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1225
1235
}
1226
1236
1227
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1228
1238
if afterTime.IsZero() {
1229
1239
return 1
1230
1240
}
1231
1241
1232
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1233
-
if err != nil {
1234
-
return 1
1235
-
}
1236
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1237
1243
if foundBundle > 1 {
1238
1244
return foundBundle - 1
1239
1245
}
···
1244
1250
var allOps []plc.PLCOperation
1245
1251
seenCIDs := make(map[string]bool)
1246
1252
1247
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1248
1254
1249
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1250
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1404
1410
limit := getQueryInt(r, "limit", 0)
1405
1411
fromBundle := getQueryInt(r, "from", 1)
1406
1412
1407
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1408
1415
if err != nil {
1409
1416
resp.error(err.Error(), http.StatusInternalServerError)
1410
1417
return
···
1447
1454
resp.json(result)
1448
1455
}
1449
1456
1450
-
// ===== UTILITY FUNCTIONS =====
1457
+
// ===== DEBUG HANDLERS =====
1458
+
1459
+
func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) {
1460
+
resp := newResponse(w)
1461
+
ctx := r.Context()
1462
+
schema := "public" // Or make configurable if needed
1463
+
1464
+
tableSizes, err := s.db.GetTableSizes(ctx, schema)
1465
+
if err != nil {
1466
+
log.Error("Failed to get table sizes: %v", err)
1467
+
resp.error("Failed to retrieve table sizes", http.StatusInternalServerError)
1468
+
return
1469
+
}
1451
1470
1452
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1453
-
var jsonlData []byte
1454
-
for _, op := range ops {
1455
-
jsonlData = append(jsonlData, op.RawJSON...)
1456
-
jsonlData = append(jsonlData, '\n')
1471
+
indexSizes, err := s.db.GetIndexSizes(ctx, schema)
1472
+
if err != nil {
1473
+
log.Error("Failed to get index sizes: %v", err)
1474
+
resp.error("Failed to retrieve index sizes", http.StatusInternalServerError)
1475
+
return
1457
1476
}
1458
-
hash := sha256.Sum256(jsonlData)
1459
-
return hex.EncodeToString(hash[:])
1477
+
1478
+
resp.json(map[string]interface{}{
1479
+
"schema": schema,
1480
+
"tables": tableSizes,
1481
+
"indexes": indexSizes,
1482
+
"retrievedAt": time.Now().UTC(),
1483
+
})
1460
1484
}
1485
+
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1506
+
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1461
1509
1462
1510
func normalizeEndpoint(endpoint string) string {
1463
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+23
-14
internal/api/server.go
+23
-14
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
23
+
plcIndexDIDs bool
24
24
}
25
25
26
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
27
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
28
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
29
27
s := &Server{
30
28
router: mux.NewRouter(),
31
29
db: db,
32
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
33
30
plcBundleDir: plcCfg.BundleDir,
34
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
32
+
plcIndexDIDs: plcCfg.IndexDIDs,
35
33
}
36
34
37
35
s.setupRoutes()
···
59
57
// Generic endpoints (keep as-is)
60
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
61
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
62
61
63
-
// NEW: PDS-specific endpoints (virtual, created via JOINs)
62
+
//PDS-specific endpoints (virtual, created via JOINs)
64
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
65
64
api.HandleFunc("/pds/stats", s.handleGetPDSStats).Methods("GET")
66
65
api.HandleFunc("/pds/countries", s.handleGetCountryLeaderboard).Methods("GET")
···
68
67
api.HandleFunc("/pds/duplicates", s.handleGetDuplicateEndpoints).Methods("GET")
69
68
api.HandleFunc("/pds/{endpoint}", s.handleGetPDSDetail).Methods("GET")
70
69
70
+
// PDS repos
71
+
api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET")
72
+
api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET")
73
+
api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET")
74
+
75
+
// Global DID routes
76
+
api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET")
77
+
api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW
78
+
71
79
// PLC Bundle routes
72
80
api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET")
73
81
api.HandleFunc("/plc/bundles/stats", s.handleGetPLCBundleStats).Methods("GET")
···
76
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
77
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
78
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
79
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
80
88
81
89
// PLC history/metrics
82
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
···
87
95
// DID routes
88
96
api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET")
89
97
api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET")
90
-
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") // NEW
98
+
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET")
91
99
92
100
// Mempool routes
93
101
api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET")
···
95
103
// Metrics routes
96
104
api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET")
97
105
98
-
// Job status endpoint
106
+
// Debug Endpoints
107
+
api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET")
99
108
api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET")
100
109
101
110
// Health check
+36
-13
internal/ipinfo/client.go
+36
-13
internal/ipinfo/client.go
···
99
99
return ipInfo, nil
100
100
}
101
101
102
-
// ExtractIPFromEndpoint extracts IP from endpoint URL
103
-
func ExtractIPFromEndpoint(endpoint string) (string, error) {
102
+
// IPAddresses holds both IPv4 and IPv6 addresses
103
+
type IPAddresses struct {
104
+
IPv4 string
105
+
IPv6 string
106
+
}
107
+
108
+
// ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL
109
+
func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) {
104
110
// Parse URL
105
111
parsedURL, err := url.Parse(endpoint)
106
112
if err != nil {
107
-
return "", fmt.Errorf("failed to parse endpoint URL: %w", err)
113
+
return nil, fmt.Errorf("failed to parse endpoint URL: %w", err)
108
114
}
109
115
110
116
host := parsedURL.Hostname()
111
117
if host == "" {
112
-
return "", fmt.Errorf("no hostname in endpoint")
118
+
return nil, fmt.Errorf("no hostname in endpoint")
113
119
}
120
+
121
+
result := &IPAddresses{}
114
122
115
123
// Check if host is already an IP
116
-
if net.ParseIP(host) != nil {
117
-
return host, nil
124
+
if ip := net.ParseIP(host); ip != nil {
125
+
if ip.To4() != nil {
126
+
result.IPv4 = host
127
+
} else {
128
+
result.IPv6 = host
129
+
}
130
+
return result, nil
118
131
}
119
132
120
-
// Resolve hostname to IP
133
+
// Resolve hostname to IPs
121
134
ips, err := net.LookupIP(host)
122
135
if err != nil {
123
-
return "", fmt.Errorf("failed to resolve hostname: %w", err)
136
+
return nil, fmt.Errorf("failed to resolve hostname: %w", err)
124
137
}
125
138
126
139
if len(ips) == 0 {
127
-
return "", fmt.Errorf("no IPs found for hostname")
140
+
return nil, fmt.Errorf("no IPs found for hostname")
128
141
}
129
142
130
-
// Return first IPv4 address
143
+
// Extract both IPv4 and IPv6
131
144
for _, ip := range ips {
132
145
if ipv4 := ip.To4(); ipv4 != nil {
133
-
return ipv4.String(), nil
146
+
if result.IPv4 == "" {
147
+
result.IPv4 = ipv4.String()
148
+
}
149
+
} else {
150
+
if result.IPv6 == "" {
151
+
result.IPv6 = ip.String()
152
+
}
134
153
}
135
154
}
136
155
137
-
// Fallback to first IP (might be IPv6)
138
-
return ips[0].String(), nil
156
+
// Must have at least one IP
157
+
if result.IPv4 == "" && result.IPv6 == "" {
158
+
return nil, fmt.Errorf("no valid IPs found")
159
+
}
160
+
161
+
return result, nil
139
162
}
+6
-2
internal/log/log.go
+6
-2
internal/log/log.go
···
28
28
errorLog = log.New(os.Stderr, "", 0)
29
29
}
30
30
31
-
// timestamp returns current time in ISO 8601 format
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
32
func timestamp() string {
33
-
return time.Now().Format(time.RFC3339)
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
34
34
}
35
35
36
36
func Verbose(format string, v ...interface{}) {
···
39
39
40
40
func Info(format string, v ...interface{}) {
41
41
infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...))
42
+
}
43
+
44
+
func Warn(format string, v ...interface{}) {
45
+
infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...))
42
46
}
43
47
44
48
func Error(format string, v ...interface{}) {
+47
-18
internal/pds/client.go
+47
-18
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
28
29
29
30
// Repo represents a repository in the list
30
31
type Repo struct {
31
-
DID string `json:"did"`
32
-
Head string `json:"head,omitempty"`
33
-
Rev string `json:"rev,omitempty"`
32
+
DID string `json:"did"`
33
+
Head string `json:"head,omitempty"`
34
+
Rev string `json:"rev,omitempty"`
35
+
Active *bool `json:"active,omitempty"`
36
+
Status *string `json:"status,omitempty"`
34
37
}
35
38
36
39
// ListRepos fetches all repositories from a PDS with pagination
37
-
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]string, error) {
38
-
var allDIDs []string
40
+
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]Repo, error) {
41
+
var allRepos []Repo
39
42
var cursor *string
40
43
41
44
for {
···
67
70
}
68
71
resp.Body.Close()
69
72
70
-
// Collect DIDs
71
-
for _, repo := range result.Repos {
72
-
allDIDs = append(allDIDs, repo.DID)
73
-
}
73
+
// Collect repos
74
+
allRepos = append(allRepos, result.Repos...)
74
75
75
76
// Check if there are more pages
76
77
if result.Cursor == nil || *result.Cursor == "" {
···
79
80
cursor = result.Cursor
80
81
}
81
82
82
-
return allDIDs, nil
83
+
return allRepos, nil
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
115
144
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
+100
-36
internal/pds/scanner.go
+100
-36
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
124
124
}
125
125
126
126
func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) {
127
-
// STEP 1: Resolve IP (before any network call)
128
-
ip, err := ipinfo.ExtractIPFromEndpoint(ep.Endpoint)
127
+
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
+
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
135
134
return
136
135
}
137
136
138
-
// Update IP immediately
139
-
s.db.UpdateEndpointIP(ctx, ep.ID, ip, time.Now().UTC())
137
+
// Update IPs immediately
138
+
s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC())
140
139
141
-
// STEP 2: Health check
142
-
available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
143
-
if err != nil || !available {
144
-
errMsg := "health check failed"
145
-
if err != nil {
146
-
errMsg = err.Error()
147
-
}
140
+
// STEP 1.5: Fetch IP info asynchronously for both IPs
141
+
if ips.IPv4 != "" {
142
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv4)
143
+
}
144
+
if ips.IPv6 != "" {
145
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
146
+
}
147
+
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
148
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
149
152
Status: storage.EndpointStatusOffline,
150
-
ResponseTime: responseTime,
151
-
ErrorMessage: errMsg,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
152
156
})
153
157
return
154
158
}
155
159
156
-
// STEP 3: Fetch PDS-specific data
157
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
158
-
if err != nil {
159
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
160
-
} else if desc != nil && desc.DID != "" {
161
-
// NEW: Update server DID
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
162
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
163
163
}
164
164
165
-
dids, err := s.client.ListRepos(ctx, ep.Endpoint)
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
181
+
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
166
182
if err != nil {
167
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
168
-
dids = []string{}
184
+
repoList = []Repo{}
185
+
}
186
+
187
+
// Convert to DIDs
188
+
dids := make([]string, len(repoList))
189
+
for i, repo := range repoList {
190
+
dids[i] = repo.DID
169
191
}
170
192
171
-
// STEP 4: SAVE IMMEDIATELY
193
+
// STEP 5: SAVE scan result
172
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
173
195
Status: storage.EndpointStatusOnline,
174
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
175
197
Description: desc,
176
198
DIDs: dids,
177
199
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
178
201
})
179
202
180
-
// STEP 5: Fetch IP info if needed (async, with backoff)
181
-
go s.updateIPInfoIfNeeded(ctx, ip)
203
+
// STEP 6: Save repos in batches (only tracks changes)
204
+
if len(repoList) > 0 {
205
+
batchSize := 100_000
206
+
207
+
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
208
+
209
+
for i := 0; i < len(repoList); i += batchSize {
210
+
end := i + batchSize
211
+
if end > len(repoList) {
212
+
end = len(repoList)
213
+
}
214
+
215
+
batch := repoList[i:end]
216
+
repoData := make([]storage.PDSRepoData, len(batch))
217
+
218
+
for j, repo := range batch {
219
+
active := true
220
+
if repo.Active != nil {
221
+
active = *repo.Active
222
+
}
223
+
224
+
status := ""
225
+
if repo.Status != nil {
226
+
status = *repo.Status
227
+
}
228
+
229
+
repoData[j] = storage.PDSRepoData{
230
+
DID: repo.DID,
231
+
Head: repo.Head,
232
+
Rev: repo.Rev,
233
+
Active: active,
234
+
Status: status,
235
+
}
236
+
}
237
+
238
+
if err := s.db.UpsertPDSRepos(ctx, ep.ID, repoData); err != nil {
239
+
log.Error("Failed to save repo batch for endpoint %d: %v", ep.ID, err)
240
+
}
241
+
}
242
+
243
+
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
244
+
}
182
245
}
183
246
184
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
···
188
251
Metadata: make(map[string]interface{}),
189
252
}
190
253
191
-
var userCount int64 // NEW: Declare user count
254
+
var userCount int64
192
255
193
256
// Add PDS-specific metadata
194
257
if result.Status == storage.EndpointStatusOnline {
195
-
userCount = int64(len(result.DIDs)) // NEW: Get user count
196
-
scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
197
260
if result.Description != nil {
198
261
scanData.Metadata["server_info"] = result.Description
199
262
}
···
210
273
Status: result.Status,
211
274
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
212
275
UserCount: userCount,
213
-
Version: result.Version, // NEW: Set the version field
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
214
278
ScanData: scanData,
215
279
ScannedAt: time.Now().UTC(),
216
280
}
+2
-1
internal/pds/types.go
+2
-1
internal/pds/types.go
-662
internal/plc/bundle.go
-662
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
384
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
385
-
// Don't return error - bundle is already created
386
-
} else {
387
-
elapsed := time.Since(start)
388
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
389
-
}
390
-
} else {
391
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
392
-
}
393
-
394
-
return nil
395
-
}
396
-
397
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
398
-
didSet := make(map[string]bool)
399
-
for _, op := range ops {
400
-
didSet[op.DID] = true
401
-
}
402
-
403
-
dids := make([]string, 0, len(didSet))
404
-
for did := range didSet {
405
-
dids = append(dids, did)
406
-
}
407
-
return dids
408
-
}
409
-
410
-
// ===== MEMPOOL BUNDLE CREATION =====
411
-
412
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
413
-
if !bm.enabled {
414
-
return 0, fmt.Errorf("bundle manager disabled")
415
-
}
416
-
417
-
if len(operations) != BUNDLE_SIZE {
418
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
419
-
}
420
-
421
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
422
-
if err != nil {
423
-
return 0, err
424
-
}
425
-
bundleNum := lastBundle + 1
426
-
427
-
bf := bm.newBundleFile(bundleNum)
428
-
bf.operations = operations
429
-
430
-
if err := bm.save(bf); err != nil {
431
-
return 0, err
432
-
}
433
-
434
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
435
-
return 0, err
436
-
}
437
-
438
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
439
-
bundleNum, bf.uncompressedHash[:16])
440
-
441
-
return bundleNum, nil
442
-
}
443
-
444
-
// ===== VERIFICATION =====
445
-
446
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
447
-
if !bm.enabled {
448
-
return fmt.Errorf("bundle manager disabled")
449
-
}
450
-
451
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
452
-
453
-
for i := 1; i <= endBundle; i++ {
454
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
455
-
if err != nil {
456
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
457
-
}
458
-
459
-
// Verify file hash
460
-
path := bm.newBundleFile(i).path
461
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
462
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
463
-
}
464
-
465
-
// Verify chain link
466
-
if i > 1 {
467
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
468
-
if err != nil {
469
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
470
-
}
471
-
472
-
if bundle.PrevBundleHash != prevBundle.Hash {
473
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
474
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
475
-
}
476
-
}
477
-
478
-
if i%100 == 0 {
479
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
480
-
}
481
-
}
482
-
483
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
484
-
return nil
485
-
}
486
-
487
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
488
-
if !bm.enabled {
489
-
return nil
490
-
}
491
-
492
-
for i := 1; i < targetBundle; i++ {
493
-
if !bm.newBundleFile(i).exists() {
494
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
495
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
496
-
}
497
-
}
498
-
}
499
-
500
-
return nil
501
-
}
502
-
503
-
// ===== UTILITY METHODS =====
504
-
505
-
func (bm *BundleManager) hash(data []byte) string {
506
-
h := sha256.Sum256(data)
507
-
return hex.EncodeToString(h[:])
508
-
}
509
-
510
-
func (bm *BundleManager) hashFile(path string) string {
511
-
data, _ := os.ReadFile(path)
512
-
return bm.hash(data)
513
-
}
514
-
515
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
516
-
if expectedHash == "" {
517
-
return nil
518
-
}
519
-
520
-
actualHash := bm.hashFile(path)
521
-
if actualHash != expectedHash {
522
-
return fmt.Errorf("hash mismatch")
523
-
}
524
-
return nil
525
-
}
526
-
527
-
func (bm *BundleManager) getFileSize(path string) int64 {
528
-
if info, err := os.Stat(path); err == nil {
529
-
return info.Size()
530
-
}
531
-
return 0
532
-
}
533
-
534
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
535
-
if !bm.enabled {
536
-
return 0, 0, 0, 0, nil
537
-
}
538
-
return bm.db.GetBundleStats(ctx)
539
-
}
540
-
541
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
542
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
543
-
if err != nil {
544
-
return nil, err
545
-
}
546
-
547
-
if lastBundle == 0 {
548
-
return map[string]interface{}{
549
-
"chain_length": 0,
550
-
"status": "empty",
551
-
}, nil
552
-
}
553
-
554
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
555
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
556
-
557
-
return map[string]interface{}{
558
-
"chain_length": lastBundle,
559
-
"first_bundle": 1,
560
-
"last_bundle": lastBundle,
561
-
"chain_start_time": firstBundle.StartTime,
562
-
"chain_end_time": lastBundleData.EndTime,
563
-
"chain_head_hash": lastBundleData.Hash,
564
-
}, nil
565
-
}
566
-
567
-
// ===== EXPORTED HELPERS =====
568
-
569
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
570
-
if len(operations) == 0 {
571
-
return time.Time{}, nil
572
-
}
573
-
574
-
lastOp := operations[len(operations)-1]
575
-
boundaryTime := lastOp.CreatedAt
576
-
cidSet := make(map[string]bool)
577
-
578
-
for i := len(operations) - 1; i >= 0; i-- {
579
-
op := operations[i]
580
-
if op.CreatedAt.Equal(boundaryTime) {
581
-
cidSet[op.CID] = true
582
-
} else {
583
-
break
584
-
}
585
-
}
586
-
587
-
return boundaryTime, cidSet
588
-
}
589
-
590
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
591
-
if len(operations) == 0 {
592
-
return operations
593
-
}
594
-
595
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
596
-
if err != nil {
597
-
return operations
598
-
}
599
-
600
-
startIdx := 0
601
-
for startIdx < len(operations) {
602
-
op := operations[startIdx]
603
-
604
-
if op.CreatedAt.After(boundaryTime) {
605
-
break
606
-
}
607
-
608
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
609
-
startIdx++
610
-
continue
611
-
}
612
-
613
-
break
614
-
}
615
-
616
-
return operations[startIdx:]
617
-
}
618
-
619
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
620
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
621
-
if !bm.enabled {
622
-
return nil, fmt.Errorf("bundle manager disabled")
623
-
}
624
-
625
-
bf := bm.newBundleFile(bundleNum)
626
-
627
-
if !bf.exists() {
628
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
629
-
}
630
-
631
-
if err := bm.load(bf); err != nil {
632
-
return nil, err
633
-
}
634
-
635
-
return bf.operations, nil
636
-
}
637
-
638
-
// calculateCursor determines the cursor value for a given bundle
639
-
// For bundle 1: returns empty string
640
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
641
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
642
-
if bundleNum == 1 {
643
-
return ""
644
-
}
645
-
646
-
// Try to get cursor from previous bundle in DB
647
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
648
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
649
-
}
650
-
651
-
// If previous bundle not in DB, try to load it from file
652
-
prevBf := bm.newBundleFile(bundleNum - 1)
653
-
if prevBf.exists() {
654
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
655
-
// Return the createdAt of the last operation in previous bundle
656
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
657
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
658
-
}
659
-
}
660
-
661
-
return ""
662
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+112
internal/plc/helpers.go
+112
internal/plc/helpers.go
···
1
+
package plc
2
+
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
7
+
8
+
// MaxHandleLength is the maximum allowed handle length for database storage
9
+
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
14
+
15
+
// ExtractHandle safely extracts the handle from a PLC operation
16
+
func ExtractHandle(op *PLCOperation) string {
17
+
if op == nil || op.Operation == nil {
18
+
return ""
19
+
}
20
+
21
+
// Get "alsoKnownAs"
22
+
aka, ok := op.Operation["alsoKnownAs"].([]interface{})
23
+
if !ok {
24
+
return ""
25
+
}
26
+
27
+
// Find the handle (e.g., "at://handle.bsky.social")
28
+
for _, item := range aka {
29
+
if handle, ok := item.(string); ok {
30
+
if strings.HasPrefix(handle, "at://") {
31
+
return strings.TrimPrefix(handle, "at://")
32
+
}
33
+
}
34
+
}
35
+
return ""
36
+
}
37
+
38
+
// ValidateHandle checks if a handle is valid for database storage
39
+
// Returns empty string if handle is invalid (too long or wrong format)
40
+
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
46
+
if len(handle) > MaxHandleLength {
47
+
return ""
48
+
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
55
+
return handle
56
+
}
57
+
58
+
// ExtractPDS safely extracts the PDS endpoint from a PLC operation
59
+
func ExtractPDS(op *PLCOperation) string {
60
+
if op == nil || op.Operation == nil {
61
+
return ""
62
+
}
63
+
64
+
// Get "services"
65
+
services, ok := op.Operation["services"].(map[string]interface{})
66
+
if !ok {
67
+
return ""
68
+
}
69
+
70
+
// Get "atproto_pds"
71
+
pdsService, ok := services["atproto_pds"].(map[string]interface{})
72
+
if !ok {
73
+
return ""
74
+
}
75
+
76
+
// Get "endpoint"
77
+
if endpoint, ok := pdsService["endpoint"].(string); ok {
78
+
return endpoint
79
+
}
80
+
81
+
return ""
82
+
}
83
+
84
+
// DIDInfo contains extracted metadata from a PLC operation
85
+
type DIDInfo struct {
86
+
Handle string
87
+
PDS string
88
+
}
89
+
90
+
// ExtractDIDInfo extracts both handle and PDS from an operation
91
+
func ExtractDIDInfo(op *PLCOperation) DIDInfo {
92
+
return DIDInfo{
93
+
Handle: ExtractHandle(op),
94
+
PDS: ExtractPDS(op),
95
+
}
96
+
}
97
+
98
+
// ExtractDIDInfoMap creates a map of DID -> info from operations
99
+
// Processes in reverse order to get the latest state for each DID
100
+
func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo {
101
+
infoMap := make(map[string]DIDInfo)
102
+
103
+
// Process in reverse to get latest state
104
+
for i := len(ops) - 1; i >= 0; i-- {
105
+
op := ops[i]
106
+
if _, exists := infoMap[op.DID]; !exists {
107
+
infoMap[op.DID] = ExtractDIDInfo(&op)
108
+
}
109
+
}
110
+
111
+
return infoMap
112
+
}
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-415
internal/plc/scanner.go
+92
-415
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
84
43
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
90
-
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
246
72
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
251
74
}
252
75
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
257
-
}
258
-
}
259
-
}
260
-
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// Process for endpoint discovery
336
-
batchCounts, err := s.processBatch(ctx, ops)
337
-
s.mergeCounts(counts, batchCounts)
338
-
return err
339
-
}
340
-
341
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
342
-
for {
343
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
344
78
if err != nil {
345
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
346
81
}
347
82
348
-
log.Verbose("Mempool contains %d operations", count)
349
-
350
-
if count < BUNDLE_SIZE {
351
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
352
-
return nil
353
-
}
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
354
88
355
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
356
91
357
-
// Updated to receive 4 values instead of 3
358
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
359
-
if err != nil {
360
-
return err
361
-
}
362
-
363
-
// Process and update metrics
364
-
countsBefore := sumCounts(m.endpointCounts)
365
-
counts, _ := s.processBatch(ctx, ops)
366
-
s.mergeCounts(m.endpointCounts, counts)
367
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
368
-
369
-
m.totalProcessed += int64(len(ops))
370
-
m.newEndpoints += newEndpointsFound
371
-
m.currentBundle = bundleNum
372
-
373
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
374
94
log.Error("Warning: failed to update cursor: %v", err)
375
95
}
376
-
377
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
378
96
}
379
-
}
380
97
381
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
382
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
383
-
if err != nil {
384
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
385
103
}
386
104
387
-
ops, ids := s.deduplicateMempool(mempoolOps)
388
-
if len(ops) < BUNDLE_SIZE {
389
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
390
-
}
391
-
392
-
// Determine cursor from last bundle
393
-
cursor := ""
394
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
395
-
if err == nil && lastBundle > 0 {
396
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
397
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
398
-
}
399
-
}
400
-
401
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
402
-
if err != nil {
403
-
return 0, nil, "", err
404
-
}
405
-
406
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
407
-
return 0, nil, "", err
408
-
}
409
-
410
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
411
107
}
412
108
413
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
414
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
415
-
ids := make([]int64, 0, BUNDLE_SIZE)
416
-
seenCIDs := make(map[string]bool)
417
-
418
-
for _, mop := range mempoolOps {
419
-
if seenCIDs[mop.CID] {
420
-
ids = append(ids, mop.ID)
421
-
continue
422
-
}
423
-
seenCIDs[mop.CID] = true
424
-
425
-
var op PLCOperation
426
-
json.Unmarshal([]byte(mop.Operation), &op)
427
-
op.RawJSON = []byte(mop.Operation)
428
-
429
-
ops = append(ops, op)
430
-
ids = append(ids, mop.ID)
431
-
432
-
if len(ops) >= BUNDLE_SIZE {
433
-
break
434
-
}
435
-
}
436
-
437
-
return ops, ids
438
-
}
439
-
109
+
// processBatch extracts endpoints from operations
440
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
441
111
counts := make(map[string]int64)
442
112
seen := make(map[string]*PLCOperation)
443
113
444
114
// Collect unique endpoints
445
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
446
118
if op.IsNullified() {
447
119
continue
448
120
}
449
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
450
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
451
124
if _, exists := seen[key]; !exists {
452
-
seen[key] = &op
125
+
seen[key] = op
453
126
}
454
127
}
455
128
}
···
465
138
}
466
139
467
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
468
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
469
142
continue
470
143
}
471
144
472
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
473
146
counts[epType]++
474
147
}
475
148
476
149
return counts, nil
477
-
}
478
-
479
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
480
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
481
-
EndpointType: epType,
482
-
Endpoint: endpoint,
483
-
DiscoveredAt: discoveredAt,
484
-
LastChecked: time.Time{},
485
-
Status: storage.EndpointStatusUnknown,
486
-
})
487
150
}
488
151
489
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
526
189
return nil
527
190
}
528
191
529
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
530
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
531
-
Source: "plc_directory",
532
-
LastBundleNumber: m.currentBundle - 1,
533
-
LastScanTime: time.Now().UTC(),
534
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
535
201
})
536
202
}
537
203
···
559
225
return total
560
226
}
561
227
562
-
func formatEndpointCounts(counts map[string]int64) string {
563
-
if len(counts) == 0 {
564
-
return "0 new endpoints"
565
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
566
231
567
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
568
240
569
-
if len(counts) == 1 {
570
-
for typ, count := range counts {
571
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
572
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
573
246
}
247
+
}
574
248
575
-
parts := make([]string, 0, len(counts))
576
-
for typ, count := range counts {
577
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
578
256
}
579
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
580
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
+16
-24
internal/storage/db.go
+16
-24
internal/storage/db.go
···
27
27
EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error)
28
28
GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error)
29
29
GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error)
30
-
UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error
30
+
UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error
31
31
SaveEndpointScan(ctx context.Context, scan *EndpointScan) error
32
32
SetScanRetention(retention int)
33
33
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
···
38
38
GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error)
39
39
GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error)
40
40
GetPDSStats(ctx context.Context) (*PDSStats, error)
41
+
GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error)
42
+
GetVersionStats(ctx context.Context) ([]*VersionStats, error)
41
43
42
44
// IP operations (IP as primary key)
43
45
UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error
···
48
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
49
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
50
52
51
-
// Bundle operations
52
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
53
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
54
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
55
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
56
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
57
-
GetLastBundleNumber(ctx context.Context) (int, error)
58
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
59
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
60
-
61
-
// Mempool operations
62
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
63
-
GetMempoolCount(ctx context.Context) (int, error)
64
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
65
-
DeleteFromMempool(ctx context.Context, ids []int64) error
66
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
67
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
68
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
69
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
70
-
71
53
// Metrics
72
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
73
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
74
56
GetEndpointStats(ctx context.Context) (*EndpointStats, error)
75
57
76
58
// DID operations
77
-
UpsertDID(ctx context.Context, did string, bundleNum int) error
59
+
UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error
60
+
UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error
78
61
GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error)
62
+
GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW
63
+
GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error)
79
64
AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error
80
65
GetTotalDIDCount(ctx context.Context) (int64, error)
81
66
82
-
GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error)
83
-
GetVersionStats(ctx context.Context) ([]*VersionStats, error)
67
+
// PDS Repo operations
68
+
UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error
69
+
GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error)
70
+
GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error)
71
+
GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error)
72
+
73
+
// Internal
74
+
GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error)
75
+
GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error)
84
76
}
+799
-613
internal/storage/postgres.go
+799
-613
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
73
72
log.Info("Running database migrations...")
74
73
75
74
schema := `
76
-
-- Endpoints table (NO user_count, NO ip_info)
75
+
-- Endpoints table (with IPv6 support)
77
76
CREATE TABLE IF NOT EXISTS endpoints (
78
77
id BIGSERIAL PRIMARY KEY,
79
78
endpoint_type TEXT NOT NULL DEFAULT 'pds',
···
83
82
last_checked TIMESTAMP,
84
83
status INTEGER DEFAULT 0,
85
84
ip TEXT,
85
+
ipv6 TEXT,
86
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
87
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
88
89
UNIQUE(endpoint_type, endpoint)
89
90
);
···
92
93
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
93
94
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
94
95
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
95
97
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
96
-
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
97
100
98
-
-- IP infos table (IP as PRIMARY KEY)
99
-
CREATE TABLE IF NOT EXISTS ip_infos (
100
-
ip TEXT PRIMARY KEY,
101
-
city TEXT,
102
-
country TEXT,
103
-
country_code TEXT,
104
-
asn INTEGER,
105
-
asn_org TEXT,
106
-
is_datacenter BOOLEAN,
107
-
is_vpn BOOLEAN,
108
-
latitude REAL,
109
-
longitude REAL,
110
-
raw_data JSONB,
111
-
fetched_at TIMESTAMP NOT NULL,
112
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
113
-
);
101
+
-- IP infos table (IP as PRIMARY KEY)
102
+
CREATE TABLE IF NOT EXISTS ip_infos (
103
+
ip TEXT PRIMARY KEY,
104
+
city TEXT,
105
+
country TEXT,
106
+
country_code TEXT,
107
+
asn INTEGER,
108
+
asn_org TEXT,
109
+
is_datacenter BOOLEAN,
110
+
is_vpn BOOLEAN,
111
+
is_crawler BOOLEAN,
112
+
is_tor BOOLEAN,
113
+
is_proxy BOOLEAN,
114
+
latitude REAL,
115
+
longitude REAL,
116
+
raw_data JSONB,
117
+
fetched_at TIMESTAMP NOT NULL,
118
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
119
+
);
114
120
115
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
116
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
121
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
122
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
117
123
118
-
-- Endpoint scans (renamed from pds_scans)
124
+
-- Endpoint scans
119
125
CREATE TABLE IF NOT EXISTS endpoint_scans (
120
126
id BIGSERIAL PRIMARY KEY,
121
127
endpoint_id BIGINT NOT NULL,
···
123
129
response_time DOUBLE PRECISION,
124
130
user_count BIGINT,
125
131
version TEXT,
132
+
used_ip TEXT,
126
133
scan_data JSONB,
127
134
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
128
135
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
···
131
138
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
132
139
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
133
140
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
134
143
135
144
CREATE TABLE IF NOT EXISTS plc_metrics (
136
145
id BIGSERIAL PRIMARY KEY,
···
149
158
records_processed BIGINT DEFAULT 0
150
159
);
151
160
152
-
CREATE TABLE IF NOT EXISTS plc_bundles (
153
-
bundle_number INTEGER PRIMARY KEY,
154
-
start_time TIMESTAMP NOT NULL,
155
-
end_time TIMESTAMP NOT NULL,
156
-
dids JSONB NOT NULL,
157
-
hash TEXT NOT NULL,
158
-
compressed_hash TEXT NOT NULL,
159
-
compressed_size BIGINT NOT NULL,
160
-
uncompressed_size BIGINT NOT NULL,
161
-
cumulative_compressed_size BIGINT NOT NULL,
162
-
cumulative_uncompressed_size BIGINT NOT NULL,
163
-
cursor TEXT,
164
-
prev_bundle_hash TEXT,
165
-
compressed BOOLEAN DEFAULT true,
166
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
167
-
);
161
+
-- Minimal dids table
162
+
CREATE TABLE IF NOT EXISTS dids (
163
+
did TEXT PRIMARY KEY,
164
+
handle TEXT,
165
+
pds TEXT,
166
+
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
167
+
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
168
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
169
+
);
168
170
169
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
170
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
171
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
172
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
173
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
171
+
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
172
+
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
173
+
CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle);
174
+
CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds);
174
175
175
-
CREATE TABLE IF NOT EXISTS plc_mempool (
176
+
-- PDS Repositories table
177
+
CREATE TABLE IF NOT EXISTS pds_repos (
176
178
id BIGSERIAL PRIMARY KEY,
179
+
endpoint_id BIGINT NOT NULL,
177
180
did TEXT NOT NULL,
178
-
operation TEXT NOT NULL,
179
-
cid TEXT NOT NULL UNIQUE,
180
-
created_at TIMESTAMP NOT NULL,
181
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
182
-
);
183
-
184
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
185
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
186
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
187
-
188
-
-- Minimal dids table
189
-
CREATE TABLE IF NOT EXISTS dids (
190
-
did TEXT PRIMARY KEY,
191
-
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
192
-
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
181
+
head TEXT,
182
+
rev TEXT,
183
+
active BOOLEAN DEFAULT true,
184
+
status TEXT,
185
+
first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
186
+
last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
187
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
188
+
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE,
189
+
UNIQUE(endpoint_id, did)
193
190
);
194
191
195
-
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
196
-
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
192
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint ON pds_repos(endpoint_id);
193
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint_id_desc ON pds_repos(endpoint_id, id DESC);
194
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_did ON pds_repos(did);
195
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_active ON pds_repos(active);
196
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_status ON pds_repos(status);
197
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_last_seen ON pds_repos(last_seen DESC);
197
198
`
198
199
199
200
_, err := p.db.Exec(schema)
···
209
210
210
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
211
212
query := `
212
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ip_resolved_at)
213
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
214
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
215
216
last_checked = EXCLUDED.last_checked,
216
217
status = EXCLUDED.status,
217
218
ip = CASE
218
219
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip
219
220
ELSE endpoints.ip
221
+
END,
222
+
ipv6 = CASE
223
+
WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6
224
+
ELSE endpoints.ipv6
220
225
END,
221
226
ip_resolved_at = CASE
222
-
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip_resolved_at
227
+
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
223
228
ELSE endpoints.ip_resolved_at
224
229
END,
230
+
valid = EXCLUDED.valid,
225
231
updated_at = CURRENT_TIMESTAMP
226
232
RETURNING id
227
233
`
228
234
err := p.db.QueryRowContext(ctx, query,
229
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
230
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
231
237
return err
232
238
}
233
239
···
248
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
249
255
query := `
250
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
251
-
ip, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
252
258
FROM endpoints
253
259
WHERE endpoint = $1 AND endpoint_type = $2
254
260
`
255
261
256
262
var ep Endpoint
257
263
var lastChecked, ipResolvedAt sql.NullTime
258
-
var ip sql.NullString
264
+
var ip, ipv6 sql.NullString
259
265
260
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
261
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
262
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
263
269
)
264
270
if err != nil {
265
271
return nil, err
···
271
277
if ip.Valid {
272
278
ep.IP = ip.String
273
279
}
280
+
if ipv6.Valid {
281
+
ep.IPv6 = ipv6.String
282
+
}
274
283
if ipResolvedAt.Valid {
275
284
ep.IPResolvedAt = ipResolvedAt.Time
276
285
}
···
280
289
281
290
func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
282
291
query := `
283
-
SELECT DISTINCT ON (COALESCE(server_did, id::text))
284
-
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
285
-
ip, ip_resolved_at, updated_at
286
-
FROM endpoints
287
-
WHERE 1=1
292
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
293
+
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
295
+
FROM endpoints
296
+
WHERE 1=1
288
297
`
289
298
args := []interface{}{}
290
299
argIdx := 1
···
294
303
query += fmt.Sprintf(" AND endpoint_type = $%d", argIdx)
295
304
args = append(args, filter.Type)
296
305
argIdx++
306
+
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
297
311
}
298
312
if filter.Status != "" {
299
313
statusInt := EndpointStatusUnknown
···
317
331
}
318
332
}
319
333
320
-
// NEW: Order by server_did and discovered_at to get primary endpoints
321
-
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
322
348
323
349
if filter != nil && filter.Limit > 0 {
324
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
335
361
for rows.Next() {
336
362
var ep Endpoint
337
363
var lastChecked, ipResolvedAt sql.NullTime
338
-
var ip, serverDID sql.NullString
364
+
var ip, ipv6, serverDID sql.NullString
339
365
340
366
err := rows.Scan(
341
367
&ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked,
342
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
368
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
343
369
)
344
370
if err != nil {
345
371
return nil, err
···
354
380
if ip.Valid {
355
381
ep.IP = ip.String
356
382
}
383
+
if ipv6.Valid {
384
+
ep.IPv6 = ipv6.String
385
+
}
357
386
if ipResolvedAt.Valid {
358
387
ep.IPResolvedAt = ipResolvedAt.Time
359
388
}
···
374
403
return err
375
404
}
376
405
377
-
func (p *PostgresDB) UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error {
406
+
func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error {
378
407
query := `
379
408
UPDATE endpoints
380
-
SET ip = $1, ip_resolved_at = $2, updated_at = $3
381
-
WHERE id = $4
409
+
SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4
410
+
WHERE id = $5
382
411
`
383
-
_, err := p.db.ExecContext(ctx, query, ip, resolvedAt, time.Now().UTC(), endpointID)
412
+
_, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID)
384
413
return err
385
414
}
386
415
···
447
476
defer tx.Rollback()
448
477
449
478
query := `
450
-
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at)
451
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
452
481
`
453
-
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt)
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
454
483
if err != nil {
455
484
return err
456
485
}
···
477
506
478
507
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
479
508
query := `
480
-
SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
481
510
FROM endpoint_scans
482
511
WHERE endpoint_id = $1
483
512
ORDER BY scanned_at DESC
···
495
524
var scan EndpointScan
496
525
var responseTime sql.NullFloat64
497
526
var userCount sql.NullInt64
498
-
var version sql.NullString // NEW
527
+
var version, usedIP sql.NullString
499
528
var scanDataJSON []byte
500
529
501
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt)
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
502
531
if err != nil {
503
532
return nil, err
504
533
}
···
511
540
scan.UserCount = userCount.Int64
512
541
}
513
542
514
-
if version.Valid { // NEW
543
+
if version.Valid {
515
544
scan.Version = version.String
516
545
}
517
546
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
518
551
if len(scanDataJSON) > 0 {
519
552
var scanData EndpointScanData
520
553
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
···
540
573
discovered_at,
541
574
last_checked,
542
575
status,
543
-
ip
576
+
ip,
577
+
ipv6,
578
+
valid
544
579
FROM endpoints
545
580
WHERE endpoint_type = 'pds'
546
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
547
582
)
548
583
SELECT
549
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
550
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
551
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
552
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude
587
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
588
+
i.latitude, i.longitude
553
589
FROM unique_servers e
554
590
LEFT JOIN LATERAL (
555
591
SELECT
···
606
642
var items []*PDSListItem
607
643
for rows.Next() {
608
644
item := &PDSListItem{}
609
-
var ip, serverDID, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
610
646
var asn sql.NullInt32
611
-
var isDatacenter, isVPN sql.NullBool
647
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
612
648
var lat, lon sql.NullFloat64
613
649
var userCount sql.NullInt32
614
650
var responseTime sql.NullFloat64
···
616
652
var scannedAt sql.NullTime
617
653
618
654
err := rows.Scan(
619
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
620
656
&userCount, &responseTime, &version, &scannedAt,
621
657
&city, &country, &countryCode, &asn, &asnOrg,
622
-
&isDatacenter, &isVPN, &lat, &lon,
658
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
659
+
&lat, &lon,
623
660
)
624
661
if err != nil {
625
662
return nil, err
···
627
664
628
665
if ip.Valid {
629
666
item.IP = ip.String
667
+
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
630
670
}
631
671
if serverDID.Valid {
632
672
item.ServerDID = serverDID.String
···
658
698
ASNOrg: asnOrg.String,
659
699
IsDatacenter: isDatacenter.Bool,
660
700
IsVPN: isVPN.Bool,
701
+
IsCrawler: isCrawler.Bool,
702
+
IsTor: isTor.Bool,
703
+
IsProxy: isProxy.Bool,
661
704
Latitude: float32(lat.Float64),
662
705
Longitude: float32(lon.Float64),
663
706
}
···
671
714
672
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
673
716
query := `
674
-
WITH target_endpoint AS (
717
+
WITH target_endpoint AS MATERIALIZED (
675
718
SELECT
676
719
e.id,
677
720
e.endpoint,
···
679
722
e.discovered_at,
680
723
e.last_checked,
681
724
e.status,
682
-
e.ip
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
683
728
FROM endpoints e
684
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
685
-
),
686
-
aliases_agg AS (
687
-
SELECT
688
-
te.server_did,
689
-
array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases,
690
-
MIN(e.discovered_at) as first_discovered_at
691
-
FROM target_endpoint te
692
-
LEFT JOIN endpoints e ON te.server_did = e.server_did
693
-
AND e.endpoint_type = 'pds'
694
-
AND te.server_did IS NOT NULL
695
-
GROUP BY te.server_did
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
696
732
)
697
733
SELECT
698
734
te.id,
···
702
738
te.last_checked,
703
739
te.status,
704
740
te.ip,
741
+
te.ipv6,
742
+
te.valid,
705
743
latest.user_count,
706
744
latest.response_time,
707
745
latest.version,
708
746
latest.scan_data->'metadata'->'server_info' as server_info,
709
747
latest.scanned_at,
710
748
i.city, i.country, i.country_code, i.asn, i.asn_org,
711
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude,
749
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
750
+
i.latitude, i.longitude,
712
751
i.raw_data,
713
-
COALESCE(aa.aliases, ARRAY[]::text[]) as aliases,
714
-
aa.first_discovered_at
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
715
773
FROM target_endpoint te
716
-
LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did
717
774
LEFT JOIN LATERAL (
718
-
SELECT scan_data, response_time, version, scanned_at, user_count
719
-
FROM endpoint_scans
720
-
WHERE endpoint_id = te.id
721
-
ORDER BY scanned_at DESC
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
722
784
LIMIT 1
723
785
) latest ON true
724
-
LEFT JOIN ip_infos i ON te.ip = i.ip
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
725
787
`
726
788
727
789
detail := &PDSDetail{}
728
-
var ip, city, country, countryCode, asnOrg, serverDID sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
729
791
var asn sql.NullInt32
730
-
var isDatacenter, isVPN sql.NullBool
792
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
731
793
var lat, lon sql.NullFloat64
732
794
var userCount sql.NullInt32
733
795
var responseTime sql.NullFloat64
···
739
801
var firstDiscoveredAt sql.NullTime
740
802
741
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
742
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
743
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
744
806
&city, &country, &countryCode, &asn, &asnOrg,
745
-
&isDatacenter, &isVPN, &lat, &lon,
807
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
808
+
&lat, &lon,
746
809
&rawDataJSON,
747
810
pq.Array(&aliases),
748
811
&firstDiscoveredAt,
···
754
817
if ip.Valid {
755
818
detail.IP = ip.String
756
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
757
823
758
824
if serverDID.Valid {
759
825
detail.ServerDID = serverDID.String
···
762
828
// Set aliases and is_primary
763
829
detail.Aliases = aliases
764
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
765
-
// Has server_did - check if this is the first discovered
766
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
767
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
768
833
} else {
769
-
// No server_did means unique server
770
834
detail.IsPrimary = true
771
835
}
772
836
···
792
856
}
793
857
}
794
858
795
-
// Parse IP info
859
+
// Parse IP info with all fields
796
860
if city.Valid || country.Valid {
797
861
detail.IPInfo = &IPInfo{
798
862
IP: ip.String,
···
803
867
ASNOrg: asnOrg.String,
804
868
IsDatacenter: isDatacenter.Bool,
805
869
IsVPN: isVPN.Bool,
870
+
IsCrawler: isCrawler.Bool,
871
+
IsTor: isTor.Bool,
872
+
IsProxy: isProxy.Bool,
806
873
Latitude: float32(lat.Float64),
807
874
Longitude: float32(lon.Float64),
808
875
}
···
938
1005
countryCode := extractString(ipInfo, "location", "country_code")
939
1006
asn := extractInt(ipInfo, "asn", "asn")
940
1007
asnOrg := extractString(ipInfo, "asn", "org")
941
-
isDatacenter := extractBool(ipInfo, "company", "type", "hosting")
942
-
isVPN := extractBool(ipInfo, "security", "vpn")
1008
+
1009
+
// Extract top-level boolean flags
1010
+
isDatacenter := false
1011
+
if val, ok := ipInfo["is_datacenter"].(bool); ok {
1012
+
isDatacenter = val
1013
+
}
1014
+
1015
+
isVPN := false
1016
+
if val, ok := ipInfo["is_vpn"].(bool); ok {
1017
+
isVPN = val
1018
+
}
1019
+
1020
+
isCrawler := false
1021
+
if val, ok := ipInfo["is_crawler"].(bool); ok {
1022
+
isCrawler = val
1023
+
}
1024
+
1025
+
isTor := false
1026
+
if val, ok := ipInfo["is_tor"].(bool); ok {
1027
+
isTor = val
1028
+
}
1029
+
1030
+
isProxy := false
1031
+
if val, ok := ipInfo["is_proxy"].(bool); ok {
1032
+
isProxy = val
1033
+
}
1034
+
943
1035
lat := extractFloat(ipInfo, "location", "latitude")
944
1036
lon := extractFloat(ipInfo, "location", "longitude")
945
1037
946
1038
query := `
947
-
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, latitude, longitude, raw_data, fetched_at)
948
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
1039
+
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at)
1040
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
949
1041
ON CONFLICT(ip) DO UPDATE SET
950
1042
city = EXCLUDED.city,
951
1043
country = EXCLUDED.country,
···
954
1046
asn_org = EXCLUDED.asn_org,
955
1047
is_datacenter = EXCLUDED.is_datacenter,
956
1048
is_vpn = EXCLUDED.is_vpn,
1049
+
is_crawler = EXCLUDED.is_crawler,
1050
+
is_tor = EXCLUDED.is_tor,
1051
+
is_proxy = EXCLUDED.is_proxy,
957
1052
latitude = EXCLUDED.latitude,
958
1053
longitude = EXCLUDED.longitude,
959
1054
raw_data = EXCLUDED.raw_data,
960
1055
fetched_at = EXCLUDED.fetched_at,
961
1056
updated_at = CURRENT_TIMESTAMP
962
1057
`
963
-
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, lat, lon, rawDataJSON, time.Now().UTC())
1058
+
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC())
964
1059
return err
965
1060
}
966
1061
967
1062
func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) {
968
1063
query := `
969
-
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn,
1064
+
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy,
970
1065
latitude, longitude, raw_data, fetched_at, updated_at
971
1066
FROM ip_infos
972
1067
WHERE ip = $1
···
977
1072
978
1073
err := p.db.QueryRowContext(ctx, query, ip).Scan(
979
1074
&info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg,
980
-
&info.IsDatacenter, &info.IsVPN, &info.Latitude, &info.Longitude,
1075
+
&info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy,
1076
+
&info.Latitude, &info.Longitude,
981
1077
&rawDataJSON, &info.FetchedAt, &info.UpdatedAt,
982
1078
)
983
1079
if err != nil {
···
1067
1163
return 0
1068
1164
}
1069
1165
1070
-
func extractBool(data map[string]interface{}, keys ...string) bool {
1071
-
current := data
1072
-
for i, key := range keys {
1073
-
if i == len(keys)-1 {
1074
-
if val, ok := current[key].(bool); ok {
1075
-
return val
1076
-
}
1077
-
// Check if it's a string that matches (for type="hosting")
1078
-
if val, ok := current[key].(string); ok {
1079
-
// For cases like company.type == "hosting"
1080
-
expectedValue := keys[len(keys)-1]
1081
-
return val == expectedValue
1082
-
}
1083
-
return false
1084
-
}
1085
-
if nested, ok := current[key].(map[string]interface{}); ok {
1086
-
current = nested
1087
-
} else {
1088
-
return false
1089
-
}
1090
-
}
1091
-
return false
1092
-
}
1093
-
1094
-
// ===== BUNDLE OPERATIONS =====
1095
-
1096
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1097
-
didsJSON, err := json.Marshal(bundle.DIDs)
1098
-
if err != nil {
1099
-
return err
1100
-
}
1101
-
1102
-
// Calculate cumulative sizes from previous bundle
1103
-
if bundle.BundleNumber > 1 {
1104
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1105
-
if err == nil && prevBundle != nil {
1106
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1107
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1108
-
} else {
1109
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1110
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1111
-
}
1112
-
} else {
1113
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1114
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1115
-
}
1116
-
1117
-
query := `
1118
-
INSERT INTO plc_bundles (
1119
-
bundle_number, start_time, end_time, dids,
1120
-
hash, compressed_hash, compressed_size, uncompressed_size,
1121
-
cumulative_compressed_size, cumulative_uncompressed_size,
1122
-
cursor, prev_bundle_hash, compressed
1123
-
)
1124
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1125
-
ON CONFLICT(bundle_number) DO UPDATE SET
1126
-
start_time = EXCLUDED.start_time,
1127
-
end_time = EXCLUDED.end_time,
1128
-
dids = EXCLUDED.dids,
1129
-
hash = EXCLUDED.hash,
1130
-
compressed_hash = EXCLUDED.compressed_hash,
1131
-
compressed_size = EXCLUDED.compressed_size,
1132
-
uncompressed_size = EXCLUDED.uncompressed_size,
1133
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1134
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1135
-
cursor = EXCLUDED.cursor,
1136
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1137
-
compressed = EXCLUDED.compressed
1138
-
`
1139
-
_, err = p.db.ExecContext(ctx, query,
1140
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1141
-
didsJSON, bundle.Hash, bundle.CompressedHash,
1142
-
bundle.CompressedSize, bundle.UncompressedSize,
1143
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1144
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1145
-
)
1146
-
1147
-
return err
1148
-
}
1149
-
1150
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1151
-
query := `
1152
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1153
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1154
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1155
-
FROM plc_bundles
1156
-
WHERE bundle_number = $1
1157
-
`
1158
-
1159
-
var bundle PLCBundle
1160
-
var didsJSON []byte
1161
-
var prevHash sql.NullString
1162
-
var cursor sql.NullString
1163
-
1164
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1165
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1166
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1167
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1168
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1169
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1170
-
)
1171
-
if err != nil {
1172
-
return nil, err
1173
-
}
1174
-
1175
-
if prevHash.Valid {
1176
-
bundle.PrevBundleHash = prevHash.String
1177
-
}
1178
-
if cursor.Valid {
1179
-
bundle.Cursor = cursor.String
1180
-
}
1181
-
1182
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1183
-
return &bundle, nil
1184
-
}
1185
-
1186
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1187
-
query := `
1188
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1189
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1190
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1191
-
FROM plc_bundles
1192
-
ORDER BY bundle_number DESC
1193
-
LIMIT $1
1194
-
`
1195
-
1196
-
rows, err := p.db.QueryContext(ctx, query, limit)
1197
-
if err != nil {
1198
-
return nil, err
1199
-
}
1200
-
defer rows.Close()
1201
-
1202
-
return p.scanBundles(rows)
1203
-
}
1204
-
1205
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1206
-
query := `
1207
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1208
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1209
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1210
-
FROM plc_bundles
1211
-
WHERE dids ? $1
1212
-
ORDER BY bundle_number ASC
1213
-
`
1214
-
1215
-
rows, err := p.db.QueryContext(ctx, query, did)
1216
-
if err != nil {
1217
-
return nil, err
1218
-
}
1219
-
defer rows.Close()
1220
-
1221
-
return p.scanBundles(rows)
1222
-
}
1223
-
1224
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1225
-
var bundles []*PLCBundle
1226
-
1227
-
for rows.Next() {
1228
-
var bundle PLCBundle
1229
-
var didsJSON []byte
1230
-
var prevHash sql.NullString
1231
-
var cursor sql.NullString
1232
-
1233
-
if err := rows.Scan(
1234
-
&bundle.BundleNumber,
1235
-
&bundle.StartTime,
1236
-
&bundle.EndTime,
1237
-
&didsJSON,
1238
-
&bundle.Hash,
1239
-
&bundle.CompressedHash,
1240
-
&bundle.CompressedSize,
1241
-
&bundle.UncompressedSize,
1242
-
&bundle.CumulativeCompressedSize,
1243
-
&bundle.CumulativeUncompressedSize,
1244
-
&cursor,
1245
-
&prevHash,
1246
-
&bundle.Compressed,
1247
-
&bundle.CreatedAt,
1248
-
); err != nil {
1249
-
return nil, err
1250
-
}
1251
-
1252
-
if prevHash.Valid {
1253
-
bundle.PrevBundleHash = prevHash.String
1254
-
}
1255
-
if cursor.Valid {
1256
-
bundle.Cursor = cursor.String
1257
-
}
1258
-
1259
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1260
-
bundles = append(bundles, &bundle)
1261
-
}
1262
-
1263
-
return bundles, rows.Err()
1264
-
}
1265
-
1266
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1267
-
var count, lastBundleNum int64
1268
-
err := p.db.QueryRowContext(ctx, `
1269
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1270
-
FROM plc_bundles
1271
-
`).Scan(&count, &lastBundleNum)
1272
-
if err != nil {
1273
-
return 0, 0, 0, 0, err
1274
-
}
1275
-
1276
-
if lastBundleNum == 0 {
1277
-
return 0, 0, 0, 0, nil
1278
-
}
1279
-
1280
-
var compressedSize, uncompressedSize int64
1281
-
err = p.db.QueryRowContext(ctx, `
1282
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1283
-
FROM plc_bundles
1284
-
WHERE bundle_number = $1
1285
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1286
-
if err != nil {
1287
-
return 0, 0, 0, 0, err
1288
-
}
1289
-
1290
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1291
-
}
1292
-
1293
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1294
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1295
-
var num int
1296
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1297
-
return num, err
1298
-
}
1299
-
1300
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1301
-
query := `
1302
-
SELECT bundle_number
1303
-
FROM plc_bundles
1304
-
WHERE start_time <= $1 AND end_time >= $1
1305
-
ORDER BY bundle_number ASC
1306
-
LIMIT 1
1307
-
`
1308
-
1309
-
var bundleNum int
1310
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1311
-
if err == sql.ErrNoRows {
1312
-
query = `
1313
-
SELECT bundle_number
1314
-
FROM plc_bundles
1315
-
WHERE end_time < $1
1316
-
ORDER BY bundle_number DESC
1317
-
LIMIT 1
1318
-
`
1319
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1320
-
if err == sql.ErrNoRows {
1321
-
return 1, nil
1322
-
}
1323
-
if err != nil {
1324
-
return 0, err
1325
-
}
1326
-
return bundleNum, nil
1327
-
}
1328
-
if err != nil {
1329
-
return 0, err
1330
-
}
1331
-
1332
-
return bundleNum, nil
1333
-
}
1334
-
1335
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1336
-
query := `
1337
-
WITH daily_stats AS (
1338
-
SELECT
1339
-
DATE(start_time) as date,
1340
-
MAX(bundle_number) as last_bundle,
1341
-
COUNT(*) as bundle_count,
1342
-
SUM(uncompressed_size) as total_uncompressed,
1343
-
SUM(compressed_size) as total_compressed,
1344
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1345
-
MAX(cumulative_compressed_size) as cumulative_compressed
1346
-
FROM plc_bundles
1347
-
WHERE bundle_number >= $1
1348
-
GROUP BY DATE(start_time)
1349
-
)
1350
-
SELECT
1351
-
date::text,
1352
-
last_bundle,
1353
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1354
-
total_uncompressed,
1355
-
total_compressed,
1356
-
cumulative_uncompressed,
1357
-
cumulative_compressed
1358
-
FROM daily_stats
1359
-
ORDER BY date ASC
1360
-
`
1361
-
1362
-
if limit > 0 {
1363
-
query += fmt.Sprintf(" LIMIT %d", limit)
1364
-
}
1365
-
1366
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1367
-
if err != nil {
1368
-
return nil, err
1369
-
}
1370
-
defer rows.Close()
1371
-
1372
-
var history []*PLCHistoryPoint
1373
-
for rows.Next() {
1374
-
var point PLCHistoryPoint
1375
-
var cumulativeOps int64
1376
-
1377
-
err := rows.Scan(
1378
-
&point.Date,
1379
-
&point.BundleNumber,
1380
-
&cumulativeOps,
1381
-
&point.UncompressedSize,
1382
-
&point.CompressedSize,
1383
-
&point.CumulativeUncompressed,
1384
-
&point.CumulativeCompressed,
1385
-
)
1386
-
if err != nil {
1387
-
return nil, err
1388
-
}
1389
-
1390
-
point.OperationCount = int(cumulativeOps)
1391
-
1392
-
history = append(history, &point)
1393
-
}
1394
-
1395
-
return history, rows.Err()
1396
-
}
1397
-
1398
-
// ===== MEMPOOL OPERATIONS =====
1399
-
1400
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1401
-
if len(ops) == 0 {
1402
-
return nil
1403
-
}
1404
-
1405
-
tx, err := p.db.BeginTx(ctx, nil)
1406
-
if err != nil {
1407
-
return err
1408
-
}
1409
-
defer tx.Rollback()
1410
-
1411
-
stmt, err := tx.PrepareContext(ctx, `
1412
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1413
-
VALUES ($1, $2, $3, $4)
1414
-
ON CONFLICT(cid) DO NOTHING
1415
-
`)
1416
-
if err != nil {
1417
-
return err
1418
-
}
1419
-
defer stmt.Close()
1420
-
1421
-
for _, op := range ops {
1422
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1423
-
if err != nil {
1424
-
return err
1425
-
}
1426
-
}
1427
-
1428
-
return tx.Commit()
1429
-
}
1430
-
1431
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1432
-
query := "SELECT COUNT(*) FROM plc_mempool"
1433
-
var count int
1434
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1435
-
return count, err
1436
-
}
1437
-
1438
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1439
-
query := `
1440
-
SELECT id, did, operation, cid, created_at, added_at
1441
-
FROM plc_mempool
1442
-
ORDER BY created_at ASC
1443
-
LIMIT $1
1444
-
`
1445
-
1446
-
rows, err := p.db.QueryContext(ctx, query, limit)
1447
-
if err != nil {
1448
-
return nil, err
1449
-
}
1450
-
defer rows.Close()
1451
-
1452
-
var ops []MempoolOperation
1453
-
for rows.Next() {
1454
-
var op MempoolOperation
1455
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1456
-
if err != nil {
1457
-
return nil, err
1458
-
}
1459
-
ops = append(ops, op)
1460
-
}
1461
-
1462
-
return ops, rows.Err()
1463
-
}
1464
-
1465
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1466
-
if len(ids) == 0 {
1467
-
return nil
1468
-
}
1469
-
1470
-
placeholders := make([]string, len(ids))
1471
-
args := make([]interface{}, len(ids))
1472
-
for i, id := range ids {
1473
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1474
-
args[i] = id
1475
-
}
1476
-
1477
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1478
-
strings.Join(placeholders, ","))
1479
-
1480
-
_, err := p.db.ExecContext(ctx, query, args...)
1481
-
return err
1482
-
}
1483
-
1484
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1485
-
query := `
1486
-
SELECT id, did, operation, cid, created_at, added_at
1487
-
FROM plc_mempool
1488
-
ORDER BY created_at ASC, id ASC
1489
-
LIMIT 1
1490
-
`
1491
-
1492
-
var op MempoolOperation
1493
-
err := p.db.QueryRowContext(ctx, query).Scan(
1494
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1495
-
)
1496
-
if err == sql.ErrNoRows {
1497
-
return nil, nil
1498
-
}
1499
-
if err != nil {
1500
-
return nil, err
1501
-
}
1502
-
1503
-
return &op, nil
1504
-
}
1505
-
1506
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1507
-
query := `
1508
-
SELECT id, did, operation, cid, created_at, added_at
1509
-
FROM plc_mempool
1510
-
ORDER BY created_at DESC, id DESC
1511
-
LIMIT 1
1512
-
`
1513
-
1514
-
var op MempoolOperation
1515
-
err := p.db.QueryRowContext(ctx, query).Scan(
1516
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1517
-
)
1518
-
if err == sql.ErrNoRows {
1519
-
return nil, nil
1520
-
}
1521
-
if err != nil {
1522
-
return nil, err
1523
-
}
1524
-
1525
-
return &op, nil
1526
-
}
1527
-
1528
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1529
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1530
-
var count int
1531
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1532
-
return count, err
1533
-
}
1534
-
1535
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1536
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1537
-
var size int64
1538
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1539
-
return size, err
1540
-
}
1541
-
1542
1166
// ===== CURSOR OPERATIONS =====
1543
1167
1544
1168
func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) {
···
1611
1235
1612
1236
// ===== DID OPERATIONS =====
1613
1237
1614
-
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int) error {
1238
+
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error {
1615
1239
query := `
1616
-
INSERT INTO dids (did, bundle_numbers, created_at)
1617
-
VALUES ($1, jsonb_build_array($2), CURRENT_TIMESTAMP)
1240
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1241
+
VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP)
1618
1242
ON CONFLICT(did) DO UPDATE SET
1243
+
handle = EXCLUDED.handle,
1244
+
pds = EXCLUDED.pds,
1619
1245
bundle_numbers = CASE
1620
-
WHEN dids.bundle_numbers ? $2::text THEN dids.bundle_numbers
1621
-
ELSE dids.bundle_numbers || jsonb_build_array($2)
1622
-
END
1246
+
WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers
1247
+
ELSE dids.bundle_numbers || jsonb_build_array($4::integer)
1248
+
END,
1249
+
updated_at = CURRENT_TIMESTAMP
1623
1250
`
1624
-
_, err := p.db.ExecContext(ctx, query, did, bundleNum)
1251
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum)
1252
+
return err
1253
+
}
1254
+
1255
+
// UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers
1256
+
func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error {
1257
+
query := `
1258
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1259
+
VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP)
1260
+
ON CONFLICT(did) DO UPDATE SET
1261
+
handle = EXCLUDED.handle,
1262
+
pds = EXCLUDED.pds,
1263
+
updated_at = CURRENT_TIMESTAMP
1264
+
`
1265
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds)
1625
1266
return err
1626
1267
}
1627
1268
1628
1269
func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) {
1629
1270
query := `
1630
-
SELECT did, bundle_numbers, created_at
1271
+
SELECT did, handle, pds, bundle_numbers, created_at
1631
1272
FROM dids
1632
1273
WHERE did = $1
1633
1274
`
1634
1275
1635
1276
var record DIDRecord
1636
1277
var bundleNumbersJSON []byte
1278
+
var handle, pds sql.NullString
1637
1279
1638
1280
err := p.db.QueryRowContext(ctx, query, did).Scan(
1639
1281
&record.DID,
1282
+
&handle,
1283
+
&pds,
1640
1284
&bundleNumbersJSON,
1641
1285
&record.CreatedAt,
1642
1286
)
···
1644
1288
return nil, err
1645
1289
}
1646
1290
1291
+
if handle.Valid {
1292
+
record.Handle = handle.String
1293
+
}
1294
+
if pds.Valid {
1295
+
record.CurrentPDS = pds.String
1296
+
}
1297
+
1647
1298
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1648
1299
return nil, err
1649
1300
}
···
1651
1302
return &record, nil
1652
1303
}
1653
1304
1305
+
func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) {
1306
+
query := `
1307
+
SELECT did, handle, pds, bundle_numbers, created_at
1308
+
FROM dids
1309
+
WHERE handle = $1
1310
+
`
1311
+
1312
+
var record DIDRecord
1313
+
var bundleNumbersJSON []byte
1314
+
var recordHandle, pds sql.NullString
1315
+
1316
+
err := p.db.QueryRowContext(ctx, query, handle).Scan(
1317
+
&record.DID,
1318
+
&recordHandle,
1319
+
&pds,
1320
+
&bundleNumbersJSON,
1321
+
&record.CreatedAt,
1322
+
)
1323
+
if err != nil {
1324
+
return nil, err
1325
+
}
1326
+
1327
+
if recordHandle.Valid {
1328
+
record.Handle = recordHandle.String
1329
+
}
1330
+
if pds.Valid {
1331
+
record.CurrentPDS = pds.String
1332
+
}
1333
+
1334
+
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1335
+
return nil, err
1336
+
}
1337
+
1338
+
return &record, nil
1339
+
}
1340
+
1341
+
// GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos'
1342
+
func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) {
1343
+
query := `
1344
+
WITH primary_endpoints AS (
1345
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
1346
+
id
1347
+
FROM endpoints
1348
+
WHERE endpoint_type = 'pds'
1349
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
1350
+
)
1351
+
SELECT
1352
+
d.did,
1353
+
d.handle,
1354
+
d.pds,
1355
+
d.bundle_numbers,
1356
+
d.created_at,
1357
+
COALESCE(
1358
+
jsonb_agg(
1359
+
jsonb_build_object(
1360
+
'id', pr.id,
1361
+
'endpoint_id', pr.endpoint_id,
1362
+
'endpoint', e.endpoint,
1363
+
'did', pr.did,
1364
+
'head', pr.head,
1365
+
'rev', pr.rev,
1366
+
'active', pr.active,
1367
+
'status', pr.status,
1368
+
'first_seen', pr.first_seen AT TIME ZONE 'UTC',
1369
+
'last_seen', pr.last_seen AT TIME ZONE 'UTC',
1370
+
'updated_at', pr.updated_at AT TIME ZONE 'UTC'
1371
+
)
1372
+
ORDER BY pr.last_seen DESC
1373
+
) FILTER (
1374
+
WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL
1375
+
),
1376
+
'[]'::jsonb
1377
+
) AS hosting_on
1378
+
FROM
1379
+
dids d
1380
+
LEFT JOIN
1381
+
pds_repos pr ON d.did = pr.did
1382
+
LEFT JOIN
1383
+
endpoints e ON pr.endpoint_id = e.id
1384
+
LEFT JOIN
1385
+
primary_endpoints pe ON pr.endpoint_id = pe.id
1386
+
WHERE
1387
+
d.did = $1
1388
+
GROUP BY
1389
+
d.did, d.handle, d.pds, d.bundle_numbers, d.created_at
1390
+
`
1391
+
1392
+
var info GlobalDIDInfo
1393
+
var bundleNumbersJSON []byte
1394
+
var hostingOnJSON []byte
1395
+
var handle, pds sql.NullString
1396
+
1397
+
err := p.db.QueryRowContext(ctx, query, did).Scan(
1398
+
&info.DID,
1399
+
&handle,
1400
+
&pds,
1401
+
&bundleNumbersJSON,
1402
+
&info.CreatedAt,
1403
+
&hostingOnJSON,
1404
+
)
1405
+
if err != nil {
1406
+
return nil, err
1407
+
}
1408
+
1409
+
if handle.Valid {
1410
+
info.Handle = handle.String
1411
+
}
1412
+
if pds.Valid {
1413
+
info.CurrentPDS = pds.String
1414
+
}
1415
+
1416
+
if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil {
1417
+
return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err)
1418
+
}
1419
+
1420
+
if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil {
1421
+
return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err)
1422
+
}
1423
+
1424
+
return &info, nil
1425
+
}
1426
+
1654
1427
func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error {
1655
1428
if len(dids) == 0 {
1656
1429
return nil
···
1916
1689
}
1917
1690
return "0%"
1918
1691
}
1692
+
1693
+
func (p *PostgresDB) UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error {
1694
+
if len(repos) == 0 {
1695
+
return nil
1696
+
}
1697
+
1698
+
// Step 1: Load all existing repos for this endpoint into memory
1699
+
query := `
1700
+
SELECT did, head, rev, active, status
1701
+
FROM pds_repos
1702
+
WHERE endpoint_id = $1
1703
+
`
1704
+
1705
+
rows, err := p.db.QueryContext(ctx, query, endpointID)
1706
+
if err != nil {
1707
+
return err
1708
+
}
1709
+
1710
+
existingRepos := make(map[string]*PDSRepo)
1711
+
for rows.Next() {
1712
+
var repo PDSRepo
1713
+
var head, rev, status sql.NullString
1714
+
1715
+
err := rows.Scan(&repo.DID, &head, &rev, &repo.Active, &status)
1716
+
if err != nil {
1717
+
rows.Close()
1718
+
return err
1719
+
}
1720
+
1721
+
if head.Valid {
1722
+
repo.Head = head.String
1723
+
}
1724
+
if rev.Valid {
1725
+
repo.Rev = rev.String
1726
+
}
1727
+
if status.Valid {
1728
+
repo.Status = status.String
1729
+
}
1730
+
1731
+
existingRepos[repo.DID] = &repo
1732
+
}
1733
+
rows.Close()
1734
+
1735
+
if err := rows.Err(); err != nil {
1736
+
return err
1737
+
}
1738
+
1739
+
// Step 2: Compare and collect changes
1740
+
var newRepos []PDSRepoData
1741
+
var changedRepos []PDSRepoData
1742
+
1743
+
for _, repo := range repos {
1744
+
existing, exists := existingRepos[repo.DID]
1745
+
if !exists {
1746
+
// New repo
1747
+
newRepos = append(newRepos, repo)
1748
+
} else if existing.Head != repo.Head ||
1749
+
existing.Rev != repo.Rev ||
1750
+
existing.Active != repo.Active ||
1751
+
existing.Status != repo.Status {
1752
+
// Repo changed
1753
+
changedRepos = append(changedRepos, repo)
1754
+
}
1755
+
}
1756
+
1757
+
// Log comparison results
1758
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, total=%d, existing=%d, new=%d, changed=%d, unchanged=%d",
1759
+
endpointID, len(repos), len(existingRepos), len(newRepos), len(changedRepos),
1760
+
len(repos)-len(newRepos)-len(changedRepos))
1761
+
1762
+
// If nothing changed, return early
1763
+
if len(newRepos) == 0 && len(changedRepos) == 0 {
1764
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, no changes detected, skipping database operations", endpointID)
1765
+
return nil
1766
+
}
1767
+
1768
+
// Step 3: Execute batched operations
1769
+
conn, err := p.pool.Acquire(ctx)
1770
+
if err != nil {
1771
+
return err
1772
+
}
1773
+
defer conn.Release()
1774
+
1775
+
tx, err := conn.Begin(ctx)
1776
+
if err != nil {
1777
+
return err
1778
+
}
1779
+
defer tx.Rollback(ctx)
1780
+
1781
+
// Insert new repos
1782
+
if len(newRepos) > 0 {
1783
+
_, err := tx.Exec(ctx, `
1784
+
CREATE TEMP TABLE temp_new_repos (
1785
+
did TEXT,
1786
+
head TEXT,
1787
+
rev TEXT,
1788
+
active BOOLEAN,
1789
+
status TEXT
1790
+
) ON COMMIT DROP
1791
+
`)
1792
+
if err != nil {
1793
+
return err
1794
+
}
1795
+
1796
+
_, err = tx.Conn().CopyFrom(
1797
+
ctx,
1798
+
pgx.Identifier{"temp_new_repos"},
1799
+
[]string{"did", "head", "rev", "active", "status"},
1800
+
pgx.CopyFromSlice(len(newRepos), func(i int) ([]interface{}, error) {
1801
+
repo := newRepos[i]
1802
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1803
+
}),
1804
+
)
1805
+
if err != nil {
1806
+
return err
1807
+
}
1808
+
1809
+
result, err := tx.Exec(ctx, `
1810
+
INSERT INTO pds_repos (endpoint_id, did, head, rev, active, status, first_seen, last_seen)
1811
+
SELECT $1, did, head, rev, active, status,
1812
+
TIMEZONE('UTC', NOW()),
1813
+
TIMEZONE('UTC', NOW())
1814
+
FROM temp_new_repos
1815
+
`, endpointID)
1816
+
if err != nil {
1817
+
return err
1818
+
}
1819
+
1820
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, inserted %d new repos", endpointID, result.RowsAffected())
1821
+
}
1822
+
1823
+
// Update changed repos
1824
+
if len(changedRepos) > 0 {
1825
+
_, err := tx.Exec(ctx, `
1826
+
CREATE TEMP TABLE temp_changed_repos (
1827
+
did TEXT,
1828
+
head TEXT,
1829
+
rev TEXT,
1830
+
active BOOLEAN,
1831
+
status TEXT
1832
+
) ON COMMIT DROP
1833
+
`)
1834
+
if err != nil {
1835
+
return err
1836
+
}
1837
+
1838
+
_, err = tx.Conn().CopyFrom(
1839
+
ctx,
1840
+
pgx.Identifier{"temp_changed_repos"},
1841
+
[]string{"did", "head", "rev", "active", "status"},
1842
+
pgx.CopyFromSlice(len(changedRepos), func(i int) ([]interface{}, error) {
1843
+
repo := changedRepos[i]
1844
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1845
+
}),
1846
+
)
1847
+
if err != nil {
1848
+
return err
1849
+
}
1850
+
1851
+
result, err := tx.Exec(ctx, `
1852
+
UPDATE pds_repos
1853
+
SET head = t.head,
1854
+
rev = t.rev,
1855
+
active = t.active,
1856
+
status = t.status,
1857
+
last_seen = TIMEZONE('UTC', NOW()),
1858
+
updated_at = TIMEZONE('UTC', NOW())
1859
+
FROM temp_changed_repos t
1860
+
WHERE pds_repos.endpoint_id = $1
1861
+
AND pds_repos.did = t.did
1862
+
`, endpointID)
1863
+
if err != nil {
1864
+
return err
1865
+
}
1866
+
1867
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, updated %d changed repos", endpointID, result.RowsAffected())
1868
+
}
1869
+
1870
+
if err := tx.Commit(ctx); err != nil {
1871
+
return err
1872
+
}
1873
+
1874
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, transaction committed successfully", endpointID)
1875
+
return nil
1876
+
}
1877
+
1878
+
func (p *PostgresDB) GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) {
1879
+
query := `
1880
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1881
+
FROM pds_repos
1882
+
WHERE endpoint_id = $1
1883
+
`
1884
+
1885
+
args := []interface{}{endpointID}
1886
+
argIdx := 2
1887
+
1888
+
if activeOnly {
1889
+
query += " AND active = true"
1890
+
}
1891
+
1892
+
// Order by id (primary key) - fastest
1893
+
query += " ORDER BY id DESC"
1894
+
1895
+
if limit > 0 {
1896
+
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
1897
+
args = append(args, limit, offset)
1898
+
}
1899
+
1900
+
rows, err := p.db.QueryContext(ctx, query, args...)
1901
+
if err != nil {
1902
+
return nil, err
1903
+
}
1904
+
defer rows.Close()
1905
+
1906
+
var repos []*PDSRepo
1907
+
for rows.Next() {
1908
+
var repo PDSRepo
1909
+
var head, rev, status sql.NullString
1910
+
1911
+
err := rows.Scan(
1912
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1913
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1914
+
)
1915
+
if err != nil {
1916
+
return nil, err
1917
+
}
1918
+
1919
+
if head.Valid {
1920
+
repo.Head = head.String
1921
+
}
1922
+
if rev.Valid {
1923
+
repo.Rev = rev.String
1924
+
}
1925
+
if status.Valid {
1926
+
repo.Status = status.String
1927
+
}
1928
+
1929
+
repos = append(repos, &repo)
1930
+
}
1931
+
1932
+
return repos, rows.Err()
1933
+
}
1934
+
1935
+
func (p *PostgresDB) GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) {
1936
+
query := `
1937
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1938
+
FROM pds_repos
1939
+
WHERE did = $1
1940
+
ORDER BY last_seen DESC
1941
+
`
1942
+
1943
+
rows, err := p.db.QueryContext(ctx, query, did)
1944
+
if err != nil {
1945
+
return nil, err
1946
+
}
1947
+
defer rows.Close()
1948
+
1949
+
var repos []*PDSRepo
1950
+
for rows.Next() {
1951
+
var repo PDSRepo
1952
+
var head, rev, status sql.NullString
1953
+
1954
+
err := rows.Scan(
1955
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1956
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1957
+
)
1958
+
if err != nil {
1959
+
return nil, err
1960
+
}
1961
+
1962
+
if head.Valid {
1963
+
repo.Head = head.String
1964
+
}
1965
+
if rev.Valid {
1966
+
repo.Rev = rev.String
1967
+
}
1968
+
if status.Valid {
1969
+
repo.Status = status.String
1970
+
}
1971
+
1972
+
repos = append(repos, &repo)
1973
+
}
1974
+
1975
+
return repos, rows.Err()
1976
+
}
1977
+
1978
+
func (p *PostgresDB) GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) {
1979
+
query := `
1980
+
SELECT
1981
+
COUNT(*) as total_repos,
1982
+
COUNT(*) FILTER (WHERE active = true) as active_repos,
1983
+
COUNT(*) FILTER (WHERE active = false) as inactive_repos,
1984
+
COUNT(*) FILTER (WHERE status IS NOT NULL AND status != '') as repos_with_status,
1985
+
COUNT(*) FILTER (WHERE updated_at > CURRENT_TIMESTAMP - INTERVAL '1 hour') as recent_changes
1986
+
FROM pds_repos
1987
+
WHERE endpoint_id = $1
1988
+
`
1989
+
1990
+
var totalRepos, activeRepos, inactiveRepos, reposWithStatus, recentChanges int64
1991
+
1992
+
err := p.db.QueryRowContext(ctx, query, endpointID).Scan(
1993
+
&totalRepos, &activeRepos, &inactiveRepos, &reposWithStatus, &recentChanges,
1994
+
)
1995
+
if err != nil {
1996
+
return nil, err
1997
+
}
1998
+
1999
+
return map[string]interface{}{
2000
+
"total_repos": totalRepos,
2001
+
"active_repos": activeRepos,
2002
+
"inactive_repos": inactiveRepos,
2003
+
"repos_with_status": reposWithStatus,
2004
+
"recent_changes": recentChanges,
2005
+
}, nil
2006
+
}
2007
+
2008
+
// GetTableSizes fetches size information (in bytes) for all tables in the specified schema.
2009
+
func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) {
2010
+
// Query now selects raw byte values directly
2011
+
query := `
2012
+
SELECT
2013
+
c.relname AS table_name,
2014
+
pg_total_relation_size(c.oid) AS total_bytes,
2015
+
pg_relation_size(c.oid) AS table_heap_bytes,
2016
+
pg_indexes_size(c.oid) AS indexes_bytes
2017
+
FROM
2018
+
pg_class c
2019
+
LEFT JOIN
2020
+
pg_namespace n ON n.oid = c.relnamespace
2021
+
WHERE
2022
+
c.relkind = 'r' -- 'r' = ordinary table
2023
+
AND n.nspname = $1
2024
+
ORDER BY
2025
+
total_bytes DESC;
2026
+
`
2027
+
rows, err := p.db.QueryContext(ctx, query, schema)
2028
+
if err != nil {
2029
+
return nil, fmt.Errorf("failed to query table sizes: %w", err)
2030
+
}
2031
+
defer rows.Close()
2032
+
2033
+
var results []TableSizeInfo
2034
+
for rows.Next() {
2035
+
var info TableSizeInfo
2036
+
// Scan directly into int64 fields
2037
+
if err := rows.Scan(
2038
+
&info.TableName,
2039
+
&info.TotalBytes,
2040
+
&info.TableHeapBytes,
2041
+
&info.IndexesBytes,
2042
+
); err != nil {
2043
+
return nil, fmt.Errorf("failed to scan table size row: %w", err)
2044
+
}
2045
+
results = append(results, info)
2046
+
}
2047
+
if err := rows.Err(); err != nil {
2048
+
return nil, fmt.Errorf("error iterating table size rows: %w", err)
2049
+
}
2050
+
2051
+
return results, nil
2052
+
}
2053
+
2054
+
// GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema.
2055
+
func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) {
2056
+
// Query now selects raw byte values directly
2057
+
query := `
2058
+
SELECT
2059
+
c.relname AS index_name,
2060
+
COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name,
2061
+
pg_relation_size(c.oid) AS index_bytes
2062
+
FROM
2063
+
pg_class c
2064
+
LEFT JOIN
2065
+
pg_index i ON i.indexrelid = c.oid
2066
+
LEFT JOIN
2067
+
pg_namespace n ON n.oid = c.relnamespace
2068
+
WHERE
2069
+
c.relkind = 'i' -- 'i' = index
2070
+
AND n.nspname = $1
2071
+
ORDER BY
2072
+
index_bytes DESC;
2073
+
`
2074
+
rows, err := p.db.QueryContext(ctx, query, schema)
2075
+
if err != nil {
2076
+
return nil, fmt.Errorf("failed to query index sizes: %w", err)
2077
+
}
2078
+
defer rows.Close()
2079
+
2080
+
var results []IndexSizeInfo
2081
+
for rows.Next() {
2082
+
var info IndexSizeInfo
2083
+
var tableName sql.NullString
2084
+
// Scan directly into int64 field
2085
+
if err := rows.Scan(
2086
+
&info.IndexName,
2087
+
&tableName,
2088
+
&info.IndexBytes,
2089
+
); err != nil {
2090
+
return nil, fmt.Errorf("failed to scan index size row: %w", err)
2091
+
}
2092
+
if tableName.Valid {
2093
+
info.TableName = tableName.String
2094
+
} else {
2095
+
info.TableName = "N/A"
2096
+
}
2097
+
results = append(results, info)
2098
+
}
2099
+
if err := rows.Err(); err != nil {
2100
+
return nil, fmt.Errorf("error iterating index size rows: %w", err)
2101
+
}
2102
+
2103
+
return results, nil
2104
+
}
+81
-16
internal/storage/types.go
+81
-16
internal/storage/types.go
···
1
1
package storage
2
2
3
3
import (
4
+
"database/sql"
4
5
"fmt"
5
6
"path/filepath"
6
7
"time"
···
25
26
LastChecked time.Time
26
27
Status int
27
28
IP string
29
+
IPv6 string
28
30
IPResolvedAt time.Time
31
+
Valid bool
29
32
UpdatedAt time.Time
30
33
}
31
34
···
52
55
Status int
53
56
ResponseTime float64
54
57
UserCount int64
55
-
Version string // NEW: Add this field
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
56
60
ScanData *EndpointScanData
57
61
ScannedAt time.Time
58
62
}
···
73
77
74
78
// EndpointFilter for querying endpoints
75
79
type EndpointFilter struct {
76
-
Type string // "pds", "labeler", etc.
80
+
Type string
77
81
Status string
78
82
MinUserCount int64
79
-
OnlyStale bool // NEW: Only return endpoints that need re-checking
80
-
RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
81
87
Limit int
82
88
Offset int
83
89
}
···
116
122
StartTime time.Time
117
123
EndTime time.Time
118
124
BoundaryCIDs []string
119
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
120
126
Hash string
121
127
CompressedHash string
122
128
CompressedSize int64
···
149
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
150
156
}
151
157
152
-
// MempoolOperation represents an operation waiting to be bundled
153
-
type MempoolOperation struct {
154
-
ID int64
155
-
DID string
156
-
Operation string
157
-
CID string
158
-
CreatedAt time.Time
159
-
AddedAt time.Time
160
-
}
161
-
162
158
// ScanCursor stores scanning progress
163
159
type ScanCursor struct {
164
160
Source string
···
170
166
// DIDRecord represents a DID entry in the database
171
167
type DIDRecord struct {
172
168
DID string `json:"did"`
169
+
Handle string `json:"handle,omitempty"`
170
+
CurrentPDS string `json:"current_pds,omitempty"`
171
+
LastOpAt time.Time `json:"last_op_at,omitempty"`
173
172
BundleNumbers []int `json:"bundle_numbers"`
174
173
CreatedAt time.Time `json:"created_at"`
175
174
}
176
175
176
+
// GlobalDIDInfo consolidates DID data from PLC and PDS tables
177
+
type GlobalDIDInfo struct {
178
+
DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc.
179
+
HostingOn []*PDSRepo `json:"hosting_on"`
180
+
}
181
+
177
182
// IPInfo represents IP information (stored with IP as primary key)
178
183
type IPInfo struct {
179
184
IP string `json:"ip"`
···
184
189
ASNOrg string `json:"asn_org,omitempty"`
185
190
IsDatacenter bool `json:"is_datacenter"`
186
191
IsVPN bool `json:"is_vpn"`
192
+
IsCrawler bool `json:"is_crawler"`
193
+
IsTor bool `json:"is_tor"`
194
+
IsProxy bool `json:"is_proxy"`
187
195
Latitude float32 `json:"latitude,omitempty"`
188
196
Longitude float32 `json:"longitude,omitempty"`
189
197
RawData map[string]interface{} `json:"raw_data,omitempty"`
···
191
199
UpdatedAt time.Time `json:"updated_at"`
192
200
}
193
201
202
+
// IsHome returns true if this is a residential/home IP
203
+
// (not crawler, datacenter, tor, proxy, or vpn)
204
+
func (i *IPInfo) IsHome() bool {
205
+
return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN
206
+
}
207
+
194
208
// PDSListItem is a virtual type created by JOIN for /pds endpoint
195
209
type PDSListItem struct {
196
210
// From endpoints table
197
211
ID int64
198
212
Endpoint string
199
-
ServerDID string // NEW: Add this
213
+
ServerDID string
200
214
DiscoveredAt time.Time
201
215
LastChecked time.Time
202
216
Status int
203
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
204
220
205
221
// From latest endpoint_scans (via JOIN)
206
222
LatestScan *struct {
···
252
268
FirstSeen time.Time `json:"first_seen"`
253
269
LastSeen time.Time `json:"last_seen"`
254
270
}
271
+
272
+
type PDSRepo struct {
273
+
ID int64 `json:"id"`
274
+
EndpointID int64 `json:"endpoint_id"`
275
+
Endpoint string `json:"endpoint,omitempty"`
276
+
DID string `json:"did"`
277
+
Head string `json:"head,omitempty"`
278
+
Rev string `json:"rev,omitempty"`
279
+
Active bool `json:"active"`
280
+
Status string `json:"status,omitempty"`
281
+
FirstSeen time.Time `json:"first_seen"`
282
+
LastSeen time.Time `json:"last_seen"`
283
+
UpdatedAt time.Time `json:"updated_at"`
284
+
}
285
+
286
+
type PDSRepoData struct {
287
+
DID string
288
+
Head string
289
+
Rev string
290
+
Active bool
291
+
Status string
292
+
}
293
+
294
+
type DIDBackfillInfo struct {
295
+
DID string
296
+
LastBundleNum int
297
+
}
298
+
299
+
type DIDStateUpdateData struct {
300
+
DID string
301
+
Handle sql.NullString // Use sql.NullString for potential NULLs
302
+
PDS sql.NullString
303
+
OpTime time.Time
304
+
}
305
+
306
+
// TableSizeInfo holds size information for a database table.
307
+
type TableSizeInfo struct {
308
+
TableName string `json:"table_name"`
309
+
TotalBytes int64 `json:"total_bytes"` // Raw bytes
310
+
TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes
311
+
IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes
312
+
}
313
+
314
+
// IndexSizeInfo holds size information for a database index.
315
+
type IndexSizeInfo struct {
316
+
IndexName string `json:"index_name"`
317
+
TableName string `json:"table_name"`
318
+
IndexBytes int64 `json:"index_bytes"` // Raw bytes
319
+
}
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+125
utils/db-sizes.sh
+125
utils/db-sizes.sh
···
1
+
#!/bin/bash
2
+
3
+
# === Configuration ===
4
+
CONFIG_FILE="config.yaml" # Path to your config file
5
+
SCHEMA_NAME="public" # Replace if your schema is different
6
+
7
+
# Check if config file exists
8
+
if [ ! -f "$CONFIG_FILE" ]; then
9
+
echo "Error: Config file not found at '$CONFIG_FILE'"
10
+
exit 1
11
+
fi
12
+
13
+
# Check if yq is installed
14
+
if ! command -v yq &> /dev/null; then
15
+
echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)."
16
+
echo "See: https://github.com/mikefarah/yq/"
17
+
exit 1
18
+
fi
19
+
20
+
echo "--- Reading connection info from '$CONFIG_FILE' ---"
21
+
22
+
# === Extract Database Config using yq ===
23
+
DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE")
24
+
DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI
25
+
26
+
if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then
27
+
echo "Error: Could not read database type or path from '$CONFIG_FILE'."
28
+
exit 1
29
+
fi
30
+
31
+
# === Parse the Connection String ===
32
+
DB_USER=""
33
+
DB_PASSWORD=""
34
+
DB_HOST="localhost" # Default
35
+
DB_PORT="5432" # Default
36
+
DB_NAME=""
37
+
38
+
# Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters)
39
+
if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then
40
+
DB_USER="${BASH_REMATCH[2]}"
41
+
DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group
42
+
DB_HOST="${BASH_REMATCH[5]}"
43
+
DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default
44
+
DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?'
45
+
else
46
+
echo "Error: Could not parse database connection string URI: $DB_CONN_STRING"
47
+
exit 1
48
+
fi
49
+
50
+
# Set PGPASSWORD environment variable if password was found
51
+
if [ -n "$DB_PASSWORD" ]; then
52
+
export PGPASSWORD="$DB_PASSWORD"
53
+
else
54
+
echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt."
55
+
unset PGPASSWORD
56
+
fi
57
+
58
+
echo "--- Database Size Investigation ---"
59
+
echo "Database: $DB_NAME"
60
+
echo "Schema: $SCHEMA_NAME"
61
+
echo "User: $DB_USER"
62
+
echo "Host: $DB_HOST:$DB_PORT"
63
+
echo "-----------------------------------"
64
+
65
+
# === Table Sizes ===
66
+
echo ""
67
+
echo "## Table Sizes (Schema: $SCHEMA_NAME) ##"
68
+
# Removed --tuples-only and --no-align, added -P footer=off
69
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
70
+
SELECT
71
+
c.relname AS "Table Name",
72
+
pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size",
73
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size",
74
+
pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size"
75
+
FROM
76
+
pg_class c
77
+
LEFT JOIN
78
+
pg_namespace n ON n.oid = c.relnamespace
79
+
WHERE
80
+
c.relkind = 'r' -- 'r' = ordinary table
81
+
AND n.nspname = '$SCHEMA_NAME'
82
+
ORDER BY
83
+
pg_total_relation_size(c.oid) DESC;
84
+
EOF
85
+
86
+
if [ $? -ne 0 ]; then
87
+
echo "Error querying table sizes. Check connection details, permissions, and password."
88
+
unset PGPASSWORD
89
+
exit 1
90
+
fi
91
+
92
+
# === Index Sizes ===
93
+
echo ""
94
+
echo "## Index Sizes (Schema: $SCHEMA_NAME) ##"
95
+
# Removed --tuples-only and --no-align, added -P footer=off
96
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
97
+
SELECT
98
+
c.relname AS "Index Name",
99
+
i.indrelid::regclass AS "Table Name", -- Show associated table
100
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size"
101
+
FROM
102
+
pg_class c
103
+
LEFT JOIN
104
+
pg_index i ON i.indexrelid = c.oid
105
+
LEFT JOIN
106
+
pg_namespace n ON n.oid = c.relnamespace
107
+
WHERE
108
+
c.relkind = 'i' -- 'i' = index
109
+
AND n.nspname = '$SCHEMA_NAME'
110
+
ORDER BY
111
+
pg_relation_size(c.oid) DESC;
112
+
EOF
113
+
114
+
if [ $? -ne 0 ]; then
115
+
echo "Error querying index sizes. Check connection details, permissions, and password."
116
+
unset PGPASSWORD
117
+
exit 1
118
+
fi
119
+
120
+
echo ""
121
+
echo "-----------------------------------"
122
+
echo "Investigation complete."
123
+
124
+
# Unset the password variable for security
125
+
unset PGPASSWORD
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"
+2
-2
utils/migrate-ipinfo.sh
+2
-2
utils/migrate-ipinfo.sh
+199
utils/vuln-scanner-parallel.sh
+199
utils/vuln-scanner-parallel.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
PARALLEL_JOBS=20
7
+
OUTPUT_DIR="./pds_scan_results"
8
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
9
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
10
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
11
+
12
+
# Paths to check
13
+
PATHS=(
14
+
"/info.php"
15
+
"/phpinfo.php"
16
+
"/test.php"
17
+
"/admin"
18
+
"/admin.php"
19
+
"/wp-admin"
20
+
"/robots.txt"
21
+
"/.env"
22
+
"/.git/config"
23
+
"/config.php"
24
+
"/backup"
25
+
"/db.sql"
26
+
"/.DS_Store"
27
+
"/server-status"
28
+
"/.well-known/security.txt"
29
+
)
30
+
31
+
# Colors
32
+
RED='\033[0;31m'
33
+
GREEN='\033[0;32m'
34
+
YELLOW='\033[1;33m'
35
+
BLUE='\033[0;34m'
36
+
NC='\033[0m'
37
+
38
+
# Check dependencies
39
+
if ! command -v jq &> /dev/null; then
40
+
echo -e "${RED}Error: jq is required${NC}"
41
+
echo "Install: sudo apt-get install jq"
42
+
exit 1
43
+
fi
44
+
45
+
if ! command -v parallel &> /dev/null; then
46
+
echo -e "${RED}Error: GNU parallel is required${NC}"
47
+
echo "Install: sudo apt-get install parallel (or brew install parallel)"
48
+
exit 1
49
+
fi
50
+
51
+
mkdir -p "$OUTPUT_DIR"
52
+
53
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
54
+
echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}"
55
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
56
+
echo ""
57
+
echo "API Host: $API_HOST"
58
+
echo "Timeout: ${TIMEOUT}s per request"
59
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
60
+
echo "Paths to check: ${#PATHS[@]}"
61
+
echo ""
62
+
63
+
# Scan function - will be called by GNU parallel
64
+
scan_endpoint() {
65
+
local endpoint="$1"
66
+
local timeout="$2"
67
+
shift 2
68
+
local paths=("$@")
69
+
70
+
for path in "${paths[@]}"; do
71
+
url="${endpoint}${path}"
72
+
73
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
74
+
--max-time "$timeout" \
75
+
--connect-timeout "$timeout" \
76
+
--retry 0 \
77
+
-A "Mozilla/5.0 (Security Scanner)" \
78
+
"$url" 2>/dev/null)
79
+
80
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
81
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
82
+
echo "FOUND|$endpoint|$path|$response"
83
+
elif [ "$response" != "403" ] && [ "$response" != "401" ]; then
84
+
echo "MAYBE|$endpoint|$path|$response"
85
+
fi
86
+
fi
87
+
done
88
+
}
89
+
90
+
export -f scan_endpoint
91
+
92
+
# Fetch active PDS endpoints
93
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
94
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
95
+
jq -r '.[].endpoint' 2>/dev/null)
96
+
97
+
if [ -z "$ENDPOINTS" ]; then
98
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
99
+
echo "Check that the API is running at: $API_HOST"
100
+
exit 1
101
+
fi
102
+
103
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ')
104
+
echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
105
+
echo ""
106
+
107
+
# Write header to results file
108
+
{
109
+
echo "PDS Security Scan Results"
110
+
echo "========================="
111
+
echo "Scan started: $(date)"
112
+
echo "Endpoints scanned: ${ENDPOINT_COUNT}"
113
+
echo "Paths checked: ${#PATHS[@]}"
114
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
115
+
echo ""
116
+
echo "Results:"
117
+
echo "--------"
118
+
} > "$RESULTS_FILE"
119
+
120
+
# Run parallel scan
121
+
echo -e "${YELLOW}Starting parallel scan...${NC}"
122
+
echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}"
123
+
echo ""
124
+
125
+
echo "$ENDPOINTS" | \
126
+
parallel \
127
+
-j "$PARALLEL_JOBS" \
128
+
--bar \
129
+
--joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \
130
+
scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \
131
+
>> "$RESULTS_FILE"
132
+
133
+
echo ""
134
+
echo -e "${YELLOW}Processing results...${NC}"
135
+
136
+
# Count results
137
+
FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0)
138
+
MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0)
139
+
140
+
# Extract found URLs to separate file
141
+
{
142
+
echo "Found URLs (HTTP 200/301/302)"
143
+
echo "=============================="
144
+
echo "Scan: $(date)"
145
+
echo ""
146
+
} > "$FOUND_FILE"
147
+
148
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do
149
+
echo "$endpoint$path [$code]"
150
+
done >> "$FOUND_FILE"
151
+
152
+
# Create summary at end of results file
153
+
{
154
+
echo ""
155
+
echo "Summary"
156
+
echo "======="
157
+
echo "Scan completed: $(date)"
158
+
echo "Total endpoints scanned: ${ENDPOINT_COUNT}"
159
+
echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))"
160
+
echo "Found (200/301/302): ${FOUND_COUNT}"
161
+
echo "Maybe (other codes): ${MAYBE_COUNT}"
162
+
} >> "$RESULTS_FILE"
163
+
164
+
# Display summary
165
+
echo ""
166
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
167
+
echo -e "${BLUE}║ Scan Complete! ║${NC}"
168
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
169
+
echo ""
170
+
echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}"
171
+
echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}"
172
+
echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}"
173
+
echo ""
174
+
echo -e "Results:"
175
+
echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}"
176
+
echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}"
177
+
echo ""
178
+
echo "Files created:"
179
+
echo " Full results: $RESULTS_FILE"
180
+
echo " Found URLs: $FOUND_FILE"
181
+
echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt"
182
+
183
+
# Show sample of found URLs if any
184
+
if [ "$FOUND_COUNT" -gt 0 ]; then
185
+
echo ""
186
+
echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}"
187
+
echo ""
188
+
echo "Sample findings (first 10):"
189
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do
190
+
echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]"
191
+
done
192
+
193
+
if [ "$FOUND_COUNT" -gt 10 ]; then
194
+
echo ""
195
+
echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)"
196
+
fi
197
+
fi
198
+
199
+
echo ""
+117
utils/vuln-scanner.sh
+117
utils/vuln-scanner.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
OUTPUT_DIR="./pds_scan_results"
7
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
8
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
9
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
10
+
11
+
# Paths to check (one per line for easier editing)
12
+
PATHS=(
13
+
"/info.php"
14
+
"/phpinfo.php"
15
+
"/test.php"
16
+
"/admin"
17
+
"/admin.php"
18
+
"/wp-admin"
19
+
"/robots.txt"
20
+
"/.env"
21
+
"/.git/config"
22
+
"/config.php"
23
+
"/backup"
24
+
"/db.sql"
25
+
"/.DS_Store"
26
+
"/server-status"
27
+
"/.well-known/security.txt"
28
+
)
29
+
30
+
# Colors
31
+
RED='\033[0;31m'
32
+
GREEN='\033[0;32m'
33
+
YELLOW='\033[1;33m'
34
+
BLUE='\033[0;34m'
35
+
NC='\033[0m'
36
+
37
+
mkdir -p "$OUTPUT_DIR"
38
+
39
+
echo -e "${BLUE}=== PDS Security Scanner ===${NC}"
40
+
echo "API Host: $API_HOST"
41
+
echo "Timeout: ${TIMEOUT}s"
42
+
echo "Scanning for ${#PATHS[@]} paths"
43
+
echo "Results: $RESULTS_FILE"
44
+
echo ""
45
+
46
+
# Fetch active PDS endpoints
47
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
48
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
49
+
jq -r '.[].endpoint' 2>/dev/null)
50
+
51
+
if [ -z "$ENDPOINTS" ]; then
52
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
53
+
exit 1
54
+
fi
55
+
56
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l)
57
+
echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
58
+
echo ""
59
+
60
+
# Write header
61
+
echo "PDS Security Scan - $(date)" > "$RESULTS_FILE"
62
+
echo "========================================" >> "$RESULTS_FILE"
63
+
echo "" >> "$RESULTS_FILE"
64
+
65
+
# Counters
66
+
CURRENT=0
67
+
TOTAL_FOUND=0
68
+
TOTAL_MAYBE=0
69
+
70
+
# Scan each endpoint sequentially
71
+
while IFS= read -r endpoint; do
72
+
CURRENT=$((CURRENT + 1))
73
+
74
+
echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint"
75
+
76
+
# Scan each path
77
+
for path in "${PATHS[@]}"; do
78
+
url="${endpoint}${path}"
79
+
80
+
# Make request with timeout
81
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
82
+
--max-time "$TIMEOUT" \
83
+
--connect-timeout "$TIMEOUT" \
84
+
-L \
85
+
-A "Mozilla/5.0 (Security Scanner)" \
86
+
"$url" 2>/dev/null)
87
+
88
+
# Check response
89
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
90
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
91
+
echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}"
92
+
echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE"
93
+
echo "$endpoint$path" >> "$FOUND_FILE"
94
+
TOTAL_FOUND=$((TOTAL_FOUND + 1))
95
+
elif [ "$response" != "403" ]; then
96
+
echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}"
97
+
echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE"
98
+
TOTAL_MAYBE=$((TOTAL_MAYBE + 1))
99
+
fi
100
+
fi
101
+
done
102
+
103
+
echo "" >> "$RESULTS_FILE"
104
+
105
+
done <<< "$ENDPOINTS"
106
+
107
+
# Summary
108
+
echo ""
109
+
echo -e "${BLUE}========================================${NC}"
110
+
echo -e "${GREEN}Scan Complete!${NC}"
111
+
echo "Scanned: ${ENDPOINT_COUNT} endpoints"
112
+
echo "Paths checked per endpoint: ${#PATHS[@]}"
113
+
echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}"
114
+
echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}"
115
+
echo ""
116
+
echo "Full results: $RESULTS_FILE"
117
+
[ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"