+42
Makefile
+42
Makefile
···
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
30
+
31
+
run:
32
+
$(GORUN) cmd/atscand/main.go -verbose
33
+
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-96
cmd/atscanner.go
-96
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"os"
7
-
"os/signal"
8
-
"syscall"
9
-
"time"
10
-
11
-
"github.com/atscan/atscanner/internal/api"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/log"
14
-
"github.com/atscan/atscanner/internal/pds"
15
-
"github.com/atscan/atscanner/internal/plc"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/atscan/atscanner/internal/worker"
18
-
)
19
-
20
-
func main() {
21
-
configPath := flag.String("config", "config.yaml", "path to config file")
22
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
23
-
flag.Parse()
24
-
25
-
// Load configuration
26
-
cfg, err := config.Load(*configPath)
27
-
if err != nil {
28
-
log.Fatal("Failed to load config: %v", err)
29
-
}
30
-
31
-
// Override verbose setting if flag is provided
32
-
if *verbose {
33
-
cfg.API.Verbose = true
34
-
}
35
-
36
-
// Initialize logger
37
-
log.Init(cfg.API.Verbose)
38
-
39
-
// Initialize database
40
-
db, err := storage.NewSQLiteDB(cfg.Database.Path)
41
-
if err != nil {
42
-
log.Fatal("Failed to initialize database: %v", err)
43
-
}
44
-
defer db.Close()
45
-
46
-
// Run migrations
47
-
if err := db.Migrate(); err != nil {
48
-
log.Fatal("Failed to run migrations: %v", err)
49
-
}
50
-
51
-
ctx, cancel := context.WithCancel(context.Background())
52
-
defer cancel()
53
-
54
-
// Initialize workers
55
-
plcScanner := plc.NewScanner(db, cfg.PLC)
56
-
defer plcScanner.Close() // Close scanner to cleanup cache
57
-
58
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
59
-
60
-
scheduler := worker.NewScheduler()
61
-
62
-
// Schedule PLC directory scan
63
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
64
-
if err := plcScanner.Scan(ctx); err != nil {
65
-
log.Error("PLC scan error: %v", err)
66
-
}
67
-
})
68
-
69
-
// Schedule PDS availability checks
70
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
71
-
if err := pdsScanner.ScanAll(ctx); err != nil {
72
-
log.Error("PDS scan error: %v", err)
73
-
}
74
-
})
75
-
76
-
// Start API server
77
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
78
-
go func() {
79
-
if err := apiServer.Start(); err != nil {
80
-
log.Fatal("API server error: %v", err)
81
-
}
82
-
}()
83
-
84
-
// Start scheduler
85
-
scheduler.Start(ctx)
86
-
87
-
// Wait for interrupt
88
-
sigChan := make(chan os.Signal, 1)
89
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
90
-
<-sigChan
91
-
92
-
log.Info("Shutting down gracefully...")
93
-
cancel()
94
-
apiServer.Shutdown(context.Background())
95
-
time.Sleep(2 * time.Second)
96
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+22
config.sample.yaml
+22
config.sample.yaml
···
1
+
database:
2
+
type: "postgres" # or "sqlite"
3
+
path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable"
4
+
# For SQLite: path: "atscan.db"
5
+
6
+
plc:
7
+
directory_url: "https://plc.directory"
8
+
scan_interval: "5s"
9
+
bundle_dir: "./plc_bundles"
10
+
use_cache: true
11
+
index_dids: true
12
+
13
+
pds:
14
+
scan_interval: "30m"
15
+
timeout: "30s"
16
+
workers: 20
17
+
recheck_interval: "1.5h"
18
+
scan_retention: 20
19
+
20
+
api:
21
+
host: "0.0.0.0"
22
+
port: 8080
-19
config.yaml
-19
config.yaml
···
1
-
database:
2
-
path: "atscan.db"
3
-
type: "sqlite"
4
-
5
-
plc:
6
-
directory_url: "https://plc.directory"
7
-
scan_interval: "5s"
8
-
bundle_dir: "./plc_bundles"
9
-
use_cache: true
10
-
11
-
pds:
12
-
scan_interval: "15m"
13
-
timeout: "30s"
14
-
workers: 10
15
-
recheck_interval: "5m"
16
-
17
-
api:
18
-
host: "0.0.0.0"
19
-
port: 8080
+17
-6
go.mod
+17
-6
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
-
go 1.22
3
+
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
-
github.com/mattn/go-sqlite3 v1.14.18
7
+
github.com/lib/pq v1.10.9
8
8
gopkg.in/yaml.v3 v3.0.1
9
9
)
10
10
11
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
12
12
13
13
require (
14
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
15
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
16
17
)
17
18
18
-
require github.com/felixge/httpsnoop v1.0.3 // indirect
19
+
require (
20
+
github.com/felixge/httpsnoop v1.0.3 // indirect
21
+
github.com/jackc/pgpassfile v1.0.0 // indirect
22
+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
+
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
26
+
golang.org/x/crypto v0.37.0 // indirect
27
+
golang.org/x/sync v0.13.0 // indirect
28
+
golang.org/x/text v0.24.0 // indirect
29
+
)
+40
-7
go.sum
+40
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
2
+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
4
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
5
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
6
8
github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w=
7
9
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
8
10
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
9
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
10
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
11
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
12
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
13
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
11
+
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
12
+
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
13
+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
14
+
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
15
+
github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk=
16
+
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
17
+
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
18
+
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
25
+
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
26
+
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
28
+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
31
+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
32
+
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
33
+
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
36
+
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
37
+
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
38
+
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
39
+
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
40
+
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
41
+
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
14
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
43
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
45
+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
15
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
16
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+995
-299
internal/api/handlers.go
+995
-299
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
-
"encoding/hex"
5
+
"database/sql"
7
6
"encoding/json"
8
7
"fmt"
8
+
"io"
9
9
"net/http"
10
-
"os"
11
-
"path/filepath"
12
10
"strconv"
11
+
"strings"
13
12
"time"
14
13
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
19
20
)
20
21
21
22
// ===== RESPONSE HELPERS =====
···
37
38
http.Error(r.w, msg, code)
38
39
}
39
40
40
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
41
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
42
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
43
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
44
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
45
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
46
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
47
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
48
49
}
49
50
50
51
// ===== REQUEST HELPERS =====
···
74
75
75
76
// ===== FORMATTING HELPERS =====
76
77
77
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
78
-
return map[string]interface{}{
79
-
"plc_bundle_number": bundle.BundleNumber,
80
-
"start_time": bundle.StartTime,
81
-
"end_time": bundle.EndTime,
82
-
"operation_count": plc.BUNDLE_SIZE,
83
-
"did_count": len(bundle.DIDs),
84
-
"hash": bundle.Hash,
85
-
"compressed_hash": bundle.CompressedHash,
86
-
"compressed_size": bundle.CompressedSize,
87
-
"uncompressed_size": bundle.UncompressedSize,
88
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
89
-
"cursor": bundle.Cursor,
90
-
"prev_bundle_hash": bundle.PrevBundleHash,
91
-
"created_at": bundle.CreatedAt,
92
-
}
93
-
}
94
-
95
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
96
-
return map[string]interface{}{
79
+
response := map[string]interface{}{
97
80
"id": ep.ID,
98
81
"endpoint_type": ep.EndpointType,
99
82
"endpoint": ep.Endpoint,
100
83
"discovered_at": ep.DiscoveredAt,
101
84
"last_checked": ep.LastChecked,
102
85
"status": statusToString(ep.Status),
103
-
"user_count": ep.UserCount,
86
+
}
87
+
88
+
// Add IPs if available
89
+
if ep.IP != "" {
90
+
response["ip"] = ep.IP
91
+
}
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
104
94
}
95
+
96
+
return response
105
97
}
106
98
107
99
func statusToString(status int) string {
···
124
116
Type: r.URL.Query().Get("type"),
125
117
Status: r.URL.Query().Get("status"),
126
118
MinUserCount: getQueryInt64(r, "min_user_count", 0),
127
-
Limit: getQueryInt(r, "limit", 0),
119
+
Limit: getQueryInt(r, "limit", 50),
128
120
Offset: getQueryInt(r, "offset", 0),
129
121
}
130
122
···
142
134
resp.json(response)
143
135
}
144
136
145
-
func (s *Server) handleGetEndpoint(w http.ResponseWriter, r *http.Request) {
137
+
func (s *Server) handleGetEndpointStats(w http.ResponseWriter, r *http.Request) {
146
138
resp := newResponse(w)
147
-
vars := mux.Vars(r)
148
-
endpoint := vars["endpoint"]
139
+
stats, err := s.db.GetEndpointStats(r.Context())
140
+
if err != nil {
141
+
resp.error(err.Error(), http.StatusInternalServerError)
142
+
return
143
+
}
144
+
resp.json(stats)
145
+
}
146
+
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
149
152
endpointType := r.URL.Query().Get("type")
150
153
if endpointType == "" {
151
-
endpointType = "pds"
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
152
156
}
153
157
154
-
ep, err := s.db.GetEndpoint(r.Context(), endpoint, endpointType)
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
155
170
if err != nil {
156
-
resp.error("Endpoint not found", http.StatusNotFound)
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
183
+
// ===== PDS HANDLERS =====
184
+
185
+
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
186
+
resp := newResponse(w)
187
+
188
+
filter := &storage.EndpointFilter{
189
+
Type: "pds",
190
+
Status: r.URL.Query().Get("status"),
191
+
MinUserCount: getQueryInt64(r, "min_user_count", 0),
192
+
Limit: getQueryInt(r, "limit", 50),
193
+
Offset: getQueryInt(r, "offset", 0),
194
+
}
195
+
196
+
pdsServers, err := s.db.GetPDSList(r.Context(), filter)
197
+
if err != nil {
198
+
resp.error(err.Error(), http.StatusInternalServerError)
199
+
return
200
+
}
201
+
202
+
response := make([]map[string]interface{}, len(pdsServers))
203
+
for i, pds := range pdsServers {
204
+
response[i] = formatPDSListItem(pds)
205
+
}
206
+
207
+
resp.json(response)
208
+
}
209
+
210
+
func (s *Server) handleGetPDSDetail(w http.ResponseWriter, r *http.Request) {
211
+
resp := newResponse(w)
212
+
vars := mux.Vars(r)
213
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
214
+
215
+
// FIX: Use r.Context() instead of ctx
216
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
217
+
if err != nil {
218
+
resp.error("PDS not found", http.StatusNotFound)
157
219
return
158
220
}
159
221
160
-
scans, _ := s.db.GetEndpointScans(r.Context(), ep.ID, 10)
222
+
// Get recent scans
223
+
scans, _ := s.db.GetEndpointScans(r.Context(), pds.ID, 10)
161
224
162
-
result := formatEndpointResponse(ep)
163
-
result["recent_scans"] = scans
225
+
result := formatPDSDetail(pds)
226
+
result["recent_scans"] = formatScans(scans)
164
227
165
228
resp.json(result)
166
229
}
167
230
168
-
func (s *Server) handleGetEndpointStats(w http.ResponseWriter, r *http.Request) {
231
+
func (s *Server) handleGetPDSStats(w http.ResponseWriter, r *http.Request) {
232
+
resp := newResponse(w)
233
+
ctx := r.Context()
234
+
235
+
// Get PDS-specific stats
236
+
stats, err := s.db.GetPDSStats(ctx)
237
+
if err != nil {
238
+
resp.error(err.Error(), http.StatusInternalServerError)
239
+
return
240
+
}
241
+
242
+
resp.json(stats)
243
+
}
244
+
245
+
func formatPDSListItem(pds *storage.PDSListItem) map[string]interface{} {
246
+
response := map[string]interface{}{
247
+
"id": pds.ID,
248
+
"endpoint": pds.Endpoint,
249
+
"discovered_at": pds.DiscoveredAt,
250
+
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
252
+
}
253
+
254
+
// Add server_did if available
255
+
if pds.ServerDID != "" {
256
+
response["server_did"] = pds.ServerDID
257
+
}
258
+
259
+
// Add last_checked if available
260
+
if !pds.LastChecked.IsZero() {
261
+
response["last_checked"] = pds.LastChecked
262
+
}
263
+
264
+
// Add data from latest scan (if available)
265
+
if pds.LatestScan != nil {
266
+
response["user_count"] = pds.LatestScan.UserCount
267
+
response["response_time"] = pds.LatestScan.ResponseTime
268
+
if pds.LatestScan.Version != "" {
269
+
response["version"] = pds.LatestScan.Version
270
+
}
271
+
if !pds.LatestScan.ScannedAt.IsZero() {
272
+
response["last_scan"] = pds.LatestScan.ScannedAt
273
+
}
274
+
}
275
+
276
+
// Add IPs if available
277
+
if pds.IP != "" {
278
+
response["ip"] = pds.IP
279
+
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
282
+
}
283
+
284
+
// Add IP info (from ip_infos table via JOIN)
285
+
if pds.IPInfo != nil {
286
+
if pds.IPInfo.City != "" {
287
+
response["city"] = pds.IPInfo.City
288
+
}
289
+
if pds.IPInfo.Country != "" {
290
+
response["country"] = pds.IPInfo.Country
291
+
}
292
+
if pds.IPInfo.CountryCode != "" {
293
+
response["country_code"] = pds.IPInfo.CountryCode
294
+
}
295
+
if pds.IPInfo.ASN > 0 {
296
+
response["asn"] = pds.IPInfo.ASN
297
+
}
298
+
299
+
// Add all network type flags
300
+
response["is_datacenter"] = pds.IPInfo.IsDatacenter
301
+
response["is_vpn"] = pds.IPInfo.IsVPN
302
+
response["is_crawler"] = pds.IPInfo.IsCrawler
303
+
response["is_tor"] = pds.IPInfo.IsTor
304
+
response["is_proxy"] = pds.IPInfo.IsProxy
305
+
306
+
// Add computed is_home field
307
+
response["is_home"] = pds.IPInfo.IsHome()
308
+
}
309
+
310
+
return response
311
+
}
312
+
313
+
func formatPDSDetail(pds *storage.PDSDetail) map[string]interface{} {
314
+
// Start with list item formatting (includes server_did)
315
+
response := formatPDSListItem(&pds.PDSListItem)
316
+
317
+
// Add is_primary flag
318
+
response["is_primary"] = pds.IsPrimary
319
+
320
+
// Add aliases if available
321
+
if len(pds.Aliases) > 0 {
322
+
response["aliases"] = pds.Aliases
323
+
response["alias_count"] = len(pds.Aliases)
324
+
}
325
+
326
+
// Add server_info and version from latest scan (PDSDetail's LatestScan takes precedence)
327
+
if pds.LatestScan != nil {
328
+
// Override with detail-specific scan data
329
+
response["user_count"] = pds.LatestScan.UserCount
330
+
response["response_time"] = pds.LatestScan.ResponseTime
331
+
332
+
if pds.LatestScan.Version != "" {
333
+
response["version"] = pds.LatestScan.Version
334
+
}
335
+
336
+
if !pds.LatestScan.ScannedAt.IsZero() {
337
+
response["last_scan"] = pds.LatestScan.ScannedAt
338
+
}
339
+
340
+
if pds.LatestScan.ServerInfo != nil {
341
+
response["server_info"] = pds.LatestScan.ServerInfo
342
+
}
343
+
}
344
+
345
+
// Add full IP info with computed is_home field
346
+
if pds.IPInfo != nil {
347
+
// Convert IPInfo to map
348
+
ipInfoMap := make(map[string]interface{})
349
+
ipInfoJSON, _ := json.Marshal(pds.IPInfo)
350
+
json.Unmarshal(ipInfoJSON, &ipInfoMap)
351
+
352
+
// Add computed is_home field
353
+
ipInfoMap["is_home"] = pds.IPInfo.IsHome()
354
+
355
+
response["ip_info"] = ipInfoMap
356
+
}
357
+
358
+
return response
359
+
}
360
+
361
+
func formatScans(scans []*storage.EndpointScan) []map[string]interface{} {
362
+
result := make([]map[string]interface{}, len(scans))
363
+
for i, scan := range scans {
364
+
scanMap := map[string]interface{}{
365
+
"id": scan.ID,
366
+
"status": statusToString(scan.Status),
367
+
"scanned_at": scan.ScannedAt,
368
+
}
369
+
370
+
if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil {
371
+
if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" {
372
+
scanMap["error"] = errorMsg
373
+
}
374
+
}
375
+
376
+
if scan.ResponseTime > 0 {
377
+
scanMap["response_time"] = scan.ResponseTime
378
+
}
379
+
380
+
if scan.Version != "" {
381
+
scanMap["version"] = scan.Version
382
+
}
383
+
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
386
+
}
387
+
388
+
// Use the top-level UserCount field first
389
+
if scan.UserCount > 0 {
390
+
scanMap["user_count"] = scan.UserCount
391
+
} else if scan.ScanData != nil && scan.ScanData.Metadata != nil {
392
+
// Fallback to metadata for older scans
393
+
if userCount, ok := scan.ScanData.Metadata["user_count"].(int); ok {
394
+
scanMap["user_count"] = userCount
395
+
} else if userCount, ok := scan.ScanData.Metadata["user_count"].(float64); ok {
396
+
scanMap["user_count"] = int(userCount)
397
+
}
398
+
}
399
+
400
+
if scan.ScanData != nil {
401
+
// Include DID count if available
402
+
if scan.ScanData.DIDCount > 0 {
403
+
scanMap["did_count"] = scan.ScanData.DIDCount
404
+
}
405
+
}
406
+
407
+
result[i] = scanMap
408
+
}
409
+
return result
410
+
}
411
+
412
+
// Get repos for a specific PDS
413
+
func (s *Server) handleGetPDSRepos(w http.ResponseWriter, r *http.Request) {
414
+
resp := newResponse(w)
415
+
vars := mux.Vars(r)
416
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
417
+
418
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
419
+
if err != nil {
420
+
resp.error("PDS not found", http.StatusNotFound)
421
+
return
422
+
}
423
+
424
+
// Parse query parameters
425
+
activeOnly := r.URL.Query().Get("active") == "true"
426
+
limit := getQueryInt(r, "limit", 100)
427
+
offset := getQueryInt(r, "offset", 0)
428
+
429
+
// Cap limit at 1000
430
+
if limit > 1000 {
431
+
limit = 1000
432
+
}
433
+
434
+
repos, err := s.db.GetPDSRepos(r.Context(), pds.ID, activeOnly, limit, offset)
435
+
if err != nil {
436
+
resp.error(err.Error(), http.StatusInternalServerError)
437
+
return
438
+
}
439
+
440
+
// Get total from latest scan (same as user_count)
441
+
totalRepos := 0
442
+
if pds.LatestScan != nil {
443
+
totalRepos = pds.LatestScan.UserCount
444
+
}
445
+
446
+
resp.json(map[string]interface{}{
447
+
"endpoint": pds.Endpoint,
448
+
"total_repos": totalRepos,
449
+
"returned": len(repos),
450
+
"limit": limit,
451
+
"offset": offset,
452
+
"repos": repos,
453
+
})
454
+
}
455
+
456
+
// Find which PDS hosts a specific DID
457
+
func (s *Server) handleGetDIDRepos(w http.ResponseWriter, r *http.Request) {
169
458
resp := newResponse(w)
170
-
stats, err := s.db.GetEndpointStats(r.Context())
459
+
vars := mux.Vars(r)
460
+
did := vars["did"]
461
+
462
+
repos, err := s.db.GetReposByDID(r.Context(), did)
171
463
if err != nil {
172
464
resp.error(err.Error(), http.StatusInternalServerError)
173
465
return
174
466
}
467
+
468
+
resp.json(map[string]interface{}{
469
+
"did": did,
470
+
"pds_count": len(repos),
471
+
"hosting_on": repos,
472
+
})
473
+
}
474
+
475
+
// Add to internal/api/handlers.go
476
+
func (s *Server) handleGetPDSRepoStats(w http.ResponseWriter, r *http.Request) {
477
+
resp := newResponse(w)
478
+
vars := mux.Vars(r)
479
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
480
+
481
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
482
+
if err != nil {
483
+
resp.error("PDS not found", http.StatusNotFound)
484
+
return
485
+
}
486
+
487
+
stats, err := s.db.GetPDSRepoStats(r.Context(), pds.ID)
488
+
if err != nil {
489
+
resp.error(err.Error(), http.StatusInternalServerError)
490
+
return
491
+
}
492
+
175
493
resp.json(stats)
176
494
}
177
495
496
+
// ===== GLOBAL DID HANDLER =====
497
+
498
+
// handleGetGlobalDID provides a consolidated view of a DID
499
+
func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) {
500
+
resp := newResponse(w)
501
+
vars := mux.Vars(r)
502
+
did := vars["did"]
503
+
ctx := r.Context()
504
+
505
+
// Get DID info (now includes handle and pds from database)
506
+
didInfo, err := s.db.GetGlobalDIDInfo(ctx, did)
507
+
if err != nil {
508
+
if err == sql.ErrNoRows {
509
+
if !s.plcIndexDIDs {
510
+
resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
511
+
} else {
512
+
resp.error("DID not found in PLC index.", http.StatusNotFound)
513
+
}
514
+
} else {
515
+
resp.error(err.Error(), http.StatusInternalServerError)
516
+
}
517
+
return
518
+
}
519
+
520
+
// Optionally include latest operation details if requested
521
+
var latestOperation *plc.PLCOperation
522
+
if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 {
523
+
lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1]
524
+
ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum)
525
+
if err != nil {
526
+
log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err)
527
+
} else {
528
+
// Find latest operation for this DID (in reverse)
529
+
for i := len(ops) - 1; i >= 0; i-- {
530
+
if ops[i].DID == did {
531
+
latestOperation = &ops[i]
532
+
break
533
+
}
534
+
}
535
+
}
536
+
}
537
+
538
+
result := map[string]interface{}{
539
+
"did": didInfo.DID,
540
+
"handle": didInfo.Handle, // From database!
541
+
"current_pds": didInfo.CurrentPDS, // From database!
542
+
"plc_index_created_at": didInfo.CreatedAt,
543
+
"plc_bundle_history": didInfo.BundleNumbers,
544
+
"pds_hosting_on": didInfo.HostingOn,
545
+
}
546
+
547
+
// Only include operation if requested
548
+
if latestOperation != nil {
549
+
result["latest_plc_operation"] = latestOperation
550
+
}
551
+
552
+
resp.json(result)
553
+
}
554
+
555
+
// handleGetDIDByHandle resolves a handle to a DID
556
+
func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) {
557
+
resp := newResponse(w)
558
+
vars := mux.Vars(r)
559
+
handle := vars["handle"]
560
+
561
+
// Normalize handle (remove @ prefix if present)
562
+
handle = strings.TrimPrefix(handle, "@")
563
+
564
+
// Look up DID by handle
565
+
didRecord, err := s.db.GetDIDByHandle(r.Context(), handle)
566
+
if err != nil {
567
+
if err == sql.ErrNoRows {
568
+
if !s.plcIndexDIDs {
569
+
resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
570
+
} else {
571
+
resp.error("Handle not found.", http.StatusNotFound)
572
+
}
573
+
} else {
574
+
resp.error(err.Error(), http.StatusInternalServerError)
575
+
}
576
+
return
577
+
}
578
+
579
+
// Return just the handle and DID
580
+
resp.json(map[string]string{
581
+
"handle": handle,
582
+
"did": didRecord.DID,
583
+
})
584
+
}
585
+
178
586
// ===== DID HANDLERS =====
179
587
180
588
func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) {
···
182
590
vars := mux.Vars(r)
183
591
did := vars["did"]
184
592
185
-
bundles, err := s.db.GetBundlesForDID(r.Context(), did)
593
+
// Fast lookup using dids table
594
+
didRecord, err := s.db.GetDIDRecord(r.Context(), did)
186
595
if err != nil {
187
-
resp.error(err.Error(), http.StatusInternalServerError)
596
+
if err == sql.ErrNoRows {
597
+
// NEW: Provide helpful message if indexing is disabled
598
+
resp.error("DID not found. Note: DID indexing may be disabled in configuration.", http.StatusNotFound)
599
+
} else {
600
+
resp.error(err.Error(), http.StatusInternalServerError)
601
+
}
188
602
return
189
603
}
190
604
191
-
if len(bundles) == 0 {
192
-
resp.error("DID not found in bundles", http.StatusNotFound)
605
+
// Get the last bundle number where this DID appeared
606
+
if len(didRecord.BundleNumbers) == 0 {
607
+
resp.error("DID has no bundle history", http.StatusInternalServerError)
193
608
return
194
609
}
195
610
196
-
lastBundle := bundles[len(bundles)-1]
197
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), lastBundle.BundleNumber)
611
+
lastBundleNum := didRecord.BundleNumbers[len(didRecord.BundleNumbers)-1]
612
+
613
+
// Load last bundle to get latest operation
614
+
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), lastBundleNum)
198
615
if err != nil {
199
616
resp.error(fmt.Sprintf("failed to load bundle: %v", err), http.StatusInternalServerError)
200
617
return
···
216
633
vars := mux.Vars(r)
217
634
did := vars["did"]
218
635
219
-
bundles, err := s.db.GetBundlesForDID(r.Context(), did)
636
+
// Fast lookup using dids table
637
+
didRecord, err := s.db.GetDIDRecord(r.Context(), did)
220
638
if err != nil {
221
-
resp.error(err.Error(), http.StatusInternalServerError)
222
-
return
223
-
}
224
-
225
-
if len(bundles) == 0 {
226
-
resp.error("DID not found in bundles", http.StatusNotFound)
639
+
if err == sql.ErrNoRows {
640
+
resp.error("DID not found", http.StatusNotFound)
641
+
} else {
642
+
resp.error(err.Error(), http.StatusInternalServerError)
643
+
}
227
644
return
228
645
}
229
646
230
647
var allOperations []plc.DIDHistoryEntry
231
648
var currentOp *plc.PLCOperation
232
649
233
-
for _, bundle := range bundles {
234
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
650
+
// Load operations from each bundle
651
+
for _, bundleNum := range didRecord.BundleNumbers {
652
+
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundleNum)
235
653
if err != nil {
236
-
log.Error("Warning: failed to load bundle: %v", err)
654
+
log.Error("Warning: failed to load bundle %d: %v", bundleNum, err)
237
655
continue
238
656
}
239
657
···
241
659
if op.DID == did {
242
660
entry := plc.DIDHistoryEntry{
243
661
Operation: op,
244
-
PLCBundle: fmt.Sprintf("%06d", bundle.BundleNumber),
662
+
PLCBundle: fmt.Sprintf("%06d", bundleNum),
245
663
}
246
664
allOperations = append(allOperations, entry)
247
665
currentOp = &op
···
256
674
})
257
675
}
258
676
677
+
func (s *Server) handleGetDIDStats(w http.ResponseWriter, r *http.Request) {
678
+
resp := newResponse(w)
679
+
ctx := r.Context()
680
+
681
+
totalDIDs, err := s.db.GetTotalDIDCount(ctx)
682
+
if err != nil {
683
+
resp.error(err.Error(), http.StatusInternalServerError)
684
+
return
685
+
}
686
+
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
688
+
resp.json(map[string]interface{}{
689
+
"total_unique_dids": totalDIDs,
690
+
"last_bundle": lastBundle,
691
+
})
692
+
}
693
+
259
694
// ===== PLC BUNDLE HANDLERS =====
260
695
261
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
262
697
resp := newResponse(w)
263
-
264
698
bundleNum, err := getBundleNumber(r)
265
699
if err != nil {
266
700
resp.error("invalid bundle number", http.StatusBadRequest)
267
701
return
268
702
}
269
703
270
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
271
707
if err != nil {
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
717
+
return
718
+
}
272
719
resp.error("bundle not found", http.StatusNotFound)
273
720
return
274
721
}
275
722
276
-
resp.json(formatBundleResponse(bundle))
723
+
resp.json(formatBundleMetadata(bundleMeta))
724
+
}
725
+
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
743
+
}
744
+
}
745
+
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
752
+
return map[string]interface{}{
753
+
"plc_bundle_number": bundleNum,
754
+
"is_upcoming": true,
755
+
"status": "empty",
756
+
"message": "No operations in mempool yet",
757
+
"operation_count": 0,
758
+
}, nil
759
+
}
760
+
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
771
+
}
772
+
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
775
+
}
776
+
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
780
+
}
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
783
+
}
784
+
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
789
+
}
790
+
791
+
// Get previous bundle info
792
+
if bundleNum > 1 {
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
796
+
}
797
+
}
798
+
799
+
return result, nil
277
800
}
278
801
279
802
func (s *Server) handleGetPLCBundleDIDs(w http.ResponseWriter, r *http.Request) {
···
285
808
return
286
809
}
287
810
288
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
289
813
if err != nil {
290
814
resp.error("bundle not found", http.StatusNotFound)
291
815
return
292
816
}
293
817
294
818
resp.json(map[string]interface{}{
295
-
"plc_bundle_number": bundle.BundleNumber,
296
-
"did_count": len(bundle.DIDs),
297
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
298
822
})
299
823
}
300
824
···
309
833
310
834
compressed := r.URL.Query().Get("compressed") != "false"
311
835
312
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
837
+
if err == nil {
838
+
// Bundle exists, serve it normally
839
+
resp.bundleHeaders(bundle)
840
+
841
+
if compressed {
842
+
s.serveCompressedBundle(w, r, bundle)
843
+
} else {
844
+
s.serveUncompressedBundle(w, r, bundle)
845
+
}
846
+
return
847
+
}
848
+
849
+
// Bundle not found - check if it's the upcoming bundle
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
851
+
if bundleNum == lastBundle+1 {
852
+
// This is the upcoming bundle - serve from mempool
853
+
s.serveUpcomingBundle(w, bundleNum)
854
+
return
855
+
}
856
+
857
+
// Not an upcoming bundle, just not found
858
+
resp.error("bundle not found", http.StatusNotFound)
859
+
}
860
+
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
865
+
866
+
if !ok || count == 0 {
867
+
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
868
+
return
869
+
}
870
+
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
313
873
if err != nil {
314
-
resp.error("bundle not found", http.StatusNotFound)
874
+
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
315
875
return
316
876
}
317
877
318
-
resp.bundleHeaders(bundle)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
880
+
return
881
+
}
319
882
320
-
if compressed {
321
-
s.serveCompressedBundle(w, r, bundle)
322
-
} else {
323
-
s.serveUncompressedBundle(w, r, bundle)
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
886
+
887
+
// Extract unique DIDs
888
+
didSet := make(map[string]bool)
889
+
for _, op := range ops {
890
+
didSet[op.DID] = true
891
+
}
892
+
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
899
+
// Get previous bundle hash
900
+
prevBundleHash := ""
901
+
if bundleNum > 1 {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
903
+
prevBundleHash = prevBundle.Hash
904
+
}
905
+
}
906
+
907
+
// Set headers
908
+
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
909
+
w.Header().Set("X-Bundle-Is-Upcoming", "true")
910
+
w.Header().Set("X-Bundle-Status", "preview")
911
+
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
912
+
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
916
+
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
917
+
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
919
+
920
+
w.Header().Set("Content-Type", "application/jsonl")
921
+
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
922
+
923
+
// Stream operations as JSONL
924
+
w.WriteHeader(http.StatusOK)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
324
936
}
325
937
}
326
938
327
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
328
940
resp := newResponse(w)
329
-
path := bundle.GetFilePath(s.plcBundleDir)
330
941
331
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
332
944
if err != nil {
333
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
334
946
return
335
947
}
336
-
defer file.Close()
337
-
338
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
339
949
340
950
w.Header().Set("Content-Type", "application/zstd")
341
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
342
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
343
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
344
954
345
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
346
958
}
347
959
348
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
349
961
resp := newResponse(w)
350
962
351
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
352
965
if err != nil {
353
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
354
967
return
355
968
}
356
-
357
-
// Serialize to JSONL
358
-
var buf []byte
359
-
for _, op := range ops {
360
-
buf = append(buf, op.RawJSON...)
361
-
buf = append(buf, '\n')
362
-
}
363
-
364
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
365
-
compressedSize := int64(0)
366
-
if fileInfo != nil {
367
-
compressedSize = fileInfo.Size()
368
-
}
969
+
defer reader.Close()
369
970
370
971
w.Header().Set("Content-Type", "application/jsonl")
371
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
372
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
373
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
374
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
375
-
if compressedSize > 0 {
376
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
377
978
}
378
979
980
+
// Stream the data directly to the response
379
981
w.WriteHeader(http.StatusOK)
380
-
w.Write(buf)
982
+
io.Copy(w, reader)
381
983
}
382
984
383
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
384
986
resp := newResponse(w)
385
987
limit := getQueryInt(r, "limit", 50)
386
988
387
-
bundles, err := s.db.GetBundles(r.Context(), limit)
388
-
if err != nil {
389
-
resp.error(err.Error(), http.StatusInternalServerError)
390
-
return
391
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
392
990
393
991
response := make([]map[string]interface{}, len(bundles))
394
992
for i, bundle := range bundles {
395
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
396
994
}
397
995
398
996
resp.json(response)
···
401
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
402
1000
resp := newResponse(w)
403
1001
404
-
count, size, err := s.db.GetBundleStats(r.Context())
405
-
if err != nil {
406
-
resp.error(err.Error(), http.StatusInternalServerError)
407
-
return
408
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
409
1008
410
1009
resp.json(map[string]interface{}{
411
-
"plc_bundle_count": count,
412
-
"total_size": size,
413
-
"total_size_mb": float64(size) / 1024 / 1024,
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
414
1015
})
415
1016
}
416
1017
···
418
1019
419
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
420
1021
resp := newResponse(w)
421
-
ctx := r.Context()
422
1022
423
-
count, err := s.db.GetMempoolCount(ctx)
424
-
if err != nil {
425
-
resp.error(err.Error(), http.StatusInternalServerError)
426
-
return
427
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
428
1025
429
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
430
-
if err != nil {
431
-
resp.error(err.Error(), http.StatusInternalServerError)
432
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
433
1030
}
434
1031
435
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
436
-
if err != nil {
437
-
resp.error(err.Error(), http.StatusInternalServerError)
438
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
439
1036
}
440
1037
441
-
result := map[string]interface{}{
442
-
"operation_count": count,
443
-
"unique_did_count": uniqueDIDCount,
444
-
"uncompressed_size": uncompressedSize,
445
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
446
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
447
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
448
1042
449
-
if count > 0 {
450
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
451
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
452
1045
453
-
if count < plc.BUNDLE_SIZE {
454
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
455
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
456
1049
if timeSpan > 0 {
457
1050
opsPerSecond := float64(count) / timeSpan
458
1051
if opsPerSecond > 0 {
459
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
460
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
461
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
462
-
result["operations_needed"] = remainingOps
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
463
1057
result["current_rate_per_second"] = opsPerSecond
1058
+
result["operations_needed"] = remainingOps
464
1059
}
465
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
466
1066
}
467
-
} else {
468
-
result["estimated_next_bundle_time"] = time.Now()
469
-
result["operations_needed"] = 0
470
1067
}
471
1068
}
472
1069
} else {
1070
+
// Empty mempool
473
1071
result["mempool_start_time"] = nil
474
1072
result["estimated_next_bundle_time"] = nil
475
1073
}
···
494
1092
495
1093
// ===== VERIFICATION HANDLERS =====
496
1094
497
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
498
-
resp := newResponse(w)
499
-
vars := mux.Vars(r)
500
-
501
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
502
-
if err != nil {
503
-
resp.error("Invalid bundle number", http.StatusBadRequest)
504
-
return
505
-
}
506
-
507
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
508
-
if err != nil {
509
-
resp.error("Bundle not found", http.StatusNotFound)
510
-
return
511
-
}
512
-
513
-
// Fetch from PLC and verify
514
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
515
-
if err != nil {
516
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
517
-
return
518
-
}
519
-
520
-
remoteHash := computeOperationsHash(remoteOps)
521
-
verified := bundle.Hash == remoteHash
522
-
523
-
resp.json(map[string]interface{}{
524
-
"bundle_number": bundleNumber,
525
-
"verified": verified,
526
-
"local_hash": bundle.Hash,
527
-
"remote_hash": remoteHash,
528
-
"local_op_count": plc.BUNDLE_SIZE,
529
-
"remote_op_count": len(remoteOps),
530
-
"boundary_cids_used": len(prevCIDs),
531
-
})
532
-
}
533
-
534
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
535
-
var after string
536
-
var prevBoundaryCIDs map[string]bool
537
-
538
-
if bundleNum > 1 {
539
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
540
-
if err != nil {
541
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
542
-
}
543
-
544
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
545
-
546
-
if len(prevBundle.BoundaryCIDs) > 0 {
547
-
prevBoundaryCIDs = make(map[string]bool)
548
-
for _, cid := range prevBundle.BoundaryCIDs {
549
-
prevBoundaryCIDs[cid] = true
550
-
}
551
-
}
552
-
}
553
-
554
-
var allRemoteOps []plc.PLCOperation
555
-
seenCIDs := make(map[string]bool)
556
-
557
-
for cid := range prevBoundaryCIDs {
558
-
seenCIDs[cid] = true
559
-
}
560
-
561
-
currentAfter := after
562
-
maxFetches := 20
563
-
564
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
565
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
566
-
Count: 1000,
567
-
After: currentAfter,
568
-
})
569
-
if err != nil || len(batch) == 0 {
570
-
break
571
-
}
572
-
573
-
for _, op := range batch {
574
-
if !seenCIDs[op.CID] {
575
-
seenCIDs[op.CID] = true
576
-
allRemoteOps = append(allRemoteOps, op)
577
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
578
-
break
579
-
}
580
-
}
581
-
}
582
-
583
-
if len(batch) > 0 {
584
-
lastOp := batch[len(batch)-1]
585
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
586
-
}
587
-
588
-
if len(batch) < 1000 {
589
-
break
590
-
}
591
-
}
592
-
593
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
594
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
595
-
}
596
-
597
-
return allRemoteOps, prevBoundaryCIDs, nil
598
-
}
599
-
600
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
601
1096
resp := newResponse(w)
602
-
ctx := r.Context()
603
1097
604
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
605
-
if err != nil {
606
-
resp.error(err.Error(), http.StatusInternalServerError)
607
-
return
608
-
}
609
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
610
1099
if lastBundle == 0 {
611
1100
resp.json(map[string]interface{}{
612
1101
"status": "empty",
···
620
1109
var errorMsg string
621
1110
622
1111
for i := 1; i <= lastBundle; i++ {
623
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
624
1113
if err != nil {
625
1114
valid = false
626
1115
brokenAt = i
···
629
1118
}
630
1119
631
1120
if i > 1 {
632
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
633
1122
if err != nil {
634
1123
valid = false
635
1124
brokenAt = i
···
637
1126
break
638
1127
}
639
1128
640
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
641
1130
valid = false
642
1131
brokenAt = i
643
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
644
1133
break
645
1134
}
646
1135
}
···
661
1150
662
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
663
1152
resp := newResponse(w)
664
-
ctx := r.Context()
665
1153
666
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
667
-
if err != nil {
668
-
resp.error(err.Error(), http.StatusInternalServerError)
669
-
return
670
-
}
671
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
672
1155
if lastBundle == 0 {
673
1156
resp.json(map[string]interface{}{
674
1157
"chain_length": 0,
···
677
1160
return
678
1161
}
679
1162
680
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
681
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
682
-
count, size, _ := s.db.GetBundleStats(ctx)
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
683
1166
684
1167
resp.json(map[string]interface{}{
685
-
"chain_length": lastBundle,
686
-
"total_bundles": count,
687
-
"total_size_mb": float64(size) / 1024 / 1024,
688
-
"chain_start_time": firstBundle.StartTime,
689
-
"chain_end_time": lastBundleData.EndTime,
690
-
"chain_head_hash": lastBundleData.Hash,
691
-
"first_prev_hash": firstBundle.PrevBundleHash,
692
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
693
1177
})
694
1178
}
695
1179
···
710
1194
return
711
1195
}
712
1196
713
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
714
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
715
1199
716
1200
w.Header().Set("Content-Type", "application/jsonl")
···
750
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
751
1235
}
752
1236
753
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
754
1238
if afterTime.IsZero() {
755
1239
return 1
756
1240
}
757
1241
758
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
759
-
if err != nil {
760
-
return 1
761
-
}
762
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
763
1243
if foundBundle > 1 {
764
1244
return foundBundle - 1
765
1245
}
···
770
1250
var allOps []plc.PLCOperation
771
1251
seenCIDs := make(map[string]bool)
772
1252
773
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
774
1254
775
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
776
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
800
1280
return allOps
801
1281
}
802
1282
1283
+
func (s *Server) handleGetCountryLeaderboard(w http.ResponseWriter, r *http.Request) {
1284
+
resp := newResponse(w)
1285
+
1286
+
stats, err := s.db.GetCountryLeaderboard(r.Context())
1287
+
if err != nil {
1288
+
resp.error(err.Error(), http.StatusInternalServerError)
1289
+
return
1290
+
}
1291
+
1292
+
resp.json(stats)
1293
+
}
1294
+
1295
+
func (s *Server) handleGetVersionStats(w http.ResponseWriter, r *http.Request) {
1296
+
resp := newResponse(w)
1297
+
1298
+
stats, err := s.db.GetVersionStats(r.Context())
1299
+
if err != nil {
1300
+
resp.error(err.Error(), http.StatusInternalServerError)
1301
+
return
1302
+
}
1303
+
1304
+
// Add summary totals
1305
+
var totalPDS int64
1306
+
var totalUsers int64
1307
+
for _, stat := range stats {
1308
+
totalPDS += stat.PDSCount
1309
+
totalUsers += stat.TotalUsers
1310
+
}
1311
+
1312
+
result := map[string]interface{}{
1313
+
"versions": stats,
1314
+
"summary": map[string]interface{}{
1315
+
"total_pds_with_version": totalPDS,
1316
+
"total_users": totalUsers,
1317
+
"version_count": len(stats),
1318
+
},
1319
+
}
1320
+
1321
+
resp.json(result)
1322
+
}
1323
+
803
1324
// ===== HEALTH HANDLER =====
804
1325
805
1326
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
806
1327
newResponse(w).json(map[string]string{"status": "ok"})
807
1328
}
808
1329
809
-
// ===== UTILITY FUNCTIONS =====
1330
+
func (s *Server) handleGetJobStatus(w http.ResponseWriter, r *http.Request) {
1331
+
resp := newResponse(w)
1332
+
tracker := monitor.GetTracker()
1333
+
1334
+
jobs := tracker.GetAllJobs()
1335
+
1336
+
result := make(map[string]interface{})
1337
+
for name, job := range jobs {
1338
+
jobData := map[string]interface{}{
1339
+
"name": job.Name,
1340
+
"status": job.Status,
1341
+
"run_count": job.RunCount,
1342
+
"success_count": job.SuccessCount,
1343
+
"error_count": job.ErrorCount,
1344
+
}
1345
+
1346
+
if !job.LastRun.IsZero() {
1347
+
jobData["last_run"] = job.LastRun
1348
+
jobData["last_duration"] = job.Duration.String()
1349
+
}
1350
+
1351
+
if !job.NextRun.IsZero() {
1352
+
jobData["next_run"] = job.NextRun
1353
+
jobData["next_run_in"] = time.Until(job.NextRun).Round(time.Second).String()
1354
+
}
1355
+
1356
+
if job.Status == "running" {
1357
+
jobData["running_for"] = job.Duration.Round(time.Second).String()
1358
+
1359
+
if job.Progress != nil {
1360
+
jobData["progress"] = job.Progress
1361
+
}
1362
+
1363
+
// Add worker status
1364
+
workers := tracker.GetWorkers(name)
1365
+
if len(workers) > 0 {
1366
+
jobData["workers"] = workers
1367
+
}
1368
+
}
1369
+
1370
+
if job.Error != "" {
1371
+
jobData["error"] = job.Error
1372
+
}
1373
+
1374
+
result[name] = jobData
1375
+
}
1376
+
1377
+
resp.json(result)
1378
+
}
1379
+
1380
+
func (s *Server) handleGetDuplicateEndpoints(w http.ResponseWriter, r *http.Request) {
1381
+
resp := newResponse(w)
1382
+
1383
+
duplicates, err := s.db.GetDuplicateEndpoints(r.Context())
1384
+
if err != nil {
1385
+
resp.error(err.Error(), http.StatusInternalServerError)
1386
+
return
1387
+
}
1388
+
1389
+
// Format response
1390
+
result := make([]map[string]interface{}, 0)
1391
+
for serverDID, endpoints := range duplicates {
1392
+
result = append(result, map[string]interface{}{
1393
+
"server_did": serverDID,
1394
+
"primary": endpoints[0], // First discovered
1395
+
"aliases": endpoints[1:], // Other domains
1396
+
"alias_count": len(endpoints) - 1,
1397
+
"total_domains": len(endpoints),
1398
+
})
1399
+
}
1400
+
1401
+
resp.json(map[string]interface{}{
1402
+
"duplicates": result,
1403
+
"total_duplicate_servers": len(duplicates),
1404
+
})
1405
+
}
1406
+
1407
+
func (s *Server) handleGetPLCHistory(w http.ResponseWriter, r *http.Request) {
1408
+
resp := newResponse(w)
1409
+
1410
+
limit := getQueryInt(r, "limit", 0)
1411
+
fromBundle := getQueryInt(r, "from", 1)
1412
+
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1415
+
if err != nil {
1416
+
resp.error(err.Error(), http.StatusInternalServerError)
1417
+
return
1418
+
}
1419
+
1420
+
var totalOps int64
1421
+
var totalUncompressed int64
1422
+
var totalCompressed int64
1423
+
1424
+
for _, point := range history {
1425
+
totalOps += int64(point.OperationCount)
1426
+
totalUncompressed += point.UncompressedSize
1427
+
totalCompressed += point.CompressedSize
1428
+
}
1429
+
1430
+
result := map[string]interface{}{
1431
+
"data": history,
1432
+
"summary": map[string]interface{}{
1433
+
"days": len(history),
1434
+
"total_operations": totalOps,
1435
+
"total_uncompressed": totalUncompressed,
1436
+
"total_compressed": totalCompressed,
1437
+
"compression_ratio": 0.0,
1438
+
},
1439
+
}
1440
+
1441
+
if len(history) > 0 {
1442
+
result["summary"].(map[string]interface{})["first_date"] = history[0].Date
1443
+
result["summary"].(map[string]interface{})["last_date"] = history[len(history)-1].Date
1444
+
result["summary"].(map[string]interface{})["time_span_days"] = len(history)
810
1445
811
-
func computeOperationsHash(ops []plc.PLCOperation) string {
812
-
var jsonlData []byte
813
-
for _, op := range ops {
814
-
jsonlData = append(jsonlData, op.RawJSON...)
815
-
jsonlData = append(jsonlData, '\n')
1446
+
if totalCompressed > 0 {
1447
+
result["summary"].(map[string]interface{})["compression_ratio"] = float64(totalUncompressed) / float64(totalCompressed)
1448
+
}
1449
+
1450
+
result["summary"].(map[string]interface{})["avg_operations_per_day"] = totalOps / int64(len(history))
1451
+
result["summary"].(map[string]interface{})["avg_size_per_day"] = totalUncompressed / int64(len(history))
816
1452
}
817
-
hash := sha256.Sum256(jsonlData)
818
-
return hex.EncodeToString(hash[:])
1453
+
1454
+
resp.json(result)
1455
+
}
1456
+
1457
+
// ===== DEBUG HANDLERS =====
1458
+
1459
+
func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) {
1460
+
resp := newResponse(w)
1461
+
ctx := r.Context()
1462
+
schema := "public" // Or make configurable if needed
1463
+
1464
+
tableSizes, err := s.db.GetTableSizes(ctx, schema)
1465
+
if err != nil {
1466
+
log.Error("Failed to get table sizes: %v", err)
1467
+
resp.error("Failed to retrieve table sizes", http.StatusInternalServerError)
1468
+
return
1469
+
}
1470
+
1471
+
indexSizes, err := s.db.GetIndexSizes(ctx, schema)
1472
+
if err != nil {
1473
+
log.Error("Failed to get index sizes: %v", err)
1474
+
resp.error("Failed to retrieve index sizes", http.StatusInternalServerError)
1475
+
return
1476
+
}
1477
+
1478
+
resp.json(map[string]interface{}{
1479
+
"schema": schema,
1480
+
"tables": tableSizes,
1481
+
"indexes": indexSizes,
1482
+
"retrievedAt": time.Now().UTC(),
1483
+
})
1484
+
}
1485
+
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1506
+
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1509
+
1510
+
func normalizeEndpoint(endpoint string) string {
1511
+
endpoint = strings.TrimPrefix(endpoint, "https://")
1512
+
endpoint = strings.TrimPrefix(endpoint, "http://")
1513
+
endpoint = strings.TrimSuffix(endpoint, "/")
1514
+
return endpoint
819
1515
}
+36
-13
internal/api/server.go
+36
-13
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
23
+
plcIndexDIDs bool
24
24
}
25
25
26
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
27
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db)
28
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
29
27
s := &Server{
30
28
router: mux.NewRouter(),
31
29
db: db,
32
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
33
30
plcBundleDir: plcCfg.BundleDir,
34
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
32
+
plcIndexDIDs: plcCfg.IndexDIDs,
35
33
}
36
34
37
35
s.setupRoutes()
···
56
54
func (s *Server) setupRoutes() {
57
55
api := s.router.PathPrefix("/api/v1").Subrouter()
58
56
59
-
// Endpoint routes (replaces PDS routes)
57
+
// Generic endpoints (keep as-is)
60
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
61
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
62
-
api.HandleFunc("/endpoints/{endpoint}", s.handleGetEndpoint).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
61
+
62
+
//PDS-specific endpoints (virtual, created via JOINs)
63
+
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
64
+
api.HandleFunc("/pds/stats", s.handleGetPDSStats).Methods("GET")
65
+
api.HandleFunc("/pds/countries", s.handleGetCountryLeaderboard).Methods("GET")
66
+
api.HandleFunc("/pds/versions", s.handleGetVersionStats).Methods("GET")
67
+
api.HandleFunc("/pds/duplicates", s.handleGetDuplicateEndpoints).Methods("GET")
68
+
api.HandleFunc("/pds/{endpoint}", s.handleGetPDSDetail).Methods("GET")
69
+
70
+
// PDS repos
71
+
api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET")
72
+
api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET")
73
+
api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET")
74
+
75
+
// Global DID routes
76
+
api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET")
77
+
api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW
63
78
64
79
// PLC Bundle routes
65
80
api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET")
···
69
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
70
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
71
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
72
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
88
+
89
+
// PLC history/metrics
90
+
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
73
91
74
92
// PLC Export endpoint (simulates PLC directory)
75
93
api.HandleFunc("/plc/export", s.handlePLCExport).Methods("GET")
···
77
95
// DID routes
78
96
api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET")
79
97
api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET")
98
+
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET")
80
99
81
100
// Mempool routes
82
101
api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET")
83
102
84
103
// Metrics routes
85
104
api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET")
105
+
106
+
// Debug Endpoints
107
+
api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET")
108
+
api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET")
86
109
87
110
// Health check
88
111
s.router.HandleFunc("/health", s.handleHealth).Methods("GET")
+8
-3
internal/config/config.go
+8
-3
internal/config/config.go
···
16
16
17
17
type DatabaseConfig struct {
18
18
Path string `yaml:"path"`
19
-
Type string `yaml:"type"` // sqlite, postgres
19
+
Type string `yaml:"type"` // postgres
20
20
}
21
21
22
22
type PLCConfig struct {
23
23
DirectoryURL string `yaml:"directory_url"`
24
24
ScanInterval time.Duration `yaml:"scan_interval"`
25
25
BatchSize int `yaml:"batch_size"`
26
-
BundleDir string `yaml:"bundles_dir"` // NEW: Cache directory
27
-
UseCache bool `yaml:"use_cache"` // NEW: Enable/disable cache
26
+
BundleDir string `yaml:"bundles_dir"`
27
+
UseCache bool `yaml:"use_cache"`
28
+
IndexDIDs bool `yaml:"index_dids"`
28
29
}
29
30
30
31
type PDSConfig struct {
···
32
33
Timeout time.Duration `yaml:"timeout"`
33
34
Workers int `yaml:"workers"`
34
35
RecheckInterval time.Duration `yaml:"recheck_interval"`
36
+
ScanRetention int `yaml:"scan_retention"`
35
37
}
36
38
37
39
type APIConfig struct {
···
72
74
}
73
75
if cfg.PDS.Workers == 0 {
74
76
cfg.PDS.Workers = 10
77
+
}
78
+
if cfg.PDS.ScanRetention == 0 {
79
+
cfg.PDS.ScanRetention = 3
75
80
}
76
81
if cfg.API.Port == 0 {
77
82
cfg.API.Port = 8080
+162
internal/ipinfo/client.go
+162
internal/ipinfo/client.go
···
1
+
package ipinfo
2
+
3
+
import (
4
+
"context"
5
+
"encoding/json"
6
+
"fmt"
7
+
"net"
8
+
"net/http"
9
+
"net/url"
10
+
"sync"
11
+
"time"
12
+
)
13
+
14
+
type Client struct {
15
+
httpClient *http.Client
16
+
baseURL string
17
+
mu sync.RWMutex
18
+
backoffUntil time.Time
19
+
backoffDuration time.Duration
20
+
}
21
+
22
+
func NewClient() *Client {
23
+
return &Client{
24
+
httpClient: &http.Client{
25
+
Timeout: 10 * time.Second,
26
+
},
27
+
baseURL: "https://api.ipapi.is",
28
+
backoffDuration: 5 * time.Minute,
29
+
}
30
+
}
31
+
32
+
// IsInBackoff checks if we're currently in backoff period
33
+
func (c *Client) IsInBackoff() bool {
34
+
c.mu.RLock()
35
+
defer c.mu.RUnlock()
36
+
return time.Now().Before(c.backoffUntil)
37
+
}
38
+
39
+
// SetBackoff sets the backoff period
40
+
func (c *Client) SetBackoff() {
41
+
c.mu.Lock()
42
+
defer c.mu.Unlock()
43
+
c.backoffUntil = time.Now().Add(c.backoffDuration)
44
+
}
45
+
46
+
// ClearBackoff clears the backoff (on successful request)
47
+
func (c *Client) ClearBackoff() {
48
+
c.mu.Lock()
49
+
defer c.mu.Unlock()
50
+
c.backoffUntil = time.Time{}
51
+
}
52
+
53
+
// GetIPInfo fetches IP information from ipapi.is
54
+
func (c *Client) GetIPInfo(ctx context.Context, ip string) (map[string]interface{}, error) {
55
+
// Check if we're in backoff period
56
+
if c.IsInBackoff() {
57
+
c.mu.RLock()
58
+
remaining := time.Until(c.backoffUntil)
59
+
c.mu.RUnlock()
60
+
return nil, fmt.Errorf("in backoff period, retry in %v", remaining.Round(time.Second))
61
+
}
62
+
63
+
// Build URL with IP parameter
64
+
reqURL := fmt.Sprintf("%s/?q=%s", c.baseURL, url.QueryEscape(ip))
65
+
66
+
req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
67
+
if err != nil {
68
+
return nil, fmt.Errorf("failed to create request: %w", err)
69
+
}
70
+
71
+
resp, err := c.httpClient.Do(req)
72
+
if err != nil {
73
+
// Set backoff on network errors (timeout, etc)
74
+
c.SetBackoff()
75
+
return nil, fmt.Errorf("failed to fetch IP info: %w", err)
76
+
}
77
+
defer resp.Body.Close()
78
+
79
+
if resp.StatusCode == http.StatusTooManyRequests {
80
+
// Set backoff on rate limit
81
+
c.SetBackoff()
82
+
return nil, fmt.Errorf("rate limited (429), backing off for %v", c.backoffDuration)
83
+
}
84
+
85
+
if resp.StatusCode != http.StatusOK {
86
+
// Set backoff on other errors too
87
+
c.SetBackoff()
88
+
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
89
+
}
90
+
91
+
var ipInfo map[string]interface{}
92
+
if err := json.NewDecoder(resp.Body).Decode(&ipInfo); err != nil {
93
+
return nil, fmt.Errorf("failed to decode response: %w", err)
94
+
}
95
+
96
+
// Clear backoff on successful request
97
+
c.ClearBackoff()
98
+
99
+
return ipInfo, nil
100
+
}
101
+
102
+
// IPAddresses holds both IPv4 and IPv6 addresses
103
+
type IPAddresses struct {
104
+
IPv4 string
105
+
IPv6 string
106
+
}
107
+
108
+
// ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL
109
+
func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) {
110
+
// Parse URL
111
+
parsedURL, err := url.Parse(endpoint)
112
+
if err != nil {
113
+
return nil, fmt.Errorf("failed to parse endpoint URL: %w", err)
114
+
}
115
+
116
+
host := parsedURL.Hostname()
117
+
if host == "" {
118
+
return nil, fmt.Errorf("no hostname in endpoint")
119
+
}
120
+
121
+
result := &IPAddresses{}
122
+
123
+
// Check if host is already an IP
124
+
if ip := net.ParseIP(host); ip != nil {
125
+
if ip.To4() != nil {
126
+
result.IPv4 = host
127
+
} else {
128
+
result.IPv6 = host
129
+
}
130
+
return result, nil
131
+
}
132
+
133
+
// Resolve hostname to IPs
134
+
ips, err := net.LookupIP(host)
135
+
if err != nil {
136
+
return nil, fmt.Errorf("failed to resolve hostname: %w", err)
137
+
}
138
+
139
+
if len(ips) == 0 {
140
+
return nil, fmt.Errorf("no IPs found for hostname")
141
+
}
142
+
143
+
// Extract both IPv4 and IPv6
144
+
for _, ip := range ips {
145
+
if ipv4 := ip.To4(); ipv4 != nil {
146
+
if result.IPv4 == "" {
147
+
result.IPv4 = ipv4.String()
148
+
}
149
+
} else {
150
+
if result.IPv6 == "" {
151
+
result.IPv6 = ip.String()
152
+
}
153
+
}
154
+
}
155
+
156
+
// Must have at least one IP
157
+
if result.IPv4 == "" && result.IPv6 == "" {
158
+
return nil, fmt.Errorf("no valid IPs found")
159
+
}
160
+
161
+
return result, nil
162
+
}
+115
-7
internal/log/log.go
+115
-7
internal/log/log.go
···
1
1
package log
2
2
3
3
import (
4
+
"fmt"
4
5
"io"
5
6
"log"
6
7
"os"
8
+
"strings"
9
+
"time"
7
10
)
8
11
9
12
var (
···
19
22
verboseWriter = os.Stdout
20
23
}
21
24
22
-
infoLog = log.New(infoWriter, "INFO: ", log.Ldate|log.Ltime|log.Lshortfile)
23
-
verboseLog = log.New(verboseWriter, "VERBOSE: ", log.Ldate|log.Ltime|log.Lshortfile)
24
-
errorLog = log.New(os.Stderr, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile)
25
+
// Use no flags, we'll add our own ISO 8601 timestamps
26
+
infoLog = log.New(infoWriter, "", 0)
27
+
verboseLog = log.New(verboseWriter, "", 0)
28
+
errorLog = log.New(os.Stderr, "", 0)
29
+
}
30
+
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
+
func timestamp() string {
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
25
34
}
26
35
27
36
func Verbose(format string, v ...interface{}) {
28
-
verboseLog.Printf(format, v...)
37
+
verboseLog.Printf("%s [VERBOSE] %s", timestamp(), fmt.Sprintf(format, v...))
29
38
}
30
39
31
40
func Info(format string, v ...interface{}) {
32
-
infoLog.Printf(format, v...)
41
+
infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...))
42
+
}
43
+
44
+
func Warn(format string, v ...interface{}) {
45
+
infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...))
33
46
}
34
47
35
48
func Error(format string, v ...interface{}) {
36
-
errorLog.Printf(format, v...)
49
+
errorLog.Printf("%s [ERROR] %s", timestamp(), fmt.Sprintf(format, v...))
37
50
}
38
51
39
52
func Fatal(format string, v ...interface{}) {
40
-
errorLog.Fatalf(format, v...)
53
+
errorLog.Fatalf("%s [FATAL] %s", timestamp(), fmt.Sprintf(format, v...))
54
+
}
55
+
56
+
// Banner prints a startup banner
57
+
func Banner(version string) {
58
+
banner := `
59
+
╔════════════════════════════════════════════════════════════╗
60
+
║ ║
61
+
║ █████╗ ████████╗███████╗ ██████╗ █████╗ ███╗ ██╗ ║
62
+
║ ██╔══██╗╚══██╔══╝██╔════╝██╔════╝██╔══██╗████╗ ██║ ║
63
+
║ ███████║ ██║ ███████╗██║ ███████║██╔██╗ ██║ ║
64
+
║ ██╔══██║ ██║ ╚════██║██║ ██╔══██║██║╚██╗██║ ║
65
+
║ ██║ ██║ ██║ ███████║╚██████╗██║ ██║██║ ╚████║ ║
66
+
║ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ║
67
+
║ ║
68
+
║ AT Protocol Network Scanner & Indexer ║
69
+
║ Version %s ║
70
+
║ ║
71
+
╚════════════════════════════════════════════════════════════╝
72
+
`
73
+
fmt.Printf(banner, padVersion(version))
74
+
}
75
+
76
+
// padVersion pads the version string to fit the banner
77
+
func padVersion(version string) string {
78
+
targetLen := 7
79
+
if len(version) < targetLen {
80
+
padding := strings.Repeat(" ", (targetLen-len(version))/2)
81
+
return padding + version + padding
82
+
}
83
+
return version
84
+
}
85
+
86
+
// RedactPassword redacts passwords from connection strings
87
+
func RedactPassword(connStr string) string {
88
+
// Handle PostgreSQL URI format: postgresql://user:password@host/db
89
+
// Pattern: find everything between :// and @ that contains a colon
90
+
if strings.Contains(connStr, "://") && strings.Contains(connStr, "@") {
91
+
// Find the credentials section
92
+
parts := strings.SplitN(connStr, "://", 2)
93
+
if len(parts) == 2 {
94
+
scheme := parts[0]
95
+
remainder := parts[1]
96
+
97
+
// Find the @ symbol
98
+
atIndex := strings.Index(remainder, "@")
99
+
if atIndex > 0 {
100
+
credentials := remainder[:atIndex]
101
+
hostAndDb := remainder[atIndex:]
102
+
103
+
// Check if there's a password (look for colon in credentials)
104
+
colonIndex := strings.Index(credentials, ":")
105
+
if colonIndex > 0 {
106
+
username := credentials[:colonIndex]
107
+
return fmt.Sprintf("%s://%s:***%s", scheme, username, hostAndDb)
108
+
}
109
+
}
110
+
}
111
+
}
112
+
113
+
// Handle key-value format: host=localhost password=secret user=myuser
114
+
if strings.Contains(connStr, "password=") {
115
+
parts := strings.Split(connStr, " ")
116
+
for i, part := range parts {
117
+
if strings.HasPrefix(part, "password=") {
118
+
parts[i] = "password=***"
119
+
}
120
+
}
121
+
return strings.Join(parts, " ")
122
+
}
123
+
124
+
return connStr
125
+
}
126
+
127
+
// PrintConfig prints configuration summary
128
+
func PrintConfig(items map[string]string) {
129
+
Info("=== Configuration ===")
130
+
maxKeyLen := 0
131
+
for key := range items {
132
+
if len(key) > maxKeyLen {
133
+
maxKeyLen = len(key)
134
+
}
135
+
}
136
+
137
+
for key, value := range items {
138
+
padding := strings.Repeat(" ", maxKeyLen-len(key))
139
+
140
+
// Redact database connection strings
141
+
displayValue := value
142
+
if strings.Contains(key, "Database Path") || strings.Contains(key, "Connection") || strings.Contains(strings.ToLower(key), "password") {
143
+
displayValue = RedactPassword(value)
144
+
}
145
+
146
+
fmt.Printf(" %s:%s %s\n", key, padding, displayValue)
147
+
}
148
+
Info("====================")
41
149
}
+226
internal/monitor/tracker.go
+226
internal/monitor/tracker.go
···
1
+
package monitor
2
+
3
+
import (
4
+
"sync"
5
+
"time"
6
+
)
7
+
8
+
type JobStatus struct {
9
+
Name string `json:"name"`
10
+
Status string `json:"status"` // "idle", "running", "completed", "error"
11
+
StartTime time.Time `json:"start_time,omitempty"`
12
+
LastRun time.Time `json:"last_run,omitempty"`
13
+
Duration time.Duration `json:"duration,omitempty"`
14
+
Progress *Progress `json:"progress,omitempty"`
15
+
Error string `json:"error,omitempty"`
16
+
NextRun time.Time `json:"next_run,omitempty"`
17
+
RunCount int64 `json:"run_count"`
18
+
SuccessCount int64 `json:"success_count"`
19
+
ErrorCount int64 `json:"error_count"`
20
+
}
21
+
22
+
type Progress struct {
23
+
Current int `json:"current"`
24
+
Total int `json:"total"`
25
+
Percent float64 `json:"percent"`
26
+
Message string `json:"message,omitempty"`
27
+
}
28
+
29
+
type WorkerStatus struct {
30
+
ID int `json:"id"`
31
+
Status string `json:"status"` // "idle", "working"
32
+
CurrentTask string `json:"current_task,omitempty"`
33
+
StartedAt time.Time `json:"started_at,omitempty"`
34
+
Duration time.Duration `json:"duration,omitempty"`
35
+
}
36
+
37
+
type Tracker struct {
38
+
mu sync.RWMutex
39
+
jobs map[string]*JobStatus
40
+
workers map[string][]WorkerStatus // key is job name
41
+
}
42
+
43
+
var globalTracker *Tracker
44
+
45
+
func init() {
46
+
globalTracker = &Tracker{
47
+
jobs: make(map[string]*JobStatus),
48
+
workers: make(map[string][]WorkerStatus),
49
+
}
50
+
}
51
+
52
+
func GetTracker() *Tracker {
53
+
return globalTracker
54
+
}
55
+
56
+
// Job status methods
57
+
func (t *Tracker) RegisterJob(name string) {
58
+
t.mu.Lock()
59
+
defer t.mu.Unlock()
60
+
61
+
t.jobs[name] = &JobStatus{
62
+
Name: name,
63
+
Status: "idle",
64
+
}
65
+
}
66
+
67
+
func (t *Tracker) StartJob(name string) {
68
+
t.mu.Lock()
69
+
defer t.mu.Unlock()
70
+
71
+
if job, exists := t.jobs[name]; exists {
72
+
job.Status = "running"
73
+
job.StartTime = time.Now()
74
+
job.Error = ""
75
+
job.RunCount++
76
+
}
77
+
}
78
+
79
+
func (t *Tracker) CompleteJob(name string, err error) {
80
+
t.mu.Lock()
81
+
defer t.mu.Unlock()
82
+
83
+
if job, exists := t.jobs[name]; exists {
84
+
job.LastRun = time.Now()
85
+
job.Duration = time.Since(job.StartTime)
86
+
87
+
if err != nil {
88
+
job.Status = "error"
89
+
job.Error = err.Error()
90
+
job.ErrorCount++
91
+
} else {
92
+
job.Status = "completed"
93
+
job.SuccessCount++
94
+
}
95
+
96
+
job.Progress = nil // Clear progress
97
+
}
98
+
}
99
+
100
+
func (t *Tracker) UpdateProgress(name string, current, total int, message string) {
101
+
t.mu.Lock()
102
+
defer t.mu.Unlock()
103
+
104
+
if job, exists := t.jobs[name]; exists {
105
+
var percent float64
106
+
if total > 0 {
107
+
percent = float64(current) / float64(total) * 100
108
+
}
109
+
110
+
job.Progress = &Progress{
111
+
Current: current,
112
+
Total: total,
113
+
Percent: percent,
114
+
Message: message,
115
+
}
116
+
}
117
+
}
118
+
119
+
func (t *Tracker) SetNextRun(name string, nextRun time.Time) {
120
+
t.mu.Lock()
121
+
defer t.mu.Unlock()
122
+
123
+
if job, exists := t.jobs[name]; exists {
124
+
job.NextRun = nextRun
125
+
}
126
+
}
127
+
128
+
func (t *Tracker) GetJobStatus(name string) *JobStatus {
129
+
t.mu.RLock()
130
+
defer t.mu.RUnlock()
131
+
132
+
if job, exists := t.jobs[name]; exists {
133
+
// Create a copy
134
+
jobCopy := *job
135
+
if job.Progress != nil {
136
+
progressCopy := *job.Progress
137
+
jobCopy.Progress = &progressCopy
138
+
}
139
+
140
+
// Calculate duration for running jobs
141
+
if jobCopy.Status == "running" {
142
+
jobCopy.Duration = time.Since(jobCopy.StartTime)
143
+
}
144
+
145
+
return &jobCopy
146
+
}
147
+
return nil
148
+
}
149
+
150
+
func (t *Tracker) GetAllJobs() map[string]*JobStatus {
151
+
t.mu.RLock()
152
+
defer t.mu.RUnlock()
153
+
154
+
result := make(map[string]*JobStatus)
155
+
for name, job := range t.jobs {
156
+
jobCopy := *job
157
+
if job.Progress != nil {
158
+
progressCopy := *job.Progress
159
+
jobCopy.Progress = &progressCopy
160
+
}
161
+
162
+
// Calculate duration for running jobs
163
+
if jobCopy.Status == "running" {
164
+
jobCopy.Duration = time.Since(jobCopy.StartTime)
165
+
}
166
+
167
+
result[name] = &jobCopy
168
+
}
169
+
return result
170
+
}
171
+
172
+
// Worker status methods
173
+
func (t *Tracker) InitWorkers(jobName string, count int) {
174
+
t.mu.Lock()
175
+
defer t.mu.Unlock()
176
+
177
+
workers := make([]WorkerStatus, count)
178
+
for i := 0; i < count; i++ {
179
+
workers[i] = WorkerStatus{
180
+
ID: i + 1,
181
+
Status: "idle",
182
+
}
183
+
}
184
+
t.workers[jobName] = workers
185
+
}
186
+
187
+
func (t *Tracker) StartWorker(jobName string, workerID int, task string) {
188
+
t.mu.Lock()
189
+
defer t.mu.Unlock()
190
+
191
+
if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) {
192
+
workers[workerID-1].Status = "working"
193
+
workers[workerID-1].CurrentTask = task
194
+
workers[workerID-1].StartedAt = time.Now()
195
+
}
196
+
}
197
+
198
+
func (t *Tracker) CompleteWorker(jobName string, workerID int) {
199
+
t.mu.Lock()
200
+
defer t.mu.Unlock()
201
+
202
+
if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) {
203
+
workers[workerID-1].Status = "idle"
204
+
workers[workerID-1].CurrentTask = ""
205
+
workers[workerID-1].Duration = time.Since(workers[workerID-1].StartedAt)
206
+
workers[workerID-1].StartedAt = time.Time{}
207
+
}
208
+
}
209
+
210
+
func (t *Tracker) GetWorkers(jobName string) []WorkerStatus {
211
+
t.mu.RLock()
212
+
defer t.mu.RUnlock()
213
+
214
+
if workers, exists := t.workers[jobName]; exists {
215
+
// Create a copy with calculated durations
216
+
result := make([]WorkerStatus, len(workers))
217
+
for i, w := range workers {
218
+
result[i] = w
219
+
if w.Status == "working" && !w.StartedAt.IsZero() {
220
+
result[i].Duration = time.Since(w.StartedAt)
221
+
}
222
+
}
223
+
return result
224
+
}
225
+
return nil
226
+
}
+70
-23
internal/pds/client.go
+70
-23
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
28
29
29
30
// Repo represents a repository in the list
30
31
type Repo struct {
31
-
DID string `json:"did"`
32
-
Head string `json:"head,omitempty"`
33
-
Rev string `json:"rev,omitempty"`
32
+
DID string `json:"did"`
33
+
Head string `json:"head,omitempty"`
34
+
Rev string `json:"rev,omitempty"`
35
+
Active *bool `json:"active,omitempty"`
36
+
Status *string `json:"status,omitempty"`
34
37
}
35
38
36
39
// ListRepos fetches all repositories from a PDS with pagination
37
-
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]string, error) {
38
-
var allDIDs []string
40
+
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]Repo, error) {
41
+
var allRepos []Repo
39
42
var cursor *string
40
43
41
44
for {
···
67
70
}
68
71
resp.Body.Close()
69
72
70
-
// Collect DIDs
71
-
for _, repo := range result.Repos {
72
-
allDIDs = append(allDIDs, repo.DID)
73
-
}
73
+
// Collect repos
74
+
allRepos = append(allRepos, result.Repos...)
74
75
75
76
// Check if there are more pages
76
77
if result.Cursor == nil || *result.Cursor == "" {
···
79
80
cursor = result.Cursor
80
81
}
81
82
82
-
return allDIDs, nil
83
+
return allRepos, nil
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
-
// CheckHealth performs a basic health check
115
-
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, error) {
142
+
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
144
+
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
118
147
url := fmt.Sprintf("%s/xrpc/_health", endpoint)
119
148
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
120
149
if err != nil {
121
-
return false, 0, err
150
+
return false, 0, "", err
122
151
}
123
152
124
153
resp, err := c.httpClient.Do(req)
125
154
duration := time.Since(startTime)
126
155
127
156
if err != nil {
128
-
return false, duration, err
157
+
return false, duration, "", err
129
158
}
130
159
defer resp.Body.Close()
131
160
132
-
return resp.StatusCode == http.StatusOK, duration, nil
161
+
if resp.StatusCode != http.StatusOK {
162
+
return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode)
163
+
}
164
+
165
+
// Decode the JSON response and check for "version"
166
+
var healthResponse struct {
167
+
Version string `json:"version"`
168
+
}
169
+
170
+
if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil {
171
+
return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err)
172
+
}
173
+
174
+
if healthResponse.Version == "" {
175
+
return false, duration, "", fmt.Errorf("health JSON response missing 'version' field")
176
+
}
177
+
178
+
// All checks passed
179
+
return true, duration, healthResponse.Version, nil
133
180
}
+270
-99
internal/pds/scanner.go
+270
-99
internal/pds/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
+
"fmt"
6
+
"math/rand"
5
7
"sync"
8
+
"sync/atomic"
6
9
"time"
7
10
8
-
"github.com/acarl005/stripansi"
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
12
16
)
13
17
14
18
type Scanner struct {
15
-
client *Client
16
-
db storage.Database
17
-
config config.PDSConfig
19
+
client *Client
20
+
db storage.Database
21
+
config config.PDSConfig
22
+
ipInfoClient *ipinfo.Client
18
23
}
19
24
20
25
func NewScanner(db storage.Database, cfg config.PDSConfig) *Scanner {
21
26
return &Scanner{
22
-
client: NewClient(cfg.Timeout),
23
-
db: db,
24
-
config: cfg,
27
+
client: NewClient(cfg.Timeout),
28
+
db: db,
29
+
config: cfg,
30
+
ipInfoClient: ipinfo.NewClient(),
25
31
}
26
32
}
27
33
···
29
35
startTime := time.Now()
30
36
log.Info("Starting PDS availability scan...")
31
37
32
-
// Get only PDS endpoints
38
+
// Get only PDS endpoints that need checking
33
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
34
-
Type: "pds",
40
+
Type: "pds",
41
+
OnlyStale: true,
42
+
OnlyValid: true,
43
+
RecheckInterval: s.config.RecheckInterval,
35
44
})
36
45
if err != nil {
37
46
return err
38
47
}
39
48
40
-
log.Info("Scanning %d PDS servers...", len(servers))
49
+
if len(servers) == 0 {
50
+
log.Info("No endpoints need scanning at this time")
51
+
monitor.GetTracker().UpdateProgress("pds_scan", 0, 0, "No endpoints need scanning")
52
+
return nil
53
+
}
54
+
55
+
log.Info("Found %d endpoints that need scanning", len(servers))
56
+
monitor.GetTracker().UpdateProgress("pds_scan", 0, len(servers), "Preparing to scan")
57
+
58
+
// Shuffle servers
59
+
if len(servers) > 0 {
60
+
r := rand.New(rand.NewSource(time.Now().UnixNano()))
61
+
r.Shuffle(len(servers), func(i, j int) {
62
+
servers[i], servers[j] = servers[j], servers[i]
63
+
})
64
+
}
41
65
42
-
// Worker pool
43
-
jobs := make(chan *storage.Endpoint, len(servers))
44
-
results := make(chan *PDSStatus, len(servers))
66
+
// Initialize workers in tracker
67
+
monitor.GetTracker().InitWorkers("pds_scan", s.config.Workers)
45
68
69
+
// Worker pool with progress tracking
70
+
jobs := make(chan *workerJob, len(servers))
46
71
var wg sync.WaitGroup
72
+
var completed int32
73
+
47
74
for i := 0; i < s.config.Workers; i++ {
48
75
wg.Add(1)
49
-
go func() {
76
+
workerID := i + 1
77
+
go func(id int) {
50
78
defer wg.Done()
51
-
s.worker(ctx, jobs, results)
52
-
}()
79
+
s.workerWithProgress(ctx, id, jobs, &completed, len(servers))
80
+
}(workerID)
53
81
}
54
82
55
-
go func() {
56
-
for _, server := range servers {
57
-
jobs <- server
58
-
}
59
-
close(jobs)
60
-
}()
83
+
// Send jobs
84
+
for _, server := range servers {
85
+
jobs <- &workerJob{endpoint: server}
86
+
}
87
+
close(jobs)
61
88
62
-
go func() {
63
-
wg.Wait()
64
-
close(results)
65
-
}()
89
+
// Wait for completion
90
+
wg.Wait()
66
91
67
-
// Process results
68
-
successCount := 0
69
-
failureCount := 0
70
-
totalUsers := int64(0)
92
+
log.Info("PDS scan completed in %v", time.Since(startTime))
93
+
monitor.GetTracker().UpdateProgress("pds_scan", len(servers), len(servers), "Completed")
71
94
72
-
for status := range results {
73
-
// Determine status code
74
-
statusCode := storage.PDSStatusOffline
75
-
if status.Available {
76
-
statusCode = storage.PDSStatusOnline
77
-
}
95
+
return nil
96
+
}
78
97
79
-
// Build scan data
80
-
scanData := &storage.EndpointScanData{
81
-
ServerInfo: status.Description,
82
-
DIDs: status.DIDs,
83
-
DIDCount: len(status.DIDs),
84
-
}
85
-
86
-
// Update using Endpoint ID
87
-
if err := s.db.UpdateEndpointStatus(ctx, status.EndpointID, &storage.EndpointUpdate{
88
-
Status: statusCode,
89
-
LastChecked: status.LastChecked,
90
-
ResponseTime: status.ResponseTime.Seconds() * 1000, // Convert to ms
91
-
ScanData: scanData,
92
-
}); err != nil {
93
-
log.Error("Error updating endpoint ID %d: %v", status.EndpointID, err)
94
-
}
95
-
96
-
if status.Available {
97
-
successCount++
98
-
totalUsers += int64(len(status.DIDs))
99
-
} else {
100
-
failureCount++
101
-
}
102
-
}
103
-
104
-
log.Info("PDS scan completed: %d available, %d unavailable, %d total users in %v",
105
-
successCount, failureCount, totalUsers, time.Since(startTime))
106
-
107
-
return nil
98
+
type workerJob struct {
99
+
endpoint *storage.Endpoint
108
100
}
109
101
110
-
func (s *Scanner) worker(ctx context.Context, jobs <-chan *storage.Endpoint, results chan<- *PDSStatus) {
111
-
for server := range jobs {
102
+
func (s *Scanner) workerWithProgress(ctx context.Context, workerID int, jobs <-chan *workerJob, completed *int32, total int) {
103
+
for job := range jobs {
112
104
select {
113
105
case <-ctx.Done():
114
106
return
115
107
default:
116
-
status := s.scanPDS(ctx, server.ID, server.Endpoint)
117
-
results <- status
108
+
// Update worker status
109
+
monitor.GetTracker().StartWorker("pds_scan", workerID, job.endpoint.Endpoint)
110
+
111
+
// Scan endpoint
112
+
s.scanAndSaveEndpoint(ctx, job.endpoint)
113
+
114
+
// Update progress
115
+
atomic.AddInt32(completed, 1)
116
+
current := atomic.LoadInt32(completed)
117
+
monitor.GetTracker().UpdateProgress("pds_scan", int(current), total,
118
+
fmt.Sprintf("Scanned %d/%d endpoints", current, total))
119
+
120
+
// Mark worker as idle
121
+
monitor.GetTracker().CompleteWorker("pds_scan", workerID)
118
122
}
119
123
}
120
124
}
121
125
122
-
func (s *Scanner) scanPDS(ctx context.Context, endpointID int64, endpoint string) *PDSStatus {
123
-
status := &PDSStatus{
124
-
EndpointID: endpointID, // Store Endpoint ID
125
-
Endpoint: endpoint,
126
-
LastChecked: time.Now(),
126
+
func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) {
127
+
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
+
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
+
if err != nil {
130
+
s.saveScanResult(ctx, ep.ID, &ScanResult{
131
+
Status: storage.EndpointStatusOffline,
132
+
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
133
+
})
134
+
return
127
135
}
128
136
129
-
// Health check
130
-
available, responseTime, err := s.client.CheckHealth(ctx, endpoint)
131
-
status.Available = available
132
-
status.ResponseTime = responseTime
137
+
// Update IPs immediately
138
+
s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC())
139
+
140
+
// STEP 1.5: Fetch IP info asynchronously for both IPs
141
+
if ips.IPv4 != "" {
142
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv4)
143
+
}
144
+
if ips.IPv6 != "" {
145
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
146
+
}
133
147
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
134
150
if err != nil {
135
-
status.ErrorMessage = err.Error()
136
-
return status
151
+
s.saveScanResult(ctx, ep.ID, &ScanResult{
152
+
Status: storage.EndpointStatusOffline,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
156
+
})
157
+
return
137
158
}
138
159
139
-
if !available {
140
-
status.ErrorMessage = "health check failed"
141
-
return status
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
162
+
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
163
+
}
164
+
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
142
172
}
143
173
144
-
// Describe server
145
-
desc, err := s.client.DescribeServer(ctx, endpoint)
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
181
+
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
146
182
if err != nil {
147
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(endpoint), err)
183
+
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
184
+
repoList = []Repo{}
185
+
}
186
+
187
+
// Convert to DIDs
188
+
dids := make([]string, len(repoList))
189
+
for i, repo := range repoList {
190
+
dids[i] = repo.DID
191
+
}
192
+
193
+
// STEP 5: SAVE scan result
194
+
s.saveScanResult(ctx, ep.ID, &ScanResult{
195
+
Status: storage.EndpointStatusOnline,
196
+
ResponseTime: avgResponseTime,
197
+
Description: desc,
198
+
DIDs: dids,
199
+
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
201
+
})
202
+
203
+
// STEP 6: Save repos in batches (only tracks changes)
204
+
if len(repoList) > 0 {
205
+
batchSize := 100_000
206
+
207
+
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
208
+
209
+
for i := 0; i < len(repoList); i += batchSize {
210
+
end := i + batchSize
211
+
if end > len(repoList) {
212
+
end = len(repoList)
213
+
}
214
+
215
+
batch := repoList[i:end]
216
+
repoData := make([]storage.PDSRepoData, len(batch))
217
+
218
+
for j, repo := range batch {
219
+
active := true
220
+
if repo.Active != nil {
221
+
active = *repo.Active
222
+
}
223
+
224
+
status := ""
225
+
if repo.Status != nil {
226
+
status = *repo.Status
227
+
}
228
+
229
+
repoData[j] = storage.PDSRepoData{
230
+
DID: repo.DID,
231
+
Head: repo.Head,
232
+
Rev: repo.Rev,
233
+
Active: active,
234
+
Status: status,
235
+
}
236
+
}
237
+
238
+
if err := s.db.UpsertPDSRepos(ctx, ep.ID, repoData); err != nil {
239
+
log.Error("Failed to save repo batch for endpoint %d: %v", ep.ID, err)
240
+
}
241
+
}
242
+
243
+
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
244
+
}
245
+
}
246
+
247
+
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
248
+
// Build scan_data with PDS-specific info in Metadata
249
+
scanData := &storage.EndpointScanData{
250
+
DIDCount: len(result.DIDs),
251
+
Metadata: make(map[string]interface{}),
252
+
}
253
+
254
+
var userCount int64
255
+
256
+
// Add PDS-specific metadata
257
+
if result.Status == storage.EndpointStatusOnline {
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
260
+
if result.Description != nil {
261
+
scanData.Metadata["server_info"] = result.Description
262
+
}
148
263
} else {
149
-
status.Description = desc
264
+
// Include error message for offline status
265
+
if result.ErrorMessage != "" {
266
+
scanData.Metadata["error"] = result.ErrorMessage
267
+
}
150
268
}
151
269
152
-
// Optionally list repos (DIDs) - commented out by default for performance
153
-
/*dids, err := s.client.ListRepos(ctx, endpoint)
270
+
// Save scan record
271
+
scan := &storage.EndpointScan{
272
+
EndpointID: endpointID,
273
+
Status: result.Status,
274
+
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
275
+
UserCount: userCount,
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
278
+
ScanData: scanData,
279
+
ScannedAt: time.Now().UTC(),
280
+
}
281
+
282
+
if err := s.db.SaveEndpointScan(ctx, scan); err != nil {
283
+
log.Error("Failed to save scan for endpoint %d: %v", endpointID, err)
284
+
}
285
+
286
+
// Update endpoint status
287
+
update := &storage.EndpointUpdate{
288
+
Status: result.Status,
289
+
LastChecked: time.Now().UTC(),
290
+
ResponseTime: result.ResponseTime.Seconds() * 1000,
291
+
}
292
+
293
+
if err := s.db.UpdateEndpointStatus(ctx, endpointID, update); err != nil {
294
+
log.Error("Failed to update endpoint status for %d: %v", endpointID, err)
295
+
}
296
+
}
297
+
298
+
func (s *Scanner) updateIPInfoIfNeeded(ctx context.Context, ip string) {
299
+
// Check if IP info client is in backoff
300
+
if s.ipInfoClient.IsInBackoff() {
301
+
return
302
+
}
303
+
304
+
// Check if we need to update IP info
305
+
exists, needsUpdate, err := s.db.ShouldUpdateIPInfo(ctx, ip)
154
306
if err != nil {
155
-
log.Verbose("Warning: failed to list repos for %s: %v", endpoint, err)
156
-
status.DIDs = []string{}
157
-
} else {
158
-
status.DIDs = dids
159
-
log.Verbose(" → Found %d users on %s", len(dids), endpoint)
160
-
}*/
307
+
log.Verbose("Failed to check IP info status: %v", err)
308
+
return
309
+
}
161
310
162
-
return status
311
+
if exists && !needsUpdate {
312
+
return // IP info is fresh
313
+
}
314
+
315
+
// Fetch IP info from ipapi.is
316
+
log.Verbose("Fetching IP info for %s", ip)
317
+
ipInfo, err := s.ipInfoClient.GetIPInfo(ctx, ip)
318
+
if err != nil {
319
+
// Log only once when backoff starts
320
+
if s.ipInfoClient.IsInBackoff() {
321
+
log.Info("⚠ IP info API unavailable, pausing requests for 5 minutes")
322
+
} else {
323
+
log.Verbose("Failed to fetch IP info for %s: %v", ip, err)
324
+
}
325
+
return
326
+
}
327
+
328
+
// Update database
329
+
if err := s.db.UpsertIPInfo(ctx, ip, ipInfo); err != nil {
330
+
log.Error("Failed to update IP info for %s: %v", ip, err)
331
+
} else {
332
+
log.Verbose("✓ Updated IP info for %s", ip)
333
+
}
163
334
}
+10
internal/pds/types.go
+10
internal/pds/types.go
-604
internal/plc/bundle.go
-604
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
}
29
-
30
-
// ===== INITIALIZATION =====
31
-
32
-
func NewBundleManager(dir string, enabled bool, db storage.Database) (*BundleManager, error) {
33
-
if !enabled {
34
-
return &BundleManager{enabled: false}, nil
35
-
}
36
-
37
-
if err := os.MkdirAll(dir, 0755); err != nil {
38
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
39
-
}
40
-
41
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
42
-
if err != nil {
43
-
return nil, err
44
-
}
45
-
46
-
decoder, err := zstd.NewReader(nil)
47
-
if err != nil {
48
-
return nil, err
49
-
}
50
-
51
-
return &BundleManager{
52
-
dir: dir,
53
-
enabled: enabled,
54
-
encoder: encoder,
55
-
decoder: decoder,
56
-
db: db,
57
-
}, nil
58
-
}
59
-
60
-
func (bm *BundleManager) Close() {
61
-
if bm.encoder != nil {
62
-
bm.encoder.Close()
63
-
}
64
-
if bm.decoder != nil {
65
-
bm.decoder.Close()
66
-
}
67
-
}
68
-
69
-
// ===== BUNDLE FILE ABSTRACTION =====
70
-
71
-
type bundleFile struct {
72
-
path string
73
-
operations []PLCOperation
74
-
uncompressedHash string
75
-
compressedHash string
76
-
}
77
-
78
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
79
-
return &bundleFile{
80
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
81
-
}
82
-
}
83
-
84
-
func (bf *bundleFile) exists() bool {
85
-
_, err := os.Stat(bf.path)
86
-
return err == nil
87
-
}
88
-
89
-
func (bm *BundleManager) load(bf *bundleFile) error {
90
-
compressed, err := os.ReadFile(bf.path)
91
-
if err != nil {
92
-
return fmt.Errorf("read failed: %w", err)
93
-
}
94
-
95
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
96
-
if err != nil {
97
-
return fmt.Errorf("decompress failed: %w", err)
98
-
}
99
-
100
-
bf.operations = bm.parseJSONL(decompressed)
101
-
return nil
102
-
}
103
-
104
-
func (bm *BundleManager) save(bf *bundleFile) error {
105
-
jsonlData := bm.serializeJSONL(bf.operations)
106
-
bf.uncompressedHash = bm.hash(jsonlData)
107
-
108
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
109
-
bf.compressedHash = bm.hash(compressed)
110
-
111
-
return os.WriteFile(bf.path, compressed, 0644)
112
-
}
113
-
114
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
115
-
var ops []PLCOperation
116
-
scanner := bufio.NewScanner(bytes.NewReader(data))
117
-
118
-
for scanner.Scan() {
119
-
line := scanner.Bytes()
120
-
if len(line) == 0 {
121
-
continue
122
-
}
123
-
124
-
var op PLCOperation
125
-
if err := json.Unmarshal(line, &op); err == nil {
126
-
op.RawJSON = append([]byte(nil), line...)
127
-
ops = append(ops, op)
128
-
}
129
-
}
130
-
131
-
return ops
132
-
}
133
-
134
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
135
-
var buf []byte
136
-
for _, op := range ops {
137
-
buf = append(buf, op.RawJSON...)
138
-
buf = append(buf, '\n')
139
-
}
140
-
return buf
141
-
}
142
-
143
-
// ===== BUNDLE FETCHING =====
144
-
145
-
type bundleFetcher struct {
146
-
client *Client
147
-
seenCIDs map[string]bool
148
-
currentAfter string
149
-
fetchCount int
150
-
}
151
-
152
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
153
-
seen := make(map[string]bool)
154
-
for cid := range prevBoundaryCIDs {
155
-
seen[cid] = true
156
-
}
157
-
158
-
return &bundleFetcher{
159
-
client: client,
160
-
seenCIDs: seen,
161
-
currentAfter: afterTime,
162
-
}
163
-
}
164
-
165
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
166
-
var ops []PLCOperation
167
-
maxFetches := (target / 900) + 5
168
-
169
-
for len(ops) < target && bf.fetchCount < maxFetches {
170
-
bf.fetchCount++
171
-
batchSize := bf.calculateBatchSize(target - len(ops))
172
-
173
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
174
-
175
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
176
-
177
-
for _, op := range batch {
178
-
if !bf.seenCIDs[op.CID] {
179
-
bf.seenCIDs[op.CID] = true
180
-
ops = append(ops, op)
181
-
182
-
if len(ops) >= target {
183
-
return ops[:target], true
184
-
}
185
-
}
186
-
}
187
-
188
-
if !shouldContinue {
189
-
break
190
-
}
191
-
}
192
-
193
-
return ops, len(ops) >= target
194
-
}
195
-
196
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
197
-
if bf.fetchCount == 0 {
198
-
return 1000
199
-
}
200
-
if remaining < 100 {
201
-
return 50
202
-
}
203
-
if remaining < 500 {
204
-
return 200
205
-
}
206
-
return 1000
207
-
}
208
-
209
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
210
-
ops, err := bf.client.Export(ctx, ExportOptions{
211
-
Count: size,
212
-
After: bf.currentAfter,
213
-
})
214
-
215
-
if err != nil || len(ops) == 0 {
216
-
return nil, false
217
-
}
218
-
219
-
if len(ops) > 0 {
220
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
221
-
}
222
-
223
-
return ops, len(ops) >= size
224
-
}
225
-
226
-
// ===== MAIN BUNDLE LOADING =====
227
-
228
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
229
-
if !bm.enabled {
230
-
return nil, false, fmt.Errorf("bundle manager disabled")
231
-
}
232
-
233
-
bf := bm.newBundleFile(bundleNum)
234
-
235
-
// Try local file first
236
-
if bf.exists() {
237
-
return bm.loadFromFile(ctx, bundleNum, bf)
238
-
}
239
-
240
-
// Fetch from PLC
241
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
242
-
}
243
-
244
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
245
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
246
-
247
-
// Verify hash if bundle is in DB
248
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
249
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
250
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
251
-
os.Remove(bf.path)
252
-
return nil, false, fmt.Errorf("hash mismatch")
253
-
}
254
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
255
-
}
256
-
257
-
if err := bm.load(bf); err != nil {
258
-
return nil, false, err
259
-
}
260
-
261
-
// Index if not in DB
262
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
263
-
bf.compressedHash = bm.hashFile(bf.path)
264
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
265
-
cursor := "" // Unknown for existing files
266
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
267
-
}
268
-
269
-
return bf.operations, true, nil
270
-
}
271
-
272
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
273
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
274
-
275
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
276
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
277
-
278
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
279
-
280
-
if isComplete {
281
-
bf.operations = ops
282
-
if err := bm.save(bf); err != nil {
283
-
log.Error("Warning: failed to save bundle: %v", err)
284
-
} else {
285
-
cursor := afterTime // Store the cursor used
286
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
287
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s...]",
288
-
bundleNum, len(ops), bf.uncompressedHash[:16])
289
-
}
290
-
}
291
-
292
-
return ops, isComplete, nil
293
-
}
294
-
295
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
296
-
if bundleNum == 1 {
297
-
return "", nil
298
-
}
299
-
300
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
301
-
if err != nil {
302
-
return "", nil
303
-
}
304
-
305
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
306
-
307
-
// Return stored boundary CIDs if available
308
-
if len(prevBundle.BoundaryCIDs) > 0 {
309
-
cids := make(map[string]bool)
310
-
for _, cid := range prevBundle.BoundaryCIDs {
311
-
cids[cid] = true
312
-
}
313
-
return afterTime, cids
314
-
}
315
-
316
-
// Fallback: compute from file
317
-
bf := bm.newBundleFile(bundleNum - 1)
318
-
if bf.exists() {
319
-
if err := bm.load(bf); err == nil {
320
-
_, cids := GetBoundaryCIDs(bf.operations)
321
-
return afterTime, cids
322
-
}
323
-
}
324
-
325
-
return afterTime, nil
326
-
}
327
-
328
-
// ===== BUNDLE INDEXING =====
329
-
330
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
331
-
prevHash := ""
332
-
if bundleNum > 1 {
333
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
334
-
prevHash = prev.Hash
335
-
}
336
-
}
337
-
338
-
dids := bm.extractUniqueDIDs(bf.operations)
339
-
compressedFileSize := bm.getFileSize(bf.path)
340
-
341
-
// Calculate uncompressed size
342
-
uncompressedSize := int64(0)
343
-
for _, op := range bf.operations {
344
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
345
-
}
346
-
347
-
bundle := &storage.PLCBundle{
348
-
BundleNumber: bundleNum,
349
-
StartTime: bf.operations[0].CreatedAt,
350
-
EndTime: bf.operations[len(bf.operations)-1].CreatedAt,
351
-
DIDs: dids,
352
-
Hash: bf.uncompressedHash,
353
-
CompressedHash: bf.compressedHash,
354
-
CompressedSize: compressedFileSize,
355
-
UncompressedSize: uncompressedSize,
356
-
Cursor: cursor,
357
-
PrevBundleHash: prevHash,
358
-
Compressed: true,
359
-
CreatedAt: time.Now(),
360
-
}
361
-
362
-
return bm.db.CreateBundle(ctx, bundle)
363
-
}
364
-
365
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
366
-
didSet := make(map[string]bool)
367
-
for _, op := range ops {
368
-
didSet[op.DID] = true
369
-
}
370
-
371
-
dids := make([]string, 0, len(didSet))
372
-
for did := range didSet {
373
-
dids = append(dids, did)
374
-
}
375
-
return dids
376
-
}
377
-
378
-
// ===== MEMPOOL BUNDLE CREATION =====
379
-
380
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
381
-
if !bm.enabled {
382
-
return 0, fmt.Errorf("bundle manager disabled")
383
-
}
384
-
385
-
if len(operations) != BUNDLE_SIZE {
386
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
387
-
}
388
-
389
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
390
-
if err != nil {
391
-
return 0, err
392
-
}
393
-
bundleNum := lastBundle + 1
394
-
395
-
bf := bm.newBundleFile(bundleNum)
396
-
bf.operations = operations
397
-
398
-
if err := bm.save(bf); err != nil {
399
-
return 0, err
400
-
}
401
-
402
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
403
-
return 0, err
404
-
}
405
-
406
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
407
-
bundleNum, bf.uncompressedHash[:16])
408
-
409
-
return bundleNum, nil
410
-
}
411
-
412
-
// ===== VERIFICATION =====
413
-
414
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
415
-
if !bm.enabled {
416
-
return fmt.Errorf("bundle manager disabled")
417
-
}
418
-
419
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
420
-
421
-
for i := 1; i <= endBundle; i++ {
422
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
423
-
if err != nil {
424
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
425
-
}
426
-
427
-
// Verify file hash
428
-
path := bm.newBundleFile(i).path
429
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
430
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
431
-
}
432
-
433
-
// Verify chain link
434
-
if i > 1 {
435
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
436
-
if err != nil {
437
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
438
-
}
439
-
440
-
if bundle.PrevBundleHash != prevBundle.Hash {
441
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
442
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
443
-
}
444
-
}
445
-
446
-
if i%100 == 0 {
447
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
448
-
}
449
-
}
450
-
451
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
452
-
return nil
453
-
}
454
-
455
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
456
-
if !bm.enabled {
457
-
return nil
458
-
}
459
-
460
-
for i := 1; i < targetBundle; i++ {
461
-
if !bm.newBundleFile(i).exists() {
462
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
463
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
464
-
}
465
-
}
466
-
}
467
-
468
-
return nil
469
-
}
470
-
471
-
// ===== UTILITY METHODS =====
472
-
473
-
func (bm *BundleManager) hash(data []byte) string {
474
-
h := sha256.Sum256(data)
475
-
return hex.EncodeToString(h[:])
476
-
}
477
-
478
-
func (bm *BundleManager) hashFile(path string) string {
479
-
data, _ := os.ReadFile(path)
480
-
return bm.hash(data)
481
-
}
482
-
483
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
484
-
if expectedHash == "" {
485
-
return nil
486
-
}
487
-
488
-
actualHash := bm.hashFile(path)
489
-
if actualHash != expectedHash {
490
-
return fmt.Errorf("hash mismatch")
491
-
}
492
-
return nil
493
-
}
494
-
495
-
func (bm *BundleManager) getFileSize(path string) int64 {
496
-
if info, err := os.Stat(path); err == nil {
497
-
return info.Size()
498
-
}
499
-
return 0
500
-
}
501
-
502
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, error) {
503
-
if !bm.enabled {
504
-
return 0, 0, nil
505
-
}
506
-
return bm.db.GetBundleStats(ctx)
507
-
}
508
-
509
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
510
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
511
-
if err != nil {
512
-
return nil, err
513
-
}
514
-
515
-
if lastBundle == 0 {
516
-
return map[string]interface{}{
517
-
"chain_length": 0,
518
-
"status": "empty",
519
-
}, nil
520
-
}
521
-
522
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
523
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
524
-
525
-
return map[string]interface{}{
526
-
"chain_length": lastBundle,
527
-
"first_bundle": 1,
528
-
"last_bundle": lastBundle,
529
-
"chain_start_time": firstBundle.StartTime,
530
-
"chain_end_time": lastBundleData.EndTime,
531
-
"chain_head_hash": lastBundleData.Hash,
532
-
}, nil
533
-
}
534
-
535
-
// ===== EXPORTED HELPERS =====
536
-
537
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
538
-
if len(operations) == 0 {
539
-
return time.Time{}, nil
540
-
}
541
-
542
-
lastOp := operations[len(operations)-1]
543
-
boundaryTime := lastOp.CreatedAt
544
-
cidSet := make(map[string]bool)
545
-
546
-
for i := len(operations) - 1; i >= 0; i-- {
547
-
op := operations[i]
548
-
if op.CreatedAt.Equal(boundaryTime) {
549
-
cidSet[op.CID] = true
550
-
} else {
551
-
break
552
-
}
553
-
}
554
-
555
-
return boundaryTime, cidSet
556
-
}
557
-
558
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
559
-
if len(operations) == 0 {
560
-
return operations
561
-
}
562
-
563
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
564
-
if err != nil {
565
-
return operations
566
-
}
567
-
568
-
startIdx := 0
569
-
for startIdx < len(operations) {
570
-
op := operations[startIdx]
571
-
572
-
if op.CreatedAt.After(boundaryTime) {
573
-
break
574
-
}
575
-
576
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
577
-
startIdx++
578
-
continue
579
-
}
580
-
581
-
break
582
-
}
583
-
584
-
return operations[startIdx:]
585
-
}
586
-
587
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
588
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
589
-
if !bm.enabled {
590
-
return nil, fmt.Errorf("bundle manager disabled")
591
-
}
592
-
593
-
bf := bm.newBundleFile(bundleNum)
594
-
595
-
if !bf.exists() {
596
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
597
-
}
598
-
599
-
if err := bm.load(bf); err != nil {
600
-
return nil, err
601
-
}
602
-
603
-
return bf.operations, nil
604
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+112
internal/plc/helpers.go
+112
internal/plc/helpers.go
···
1
+
package plc
2
+
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
7
+
8
+
// MaxHandleLength is the maximum allowed handle length for database storage
9
+
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
14
+
15
+
// ExtractHandle safely extracts the handle from a PLC operation
16
+
func ExtractHandle(op *PLCOperation) string {
17
+
if op == nil || op.Operation == nil {
18
+
return ""
19
+
}
20
+
21
+
// Get "alsoKnownAs"
22
+
aka, ok := op.Operation["alsoKnownAs"].([]interface{})
23
+
if !ok {
24
+
return ""
25
+
}
26
+
27
+
// Find the handle (e.g., "at://handle.bsky.social")
28
+
for _, item := range aka {
29
+
if handle, ok := item.(string); ok {
30
+
if strings.HasPrefix(handle, "at://") {
31
+
return strings.TrimPrefix(handle, "at://")
32
+
}
33
+
}
34
+
}
35
+
return ""
36
+
}
37
+
38
+
// ValidateHandle checks if a handle is valid for database storage
39
+
// Returns empty string if handle is invalid (too long or wrong format)
40
+
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
46
+
if len(handle) > MaxHandleLength {
47
+
return ""
48
+
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
55
+
return handle
56
+
}
57
+
58
+
// ExtractPDS safely extracts the PDS endpoint from a PLC operation
59
+
func ExtractPDS(op *PLCOperation) string {
60
+
if op == nil || op.Operation == nil {
61
+
return ""
62
+
}
63
+
64
+
// Get "services"
65
+
services, ok := op.Operation["services"].(map[string]interface{})
66
+
if !ok {
67
+
return ""
68
+
}
69
+
70
+
// Get "atproto_pds"
71
+
pdsService, ok := services["atproto_pds"].(map[string]interface{})
72
+
if !ok {
73
+
return ""
74
+
}
75
+
76
+
// Get "endpoint"
77
+
if endpoint, ok := pdsService["endpoint"].(string); ok {
78
+
return endpoint
79
+
}
80
+
81
+
return ""
82
+
}
83
+
84
+
// DIDInfo contains extracted metadata from a PLC operation
85
+
type DIDInfo struct {
86
+
Handle string
87
+
PDS string
88
+
}
89
+
90
+
// ExtractDIDInfo extracts both handle and PDS from an operation
91
+
func ExtractDIDInfo(op *PLCOperation) DIDInfo {
92
+
return DIDInfo{
93
+
Handle: ExtractHandle(op),
94
+
PDS: ExtractPDS(op),
95
+
}
96
+
}
97
+
98
+
// ExtractDIDInfoMap creates a map of DID -> info from operations
99
+
// Processes in reverse order to get the latest state for each DID
100
+
func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo {
101
+
infoMap := make(map[string]DIDInfo)
102
+
103
+
// Process in reverse to get latest state
104
+
for i := len(ops) - 1; i >= 0; i-- {
105
+
op := ops[i]
106
+
if _, exists := infoMap[op.DID]; !exists {
107
+
infoMap[op.DID] = ExtractDIDInfo(&op)
108
+
}
109
+
}
110
+
111
+
return infoMap
112
+
}
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+93
-416
internal/plc/scanner.go
+93
-416
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db)
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
84
-
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
90
-
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
95
43
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
246
72
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
251
74
}
252
75
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
257
-
}
258
-
}
259
-
}
260
-
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// Process for endpoint discovery
336
-
batchCounts, err := s.processBatch(ctx, ops)
337
-
s.mergeCounts(counts, batchCounts)
338
-
return err
339
-
}
340
-
341
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
342
-
for {
343
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
344
78
if err != nil {
345
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
346
81
}
347
82
348
-
log.Verbose("Mempool contains %d operations", count)
349
-
350
-
if count < BUNDLE_SIZE {
351
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
352
-
return nil
353
-
}
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
354
88
355
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
356
91
357
-
// Updated to receive 4 values instead of 3
358
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
359
-
if err != nil {
360
-
return err
361
-
}
362
-
363
-
// Process and update metrics
364
-
countsBefore := sumCounts(m.endpointCounts)
365
-
counts, _ := s.processBatch(ctx, ops)
366
-
s.mergeCounts(m.endpointCounts, counts)
367
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
368
-
369
-
m.totalProcessed += int64(len(ops))
370
-
m.newEndpoints += newEndpointsFound
371
-
m.currentBundle = bundleNum
372
-
373
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
374
94
log.Error("Warning: failed to update cursor: %v", err)
375
95
}
376
-
377
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
378
-
}
379
-
}
380
-
381
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
382
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
383
-
if err != nil {
384
-
return 0, nil, "", err
385
96
}
386
97
387
-
ops, ids := s.deduplicateMempool(mempoolOps)
388
-
if len(ops) < BUNDLE_SIZE {
389
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
390
-
}
391
-
392
-
// Determine cursor from last bundle
393
-
cursor := ""
394
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
395
-
if err == nil && lastBundle > 0 {
396
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
397
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
398
-
}
399
-
}
400
-
401
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
402
-
if err != nil {
403
-
return 0, nil, "", err
404
-
}
405
-
406
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
407
-
return 0, nil, "", err
408
-
}
409
-
410
-
return bundleNum, ops, cursor, nil
411
-
}
412
-
413
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
414
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
415
-
ids := make([]int64, 0, BUNDLE_SIZE)
416
-
seenCIDs := make(map[string]bool)
417
-
418
-
for _, mop := range mempoolOps {
419
-
if seenCIDs[mop.CID] {
420
-
ids = append(ids, mop.ID)
421
-
continue
422
-
}
423
-
seenCIDs[mop.CID] = true
424
-
425
-
var op PLCOperation
426
-
json.Unmarshal([]byte(mop.Operation), &op)
427
-
op.RawJSON = []byte(mop.Operation)
428
-
429
-
ops = append(ops, op)
430
-
ids = append(ids, mop.ID)
431
-
432
-
if len(ops) >= BUNDLE_SIZE {
433
-
break
434
-
}
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
435
103
}
436
104
437
-
return ops, ids
105
+
metrics.logSummary()
106
+
return nil
438
107
}
439
108
109
+
// processBatch extracts endpoints from operations
440
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
441
111
counts := make(map[string]int64)
442
112
seen := make(map[string]*PLCOperation)
443
113
444
114
// Collect unique endpoints
445
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
446
118
if op.IsNullified() {
447
119
continue
448
120
}
449
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
450
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
451
124
if _, exists := seen[key]; !exists {
452
-
seen[key] = &op
125
+
seen[key] = op
453
126
}
454
127
}
455
128
}
···
465
138
}
466
139
467
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
468
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
469
142
continue
470
143
}
471
144
472
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
473
146
counts[epType]++
474
147
}
475
148
476
149
return counts, nil
477
150
}
478
151
479
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
480
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
481
-
EndpointType: epType,
482
-
Endpoint: endpoint,
483
-
DiscoveredAt: discoveredAt,
484
-
LastChecked: time.Time{},
485
-
Status: storage.EndpointStatusUnknown,
486
-
})
487
-
}
488
-
489
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
490
153
var endpoints []EndpointInfo
491
154
···
526
189
return nil
527
190
}
528
191
529
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
530
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
531
-
Source: "plc_directory",
532
-
LastBundleNumber: m.currentBundle - 1,
533
-
LastScanTime: time.Now(),
534
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
535
201
})
536
202
}
537
203
···
539
205
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
540
206
Source: "plc_directory",
541
207
LastBundleNumber: bundle,
542
-
LastScanTime: time.Now(),
208
+
LastScanTime: time.Now().UTC(),
543
209
RecordsProcessed: totalProcessed,
544
210
})
545
211
}
···
559
225
return total
560
226
}
561
227
562
-
func formatEndpointCounts(counts map[string]int64) string {
563
-
if len(counts) == 0 {
564
-
return "0 new endpoints"
565
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
566
231
567
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
568
240
569
-
if len(counts) == 1 {
570
-
for typ, count := range counts {
571
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
572
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
573
246
}
247
+
}
574
248
575
-
parts := make([]string, 0, len(counts))
576
-
for typ, count := range counts {
577
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
578
256
}
579
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
580
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
+49
-22
internal/storage/db.go
+49
-22
internal/storage/db.go
···
2
2
3
3
import (
4
4
"context"
5
+
"fmt"
5
6
"time"
6
7
)
7
8
9
+
// NewDatabase creates a database connection based on type
10
+
func NewDatabase(dbType, connectionString string) (Database, error) {
11
+
switch dbType {
12
+
case "postgres", "postgresql":
13
+
return NewPostgresDB(connectionString)
14
+
default:
15
+
return nil, fmt.Errorf("unsupported database type: %s (supported: sqlite, postgres)", dbType)
16
+
}
17
+
}
18
+
8
19
type Database interface {
9
20
Close() error
10
21
Migrate() error
11
22
12
-
// Endpoint operations (renamed from PDS)
23
+
// Endpoint operations
13
24
UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error
14
25
GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error)
15
-
GetEndpointByID(ctx context.Context, id int64) (*Endpoint, error)
16
26
GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error)
17
-
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
18
27
EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error)
19
28
GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error)
20
29
GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error)
30
+
UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error
31
+
SaveEndpointScan(ctx context.Context, scan *EndpointScan) error
32
+
SetScanRetention(retention int)
33
+
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
34
+
UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error
35
+
GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error)
36
+
37
+
// PDS virtual endpoints (created via JOINs)
38
+
GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error)
39
+
GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error)
40
+
GetPDSStats(ctx context.Context) (*PDSStats, error)
41
+
GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error)
42
+
GetVersionStats(ctx context.Context) ([]*VersionStats, error)
43
+
44
+
// IP operations (IP as primary key)
45
+
UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error
46
+
GetIPInfo(ctx context.Context, ip string) (*IPInfo, error)
47
+
ShouldUpdateIPInfo(ctx context.Context, ip string) (exists bool, needsUpdate bool, err error)
21
48
22
49
// Cursor operations
23
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
24
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
25
52
26
-
// Bundle operations
27
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
28
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
29
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
30
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
31
-
GetBundleStats(ctx context.Context) (int64, int64, error)
32
-
GetLastBundleNumber(ctx context.Context) (int, error)
33
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
34
-
35
-
// Mempool operations
36
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
37
-
GetMempoolCount(ctx context.Context) (int, error)
38
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
39
-
DeleteFromMempool(ctx context.Context, ids []int64) error
40
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
41
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
42
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
43
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
44
-
45
53
// Metrics
46
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
47
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
48
56
GetEndpointStats(ctx context.Context) (*EndpointStats, error)
57
+
58
+
// DID operations
59
+
UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error
60
+
UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error
61
+
GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error)
62
+
GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW
63
+
GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error)
64
+
AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error
65
+
GetTotalDIDCount(ctx context.Context) (int64, error)
66
+
67
+
// PDS Repo operations
68
+
UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error
69
+
GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error)
70
+
GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error)
71
+
GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error)
72
+
73
+
// Internal
74
+
GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error)
75
+
GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error)
49
76
}
+2104
internal/storage/postgres.go
+2104
internal/storage/postgres.go
···
1
+
package storage
2
+
3
+
import (
4
+
"context"
5
+
"database/sql"
6
+
"encoding/json"
7
+
"fmt"
8
+
"time"
9
+
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/jackc/pgx/v5"
12
+
"github.com/jackc/pgx/v5/pgxpool"
13
+
_ "github.com/jackc/pgx/v5/stdlib"
14
+
"github.com/lib/pq"
15
+
)
16
+
17
+
type PostgresDB struct {
18
+
db *sql.DB
19
+
pool *pgxpool.Pool
20
+
scanRetention int
21
+
}
22
+
23
+
func NewPostgresDB(connString string) (*PostgresDB, error) {
24
+
log.Info("Connecting to PostgreSQL database...")
25
+
26
+
// Open standard sql.DB (for compatibility)
27
+
db, err := sql.Open("pgx", connString)
28
+
if err != nil {
29
+
return nil, fmt.Errorf("failed to open database: %w", err)
30
+
}
31
+
32
+
// Connection pool settings
33
+
db.SetMaxOpenConns(50)
34
+
db.SetMaxIdleConns(25)
35
+
db.SetConnMaxLifetime(5 * time.Minute)
36
+
db.SetConnMaxIdleTime(2 * time.Minute)
37
+
38
+
log.Verbose(" Max open connections: 50")
39
+
log.Verbose(" Max idle connections: 25")
40
+
log.Verbose(" Connection max lifetime: 5m")
41
+
42
+
// Test connection
43
+
log.Info("Testing database connection...")
44
+
if err := db.Ping(); err != nil {
45
+
return nil, fmt.Errorf("failed to ping database: %w", err)
46
+
}
47
+
log.Info("✓ Database connection successful")
48
+
49
+
// Also create pgx pool for COPY operations
50
+
log.Verbose("Creating pgx connection pool...")
51
+
pool, err := pgxpool.New(context.Background(), connString)
52
+
if err != nil {
53
+
return nil, fmt.Errorf("failed to create pgx pool: %w", err)
54
+
}
55
+
log.Verbose("✓ Connection pool created")
56
+
57
+
return &PostgresDB{
58
+
db: db,
59
+
pool: pool,
60
+
scanRetention: 3, // Default
61
+
}, nil
62
+
}
63
+
64
+
func (p *PostgresDB) Close() error {
65
+
if p.pool != nil {
66
+
p.pool.Close()
67
+
}
68
+
return p.db.Close()
69
+
}
70
+
71
+
func (p *PostgresDB) Migrate() error {
72
+
log.Info("Running database migrations...")
73
+
74
+
schema := `
75
+
-- Endpoints table (with IPv6 support)
76
+
CREATE TABLE IF NOT EXISTS endpoints (
77
+
id BIGSERIAL PRIMARY KEY,
78
+
endpoint_type TEXT NOT NULL DEFAULT 'pds',
79
+
endpoint TEXT NOT NULL,
80
+
server_did TEXT,
81
+
discovered_at TIMESTAMP NOT NULL,
82
+
last_checked TIMESTAMP,
83
+
status INTEGER DEFAULT 0,
84
+
ip TEXT,
85
+
ipv6 TEXT,
86
+
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
88
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
+
UNIQUE(endpoint_type, endpoint)
90
+
);
91
+
92
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
93
+
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
94
+
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
95
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
100
+
101
+
-- IP infos table (IP as PRIMARY KEY)
102
+
CREATE TABLE IF NOT EXISTS ip_infos (
103
+
ip TEXT PRIMARY KEY,
104
+
city TEXT,
105
+
country TEXT,
106
+
country_code TEXT,
107
+
asn INTEGER,
108
+
asn_org TEXT,
109
+
is_datacenter BOOLEAN,
110
+
is_vpn BOOLEAN,
111
+
is_crawler BOOLEAN,
112
+
is_tor BOOLEAN,
113
+
is_proxy BOOLEAN,
114
+
latitude REAL,
115
+
longitude REAL,
116
+
raw_data JSONB,
117
+
fetched_at TIMESTAMP NOT NULL,
118
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
119
+
);
120
+
121
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
122
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
123
+
124
+
-- Endpoint scans
125
+
CREATE TABLE IF NOT EXISTS endpoint_scans (
126
+
id BIGSERIAL PRIMARY KEY,
127
+
endpoint_id BIGINT NOT NULL,
128
+
status INTEGER NOT NULL,
129
+
response_time DOUBLE PRECISION,
130
+
user_count BIGINT,
131
+
version TEXT,
132
+
used_ip TEXT,
133
+
scan_data JSONB,
134
+
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
135
+
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
136
+
);
137
+
138
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
139
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
140
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
143
+
144
+
CREATE TABLE IF NOT EXISTS plc_metrics (
145
+
id BIGSERIAL PRIMARY KEY,
146
+
total_dids BIGINT,
147
+
total_pds BIGINT,
148
+
unique_pds BIGINT,
149
+
scan_duration_ms BIGINT,
150
+
error_count INTEGER,
151
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
152
+
);
153
+
154
+
CREATE TABLE IF NOT EXISTS scan_cursors (
155
+
source TEXT PRIMARY KEY,
156
+
last_bundle_number INTEGER DEFAULT 0,
157
+
last_scan_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
158
+
records_processed BIGINT DEFAULT 0
159
+
);
160
+
161
+
-- Minimal dids table
162
+
CREATE TABLE IF NOT EXISTS dids (
163
+
did TEXT PRIMARY KEY,
164
+
handle TEXT,
165
+
pds TEXT,
166
+
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
167
+
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
168
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
169
+
);
170
+
171
+
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
172
+
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
173
+
CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle);
174
+
CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds);
175
+
176
+
-- PDS Repositories table
177
+
CREATE TABLE IF NOT EXISTS pds_repos (
178
+
id BIGSERIAL PRIMARY KEY,
179
+
endpoint_id BIGINT NOT NULL,
180
+
did TEXT NOT NULL,
181
+
head TEXT,
182
+
rev TEXT,
183
+
active BOOLEAN DEFAULT true,
184
+
status TEXT,
185
+
first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
186
+
last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
187
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
188
+
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE,
189
+
UNIQUE(endpoint_id, did)
190
+
);
191
+
192
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint ON pds_repos(endpoint_id);
193
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint_id_desc ON pds_repos(endpoint_id, id DESC);
194
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_did ON pds_repos(did);
195
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_active ON pds_repos(active);
196
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_status ON pds_repos(status);
197
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_last_seen ON pds_repos(last_seen DESC);
198
+
`
199
+
200
+
_, err := p.db.Exec(schema)
201
+
if err != nil {
202
+
return err
203
+
}
204
+
205
+
log.Info("✓ Database migrations completed successfully")
206
+
return nil
207
+
}
208
+
209
+
// ===== ENDPOINT OPERATIONS =====
210
+
211
+
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
212
+
query := `
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
215
+
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
216
+
last_checked = EXCLUDED.last_checked,
217
+
status = EXCLUDED.status,
218
+
ip = CASE
219
+
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip
220
+
ELSE endpoints.ip
221
+
END,
222
+
ipv6 = CASE
223
+
WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6
224
+
ELSE endpoints.ipv6
225
+
END,
226
+
ip_resolved_at = CASE
227
+
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
228
+
ELSE endpoints.ip_resolved_at
229
+
END,
230
+
valid = EXCLUDED.valid,
231
+
updated_at = CURRENT_TIMESTAMP
232
+
RETURNING id
233
+
`
234
+
err := p.db.QueryRowContext(ctx, query,
235
+
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
237
+
return err
238
+
}
239
+
240
+
func (p *PostgresDB) EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error) {
241
+
query := "SELECT EXISTS(SELECT 1 FROM endpoints WHERE endpoint = $1 AND endpoint_type = $2)"
242
+
var exists bool
243
+
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&exists)
244
+
return exists, err
245
+
}
246
+
247
+
func (p *PostgresDB) GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error) {
248
+
query := "SELECT id FROM endpoints WHERE endpoint = $1 AND endpoint_type = $2"
249
+
var id int64
250
+
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&id)
251
+
return id, err
252
+
}
253
+
254
+
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
255
+
query := `
256
+
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
258
+
FROM endpoints
259
+
WHERE endpoint = $1 AND endpoint_type = $2
260
+
`
261
+
262
+
var ep Endpoint
263
+
var lastChecked, ipResolvedAt sql.NullTime
264
+
var ip, ipv6 sql.NullString
265
+
266
+
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
267
+
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
269
+
)
270
+
if err != nil {
271
+
return nil, err
272
+
}
273
+
274
+
if lastChecked.Valid {
275
+
ep.LastChecked = lastChecked.Time
276
+
}
277
+
if ip.Valid {
278
+
ep.IP = ip.String
279
+
}
280
+
if ipv6.Valid {
281
+
ep.IPv6 = ipv6.String
282
+
}
283
+
if ipResolvedAt.Valid {
284
+
ep.IPResolvedAt = ipResolvedAt.Time
285
+
}
286
+
287
+
return &ep, nil
288
+
}
289
+
290
+
func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
291
+
query := `
292
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
293
+
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
295
+
FROM endpoints
296
+
WHERE 1=1
297
+
`
298
+
args := []interface{}{}
299
+
argIdx := 1
300
+
301
+
if filter != nil {
302
+
if filter.Type != "" {
303
+
query += fmt.Sprintf(" AND endpoint_type = $%d", argIdx)
304
+
args = append(args, filter.Type)
305
+
argIdx++
306
+
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
312
+
if filter.Status != "" {
313
+
statusInt := EndpointStatusUnknown
314
+
switch filter.Status {
315
+
case "online":
316
+
statusInt = EndpointStatusOnline
317
+
case "offline":
318
+
statusInt = EndpointStatusOffline
319
+
}
320
+
query += fmt.Sprintf(" AND status = $%d", argIdx)
321
+
args = append(args, statusInt)
322
+
argIdx++
323
+
}
324
+
325
+
// Filter for stale endpoints only
326
+
if filter.OnlyStale && filter.RecheckInterval > 0 {
327
+
cutoffTime := time.Now().UTC().Add(-filter.RecheckInterval)
328
+
query += fmt.Sprintf(" AND (last_checked IS NULL OR last_checked < $%d)", argIdx)
329
+
args = append(args, cutoffTime)
330
+
argIdx++
331
+
}
332
+
}
333
+
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
348
+
349
+
if filter != nil && filter.Limit > 0 {
350
+
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
351
+
args = append(args, filter.Limit, filter.Offset)
352
+
}
353
+
354
+
rows, err := p.db.QueryContext(ctx, query, args...)
355
+
if err != nil {
356
+
return nil, err
357
+
}
358
+
defer rows.Close()
359
+
360
+
var endpoints []*Endpoint
361
+
for rows.Next() {
362
+
var ep Endpoint
363
+
var lastChecked, ipResolvedAt sql.NullTime
364
+
var ip, ipv6, serverDID sql.NullString
365
+
366
+
err := rows.Scan(
367
+
&ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked,
368
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
369
+
)
370
+
if err != nil {
371
+
return nil, err
372
+
}
373
+
374
+
if serverDID.Valid {
375
+
ep.ServerDID = serverDID.String
376
+
}
377
+
if lastChecked.Valid {
378
+
ep.LastChecked = lastChecked.Time
379
+
}
380
+
if ip.Valid {
381
+
ep.IP = ip.String
382
+
}
383
+
if ipv6.Valid {
384
+
ep.IPv6 = ipv6.String
385
+
}
386
+
if ipResolvedAt.Valid {
387
+
ep.IPResolvedAt = ipResolvedAt.Time
388
+
}
389
+
390
+
endpoints = append(endpoints, &ep)
391
+
}
392
+
393
+
return endpoints, rows.Err()
394
+
}
395
+
396
+
func (p *PostgresDB) UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error {
397
+
query := `
398
+
UPDATE endpoints
399
+
SET status = $1, last_checked = $2, updated_at = $3
400
+
WHERE id = $4
401
+
`
402
+
_, err := p.db.ExecContext(ctx, query, update.Status, update.LastChecked, time.Now().UTC(), endpointID)
403
+
return err
404
+
}
405
+
406
+
func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error {
407
+
query := `
408
+
UPDATE endpoints
409
+
SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4
410
+
WHERE id = $5
411
+
`
412
+
_, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID)
413
+
return err
414
+
}
415
+
416
+
func (p *PostgresDB) UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error {
417
+
query := `
418
+
UPDATE endpoints
419
+
SET server_did = $1, updated_at = $2
420
+
WHERE id = $3
421
+
`
422
+
_, err := p.db.ExecContext(ctx, query, serverDID, time.Now().UTC(), endpointID)
423
+
return err
424
+
}
425
+
426
+
func (p *PostgresDB) GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error) {
427
+
query := `
428
+
SELECT server_did, array_agg(endpoint ORDER BY discovered_at ASC) as endpoints
429
+
FROM endpoints
430
+
WHERE server_did IS NOT NULL
431
+
AND server_did != ''
432
+
AND endpoint_type = 'pds'
433
+
GROUP BY server_did
434
+
HAVING COUNT(*) > 1
435
+
ORDER BY COUNT(*) DESC
436
+
`
437
+
438
+
rows, err := p.db.QueryContext(ctx, query)
439
+
if err != nil {
440
+
return nil, err
441
+
}
442
+
defer rows.Close()
443
+
444
+
duplicates := make(map[string][]string)
445
+
for rows.Next() {
446
+
var serverDID string
447
+
var endpoints []string
448
+
449
+
err := rows.Scan(&serverDID, pq.Array(&endpoints))
450
+
if err != nil {
451
+
return nil, err
452
+
}
453
+
454
+
duplicates[serverDID] = endpoints
455
+
}
456
+
457
+
return duplicates, rows.Err()
458
+
}
459
+
460
+
// ===== SCAN OPERATIONS =====
461
+
462
+
func (p *PostgresDB) SetScanRetention(retention int) {
463
+
p.scanRetention = retention
464
+
}
465
+
466
+
func (p *PostgresDB) SaveEndpointScan(ctx context.Context, scan *EndpointScan) error {
467
+
var scanDataJSON []byte
468
+
if scan.ScanData != nil {
469
+
scanDataJSON, _ = json.Marshal(scan.ScanData)
470
+
}
471
+
472
+
tx, err := p.db.BeginTx(ctx, nil)
473
+
if err != nil {
474
+
return err
475
+
}
476
+
defer tx.Rollback()
477
+
478
+
query := `
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
481
+
`
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
483
+
if err != nil {
484
+
return err
485
+
}
486
+
487
+
// Use configured retention value
488
+
cleanupQuery := `
489
+
DELETE FROM endpoint_scans
490
+
WHERE endpoint_id = $1
491
+
AND id NOT IN (
492
+
SELECT id
493
+
FROM endpoint_scans
494
+
WHERE endpoint_id = $1
495
+
ORDER BY scanned_at DESC
496
+
LIMIT $2
497
+
)
498
+
`
499
+
_, err = tx.ExecContext(ctx, cleanupQuery, scan.EndpointID, p.scanRetention)
500
+
if err != nil {
501
+
return err
502
+
}
503
+
504
+
return tx.Commit()
505
+
}
506
+
507
+
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
508
+
query := `
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
510
+
FROM endpoint_scans
511
+
WHERE endpoint_id = $1
512
+
ORDER BY scanned_at DESC
513
+
LIMIT $2
514
+
`
515
+
516
+
rows, err := p.db.QueryContext(ctx, query, endpointID, limit)
517
+
if err != nil {
518
+
return nil, err
519
+
}
520
+
defer rows.Close()
521
+
522
+
var scans []*EndpointScan
523
+
for rows.Next() {
524
+
var scan EndpointScan
525
+
var responseTime sql.NullFloat64
526
+
var userCount sql.NullInt64
527
+
var version, usedIP sql.NullString
528
+
var scanDataJSON []byte
529
+
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
531
+
if err != nil {
532
+
return nil, err
533
+
}
534
+
535
+
if responseTime.Valid {
536
+
scan.ResponseTime = responseTime.Float64
537
+
}
538
+
539
+
if userCount.Valid {
540
+
scan.UserCount = userCount.Int64
541
+
}
542
+
543
+
if version.Valid {
544
+
scan.Version = version.String
545
+
}
546
+
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
551
+
if len(scanDataJSON) > 0 {
552
+
var scanData EndpointScanData
553
+
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
554
+
scan.ScanData = &scanData
555
+
}
556
+
}
557
+
558
+
scans = append(scans, &scan)
559
+
}
560
+
561
+
return scans, rows.Err()
562
+
}
563
+
564
+
// ===== PDS VIRTUAL ENDPOINTS =====
565
+
566
+
func (p *PostgresDB) GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error) {
567
+
query := `
568
+
WITH unique_servers AS (
569
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
570
+
id,
571
+
endpoint,
572
+
server_did,
573
+
discovered_at,
574
+
last_checked,
575
+
status,
576
+
ip,
577
+
ipv6,
578
+
valid
579
+
FROM endpoints
580
+
WHERE endpoint_type = 'pds'
581
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
582
+
)
583
+
SELECT
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
585
+
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
586
+
i.city, i.country, i.country_code, i.asn, i.asn_org,
587
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
588
+
i.latitude, i.longitude
589
+
FROM unique_servers e
590
+
LEFT JOIN LATERAL (
591
+
SELECT
592
+
user_count,
593
+
response_time,
594
+
version,
595
+
scanned_at
596
+
FROM endpoint_scans
597
+
WHERE endpoint_id = e.id AND status = 1
598
+
ORDER BY scanned_at DESC
599
+
LIMIT 1
600
+
) latest ON true
601
+
LEFT JOIN ip_infos i ON e.ip = i.ip
602
+
WHERE 1=1
603
+
`
604
+
605
+
args := []interface{}{}
606
+
argIdx := 1
607
+
608
+
if filter != nil {
609
+
if filter.Status != "" {
610
+
statusInt := EndpointStatusUnknown
611
+
switch filter.Status {
612
+
case "online":
613
+
statusInt = EndpointStatusOnline
614
+
case "offline":
615
+
statusInt = EndpointStatusOffline
616
+
}
617
+
query += fmt.Sprintf(" AND e.status = $%d", argIdx)
618
+
args = append(args, statusInt)
619
+
argIdx++
620
+
}
621
+
622
+
if filter.MinUserCount > 0 {
623
+
query += fmt.Sprintf(" AND latest.user_count >= $%d", argIdx)
624
+
args = append(args, filter.MinUserCount)
625
+
argIdx++
626
+
}
627
+
}
628
+
629
+
query += " ORDER BY latest.user_count DESC NULLS LAST"
630
+
631
+
if filter != nil && filter.Limit > 0 {
632
+
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
633
+
args = append(args, filter.Limit, filter.Offset)
634
+
}
635
+
636
+
rows, err := p.db.QueryContext(ctx, query, args...)
637
+
if err != nil {
638
+
return nil, err
639
+
}
640
+
defer rows.Close()
641
+
642
+
var items []*PDSListItem
643
+
for rows.Next() {
644
+
item := &PDSListItem{}
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
646
+
var asn sql.NullInt32
647
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
648
+
var lat, lon sql.NullFloat64
649
+
var userCount sql.NullInt32
650
+
var responseTime sql.NullFloat64
651
+
var version sql.NullString
652
+
var scannedAt sql.NullTime
653
+
654
+
err := rows.Scan(
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
656
+
&userCount, &responseTime, &version, &scannedAt,
657
+
&city, &country, &countryCode, &asn, &asnOrg,
658
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
659
+
&lat, &lon,
660
+
)
661
+
if err != nil {
662
+
return nil, err
663
+
}
664
+
665
+
if ip.Valid {
666
+
item.IP = ip.String
667
+
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
670
+
}
671
+
if serverDID.Valid {
672
+
item.ServerDID = serverDID.String
673
+
}
674
+
675
+
// Add latest scan data if available
676
+
if userCount.Valid {
677
+
item.LatestScan = &struct {
678
+
UserCount int
679
+
ResponseTime float64
680
+
Version string
681
+
ScannedAt time.Time
682
+
}{
683
+
UserCount: int(userCount.Int32),
684
+
ResponseTime: responseTime.Float64,
685
+
Version: version.String,
686
+
ScannedAt: scannedAt.Time,
687
+
}
688
+
}
689
+
690
+
// Add IP info if available
691
+
if city.Valid || country.Valid {
692
+
item.IPInfo = &IPInfo{
693
+
IP: ip.String,
694
+
City: city.String,
695
+
Country: country.String,
696
+
CountryCode: countryCode.String,
697
+
ASN: int(asn.Int32),
698
+
ASNOrg: asnOrg.String,
699
+
IsDatacenter: isDatacenter.Bool,
700
+
IsVPN: isVPN.Bool,
701
+
IsCrawler: isCrawler.Bool,
702
+
IsTor: isTor.Bool,
703
+
IsProxy: isProxy.Bool,
704
+
Latitude: float32(lat.Float64),
705
+
Longitude: float32(lon.Float64),
706
+
}
707
+
}
708
+
709
+
items = append(items, item)
710
+
}
711
+
712
+
return items, rows.Err()
713
+
}
714
+
715
+
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
716
+
query := `
717
+
WITH target_endpoint AS MATERIALIZED (
718
+
SELECT
719
+
e.id,
720
+
e.endpoint,
721
+
e.server_did,
722
+
e.discovered_at,
723
+
e.last_checked,
724
+
e.status,
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
728
+
FROM endpoints e
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
732
+
)
733
+
SELECT
734
+
te.id,
735
+
te.endpoint,
736
+
te.server_did,
737
+
te.discovered_at,
738
+
te.last_checked,
739
+
te.status,
740
+
te.ip,
741
+
te.ipv6,
742
+
te.valid,
743
+
latest.user_count,
744
+
latest.response_time,
745
+
latest.version,
746
+
latest.scan_data->'metadata'->'server_info' as server_info,
747
+
latest.scanned_at,
748
+
i.city, i.country, i.country_code, i.asn, i.asn_org,
749
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
750
+
i.latitude, i.longitude,
751
+
i.raw_data,
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
773
+
FROM target_endpoint te
774
+
LEFT JOIN LATERAL (
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
784
+
LIMIT 1
785
+
) latest ON true
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
787
+
`
788
+
789
+
detail := &PDSDetail{}
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
791
+
var asn sql.NullInt32
792
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
793
+
var lat, lon sql.NullFloat64
794
+
var userCount sql.NullInt32
795
+
var responseTime sql.NullFloat64
796
+
var version sql.NullString
797
+
var serverInfoJSON []byte
798
+
var scannedAt sql.NullTime
799
+
var rawDataJSON []byte
800
+
var aliases []string
801
+
var firstDiscoveredAt sql.NullTime
802
+
803
+
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
805
+
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
806
+
&city, &country, &countryCode, &asn, &asnOrg,
807
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
808
+
&lat, &lon,
809
+
&rawDataJSON,
810
+
pq.Array(&aliases),
811
+
&firstDiscoveredAt,
812
+
)
813
+
if err != nil {
814
+
return nil, err
815
+
}
816
+
817
+
if ip.Valid {
818
+
detail.IP = ip.String
819
+
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
823
+
824
+
if serverDID.Valid {
825
+
detail.ServerDID = serverDID.String
826
+
}
827
+
828
+
// Set aliases and is_primary
829
+
detail.Aliases = aliases
830
+
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
831
+
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
832
+
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
833
+
} else {
834
+
detail.IsPrimary = true
835
+
}
836
+
837
+
// Parse latest scan data
838
+
if userCount.Valid {
839
+
var serverInfo interface{}
840
+
if len(serverInfoJSON) > 0 {
841
+
json.Unmarshal(serverInfoJSON, &serverInfo)
842
+
}
843
+
844
+
detail.LatestScan = &struct {
845
+
UserCount int
846
+
ResponseTime float64
847
+
Version string
848
+
ServerInfo interface{}
849
+
ScannedAt time.Time
850
+
}{
851
+
UserCount: int(userCount.Int32),
852
+
ResponseTime: responseTime.Float64,
853
+
Version: version.String,
854
+
ServerInfo: serverInfo,
855
+
ScannedAt: scannedAt.Time,
856
+
}
857
+
}
858
+
859
+
// Parse IP info with all fields
860
+
if city.Valid || country.Valid {
861
+
detail.IPInfo = &IPInfo{
862
+
IP: ip.String,
863
+
City: city.String,
864
+
Country: country.String,
865
+
CountryCode: countryCode.String,
866
+
ASN: int(asn.Int32),
867
+
ASNOrg: asnOrg.String,
868
+
IsDatacenter: isDatacenter.Bool,
869
+
IsVPN: isVPN.Bool,
870
+
IsCrawler: isCrawler.Bool,
871
+
IsTor: isTor.Bool,
872
+
IsProxy: isProxy.Bool,
873
+
Latitude: float32(lat.Float64),
874
+
Longitude: float32(lon.Float64),
875
+
}
876
+
877
+
if len(rawDataJSON) > 0 {
878
+
json.Unmarshal(rawDataJSON, &detail.IPInfo.RawData)
879
+
}
880
+
}
881
+
882
+
return detail, nil
883
+
}
884
+
885
+
func (p *PostgresDB) GetPDSStats(ctx context.Context) (*PDSStats, error) {
886
+
query := `
887
+
WITH unique_servers AS (
888
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
889
+
id,
890
+
COALESCE(server_did, id::text) as server_identity,
891
+
status
892
+
FROM endpoints
893
+
WHERE endpoint_type = 'pds'
894
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
895
+
),
896
+
latest_scans AS (
897
+
SELECT DISTINCT ON (us.id)
898
+
us.id,
899
+
es.user_count,
900
+
us.status
901
+
FROM unique_servers us
902
+
LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id
903
+
ORDER BY us.id, es.scanned_at DESC
904
+
)
905
+
SELECT
906
+
COUNT(*) as total,
907
+
SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online,
908
+
SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline,
909
+
SUM(COALESCE(user_count, 0)) as total_users
910
+
FROM latest_scans
911
+
`
912
+
913
+
stats := &PDSStats{}
914
+
err := p.db.QueryRowContext(ctx, query).Scan(
915
+
&stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints, &stats.TotalDIDs,
916
+
)
917
+
918
+
return stats, err
919
+
}
920
+
921
+
func (p *PostgresDB) GetEndpointStats(ctx context.Context) (*EndpointStats, error) {
922
+
query := `
923
+
SELECT
924
+
COUNT(*) as total_endpoints,
925
+
SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online_endpoints,
926
+
SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline_endpoints
927
+
FROM endpoints
928
+
`
929
+
930
+
var stats EndpointStats
931
+
err := p.db.QueryRowContext(ctx, query).Scan(
932
+
&stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints,
933
+
)
934
+
if err != nil {
935
+
return nil, err
936
+
}
937
+
938
+
// Get average response time from recent scans
939
+
avgQuery := `
940
+
SELECT AVG(response_time)
941
+
FROM endpoint_scans
942
+
WHERE response_time > 0 AND scanned_at > NOW() - INTERVAL '1 hour'
943
+
`
944
+
var avgResponseTime sql.NullFloat64
945
+
_ = p.db.QueryRowContext(ctx, avgQuery).Scan(&avgResponseTime)
946
+
if avgResponseTime.Valid {
947
+
stats.AvgResponseTime = avgResponseTime.Float64
948
+
}
949
+
950
+
// Get counts by type
951
+
typeQuery := `
952
+
SELECT endpoint_type, COUNT(*)
953
+
FROM endpoints
954
+
GROUP BY endpoint_type
955
+
`
956
+
rows, err := p.db.QueryContext(ctx, typeQuery)
957
+
if err == nil {
958
+
defer rows.Close()
959
+
stats.ByType = make(map[string]int64)
960
+
for rows.Next() {
961
+
var typ string
962
+
var count int64
963
+
if err := rows.Scan(&typ, &count); err == nil {
964
+
stats.ByType[typ] = count
965
+
}
966
+
}
967
+
}
968
+
969
+
// Get total DIDs from latest PDS scans
970
+
didQuery := `
971
+
WITH unique_servers AS (
972
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
973
+
e.id
974
+
FROM endpoints e
975
+
WHERE e.endpoint_type = 'pds'
976
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
977
+
),
978
+
latest_pds_scans AS (
979
+
SELECT DISTINCT ON (us.id)
980
+
us.id,
981
+
es.user_count
982
+
FROM unique_servers us
983
+
LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id
984
+
ORDER BY us.id, es.scanned_at DESC
985
+
)
986
+
SELECT SUM(user_count) FROM latest_pds_scans
987
+
`
988
+
var totalDIDs sql.NullInt64
989
+
_ = p.db.QueryRowContext(ctx, didQuery).Scan(&totalDIDs)
990
+
if totalDIDs.Valid {
991
+
stats.TotalDIDs = totalDIDs.Int64
992
+
}
993
+
994
+
return &stats, err
995
+
}
996
+
997
+
// ===== IP INFO OPERATIONS =====
998
+
999
+
func (p *PostgresDB) UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error {
1000
+
rawDataJSON, _ := json.Marshal(ipInfo)
1001
+
1002
+
// Extract fields from ipInfo map
1003
+
city := extractString(ipInfo, "location", "city")
1004
+
country := extractString(ipInfo, "location", "country")
1005
+
countryCode := extractString(ipInfo, "location", "country_code")
1006
+
asn := extractInt(ipInfo, "asn", "asn")
1007
+
asnOrg := extractString(ipInfo, "asn", "org")
1008
+
1009
+
// Extract top-level boolean flags
1010
+
isDatacenter := false
1011
+
if val, ok := ipInfo["is_datacenter"].(bool); ok {
1012
+
isDatacenter = val
1013
+
}
1014
+
1015
+
isVPN := false
1016
+
if val, ok := ipInfo["is_vpn"].(bool); ok {
1017
+
isVPN = val
1018
+
}
1019
+
1020
+
isCrawler := false
1021
+
if val, ok := ipInfo["is_crawler"].(bool); ok {
1022
+
isCrawler = val
1023
+
}
1024
+
1025
+
isTor := false
1026
+
if val, ok := ipInfo["is_tor"].(bool); ok {
1027
+
isTor = val
1028
+
}
1029
+
1030
+
isProxy := false
1031
+
if val, ok := ipInfo["is_proxy"].(bool); ok {
1032
+
isProxy = val
1033
+
}
1034
+
1035
+
lat := extractFloat(ipInfo, "location", "latitude")
1036
+
lon := extractFloat(ipInfo, "location", "longitude")
1037
+
1038
+
query := `
1039
+
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at)
1040
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
1041
+
ON CONFLICT(ip) DO UPDATE SET
1042
+
city = EXCLUDED.city,
1043
+
country = EXCLUDED.country,
1044
+
country_code = EXCLUDED.country_code,
1045
+
asn = EXCLUDED.asn,
1046
+
asn_org = EXCLUDED.asn_org,
1047
+
is_datacenter = EXCLUDED.is_datacenter,
1048
+
is_vpn = EXCLUDED.is_vpn,
1049
+
is_crawler = EXCLUDED.is_crawler,
1050
+
is_tor = EXCLUDED.is_tor,
1051
+
is_proxy = EXCLUDED.is_proxy,
1052
+
latitude = EXCLUDED.latitude,
1053
+
longitude = EXCLUDED.longitude,
1054
+
raw_data = EXCLUDED.raw_data,
1055
+
fetched_at = EXCLUDED.fetched_at,
1056
+
updated_at = CURRENT_TIMESTAMP
1057
+
`
1058
+
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC())
1059
+
return err
1060
+
}
1061
+
1062
+
func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) {
1063
+
query := `
1064
+
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy,
1065
+
latitude, longitude, raw_data, fetched_at, updated_at
1066
+
FROM ip_infos
1067
+
WHERE ip = $1
1068
+
`
1069
+
1070
+
info := &IPInfo{}
1071
+
var rawDataJSON []byte
1072
+
1073
+
err := p.db.QueryRowContext(ctx, query, ip).Scan(
1074
+
&info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg,
1075
+
&info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy,
1076
+
&info.Latitude, &info.Longitude,
1077
+
&rawDataJSON, &info.FetchedAt, &info.UpdatedAt,
1078
+
)
1079
+
if err != nil {
1080
+
return nil, err
1081
+
}
1082
+
1083
+
if len(rawDataJSON) > 0 {
1084
+
json.Unmarshal(rawDataJSON, &info.RawData)
1085
+
}
1086
+
1087
+
return info, nil
1088
+
}
1089
+
1090
+
func (p *PostgresDB) ShouldUpdateIPInfo(ctx context.Context, ip string) (bool, bool, error) {
1091
+
query := `SELECT fetched_at FROM ip_infos WHERE ip = $1`
1092
+
1093
+
var fetchedAt time.Time
1094
+
err := p.db.QueryRowContext(ctx, query, ip).Scan(&fetchedAt)
1095
+
if err == sql.ErrNoRows {
1096
+
return false, true, nil // Doesn't exist, needs update
1097
+
}
1098
+
if err != nil {
1099
+
return false, false, err
1100
+
}
1101
+
1102
+
// Check if older than 30 days
1103
+
needsUpdate := time.Since(fetchedAt) > 30*24*time.Hour
1104
+
return true, needsUpdate, nil
1105
+
}
1106
+
1107
+
// ===== HELPER FUNCTIONS =====
1108
+
1109
+
func extractString(data map[string]interface{}, keys ...string) string {
1110
+
current := data
1111
+
for i, key := range keys {
1112
+
if i == len(keys)-1 {
1113
+
if val, ok := current[key].(string); ok {
1114
+
return val
1115
+
}
1116
+
return ""
1117
+
}
1118
+
if nested, ok := current[key].(map[string]interface{}); ok {
1119
+
current = nested
1120
+
} else {
1121
+
return ""
1122
+
}
1123
+
}
1124
+
return ""
1125
+
}
1126
+
1127
+
func extractInt(data map[string]interface{}, keys ...string) int {
1128
+
current := data
1129
+
for i, key := range keys {
1130
+
if i == len(keys)-1 {
1131
+
if val, ok := current[key].(float64); ok {
1132
+
return int(val)
1133
+
}
1134
+
if val, ok := current[key].(int); ok {
1135
+
return val
1136
+
}
1137
+
return 0
1138
+
}
1139
+
if nested, ok := current[key].(map[string]interface{}); ok {
1140
+
current = nested
1141
+
} else {
1142
+
return 0
1143
+
}
1144
+
}
1145
+
return 0
1146
+
}
1147
+
1148
+
func extractFloat(data map[string]interface{}, keys ...string) float32 {
1149
+
current := data
1150
+
for i, key := range keys {
1151
+
if i == len(keys)-1 {
1152
+
if val, ok := current[key].(float64); ok {
1153
+
return float32(val)
1154
+
}
1155
+
return 0
1156
+
}
1157
+
if nested, ok := current[key].(map[string]interface{}); ok {
1158
+
current = nested
1159
+
} else {
1160
+
return 0
1161
+
}
1162
+
}
1163
+
return 0
1164
+
}
1165
+
1166
+
// ===== CURSOR OPERATIONS =====
1167
+
1168
+
func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) {
1169
+
query := "SELECT source, last_bundle_number, last_scan_time, records_processed FROM scan_cursors WHERE source = $1"
1170
+
1171
+
var cursor ScanCursor
1172
+
err := p.db.QueryRowContext(ctx, query, source).Scan(
1173
+
&cursor.Source, &cursor.LastBundleNumber, &cursor.LastScanTime, &cursor.RecordsProcessed,
1174
+
)
1175
+
if err == sql.ErrNoRows {
1176
+
return &ScanCursor{
1177
+
Source: source,
1178
+
LastBundleNumber: 0,
1179
+
LastScanTime: time.Time{},
1180
+
}, nil
1181
+
}
1182
+
return &cursor, err
1183
+
}
1184
+
1185
+
func (p *PostgresDB) UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error {
1186
+
query := `
1187
+
INSERT INTO scan_cursors (source, last_bundle_number, last_scan_time, records_processed)
1188
+
VALUES ($1, $2, $3, $4)
1189
+
ON CONFLICT(source) DO UPDATE SET
1190
+
last_bundle_number = EXCLUDED.last_bundle_number,
1191
+
last_scan_time = EXCLUDED.last_scan_time,
1192
+
records_processed = EXCLUDED.records_processed
1193
+
`
1194
+
_, err := p.db.ExecContext(ctx, query, cursor.Source, cursor.LastBundleNumber, cursor.LastScanTime, cursor.RecordsProcessed)
1195
+
return err
1196
+
}
1197
+
1198
+
// ===== METRICS OPERATIONS =====
1199
+
1200
+
func (p *PostgresDB) StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error {
1201
+
query := `
1202
+
INSERT INTO plc_metrics (total_dids, total_pds, unique_pds, scan_duration_ms, error_count)
1203
+
VALUES ($1, $2, $3, $4, $5)
1204
+
`
1205
+
_, err := p.db.ExecContext(ctx, query, metrics.TotalDIDs, metrics.TotalPDS,
1206
+
metrics.UniquePDS, metrics.ScanDuration, metrics.ErrorCount)
1207
+
return err
1208
+
}
1209
+
1210
+
func (p *PostgresDB) GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) {
1211
+
query := `
1212
+
SELECT total_dids, total_pds, unique_pds, scan_duration_ms, error_count, created_at
1213
+
FROM plc_metrics
1214
+
ORDER BY created_at DESC
1215
+
LIMIT $1
1216
+
`
1217
+
1218
+
rows, err := p.db.QueryContext(ctx, query, limit)
1219
+
if err != nil {
1220
+
return nil, err
1221
+
}
1222
+
defer rows.Close()
1223
+
1224
+
var metrics []*PLCMetrics
1225
+
for rows.Next() {
1226
+
var m PLCMetrics
1227
+
if err := rows.Scan(&m.TotalDIDs, &m.TotalPDS, &m.UniquePDS, &m.ScanDuration, &m.ErrorCount, &m.LastScanTime); err != nil {
1228
+
return nil, err
1229
+
}
1230
+
metrics = append(metrics, &m)
1231
+
}
1232
+
1233
+
return metrics, rows.Err()
1234
+
}
1235
+
1236
+
// ===== DID OPERATIONS =====
1237
+
1238
+
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error {
1239
+
query := `
1240
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1241
+
VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP)
1242
+
ON CONFLICT(did) DO UPDATE SET
1243
+
handle = EXCLUDED.handle,
1244
+
pds = EXCLUDED.pds,
1245
+
bundle_numbers = CASE
1246
+
WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers
1247
+
ELSE dids.bundle_numbers || jsonb_build_array($4::integer)
1248
+
END,
1249
+
updated_at = CURRENT_TIMESTAMP
1250
+
`
1251
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum)
1252
+
return err
1253
+
}
1254
+
1255
+
// UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers
1256
+
func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error {
1257
+
query := `
1258
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1259
+
VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP)
1260
+
ON CONFLICT(did) DO UPDATE SET
1261
+
handle = EXCLUDED.handle,
1262
+
pds = EXCLUDED.pds,
1263
+
updated_at = CURRENT_TIMESTAMP
1264
+
`
1265
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds)
1266
+
return err
1267
+
}
1268
+
1269
+
func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) {
1270
+
query := `
1271
+
SELECT did, handle, pds, bundle_numbers, created_at
1272
+
FROM dids
1273
+
WHERE did = $1
1274
+
`
1275
+
1276
+
var record DIDRecord
1277
+
var bundleNumbersJSON []byte
1278
+
var handle, pds sql.NullString
1279
+
1280
+
err := p.db.QueryRowContext(ctx, query, did).Scan(
1281
+
&record.DID,
1282
+
&handle,
1283
+
&pds,
1284
+
&bundleNumbersJSON,
1285
+
&record.CreatedAt,
1286
+
)
1287
+
if err != nil {
1288
+
return nil, err
1289
+
}
1290
+
1291
+
if handle.Valid {
1292
+
record.Handle = handle.String
1293
+
}
1294
+
if pds.Valid {
1295
+
record.CurrentPDS = pds.String
1296
+
}
1297
+
1298
+
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1299
+
return nil, err
1300
+
}
1301
+
1302
+
return &record, nil
1303
+
}
1304
+
1305
+
func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) {
1306
+
query := `
1307
+
SELECT did, handle, pds, bundle_numbers, created_at
1308
+
FROM dids
1309
+
WHERE handle = $1
1310
+
`
1311
+
1312
+
var record DIDRecord
1313
+
var bundleNumbersJSON []byte
1314
+
var recordHandle, pds sql.NullString
1315
+
1316
+
err := p.db.QueryRowContext(ctx, query, handle).Scan(
1317
+
&record.DID,
1318
+
&recordHandle,
1319
+
&pds,
1320
+
&bundleNumbersJSON,
1321
+
&record.CreatedAt,
1322
+
)
1323
+
if err != nil {
1324
+
return nil, err
1325
+
}
1326
+
1327
+
if recordHandle.Valid {
1328
+
record.Handle = recordHandle.String
1329
+
}
1330
+
if pds.Valid {
1331
+
record.CurrentPDS = pds.String
1332
+
}
1333
+
1334
+
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1335
+
return nil, err
1336
+
}
1337
+
1338
+
return &record, nil
1339
+
}
1340
+
1341
+
// GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos'
1342
+
func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) {
1343
+
query := `
1344
+
WITH primary_endpoints AS (
1345
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
1346
+
id
1347
+
FROM endpoints
1348
+
WHERE endpoint_type = 'pds'
1349
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
1350
+
)
1351
+
SELECT
1352
+
d.did,
1353
+
d.handle,
1354
+
d.pds,
1355
+
d.bundle_numbers,
1356
+
d.created_at,
1357
+
COALESCE(
1358
+
jsonb_agg(
1359
+
jsonb_build_object(
1360
+
'id', pr.id,
1361
+
'endpoint_id', pr.endpoint_id,
1362
+
'endpoint', e.endpoint,
1363
+
'did', pr.did,
1364
+
'head', pr.head,
1365
+
'rev', pr.rev,
1366
+
'active', pr.active,
1367
+
'status', pr.status,
1368
+
'first_seen', pr.first_seen AT TIME ZONE 'UTC',
1369
+
'last_seen', pr.last_seen AT TIME ZONE 'UTC',
1370
+
'updated_at', pr.updated_at AT TIME ZONE 'UTC'
1371
+
)
1372
+
ORDER BY pr.last_seen DESC
1373
+
) FILTER (
1374
+
WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL
1375
+
),
1376
+
'[]'::jsonb
1377
+
) AS hosting_on
1378
+
FROM
1379
+
dids d
1380
+
LEFT JOIN
1381
+
pds_repos pr ON d.did = pr.did
1382
+
LEFT JOIN
1383
+
endpoints e ON pr.endpoint_id = e.id
1384
+
LEFT JOIN
1385
+
primary_endpoints pe ON pr.endpoint_id = pe.id
1386
+
WHERE
1387
+
d.did = $1
1388
+
GROUP BY
1389
+
d.did, d.handle, d.pds, d.bundle_numbers, d.created_at
1390
+
`
1391
+
1392
+
var info GlobalDIDInfo
1393
+
var bundleNumbersJSON []byte
1394
+
var hostingOnJSON []byte
1395
+
var handle, pds sql.NullString
1396
+
1397
+
err := p.db.QueryRowContext(ctx, query, did).Scan(
1398
+
&info.DID,
1399
+
&handle,
1400
+
&pds,
1401
+
&bundleNumbersJSON,
1402
+
&info.CreatedAt,
1403
+
&hostingOnJSON,
1404
+
)
1405
+
if err != nil {
1406
+
return nil, err
1407
+
}
1408
+
1409
+
if handle.Valid {
1410
+
info.Handle = handle.String
1411
+
}
1412
+
if pds.Valid {
1413
+
info.CurrentPDS = pds.String
1414
+
}
1415
+
1416
+
if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil {
1417
+
return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err)
1418
+
}
1419
+
1420
+
if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil {
1421
+
return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err)
1422
+
}
1423
+
1424
+
return &info, nil
1425
+
}
1426
+
1427
+
func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error {
1428
+
if len(dids) == 0 {
1429
+
return nil
1430
+
}
1431
+
1432
+
// Acquire a connection from the pool
1433
+
conn, err := p.pool.Acquire(ctx)
1434
+
if err != nil {
1435
+
return err
1436
+
}
1437
+
defer conn.Release()
1438
+
1439
+
// Start transaction
1440
+
tx, err := conn.Begin(ctx)
1441
+
if err != nil {
1442
+
return err
1443
+
}
1444
+
defer tx.Rollback(ctx)
1445
+
1446
+
// Create temporary table
1447
+
_, err = tx.Exec(ctx, `
1448
+
CREATE TEMP TABLE temp_dids (did TEXT PRIMARY KEY) ON COMMIT DROP
1449
+
`)
1450
+
if err != nil {
1451
+
return err
1452
+
}
1453
+
1454
+
// Use COPY for blazing fast bulk insert
1455
+
_, err = tx.Conn().CopyFrom(
1456
+
ctx,
1457
+
pgx.Identifier{"temp_dids"},
1458
+
[]string{"did"},
1459
+
pgx.CopyFromSlice(len(dids), func(i int) ([]interface{}, error) {
1460
+
return []interface{}{dids[i]}, nil
1461
+
}),
1462
+
)
1463
+
if err != nil {
1464
+
return err
1465
+
}
1466
+
1467
+
// Step 1: Insert new DIDs
1468
+
_, err = tx.Exec(ctx, `
1469
+
INSERT INTO dids (did, bundle_numbers, created_at)
1470
+
SELECT td.did, $1::jsonb, CURRENT_TIMESTAMP
1471
+
FROM temp_dids td
1472
+
WHERE NOT EXISTS (SELECT 1 FROM dids WHERE dids.did = td.did)
1473
+
`, fmt.Sprintf("[%d]", bundleNum))
1474
+
1475
+
if err != nil {
1476
+
return err
1477
+
}
1478
+
1479
+
// Step 2: Update existing DIDs
1480
+
_, err = tx.Exec(ctx, `
1481
+
UPDATE dids
1482
+
SET bundle_numbers = bundle_numbers || $1::jsonb
1483
+
FROM temp_dids
1484
+
WHERE dids.did = temp_dids.did
1485
+
AND NOT (bundle_numbers @> $1::jsonb)
1486
+
`, fmt.Sprintf("[%d]", bundleNum))
1487
+
1488
+
if err != nil {
1489
+
return err
1490
+
}
1491
+
1492
+
return tx.Commit(ctx)
1493
+
}
1494
+
1495
+
func (p *PostgresDB) GetTotalDIDCount(ctx context.Context) (int64, error) {
1496
+
query := "SELECT COUNT(*) FROM dids"
1497
+
var count int64
1498
+
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1499
+
return count, err
1500
+
}
1501
+
1502
+
func (p *PostgresDB) GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) {
1503
+
query := `
1504
+
WITH unique_servers AS (
1505
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
1506
+
e.id,
1507
+
e.ip,
1508
+
e.status
1509
+
FROM endpoints e
1510
+
WHERE e.endpoint_type = 'pds'
1511
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
1512
+
),
1513
+
pds_by_country AS (
1514
+
SELECT
1515
+
i.country,
1516
+
i.country_code,
1517
+
COUNT(DISTINCT us.id) as active_pds_count,
1518
+
SUM(latest.user_count) as total_users,
1519
+
AVG(latest.response_time) as avg_response_time
1520
+
FROM unique_servers us
1521
+
JOIN ip_infos i ON us.ip = i.ip
1522
+
LEFT JOIN LATERAL (
1523
+
SELECT user_count, response_time
1524
+
FROM endpoint_scans
1525
+
WHERE endpoint_id = us.id
1526
+
ORDER BY scanned_at DESC
1527
+
LIMIT 1
1528
+
) latest ON true
1529
+
WHERE us.status = 1
1530
+
AND i.country IS NOT NULL
1531
+
AND i.country != ''
1532
+
GROUP BY i.country, i.country_code
1533
+
),
1534
+
totals AS (
1535
+
SELECT
1536
+
SUM(active_pds_count) as total_pds,
1537
+
SUM(total_users) as total_users_global
1538
+
FROM pds_by_country
1539
+
)
1540
+
SELECT
1541
+
pbc.country,
1542
+
pbc.country_code,
1543
+
pbc.active_pds_count,
1544
+
ROUND((pbc.active_pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric, 2) as pds_percentage,
1545
+
COALESCE(pbc.total_users, 0) as total_users,
1546
+
ROUND((COALESCE(pbc.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric, 2) as users_percentage,
1547
+
ROUND(COALESCE(pbc.avg_response_time, 0)::numeric, 2) as avg_response_time_ms
1548
+
FROM pds_by_country pbc
1549
+
CROSS JOIN totals t
1550
+
ORDER BY pbc.active_pds_count DESC
1551
+
`
1552
+
1553
+
rows, err := p.db.QueryContext(ctx, query)
1554
+
if err != nil {
1555
+
return nil, err
1556
+
}
1557
+
defer rows.Close()
1558
+
1559
+
var stats []*CountryStats
1560
+
for rows.Next() {
1561
+
var s CountryStats
1562
+
var pdsPercentage, usersPercentage sql.NullFloat64
1563
+
1564
+
err := rows.Scan(
1565
+
&s.Country,
1566
+
&s.CountryCode,
1567
+
&s.ActivePDSCount,
1568
+
&pdsPercentage,
1569
+
&s.TotalUsers,
1570
+
&usersPercentage,
1571
+
&s.AvgResponseTimeMS,
1572
+
)
1573
+
if err != nil {
1574
+
return nil, err
1575
+
}
1576
+
1577
+
if pdsPercentage.Valid {
1578
+
s.PDSPercentage = pdsPercentage.Float64
1579
+
}
1580
+
if usersPercentage.Valid {
1581
+
s.UsersPercentage = usersPercentage.Float64
1582
+
}
1583
+
1584
+
stats = append(stats, &s)
1585
+
}
1586
+
1587
+
return stats, rows.Err()
1588
+
}
1589
+
1590
+
func (p *PostgresDB) GetVersionStats(ctx context.Context) ([]*VersionStats, error) {
1591
+
query := `
1592
+
WITH unique_servers AS (
1593
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
1594
+
e.id
1595
+
FROM endpoints e
1596
+
WHERE e.endpoint_type = 'pds'
1597
+
AND e.status = 1
1598
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
1599
+
),
1600
+
latest_scans AS (
1601
+
SELECT DISTINCT ON (us.id)
1602
+
us.id,
1603
+
es.version,
1604
+
es.user_count,
1605
+
es.scanned_at
1606
+
FROM unique_servers us
1607
+
JOIN endpoint_scans es ON us.id = es.endpoint_id
1608
+
WHERE es.version IS NOT NULL
1609
+
AND es.version != ''
1610
+
ORDER BY us.id, es.scanned_at DESC
1611
+
),
1612
+
version_groups AS (
1613
+
SELECT
1614
+
version,
1615
+
COUNT(*) as pds_count,
1616
+
SUM(user_count) as total_users,
1617
+
MIN(scanned_at) as first_seen,
1618
+
MAX(scanned_at) as last_seen
1619
+
FROM latest_scans
1620
+
GROUP BY version
1621
+
),
1622
+
totals AS (
1623
+
SELECT
1624
+
SUM(pds_count) as total_pds,
1625
+
SUM(total_users) as total_users_global
1626
+
FROM version_groups
1627
+
)
1628
+
SELECT
1629
+
vg.version,
1630
+
vg.pds_count,
1631
+
(vg.pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric as percentage,
1632
+
COALESCE(vg.total_users, 0) as total_users,
1633
+
(COALESCE(vg.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric as users_percentage,
1634
+
vg.first_seen,
1635
+
vg.last_seen
1636
+
FROM version_groups vg
1637
+
CROSS JOIN totals t
1638
+
ORDER BY vg.pds_count DESC
1639
+
`
1640
+
1641
+
rows, err := p.db.QueryContext(ctx, query)
1642
+
if err != nil {
1643
+
return nil, err
1644
+
}
1645
+
defer rows.Close()
1646
+
1647
+
var stats []*VersionStats
1648
+
for rows.Next() {
1649
+
var s VersionStats
1650
+
var percentage, usersPercentage sql.NullFloat64
1651
+
1652
+
err := rows.Scan(
1653
+
&s.Version,
1654
+
&s.PDSCount,
1655
+
&percentage,
1656
+
&s.TotalUsers,
1657
+
&usersPercentage,
1658
+
&s.FirstSeen,
1659
+
&s.LastSeen,
1660
+
)
1661
+
if err != nil {
1662
+
return nil, err
1663
+
}
1664
+
1665
+
if percentage.Valid {
1666
+
s.Percentage = percentage.Float64
1667
+
s.PercentageText = formatPercentage(percentage.Float64)
1668
+
}
1669
+
if usersPercentage.Valid {
1670
+
s.UsersPercentage = usersPercentage.Float64
1671
+
}
1672
+
1673
+
stats = append(stats, &s)
1674
+
}
1675
+
1676
+
return stats, rows.Err()
1677
+
}
1678
+
1679
+
// Helper function (add if not already present)
1680
+
func formatPercentage(pct float64) string {
1681
+
if pct >= 10 {
1682
+
return fmt.Sprintf("%.2f%%", pct)
1683
+
} else if pct >= 1 {
1684
+
return fmt.Sprintf("%.3f%%", pct)
1685
+
} else if pct >= 0.01 {
1686
+
return fmt.Sprintf("%.4f%%", pct)
1687
+
} else if pct > 0 {
1688
+
return fmt.Sprintf("%.6f%%", pct)
1689
+
}
1690
+
return "0%"
1691
+
}
1692
+
1693
+
func (p *PostgresDB) UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error {
1694
+
if len(repos) == 0 {
1695
+
return nil
1696
+
}
1697
+
1698
+
// Step 1: Load all existing repos for this endpoint into memory
1699
+
query := `
1700
+
SELECT did, head, rev, active, status
1701
+
FROM pds_repos
1702
+
WHERE endpoint_id = $1
1703
+
`
1704
+
1705
+
rows, err := p.db.QueryContext(ctx, query, endpointID)
1706
+
if err != nil {
1707
+
return err
1708
+
}
1709
+
1710
+
existingRepos := make(map[string]*PDSRepo)
1711
+
for rows.Next() {
1712
+
var repo PDSRepo
1713
+
var head, rev, status sql.NullString
1714
+
1715
+
err := rows.Scan(&repo.DID, &head, &rev, &repo.Active, &status)
1716
+
if err != nil {
1717
+
rows.Close()
1718
+
return err
1719
+
}
1720
+
1721
+
if head.Valid {
1722
+
repo.Head = head.String
1723
+
}
1724
+
if rev.Valid {
1725
+
repo.Rev = rev.String
1726
+
}
1727
+
if status.Valid {
1728
+
repo.Status = status.String
1729
+
}
1730
+
1731
+
existingRepos[repo.DID] = &repo
1732
+
}
1733
+
rows.Close()
1734
+
1735
+
if err := rows.Err(); err != nil {
1736
+
return err
1737
+
}
1738
+
1739
+
// Step 2: Compare and collect changes
1740
+
var newRepos []PDSRepoData
1741
+
var changedRepos []PDSRepoData
1742
+
1743
+
for _, repo := range repos {
1744
+
existing, exists := existingRepos[repo.DID]
1745
+
if !exists {
1746
+
// New repo
1747
+
newRepos = append(newRepos, repo)
1748
+
} else if existing.Head != repo.Head ||
1749
+
existing.Rev != repo.Rev ||
1750
+
existing.Active != repo.Active ||
1751
+
existing.Status != repo.Status {
1752
+
// Repo changed
1753
+
changedRepos = append(changedRepos, repo)
1754
+
}
1755
+
}
1756
+
1757
+
// Log comparison results
1758
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, total=%d, existing=%d, new=%d, changed=%d, unchanged=%d",
1759
+
endpointID, len(repos), len(existingRepos), len(newRepos), len(changedRepos),
1760
+
len(repos)-len(newRepos)-len(changedRepos))
1761
+
1762
+
// If nothing changed, return early
1763
+
if len(newRepos) == 0 && len(changedRepos) == 0 {
1764
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, no changes detected, skipping database operations", endpointID)
1765
+
return nil
1766
+
}
1767
+
1768
+
// Step 3: Execute batched operations
1769
+
conn, err := p.pool.Acquire(ctx)
1770
+
if err != nil {
1771
+
return err
1772
+
}
1773
+
defer conn.Release()
1774
+
1775
+
tx, err := conn.Begin(ctx)
1776
+
if err != nil {
1777
+
return err
1778
+
}
1779
+
defer tx.Rollback(ctx)
1780
+
1781
+
// Insert new repos
1782
+
if len(newRepos) > 0 {
1783
+
_, err := tx.Exec(ctx, `
1784
+
CREATE TEMP TABLE temp_new_repos (
1785
+
did TEXT,
1786
+
head TEXT,
1787
+
rev TEXT,
1788
+
active BOOLEAN,
1789
+
status TEXT
1790
+
) ON COMMIT DROP
1791
+
`)
1792
+
if err != nil {
1793
+
return err
1794
+
}
1795
+
1796
+
_, err = tx.Conn().CopyFrom(
1797
+
ctx,
1798
+
pgx.Identifier{"temp_new_repos"},
1799
+
[]string{"did", "head", "rev", "active", "status"},
1800
+
pgx.CopyFromSlice(len(newRepos), func(i int) ([]interface{}, error) {
1801
+
repo := newRepos[i]
1802
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1803
+
}),
1804
+
)
1805
+
if err != nil {
1806
+
return err
1807
+
}
1808
+
1809
+
result, err := tx.Exec(ctx, `
1810
+
INSERT INTO pds_repos (endpoint_id, did, head, rev, active, status, first_seen, last_seen)
1811
+
SELECT $1, did, head, rev, active, status,
1812
+
TIMEZONE('UTC', NOW()),
1813
+
TIMEZONE('UTC', NOW())
1814
+
FROM temp_new_repos
1815
+
`, endpointID)
1816
+
if err != nil {
1817
+
return err
1818
+
}
1819
+
1820
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, inserted %d new repos", endpointID, result.RowsAffected())
1821
+
}
1822
+
1823
+
// Update changed repos
1824
+
if len(changedRepos) > 0 {
1825
+
_, err := tx.Exec(ctx, `
1826
+
CREATE TEMP TABLE temp_changed_repos (
1827
+
did TEXT,
1828
+
head TEXT,
1829
+
rev TEXT,
1830
+
active BOOLEAN,
1831
+
status TEXT
1832
+
) ON COMMIT DROP
1833
+
`)
1834
+
if err != nil {
1835
+
return err
1836
+
}
1837
+
1838
+
_, err = tx.Conn().CopyFrom(
1839
+
ctx,
1840
+
pgx.Identifier{"temp_changed_repos"},
1841
+
[]string{"did", "head", "rev", "active", "status"},
1842
+
pgx.CopyFromSlice(len(changedRepos), func(i int) ([]interface{}, error) {
1843
+
repo := changedRepos[i]
1844
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1845
+
}),
1846
+
)
1847
+
if err != nil {
1848
+
return err
1849
+
}
1850
+
1851
+
result, err := tx.Exec(ctx, `
1852
+
UPDATE pds_repos
1853
+
SET head = t.head,
1854
+
rev = t.rev,
1855
+
active = t.active,
1856
+
status = t.status,
1857
+
last_seen = TIMEZONE('UTC', NOW()),
1858
+
updated_at = TIMEZONE('UTC', NOW())
1859
+
FROM temp_changed_repos t
1860
+
WHERE pds_repos.endpoint_id = $1
1861
+
AND pds_repos.did = t.did
1862
+
`, endpointID)
1863
+
if err != nil {
1864
+
return err
1865
+
}
1866
+
1867
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, updated %d changed repos", endpointID, result.RowsAffected())
1868
+
}
1869
+
1870
+
if err := tx.Commit(ctx); err != nil {
1871
+
return err
1872
+
}
1873
+
1874
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, transaction committed successfully", endpointID)
1875
+
return nil
1876
+
}
1877
+
1878
+
func (p *PostgresDB) GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) {
1879
+
query := `
1880
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1881
+
FROM pds_repos
1882
+
WHERE endpoint_id = $1
1883
+
`
1884
+
1885
+
args := []interface{}{endpointID}
1886
+
argIdx := 2
1887
+
1888
+
if activeOnly {
1889
+
query += " AND active = true"
1890
+
}
1891
+
1892
+
// Order by id (primary key) - fastest
1893
+
query += " ORDER BY id DESC"
1894
+
1895
+
if limit > 0 {
1896
+
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
1897
+
args = append(args, limit, offset)
1898
+
}
1899
+
1900
+
rows, err := p.db.QueryContext(ctx, query, args...)
1901
+
if err != nil {
1902
+
return nil, err
1903
+
}
1904
+
defer rows.Close()
1905
+
1906
+
var repos []*PDSRepo
1907
+
for rows.Next() {
1908
+
var repo PDSRepo
1909
+
var head, rev, status sql.NullString
1910
+
1911
+
err := rows.Scan(
1912
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1913
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1914
+
)
1915
+
if err != nil {
1916
+
return nil, err
1917
+
}
1918
+
1919
+
if head.Valid {
1920
+
repo.Head = head.String
1921
+
}
1922
+
if rev.Valid {
1923
+
repo.Rev = rev.String
1924
+
}
1925
+
if status.Valid {
1926
+
repo.Status = status.String
1927
+
}
1928
+
1929
+
repos = append(repos, &repo)
1930
+
}
1931
+
1932
+
return repos, rows.Err()
1933
+
}
1934
+
1935
+
func (p *PostgresDB) GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) {
1936
+
query := `
1937
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1938
+
FROM pds_repos
1939
+
WHERE did = $1
1940
+
ORDER BY last_seen DESC
1941
+
`
1942
+
1943
+
rows, err := p.db.QueryContext(ctx, query, did)
1944
+
if err != nil {
1945
+
return nil, err
1946
+
}
1947
+
defer rows.Close()
1948
+
1949
+
var repos []*PDSRepo
1950
+
for rows.Next() {
1951
+
var repo PDSRepo
1952
+
var head, rev, status sql.NullString
1953
+
1954
+
err := rows.Scan(
1955
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1956
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1957
+
)
1958
+
if err != nil {
1959
+
return nil, err
1960
+
}
1961
+
1962
+
if head.Valid {
1963
+
repo.Head = head.String
1964
+
}
1965
+
if rev.Valid {
1966
+
repo.Rev = rev.String
1967
+
}
1968
+
if status.Valid {
1969
+
repo.Status = status.String
1970
+
}
1971
+
1972
+
repos = append(repos, &repo)
1973
+
}
1974
+
1975
+
return repos, rows.Err()
1976
+
}
1977
+
1978
+
func (p *PostgresDB) GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) {
1979
+
query := `
1980
+
SELECT
1981
+
COUNT(*) as total_repos,
1982
+
COUNT(*) FILTER (WHERE active = true) as active_repos,
1983
+
COUNT(*) FILTER (WHERE active = false) as inactive_repos,
1984
+
COUNT(*) FILTER (WHERE status IS NOT NULL AND status != '') as repos_with_status,
1985
+
COUNT(*) FILTER (WHERE updated_at > CURRENT_TIMESTAMP - INTERVAL '1 hour') as recent_changes
1986
+
FROM pds_repos
1987
+
WHERE endpoint_id = $1
1988
+
`
1989
+
1990
+
var totalRepos, activeRepos, inactiveRepos, reposWithStatus, recentChanges int64
1991
+
1992
+
err := p.db.QueryRowContext(ctx, query, endpointID).Scan(
1993
+
&totalRepos, &activeRepos, &inactiveRepos, &reposWithStatus, &recentChanges,
1994
+
)
1995
+
if err != nil {
1996
+
return nil, err
1997
+
}
1998
+
1999
+
return map[string]interface{}{
2000
+
"total_repos": totalRepos,
2001
+
"active_repos": activeRepos,
2002
+
"inactive_repos": inactiveRepos,
2003
+
"repos_with_status": reposWithStatus,
2004
+
"recent_changes": recentChanges,
2005
+
}, nil
2006
+
}
2007
+
2008
+
// GetTableSizes fetches size information (in bytes) for all tables in the specified schema.
2009
+
func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) {
2010
+
// Query now selects raw byte values directly
2011
+
query := `
2012
+
SELECT
2013
+
c.relname AS table_name,
2014
+
pg_total_relation_size(c.oid) AS total_bytes,
2015
+
pg_relation_size(c.oid) AS table_heap_bytes,
2016
+
pg_indexes_size(c.oid) AS indexes_bytes
2017
+
FROM
2018
+
pg_class c
2019
+
LEFT JOIN
2020
+
pg_namespace n ON n.oid = c.relnamespace
2021
+
WHERE
2022
+
c.relkind = 'r' -- 'r' = ordinary table
2023
+
AND n.nspname = $1
2024
+
ORDER BY
2025
+
total_bytes DESC;
2026
+
`
2027
+
rows, err := p.db.QueryContext(ctx, query, schema)
2028
+
if err != nil {
2029
+
return nil, fmt.Errorf("failed to query table sizes: %w", err)
2030
+
}
2031
+
defer rows.Close()
2032
+
2033
+
var results []TableSizeInfo
2034
+
for rows.Next() {
2035
+
var info TableSizeInfo
2036
+
// Scan directly into int64 fields
2037
+
if err := rows.Scan(
2038
+
&info.TableName,
2039
+
&info.TotalBytes,
2040
+
&info.TableHeapBytes,
2041
+
&info.IndexesBytes,
2042
+
); err != nil {
2043
+
return nil, fmt.Errorf("failed to scan table size row: %w", err)
2044
+
}
2045
+
results = append(results, info)
2046
+
}
2047
+
if err := rows.Err(); err != nil {
2048
+
return nil, fmt.Errorf("error iterating table size rows: %w", err)
2049
+
}
2050
+
2051
+
return results, nil
2052
+
}
2053
+
2054
+
// GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema.
2055
+
func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) {
2056
+
// Query now selects raw byte values directly
2057
+
query := `
2058
+
SELECT
2059
+
c.relname AS index_name,
2060
+
COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name,
2061
+
pg_relation_size(c.oid) AS index_bytes
2062
+
FROM
2063
+
pg_class c
2064
+
LEFT JOIN
2065
+
pg_index i ON i.indexrelid = c.oid
2066
+
LEFT JOIN
2067
+
pg_namespace n ON n.oid = c.relnamespace
2068
+
WHERE
2069
+
c.relkind = 'i' -- 'i' = index
2070
+
AND n.nspname = $1
2071
+
ORDER BY
2072
+
index_bytes DESC;
2073
+
`
2074
+
rows, err := p.db.QueryContext(ctx, query, schema)
2075
+
if err != nil {
2076
+
return nil, fmt.Errorf("failed to query index sizes: %w", err)
2077
+
}
2078
+
defer rows.Close()
2079
+
2080
+
var results []IndexSizeInfo
2081
+
for rows.Next() {
2082
+
var info IndexSizeInfo
2083
+
var tableName sql.NullString
2084
+
// Scan directly into int64 field
2085
+
if err := rows.Scan(
2086
+
&info.IndexName,
2087
+
&tableName,
2088
+
&info.IndexBytes,
2089
+
); err != nil {
2090
+
return nil, fmt.Errorf("failed to scan index size row: %w", err)
2091
+
}
2092
+
if tableName.Valid {
2093
+
info.TableName = tableName.String
2094
+
} else {
2095
+
info.TableName = "N/A"
2096
+
}
2097
+
results = append(results, info)
2098
+
}
2099
+
if err := rows.Err(); err != nil {
2100
+
return nil, fmt.Errorf("error iterating index size rows: %w", err)
2101
+
}
2102
+
2103
+
return results, nil
2104
+
}
-891
internal/storage/sqlite.go
-891
internal/storage/sqlite.go
···
1
-
package storage
2
-
3
-
import (
4
-
"context"
5
-
"database/sql"
6
-
"encoding/json"
7
-
"fmt"
8
-
"strings"
9
-
"time"
10
-
11
-
_ "github.com/mattn/go-sqlite3"
12
-
)
13
-
14
-
type SQLiteDB struct {
15
-
db *sql.DB
16
-
}
17
-
18
-
func NewSQLiteDB(path string) (*SQLiteDB, error) {
19
-
db, err := sql.Open("sqlite3", path)
20
-
if err != nil {
21
-
return nil, err
22
-
}
23
-
24
-
// Enable WAL mode for better concurrency
25
-
if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil {
26
-
return nil, err
27
-
}
28
-
29
-
return &SQLiteDB{db: db}, nil
30
-
}
31
-
32
-
func (s *SQLiteDB) Close() error {
33
-
return s.db.Close()
34
-
}
35
-
36
-
func (s *SQLiteDB) Migrate() error {
37
-
schema := `
38
-
-- Endpoints table (replaces pds_servers)
39
-
CREATE TABLE IF NOT EXISTS endpoints (
40
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
41
-
endpoint_type TEXT NOT NULL DEFAULT 'pds',
42
-
endpoint TEXT NOT NULL,
43
-
discovered_at TIMESTAMP NOT NULL,
44
-
last_checked TIMESTAMP,
45
-
status INTEGER DEFAULT 0,
46
-
user_count INTEGER DEFAULT 0,
47
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
48
-
UNIQUE(endpoint_type, endpoint)
49
-
);
50
-
51
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint);
52
-
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
53
-
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
54
-
CREATE INDEX IF NOT EXISTS idx_endpoints_user_count ON endpoints(user_count);
55
-
56
-
-- Keep pds_scans table (or rename to endpoint_scans later)
57
-
CREATE TABLE IF NOT EXISTS pds_scans (
58
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
59
-
pds_id INTEGER NOT NULL,
60
-
status INTEGER NOT NULL,
61
-
response_time REAL,
62
-
scan_data TEXT,
63
-
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
64
-
FOREIGN KEY (pds_id) REFERENCES endpoints(id) ON DELETE CASCADE
65
-
);
66
-
67
-
CREATE INDEX IF NOT EXISTS idx_pds_scans_pds_id ON pds_scans(pds_id);
68
-
CREATE INDEX IF NOT EXISTS idx_pds_scans_scanned_at ON pds_scans(scanned_at);
69
-
70
-
-- Metrics
71
-
CREATE TABLE IF NOT EXISTS plc_metrics (
72
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
73
-
total_dids INTEGER,
74
-
total_pds INTEGER,
75
-
unique_pds INTEGER,
76
-
scan_duration_ms INTEGER,
77
-
error_count INTEGER,
78
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
79
-
);
80
-
81
-
-- Scan cursors with bundle number
82
-
CREATE TABLE IF NOT EXISTS scan_cursors (
83
-
source TEXT PRIMARY KEY,
84
-
last_bundle_number INTEGER DEFAULT 0,
85
-
last_scan_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
86
-
records_processed INTEGER DEFAULT 0
87
-
);
88
-
89
-
-- Bundles with dual hashing
90
-
CREATE TABLE IF NOT EXISTS plc_bundles (
91
-
bundle_number INTEGER PRIMARY KEY,
92
-
start_time TIMESTAMP NOT NULL,
93
-
end_time TIMESTAMP NOT NULL,
94
-
dids TEXT NOT NULL,
95
-
hash TEXT NOT NULL,
96
-
compressed_hash TEXT NOT NULL,
97
-
compressed_size INTEGER NOT NULL,
98
-
uncompressed_size INTEGER NOT NULL, -- NEW
99
-
cursor TEXT, -- NEW
100
-
prev_bundle_hash TEXT,
101
-
compressed BOOLEAN DEFAULT 1,
102
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
103
-
);
104
-
105
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
106
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
107
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
108
-
109
-
-- NEW: Mempool for pending operations
110
-
CREATE TABLE IF NOT EXISTS plc_mempool (
111
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
112
-
did TEXT NOT NULL,
113
-
operation TEXT NOT NULL,
114
-
cid TEXT NOT NULL UNIQUE, -- ✅ Add UNIQUE constraint
115
-
created_at TIMESTAMP NOT NULL,
116
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
117
-
);
118
-
119
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
120
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
121
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
122
-
`
123
-
124
-
_, err := s.db.Exec(schema)
125
-
return err
126
-
}
127
-
128
-
// GetBundleByNumber
129
-
func (s *SQLiteDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
130
-
query := `
131
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
132
-
compressed_size, uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
133
-
FROM plc_bundles
134
-
WHERE bundle_number = ?
135
-
`
136
-
137
-
var bundle PLCBundle
138
-
var didsJSON string
139
-
var prevHash sql.NullString
140
-
var cursor sql.NullString
141
-
142
-
err := s.db.QueryRowContext(ctx, query, bundleNumber).Scan(
143
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
144
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
145
-
&bundle.CompressedSize, &bundle.UncompressedSize, &cursor,
146
-
&prevHash, &bundle.Compressed, &bundle.CreatedAt,
147
-
)
148
-
if err != nil {
149
-
return nil, err
150
-
}
151
-
152
-
if prevHash.Valid {
153
-
bundle.PrevBundleHash = prevHash.String
154
-
}
155
-
if cursor.Valid {
156
-
bundle.Cursor = cursor.String
157
-
}
158
-
159
-
json.Unmarshal([]byte(didsJSON), &bundle.DIDs)
160
-
return &bundle, nil
161
-
}
162
-
163
-
// GetBundleForTimestamp finds the bundle that should contain operations at or after the given time
164
-
func (s *SQLiteDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
165
-
query := `
166
-
SELECT bundle_number
167
-
FROM plc_bundles
168
-
WHERE start_time <= ? AND end_time >= ?
169
-
ORDER BY bundle_number ASC
170
-
LIMIT 1
171
-
`
172
-
173
-
var bundleNum int
174
-
err := s.db.QueryRowContext(ctx, query, afterTime, afterTime).Scan(&bundleNum)
175
-
if err == sql.ErrNoRows {
176
-
// No exact match, find the closest bundle before this time
177
-
query = `
178
-
SELECT bundle_number
179
-
FROM plc_bundles
180
-
WHERE end_time < ?
181
-
ORDER BY bundle_number DESC
182
-
LIMIT 1
183
-
`
184
-
err = s.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
185
-
if err == sql.ErrNoRows {
186
-
return 1, nil // Start from first bundle
187
-
}
188
-
if err != nil {
189
-
return 0, err
190
-
}
191
-
return bundleNum, nil // Return the bundle just before
192
-
}
193
-
if err != nil {
194
-
return 0, err
195
-
}
196
-
197
-
return bundleNum, nil
198
-
}
199
-
200
-
// GetLastBundleNumber gets the highest bundle number
201
-
func (s *SQLiteDB) GetLastBundleNumber(ctx context.Context) (int, error) {
202
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
203
-
var num int
204
-
err := s.db.QueryRowContext(ctx, query).Scan(&num)
205
-
return num, err
206
-
}
207
-
208
-
// AddToMempool adds operations to the mempool
209
-
func (s *SQLiteDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
210
-
if len(ops) == 0 {
211
-
return nil
212
-
}
213
-
214
-
tx, err := s.db.BeginTx(ctx, nil)
215
-
if err != nil {
216
-
return err
217
-
}
218
-
defer tx.Rollback()
219
-
220
-
// ✅ Use ON CONFLICT to skip duplicates
221
-
stmt, err := tx.PrepareContext(ctx, `
222
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
223
-
VALUES (?, ?, ?, ?)
224
-
ON CONFLICT(cid) DO NOTHING
225
-
`)
226
-
if err != nil {
227
-
return err
228
-
}
229
-
defer stmt.Close()
230
-
231
-
for _, op := range ops {
232
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
233
-
if err != nil {
234
-
return err
235
-
}
236
-
}
237
-
238
-
return tx.Commit()
239
-
}
240
-
241
-
// GetMempoolCount returns number of operations in mempool
242
-
func (s *SQLiteDB) GetMempoolCount(ctx context.Context) (int, error) {
243
-
query := "SELECT COUNT(*) FROM plc_mempool"
244
-
var count int
245
-
err := s.db.QueryRowContext(ctx, query).Scan(&count)
246
-
return count, err
247
-
}
248
-
249
-
// GetMempoolOperations retrieves operations from mempool ordered by timestamp
250
-
func (s *SQLiteDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
251
-
query := `
252
-
SELECT id, did, operation, cid, created_at, added_at
253
-
FROM plc_mempool
254
-
ORDER BY created_at ASC
255
-
LIMIT ?
256
-
`
257
-
258
-
rows, err := s.db.QueryContext(ctx, query, limit)
259
-
if err != nil {
260
-
return nil, err
261
-
}
262
-
defer rows.Close()
263
-
264
-
var ops []MempoolOperation
265
-
for rows.Next() {
266
-
var op MempoolOperation
267
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
268
-
if err != nil {
269
-
return nil, err
270
-
}
271
-
ops = append(ops, op)
272
-
}
273
-
274
-
return ops, rows.Err()
275
-
}
276
-
277
-
// DeleteFromMempool removes operations from mempool
278
-
func (s *SQLiteDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
279
-
if len(ids) == 0 {
280
-
return nil
281
-
}
282
-
283
-
placeholders := make([]string, len(ids))
284
-
args := make([]interface{}, len(ids))
285
-
for i, id := range ids {
286
-
placeholders[i] = "?"
287
-
args[i] = id
288
-
}
289
-
290
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
291
-
strings.Join(placeholders, ","))
292
-
293
-
_, err := s.db.ExecContext(ctx, query, args...)
294
-
return err
295
-
}
296
-
297
-
// GetFirstMempoolOperation retrieves the oldest operation from mempool
298
-
func (s *SQLiteDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
299
-
query := `
300
-
SELECT id, did, operation, cid, created_at, added_at
301
-
FROM plc_mempool
302
-
ORDER BY created_at ASC, id ASC
303
-
LIMIT 1
304
-
`
305
-
306
-
var op MempoolOperation
307
-
err := s.db.QueryRowContext(ctx, query).Scan(
308
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
309
-
)
310
-
if err == sql.ErrNoRows {
311
-
return nil, nil // No operations in mempool
312
-
}
313
-
if err != nil {
314
-
return nil, err
315
-
}
316
-
317
-
return &op, nil
318
-
}
319
-
320
-
// GetLastMempoolOperation retrieves the most recent operation from mempool
321
-
func (s *SQLiteDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
322
-
query := `
323
-
SELECT id, did, operation, cid, created_at, added_at
324
-
FROM plc_mempool
325
-
ORDER BY created_at DESC, id DESC
326
-
LIMIT 1
327
-
`
328
-
329
-
var op MempoolOperation
330
-
err := s.db.QueryRowContext(ctx, query).Scan(
331
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
332
-
)
333
-
if err == sql.ErrNoRows {
334
-
return nil, nil // No operations in mempool
335
-
}
336
-
if err != nil {
337
-
return nil, err
338
-
}
339
-
340
-
return &op, nil
341
-
}
342
-
343
-
func (s *SQLiteDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
344
-
didsJSON, err := json.Marshal(bundle.DIDs)
345
-
if err != nil {
346
-
return err
347
-
}
348
-
349
-
query := `
350
-
INSERT INTO plc_bundles (
351
-
bundle_number, start_time, end_time, dids,
352
-
hash, compressed_hash, compressed_size, uncompressed_size, cursor, prev_bundle_hash, compressed
353
-
)
354
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
355
-
ON CONFLICT(bundle_number) DO UPDATE SET
356
-
start_time = excluded.start_time,
357
-
end_time = excluded.end_time,
358
-
dids = excluded.dids,
359
-
hash = excluded.hash,
360
-
compressed_hash = excluded.compressed_hash,
361
-
compressed_size = excluded.compressed_size,
362
-
uncompressed_size = excluded.uncompressed_size,
363
-
cursor = excluded.cursor,
364
-
prev_bundle_hash = excluded.prev_bundle_hash,
365
-
compressed = excluded.compressed
366
-
`
367
-
_, err = s.db.ExecContext(ctx, query,
368
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
369
-
string(didsJSON), bundle.Hash, bundle.CompressedHash,
370
-
bundle.CompressedSize, bundle.UncompressedSize, bundle.Cursor,
371
-
bundle.PrevBundleHash, bundle.Compressed,
372
-
)
373
-
374
-
return err
375
-
}
376
-
377
-
// GetMempoolUniqueDIDCount returns the number of unique DIDs in mempool
378
-
func (s *SQLiteDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
379
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
380
-
var count int
381
-
err := s.db.QueryRowContext(ctx, query).Scan(&count)
382
-
return count, err
383
-
}
384
-
385
-
// GetMempoolUncompressedSize returns total uncompressed size of all operations
386
-
func (s *SQLiteDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
387
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
388
-
var size int64
389
-
err := s.db.QueryRowContext(ctx, query).Scan(&size)
390
-
return size, err
391
-
}
392
-
393
-
// GetBundles
394
-
func (s *SQLiteDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
395
-
query := `
396
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at
397
-
FROM plc_bundles
398
-
ORDER BY bundle_number DESC
399
-
LIMIT ?
400
-
`
401
-
402
-
rows, err := s.db.QueryContext(ctx, query, limit)
403
-
if err != nil {
404
-
return nil, err
405
-
}
406
-
defer rows.Close()
407
-
408
-
return s.scanBundles(rows)
409
-
}
410
-
411
-
// GetBundlesForDID
412
-
func (s *SQLiteDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
413
-
query := `
414
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at
415
-
FROM plc_bundles
416
-
WHERE EXISTS (
417
-
SELECT 1 FROM json_each(dids)
418
-
WHERE json_each.value = ?
419
-
)
420
-
ORDER BY bundle_number ASC
421
-
`
422
-
423
-
rows, err := s.db.QueryContext(ctx, query, did)
424
-
if err != nil {
425
-
return nil, err
426
-
}
427
-
defer rows.Close()
428
-
429
-
return s.scanBundles(rows)
430
-
}
431
-
432
-
// GetBundle retrieves bundle by time (if needed, otherwise can be removed)
433
-
func (s *SQLiteDB) GetBundle(ctx context.Context, afterTime time.Time) (*PLCBundle, error) {
434
-
var query string
435
-
var args []interface{}
436
-
437
-
if afterTime.IsZero() {
438
-
query = `
439
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at
440
-
FROM plc_bundles
441
-
ORDER BY start_time ASC
442
-
LIMIT 1
443
-
`
444
-
args = []interface{}{}
445
-
} else {
446
-
query = `
447
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at
448
-
FROM plc_bundles
449
-
WHERE start_time >= ?
450
-
ORDER BY start_time ASC
451
-
LIMIT 1
452
-
`
453
-
args = []interface{}{afterTime}
454
-
}
455
-
456
-
var bundle PLCBundle
457
-
var didsJSON string
458
-
var prevHash sql.NullString
459
-
460
-
err := s.db.QueryRowContext(ctx, query, args...).Scan(
461
-
&bundle.BundleNumber,
462
-
&bundle.StartTime,
463
-
&bundle.EndTime,
464
-
&didsJSON,
465
-
&bundle.Hash, // Uncompressed hash
466
-
&bundle.CompressedHash, // Compressed hash
467
-
&bundle.CompressedSize, // Compressed size (not FileSize!)
468
-
&prevHash, // Previous bundle hash
469
-
&bundle.Compressed,
470
-
&bundle.CreatedAt,
471
-
)
472
-
if err == sql.ErrNoRows {
473
-
return nil, nil
474
-
}
475
-
if err != nil {
476
-
return nil, err
477
-
}
478
-
479
-
if prevHash.Valid {
480
-
bundle.PrevBundleHash = prevHash.String
481
-
}
482
-
483
-
json.Unmarshal([]byte(didsJSON), &bundle.DIDs)
484
-
return &bundle, nil
485
-
}
486
-
487
-
func (s *SQLiteDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
488
-
var bundles []*PLCBundle
489
-
490
-
for rows.Next() {
491
-
var bundle PLCBundle
492
-
var didsJSON string
493
-
var prevHash sql.NullString
494
-
var cursor sql.NullString
495
-
496
-
if err := rows.Scan(
497
-
&bundle.BundleNumber,
498
-
&bundle.StartTime,
499
-
&bundle.EndTime,
500
-
&didsJSON,
501
-
&bundle.Hash,
502
-
&bundle.CompressedHash,
503
-
&bundle.CompressedSize,
504
-
&bundle.UncompressedSize,
505
-
&cursor,
506
-
&prevHash,
507
-
&bundle.Compressed,
508
-
&bundle.CreatedAt,
509
-
); err != nil {
510
-
return nil, err
511
-
}
512
-
513
-
if prevHash.Valid {
514
-
bundle.PrevBundleHash = prevHash.String
515
-
}
516
-
if cursor.Valid {
517
-
bundle.Cursor = cursor.String
518
-
}
519
-
520
-
json.Unmarshal([]byte(didsJSON), &bundle.DIDs)
521
-
bundles = append(bundles, &bundle)
522
-
}
523
-
524
-
return bundles, rows.Err()
525
-
}
526
-
527
-
// GetBundleStats - update to use compressed_size
528
-
func (s *SQLiteDB) GetBundleStats(ctx context.Context) (int64, int64, error) {
529
-
query := `
530
-
SELECT COUNT(*), COALESCE(SUM(compressed_size), 0)
531
-
FROM plc_bundles
532
-
`
533
-
534
-
var count, totalSize int64
535
-
err := s.db.QueryRowContext(ctx, query).Scan(&count, &totalSize)
536
-
return count, totalSize, err
537
-
}
538
-
539
-
// UpsertEndpoint inserts or updates an endpoint
540
-
func (s *SQLiteDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
541
-
query := `
542
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status)
543
-
VALUES (?, ?, ?, ?, ?)
544
-
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
545
-
last_checked = excluded.last_checked
546
-
RETURNING id
547
-
`
548
-
err := s.db.QueryRowContext(ctx, query,
549
-
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
550
-
endpoint.LastChecked, endpoint.Status).Scan(&endpoint.ID)
551
-
return err
552
-
}
553
-
554
-
// EndpointExists checks if an endpoint already exists
555
-
func (s *SQLiteDB) EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error) {
556
-
query := "SELECT EXISTS(SELECT 1 FROM endpoints WHERE endpoint = ? AND endpoint_type = ?)"
557
-
var exists bool
558
-
err := s.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&exists)
559
-
return exists, err
560
-
}
561
-
562
-
// GetEndpointIDByEndpoint gets the ID for an endpoint
563
-
func (s *SQLiteDB) GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error) {
564
-
query := "SELECT id FROM endpoints WHERE endpoint = ? AND endpoint_type = ?"
565
-
var id int64
566
-
err := s.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&id)
567
-
return id, err
568
-
}
569
-
570
-
// GetEndpoint retrieves an endpoint by endpoint string and type
571
-
func (s *SQLiteDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
572
-
query := `
573
-
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, user_count, updated_at
574
-
FROM endpoints
575
-
WHERE endpoint = ? AND endpoint_type = ?
576
-
`
577
-
578
-
var ep Endpoint
579
-
var lastChecked sql.NullTime
580
-
581
-
err := s.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
582
-
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
583
-
&ep.Status, &ep.UserCount, &ep.UpdatedAt,
584
-
)
585
-
if err != nil {
586
-
return nil, err
587
-
}
588
-
589
-
if lastChecked.Valid {
590
-
ep.LastChecked = lastChecked.Time
591
-
}
592
-
593
-
return &ep, nil
594
-
}
595
-
596
-
// GetEndpointByID retrieves an endpoint by ID
597
-
func (s *SQLiteDB) GetEndpointByID(ctx context.Context, id int64) (*Endpoint, error) {
598
-
query := `
599
-
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, user_count, updated_at
600
-
FROM endpoints
601
-
WHERE id = ?
602
-
`
603
-
604
-
var ep Endpoint
605
-
var lastChecked sql.NullTime
606
-
607
-
err := s.db.QueryRowContext(ctx, query, id).Scan(
608
-
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
609
-
&ep.Status, &ep.UserCount, &ep.UpdatedAt,
610
-
)
611
-
if err != nil {
612
-
return nil, err
613
-
}
614
-
615
-
if lastChecked.Valid {
616
-
ep.LastChecked = lastChecked.Time
617
-
}
618
-
619
-
return &ep, nil
620
-
}
621
-
622
-
// GetEndpoints retrieves multiple endpoints
623
-
func (s *SQLiteDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
624
-
query := `
625
-
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, user_count, updated_at
626
-
FROM endpoints
627
-
WHERE 1=1
628
-
`
629
-
args := []interface{}{}
630
-
631
-
if filter != nil {
632
-
if filter.Type != "" {
633
-
query += " AND endpoint_type = ?"
634
-
args = append(args, filter.Type)
635
-
}
636
-
if filter.Status != "" {
637
-
statusInt := EndpointStatusUnknown
638
-
switch filter.Status {
639
-
case "online":
640
-
statusInt = EndpointStatusOnline
641
-
case "offline":
642
-
statusInt = EndpointStatusOffline
643
-
}
644
-
query += " AND status = ?"
645
-
args = append(args, statusInt)
646
-
}
647
-
if filter.MinUserCount > 0 {
648
-
query += " AND user_count >= ?"
649
-
args = append(args, filter.MinUserCount)
650
-
}
651
-
}
652
-
653
-
query += " ORDER BY user_count DESC"
654
-
655
-
if filter != nil && filter.Limit > 0 {
656
-
query += fmt.Sprintf(" LIMIT %d OFFSET %d", filter.Limit, filter.Offset)
657
-
}
658
-
659
-
rows, err := s.db.QueryContext(ctx, query, args...)
660
-
if err != nil {
661
-
return nil, err
662
-
}
663
-
defer rows.Close()
664
-
665
-
var endpoints []*Endpoint
666
-
for rows.Next() {
667
-
var ep Endpoint
668
-
var lastChecked sql.NullTime
669
-
670
-
err := rows.Scan(
671
-
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
672
-
&ep.Status, &ep.UserCount, &ep.UpdatedAt,
673
-
)
674
-
if err != nil {
675
-
return nil, err
676
-
}
677
-
678
-
if lastChecked.Valid {
679
-
ep.LastChecked = lastChecked.Time
680
-
}
681
-
682
-
endpoints = append(endpoints, &ep)
683
-
}
684
-
685
-
return endpoints, rows.Err()
686
-
}
687
-
688
-
// UpdateEndpointStatus updates the status and creates a scan record
689
-
func (s *SQLiteDB) UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error {
690
-
tx, err := s.db.BeginTx(ctx, nil)
691
-
if err != nil {
692
-
return err
693
-
}
694
-
defer tx.Rollback()
695
-
696
-
// Calculate user count from scan data
697
-
userCount := 0
698
-
if update.ScanData != nil {
699
-
userCount = update.ScanData.DIDCount
700
-
}
701
-
702
-
// Update main endpoints record
703
-
query := `
704
-
UPDATE endpoints
705
-
SET status = ?, last_checked = ?, user_count = ?, updated_at = ?
706
-
WHERE id = ?
707
-
`
708
-
_, err = tx.ExecContext(ctx, query, update.Status, update.LastChecked, userCount, time.Now(), endpointID)
709
-
if err != nil {
710
-
return err
711
-
}
712
-
713
-
// Marshal scan data
714
-
var scanDataJSON []byte
715
-
if update.ScanData != nil {
716
-
scanDataJSON, _ = json.Marshal(update.ScanData)
717
-
}
718
-
719
-
// Insert scan history (reuse pds_scans table or rename it to endpoint_scans)
720
-
scanQuery := `
721
-
INSERT INTO pds_scans (pds_id, status, response_time, scan_data)
722
-
VALUES (?, ?, ?, ?)
723
-
`
724
-
_, err = tx.ExecContext(ctx, scanQuery, endpointID, update.Status, update.ResponseTime, string(scanDataJSON))
725
-
if err != nil {
726
-
return err
727
-
}
728
-
729
-
return tx.Commit()
730
-
}
731
-
732
-
// GetEndpointScans retrieves scan history for an endpoint
733
-
func (s *SQLiteDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
734
-
query := `
735
-
SELECT id, pds_id, status, response_time, scan_data, scanned_at
736
-
FROM pds_scans
737
-
WHERE pds_id = ?
738
-
ORDER BY scanned_at DESC
739
-
LIMIT ?
740
-
`
741
-
742
-
rows, err := s.db.QueryContext(ctx, query, endpointID, limit)
743
-
if err != nil {
744
-
return nil, err
745
-
}
746
-
defer rows.Close()
747
-
748
-
var scans []*EndpointScan
749
-
for rows.Next() {
750
-
var scan EndpointScan
751
-
var responseTime sql.NullFloat64
752
-
var scanDataJSON sql.NullString
753
-
754
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &scanDataJSON, &scan.ScannedAt)
755
-
if err != nil {
756
-
return nil, err
757
-
}
758
-
759
-
if responseTime.Valid {
760
-
scan.ResponseTime = responseTime.Float64
761
-
}
762
-
763
-
if scanDataJSON.Valid && scanDataJSON.String != "" {
764
-
var scanData EndpointScanData
765
-
if err := json.Unmarshal([]byte(scanDataJSON.String), &scanData); err == nil {
766
-
scan.ScanData = &scanData
767
-
}
768
-
}
769
-
770
-
scans = append(scans, &scan)
771
-
}
772
-
773
-
return scans, rows.Err()
774
-
}
775
-
776
-
// GetEndpointStats returns aggregate statistics about all endpoints
777
-
func (s *SQLiteDB) GetEndpointStats(ctx context.Context) (*EndpointStats, error) {
778
-
query := `
779
-
SELECT
780
-
COUNT(*) as total_endpoints,
781
-
SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online_endpoints,
782
-
SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline_endpoints,
783
-
(SELECT AVG(response_time) FROM pds_scans WHERE response_time > 0
784
-
AND scanned_at > datetime('now', '-1 hour')) as avg_response_time,
785
-
SUM(user_count) as total_dids
786
-
FROM endpoints
787
-
`
788
-
789
-
var stats EndpointStats
790
-
var avgResponseTime sql.NullFloat64
791
-
792
-
err := s.db.QueryRowContext(ctx, query).Scan(
793
-
&stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints,
794
-
&avgResponseTime, &stats.TotalDIDs,
795
-
)
796
-
797
-
if avgResponseTime.Valid {
798
-
stats.AvgResponseTime = avgResponseTime.Float64
799
-
}
800
-
801
-
// Get counts by type
802
-
typeQuery := `
803
-
SELECT endpoint_type, COUNT(*)
804
-
FROM endpoints
805
-
GROUP BY endpoint_type
806
-
`
807
-
rows, err := s.db.QueryContext(ctx, typeQuery)
808
-
if err == nil {
809
-
defer rows.Close()
810
-
stats.ByType = make(map[string]int64)
811
-
for rows.Next() {
812
-
var typ string
813
-
var count int64
814
-
if err := rows.Scan(&typ, &count); err == nil {
815
-
stats.ByType[typ] = count
816
-
}
817
-
}
818
-
}
819
-
820
-
return &stats, err
821
-
}
822
-
823
-
// GetScanCursor retrieves cursor with bundle number
824
-
func (s *SQLiteDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) {
825
-
query := "SELECT source, last_bundle_number, last_scan_time, records_processed FROM scan_cursors WHERE source = ?"
826
-
827
-
var cursor ScanCursor
828
-
err := s.db.QueryRowContext(ctx, query, source).Scan(
829
-
&cursor.Source, &cursor.LastBundleNumber, &cursor.LastScanTime, &cursor.RecordsProcessed,
830
-
)
831
-
if err == sql.ErrNoRows {
832
-
return &ScanCursor{
833
-
Source: source,
834
-
LastBundleNumber: 0,
835
-
LastScanTime: time.Time{},
836
-
}, nil
837
-
}
838
-
return &cursor, err
839
-
}
840
-
841
-
// UpdateScanCursor updates cursor with bundle number
842
-
func (s *SQLiteDB) UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error {
843
-
query := `
844
-
INSERT INTO scan_cursors (source, last_bundle_number, last_scan_time, records_processed)
845
-
VALUES (?, ?, ?, ?)
846
-
ON CONFLICT(source) DO UPDATE SET
847
-
last_bundle_number = excluded.last_bundle_number,
848
-
last_scan_time = excluded.last_scan_time,
849
-
records_processed = excluded.records_processed
850
-
`
851
-
_, err := s.db.ExecContext(ctx, query, cursor.Source, cursor.LastBundleNumber, cursor.LastScanTime, cursor.RecordsProcessed)
852
-
return err
853
-
}
854
-
855
-
// StorePLCMetrics stores PLC scan metrics
856
-
func (s *SQLiteDB) StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error {
857
-
query := `
858
-
INSERT INTO plc_metrics (total_dids, total_pds, unique_pds, scan_duration_ms, error_count)
859
-
VALUES (?, ?, ?, ?, ?)
860
-
`
861
-
_, err := s.db.ExecContext(ctx, query, metrics.TotalDIDs, metrics.TotalPDS,
862
-
metrics.UniquePDS, metrics.ScanDuration, metrics.ErrorCount)
863
-
return err
864
-
}
865
-
866
-
// GetPLCMetrics retrieves recent PLC metrics
867
-
func (s *SQLiteDB) GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) {
868
-
query := `
869
-
SELECT total_dids, total_pds, unique_pds, scan_duration_ms, error_count, created_at
870
-
FROM plc_metrics
871
-
ORDER BY created_at DESC
872
-
LIMIT ?
873
-
`
874
-
875
-
rows, err := s.db.QueryContext(ctx, query, limit)
876
-
if err != nil {
877
-
return nil, err
878
-
}
879
-
defer rows.Close()
880
-
881
-
var metrics []*PLCMetrics
882
-
for rows.Next() {
883
-
var m PLCMetrics
884
-
if err := rows.Scan(&m.TotalDIDs, &m.TotalPDS, &m.UniquePDS, &m.ScanDuration, &m.ErrorCount, &m.LastScanTime); err != nil {
885
-
return nil, err
886
-
}
887
-
metrics = append(metrics, &m)
888
-
}
889
-
890
-
return metrics, rows.Err()
891
-
}
+201
-32
internal/storage/types.go
+201
-32
internal/storage/types.go
···
1
1
package storage
2
2
3
3
import (
4
+
"database/sql"
4
5
"fmt"
5
6
"path/filepath"
6
7
"time"
···
18
19
// Endpoint represents any AT Protocol service endpoint
19
20
type Endpoint struct {
20
21
ID int64
21
-
EndpointType string // "pds", "labeler", etc.
22
+
EndpointType string
22
23
Endpoint string
24
+
ServerDID string
23
25
DiscoveredAt time.Time
24
26
LastChecked time.Time
25
27
Status int
26
-
UserCount int64
28
+
IP string
29
+
IPv6 string
30
+
IPResolvedAt time.Time
31
+
Valid bool
27
32
UpdatedAt time.Time
28
33
}
29
34
···
37
42
38
43
// EndpointScanData contains data from an endpoint scan
39
44
type EndpointScanData struct {
40
-
ServerInfo interface{} `json:"server_info,omitempty"`
41
-
DIDs []string `json:"dids,omitempty"`
42
-
DIDCount int `json:"did_count"`
43
-
Metadata interface{} `json:"metadata,omitempty"` // Type-specific metadata
45
+
ServerInfo interface{} `json:"server_info,omitempty"`
46
+
DIDs []string `json:"dids,omitempty"`
47
+
DIDCount int `json:"did_count"`
48
+
Metadata map[string]interface{} `json:"metadata,omitempty"`
44
49
}
45
50
46
51
// EndpointScan represents a historical endpoint scan
···
49
54
EndpointID int64
50
55
Status int
51
56
ResponseTime float64
57
+
UserCount int64
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
52
60
ScanData *EndpointScanData
53
61
ScannedAt time.Time
54
62
}
···
69
77
70
78
// EndpointFilter for querying endpoints
71
79
type EndpointFilter struct {
72
-
Type string // "pds", "labeler", etc.
73
-
Status string
74
-
MinUserCount int64
75
-
Limit int
76
-
Offset int
80
+
Type string
81
+
Status string
82
+
MinUserCount int64
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
87
+
Limit int
88
+
Offset int
77
89
}
78
90
79
91
// EndpointStats contains aggregate statistics about endpoints
···
106
118
107
119
// PLCBundle represents a cached bundle of PLC operations
108
120
type PLCBundle struct {
109
-
BundleNumber int
110
-
StartTime time.Time
111
-
EndTime time.Time
112
-
BoundaryCIDs []string
113
-
DIDs []string
114
-
Hash string
115
-
CompressedHash string
116
-
CompressedSize int64
117
-
UncompressedSize int64 // NEW: uncompressed size
118
-
Cursor string // NEW: PLC cursor used to create this bundle
119
-
PrevBundleHash string
120
-
Compressed bool
121
-
CreatedAt time.Time
121
+
BundleNumber int
122
+
StartTime time.Time
123
+
EndTime time.Time
124
+
BoundaryCIDs []string
125
+
DIDCount int // Changed from DIDs []string
126
+
Hash string
127
+
CompressedHash string
128
+
CompressedSize int64
129
+
UncompressedSize int64
130
+
CumulativeCompressedSize int64
131
+
CumulativeUncompressedSize int64
132
+
Cursor string
133
+
PrevBundleHash string
134
+
Compressed bool
135
+
CreatedAt time.Time
122
136
}
123
137
124
138
// GetFilePath returns the computed file path for this bundle
···
131
145
return 10000
132
146
}
133
147
134
-
// MempoolOperation represents an operation waiting to be bundled
135
-
type MempoolOperation struct {
136
-
ID int64
137
-
DID string
138
-
Operation string
139
-
CID string
140
-
CreatedAt time.Time
141
-
AddedAt time.Time
148
+
type PLCHistoryPoint struct {
149
+
Date string `json:"date"`
150
+
BundleNumber int `json:"last_bundle_number"`
151
+
OperationCount int `json:"operations"`
152
+
UncompressedSize int64 `json:"size_uncompressed"`
153
+
CompressedSize int64 `json:"size_compressed"`
154
+
CumulativeUncompressed int64 `json:"cumulative_uncompressed"`
155
+
CumulativeCompressed int64 `json:"cumulative_compressed"`
142
156
}
143
157
144
158
// ScanCursor stores scanning progress
···
148
162
LastScanTime time.Time
149
163
RecordsProcessed int64
150
164
}
165
+
166
+
// DIDRecord represents a DID entry in the database
167
+
type DIDRecord struct {
168
+
DID string `json:"did"`
169
+
Handle string `json:"handle,omitempty"`
170
+
CurrentPDS string `json:"current_pds,omitempty"`
171
+
LastOpAt time.Time `json:"last_op_at,omitempty"`
172
+
BundleNumbers []int `json:"bundle_numbers"`
173
+
CreatedAt time.Time `json:"created_at"`
174
+
}
175
+
176
+
// GlobalDIDInfo consolidates DID data from PLC and PDS tables
177
+
type GlobalDIDInfo struct {
178
+
DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc.
179
+
HostingOn []*PDSRepo `json:"hosting_on"`
180
+
}
181
+
182
+
// IPInfo represents IP information (stored with IP as primary key)
183
+
type IPInfo struct {
184
+
IP string `json:"ip"`
185
+
City string `json:"city,omitempty"`
186
+
Country string `json:"country,omitempty"`
187
+
CountryCode string `json:"country_code,omitempty"`
188
+
ASN int `json:"asn,omitempty"`
189
+
ASNOrg string `json:"asn_org,omitempty"`
190
+
IsDatacenter bool `json:"is_datacenter"`
191
+
IsVPN bool `json:"is_vpn"`
192
+
IsCrawler bool `json:"is_crawler"`
193
+
IsTor bool `json:"is_tor"`
194
+
IsProxy bool `json:"is_proxy"`
195
+
Latitude float32 `json:"latitude,omitempty"`
196
+
Longitude float32 `json:"longitude,omitempty"`
197
+
RawData map[string]interface{} `json:"raw_data,omitempty"`
198
+
FetchedAt time.Time `json:"fetched_at"`
199
+
UpdatedAt time.Time `json:"updated_at"`
200
+
}
201
+
202
+
// IsHome returns true if this is a residential/home IP
203
+
// (not crawler, datacenter, tor, proxy, or vpn)
204
+
func (i *IPInfo) IsHome() bool {
205
+
return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN
206
+
}
207
+
208
+
// PDSListItem is a virtual type created by JOIN for /pds endpoint
209
+
type PDSListItem struct {
210
+
// From endpoints table
211
+
ID int64
212
+
Endpoint string
213
+
ServerDID string
214
+
DiscoveredAt time.Time
215
+
LastChecked time.Time
216
+
Status int
217
+
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
220
+
221
+
// From latest endpoint_scans (via JOIN)
222
+
LatestScan *struct {
223
+
UserCount int
224
+
ResponseTime float64
225
+
Version string
226
+
ScannedAt time.Time
227
+
}
228
+
229
+
// From ip_infos table (via JOIN on endpoints.ip)
230
+
IPInfo *IPInfo
231
+
}
232
+
233
+
// PDSDetail is extended version for /pds/{endpoint}
234
+
type PDSDetail struct {
235
+
PDSListItem
236
+
237
+
// Additional data from latest scan
238
+
LatestScan *struct {
239
+
UserCount int
240
+
ResponseTime float64
241
+
Version string
242
+
ServerInfo interface{} // Full server description
243
+
ScannedAt time.Time
244
+
}
245
+
246
+
// NEW: Aliases (other domains pointing to same server)
247
+
Aliases []string `json:"aliases,omitempty"`
248
+
IsPrimary bool `json:"is_primary"`
249
+
}
250
+
251
+
type CountryStats struct {
252
+
Country string `json:"country"`
253
+
CountryCode string `json:"country_code"`
254
+
ActivePDSCount int64 `json:"active_pds_count"`
255
+
PDSPercentage float64 `json:"pds_percentage"`
256
+
TotalUsers int64 `json:"total_users"`
257
+
UsersPercentage float64 `json:"users_percentage"`
258
+
AvgResponseTimeMS float64 `json:"avg_response_time_ms"`
259
+
}
260
+
261
+
type VersionStats struct {
262
+
Version string `json:"version"`
263
+
PDSCount int64 `json:"pds_count"`
264
+
Percentage float64 `json:"percentage"`
265
+
PercentageText string `json:"percentage_text"`
266
+
TotalUsers int64 `json:"total_users"`
267
+
UsersPercentage float64 `json:"users_percentage"`
268
+
FirstSeen time.Time `json:"first_seen"`
269
+
LastSeen time.Time `json:"last_seen"`
270
+
}
271
+
272
+
type PDSRepo struct {
273
+
ID int64 `json:"id"`
274
+
EndpointID int64 `json:"endpoint_id"`
275
+
Endpoint string `json:"endpoint,omitempty"`
276
+
DID string `json:"did"`
277
+
Head string `json:"head,omitempty"`
278
+
Rev string `json:"rev,omitempty"`
279
+
Active bool `json:"active"`
280
+
Status string `json:"status,omitempty"`
281
+
FirstSeen time.Time `json:"first_seen"`
282
+
LastSeen time.Time `json:"last_seen"`
283
+
UpdatedAt time.Time `json:"updated_at"`
284
+
}
285
+
286
+
type PDSRepoData struct {
287
+
DID string
288
+
Head string
289
+
Rev string
290
+
Active bool
291
+
Status string
292
+
}
293
+
294
+
type DIDBackfillInfo struct {
295
+
DID string
296
+
LastBundleNum int
297
+
}
298
+
299
+
type DIDStateUpdateData struct {
300
+
DID string
301
+
Handle sql.NullString // Use sql.NullString for potential NULLs
302
+
PDS sql.NullString
303
+
OpTime time.Time
304
+
}
305
+
306
+
// TableSizeInfo holds size information for a database table.
307
+
type TableSizeInfo struct {
308
+
TableName string `json:"table_name"`
309
+
TotalBytes int64 `json:"total_bytes"` // Raw bytes
310
+
TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes
311
+
IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes
312
+
}
313
+
314
+
// IndexSizeInfo holds size information for a database index.
315
+
type IndexSizeInfo struct {
316
+
IndexName string `json:"index_name"`
317
+
TableName string `json:"table_name"`
318
+
IndexBytes int64 `json:"index_bytes"` // Raw bytes
319
+
}
+27
-3
internal/worker/scheduler.go
+27
-3
internal/worker/scheduler.go
···
5
5
"sync"
6
6
"time"
7
7
8
-
"github.com/atscan/atscanner/internal/log"
8
+
"github.com/atscan/atscand/internal/log"
9
+
"github.com/atscan/atscand/internal/monitor"
9
10
)
10
11
11
12
type Job struct {
···
34
35
Interval: interval,
35
36
Fn: fn,
36
37
})
38
+
39
+
// Register job with tracker
40
+
monitor.GetTracker().RegisterJob(name)
37
41
}
38
42
39
43
func (s *Scheduler) Start(ctx context.Context) {
···
52
56
53
57
// Run immediately
54
58
log.Info("Starting job: %s", job.Name)
55
-
job.Fn()
59
+
s.executeJob(job)
56
60
57
61
for {
62
+
// Set next run time
63
+
monitor.GetTracker().SetNextRun(job.Name, time.Now().Add(job.Interval))
64
+
58
65
select {
59
66
case <-ctx.Done():
60
67
log.Info("Stopping job: %s", job.Name)
61
68
return
62
69
case <-ticker.C:
63
70
log.Info("Running job: %s", job.Name)
64
-
job.Fn()
71
+
s.executeJob(job)
65
72
}
66
73
}
67
74
}
75
+
76
+
func (s *Scheduler) executeJob(job *Job) {
77
+
monitor.GetTracker().StartJob(job.Name)
78
+
79
+
// Run job and capture any panic
80
+
func() {
81
+
defer func() {
82
+
if r := recover(); r != nil {
83
+
log.Error("Job %s panicked: %v", job.Name, r)
84
+
monitor.GetTracker().CompleteJob(job.Name, nil)
85
+
}
86
+
}()
87
+
88
+
job.Fn()
89
+
monitor.GetTracker().CompleteJob(job.Name, nil)
90
+
}()
91
+
}
+125
utils/db-sizes.sh
+125
utils/db-sizes.sh
···
1
+
#!/bin/bash
2
+
3
+
# === Configuration ===
4
+
CONFIG_FILE="config.yaml" # Path to your config file
5
+
SCHEMA_NAME="public" # Replace if your schema is different
6
+
7
+
# Check if config file exists
8
+
if [ ! -f "$CONFIG_FILE" ]; then
9
+
echo "Error: Config file not found at '$CONFIG_FILE'"
10
+
exit 1
11
+
fi
12
+
13
+
# Check if yq is installed
14
+
if ! command -v yq &> /dev/null; then
15
+
echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)."
16
+
echo "See: https://github.com/mikefarah/yq/"
17
+
exit 1
18
+
fi
19
+
20
+
echo "--- Reading connection info from '$CONFIG_FILE' ---"
21
+
22
+
# === Extract Database Config using yq ===
23
+
DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE")
24
+
DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI
25
+
26
+
if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then
27
+
echo "Error: Could not read database type or path from '$CONFIG_FILE'."
28
+
exit 1
29
+
fi
30
+
31
+
# === Parse the Connection String ===
32
+
DB_USER=""
33
+
DB_PASSWORD=""
34
+
DB_HOST="localhost" # Default
35
+
DB_PORT="5432" # Default
36
+
DB_NAME=""
37
+
38
+
# Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters)
39
+
if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then
40
+
DB_USER="${BASH_REMATCH[2]}"
41
+
DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group
42
+
DB_HOST="${BASH_REMATCH[5]}"
43
+
DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default
44
+
DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?'
45
+
else
46
+
echo "Error: Could not parse database connection string URI: $DB_CONN_STRING"
47
+
exit 1
48
+
fi
49
+
50
+
# Set PGPASSWORD environment variable if password was found
51
+
if [ -n "$DB_PASSWORD" ]; then
52
+
export PGPASSWORD="$DB_PASSWORD"
53
+
else
54
+
echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt."
55
+
unset PGPASSWORD
56
+
fi
57
+
58
+
echo "--- Database Size Investigation ---"
59
+
echo "Database: $DB_NAME"
60
+
echo "Schema: $SCHEMA_NAME"
61
+
echo "User: $DB_USER"
62
+
echo "Host: $DB_HOST:$DB_PORT"
63
+
echo "-----------------------------------"
64
+
65
+
# === Table Sizes ===
66
+
echo ""
67
+
echo "## Table Sizes (Schema: $SCHEMA_NAME) ##"
68
+
# Removed --tuples-only and --no-align, added -P footer=off
69
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
70
+
SELECT
71
+
c.relname AS "Table Name",
72
+
pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size",
73
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size",
74
+
pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size"
75
+
FROM
76
+
pg_class c
77
+
LEFT JOIN
78
+
pg_namespace n ON n.oid = c.relnamespace
79
+
WHERE
80
+
c.relkind = 'r' -- 'r' = ordinary table
81
+
AND n.nspname = '$SCHEMA_NAME'
82
+
ORDER BY
83
+
pg_total_relation_size(c.oid) DESC;
84
+
EOF
85
+
86
+
if [ $? -ne 0 ]; then
87
+
echo "Error querying table sizes. Check connection details, permissions, and password."
88
+
unset PGPASSWORD
89
+
exit 1
90
+
fi
91
+
92
+
# === Index Sizes ===
93
+
echo ""
94
+
echo "## Index Sizes (Schema: $SCHEMA_NAME) ##"
95
+
# Removed --tuples-only and --no-align, added -P footer=off
96
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
97
+
SELECT
98
+
c.relname AS "Index Name",
99
+
i.indrelid::regclass AS "Table Name", -- Show associated table
100
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size"
101
+
FROM
102
+
pg_class c
103
+
LEFT JOIN
104
+
pg_index i ON i.indexrelid = c.oid
105
+
LEFT JOIN
106
+
pg_namespace n ON n.oid = c.relnamespace
107
+
WHERE
108
+
c.relkind = 'i' -- 'i' = index
109
+
AND n.nspname = '$SCHEMA_NAME'
110
+
ORDER BY
111
+
pg_relation_size(c.oid) DESC;
112
+
EOF
113
+
114
+
if [ $? -ne 0 ]; then
115
+
echo "Error querying index sizes. Check connection details, permissions, and password."
116
+
unset PGPASSWORD
117
+
exit 1
118
+
fi
119
+
120
+
echo ""
121
+
echo "-----------------------------------"
122
+
echo "Investigation complete."
123
+
124
+
# Unset the password variable for security
125
+
unset PGPASSWORD
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"
+160
utils/migrate-ipinfo.sh
+160
utils/migrate-ipinfo.sh
···
1
+
#!/bin/bash
2
+
# migrate_ipinfo.sh - Migrate IP info from endpoints to ip_infos table
3
+
4
+
# Configuration (edit these)
5
+
DB_HOST="localhost"
6
+
DB_PORT="5432"
7
+
DB_NAME="atscand"
8
+
DB_USER="atscand"
9
+
DB_PASSWORD="Noor1kooz5eeFai9leZagh5ua5eihai4"
10
+
11
+
# Colors for output
12
+
RED='\033[0;31m'
13
+
GREEN='\033[0;32m'
14
+
YELLOW='\033[1;33m'
15
+
NC='\033[0m' # No Color
16
+
17
+
echo -e "${GREEN}=== IP Info Migration Script ===${NC}"
18
+
echo ""
19
+
20
+
# Export password for psql
21
+
export PGPASSWORD="$DB_PASSWORD"
22
+
23
+
# Check if we can connect
24
+
echo -e "${YELLOW}Testing database connection...${NC}"
25
+
if ! psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c "SELECT 1;" > /dev/null 2>&1; then
26
+
echo -e "${RED}Error: Cannot connect to database${NC}"
27
+
exit 1
28
+
fi
29
+
echo -e "${GREEN}✓ Connected to database${NC}"
30
+
echo ""
31
+
32
+
# Create ip_infos table if it doesn't exist
33
+
echo -e "${YELLOW}Creating ip_infos table...${NC}"
34
+
psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" << 'SQL'
35
+
CREATE TABLE IF NOT EXISTS ip_infos (
36
+
ip TEXT PRIMARY KEY,
37
+
city TEXT,
38
+
country TEXT,
39
+
country_code TEXT,
40
+
asn INTEGER,
41
+
asn_org TEXT,
42
+
is_datacenter BOOLEAN,
43
+
is_vpn BOOLEAN,
44
+
latitude REAL,
45
+
longitude REAL,
46
+
raw_data JSONB,
47
+
fetched_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
48
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
49
+
);
50
+
51
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
52
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
53
+
SQL
54
+
55
+
if [ $? -eq 0 ]; then
56
+
echo -e "${GREEN}✓ ip_infos table ready${NC}"
57
+
else
58
+
echo -e "${RED}✗ Failed to create table${NC}"
59
+
exit 1
60
+
fi
61
+
echo ""
62
+
63
+
# Count how many endpoints have IP info
64
+
echo -e "${YELLOW}Checking existing data...${NC}"
65
+
ENDPOINT_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \
66
+
"SELECT COUNT(*) FROM endpoints WHERE ip IS NOT NULL AND ip != '' AND ip_info IS NOT NULL;")
67
+
echo -e "Endpoints with IP info: ${GREEN}${ENDPOINT_COUNT}${NC}"
68
+
69
+
EXISTING_IP_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \
70
+
"SELECT COUNT(*) FROM ip_infos;")
71
+
echo -e "Existing IPs in ip_infos table: ${GREEN}${EXISTING_IP_COUNT}${NC}"
72
+
echo ""
73
+
74
+
# Migrate data
75
+
echo -e "${YELLOW}Migrating IP info data...${NC}"
76
+
psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" << 'SQL'
77
+
-- Migrate IP info from endpoints to ip_infos
78
+
-- Only insert IPs that don't already exist in ip_infos
79
+
INSERT INTO ip_infos (
80
+
ip,
81
+
city,
82
+
country,
83
+
country_code,
84
+
asn,
85
+
asn_org,
86
+
is_datacenter,
87
+
is_vpn,
88
+
latitude,
89
+
longitude,
90
+
raw_data,
91
+
fetched_at,
92
+
updated_at
93
+
)
94
+
SELECT DISTINCT ON (e.ip)
95
+
e.ip,
96
+
e.ip_info->'location'->>'city' AS city,
97
+
e.ip_info->'location'->>'country' AS country,
98
+
e.ip_info->'location'->>'country_code' AS country_code,
99
+
(e.ip_info->'asn'->>'asn')::INTEGER AS asn,
100
+
e.ip_info->'asn'->>'org' AS asn_org,
101
+
-- Check if company type is "hosting" for datacenter detection
102
+
CASE
103
+
WHEN e.ip_info->'company'->>'type' = 'hosting' THEN true
104
+
ELSE false
105
+
END AS is_datacenter,
106
+
-- Check VPN from security field
107
+
COALESCE((e.ip_info->'security'->>'vpn')::BOOLEAN, false) AS is_vpn,
108
+
-- Latitude and longitude
109
+
(e.ip_info->'location'->>'latitude')::REAL AS latitude,
110
+
(e.ip_info->'location'->>'longitude')::REAL AS longitude,
111
+
-- Store full raw data
112
+
e.ip_info AS raw_data,
113
+
COALESCE(e.updated_at, CURRENT_TIMESTAMP) AS fetched_at,
114
+
CURRENT_TIMESTAMP AS updated_at
115
+
FROM endpoints e
116
+
WHERE
117
+
e.ip IS NOT NULL
118
+
AND e.ip != ''
119
+
AND e.ip_info IS NOT NULL
120
+
AND NOT EXISTS (
121
+
SELECT 1 FROM ip_infos WHERE ip_infos.ip = e.ip
122
+
)
123
+
ORDER BY e.ip, e.updated_at DESC NULLS LAST;
124
+
SQL
125
+
126
+
if [ $? -eq 0 ]; then
127
+
echo -e "${GREEN}✓ Data migration completed${NC}"
128
+
else
129
+
echo -e "${RED}✗ Migration failed${NC}"
130
+
exit 1
131
+
fi
132
+
echo ""
133
+
134
+
# Show results
135
+
echo -e "${YELLOW}Migration summary:${NC}"
136
+
NEW_IP_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \
137
+
"SELECT COUNT(*) FROM ip_infos;")
138
+
MIGRATED=$((NEW_IP_COUNT - EXISTING_IP_COUNT))
139
+
echo -e "Total IPs now in ip_infos: ${GREEN}${NEW_IP_COUNT}${NC}"
140
+
echo -e "Newly migrated: ${GREEN}${MIGRATED}${NC}"
141
+
echo ""
142
+
143
+
# Show sample data
144
+
echo -e "${YELLOW}Sample migrated data:${NC}"
145
+
psql -h "$DB_HOST" -ps "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c \
146
+
"SELECT ip, city, country, country_code, asn, is_datacenter, is_vpn FROM ip_infos LIMIT 5;"
147
+
echo ""
148
+
149
+
# Optional: Drop old columns (commented out for safety)
150
+
echo -e "${YELLOW}Cleanup options:${NC}"
151
+
echo -e "To remove old ip_info column from endpoints table, run:"
152
+
echo -e "${RED} ALTER TABLE endpoints DROP COLUMN IF EXISTS ip_info;${NC}"
153
+
echo -e "To remove old user_count column from endpoints table, run:"
154
+
echo -e "${RED} ALTER TABLE endpoints DROP COLUMN IF EXISTS user_count;${NC}"
155
+
echo ""
156
+
157
+
echo -e "${GREEN}=== Migration Complete ===${NC}"
158
+
159
+
# Unset password
160
+
unset PGPASSWORD
+199
utils/vuln-scanner-parallel.sh
+199
utils/vuln-scanner-parallel.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
PARALLEL_JOBS=20
7
+
OUTPUT_DIR="./pds_scan_results"
8
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
9
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
10
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
11
+
12
+
# Paths to check
13
+
PATHS=(
14
+
"/info.php"
15
+
"/phpinfo.php"
16
+
"/test.php"
17
+
"/admin"
18
+
"/admin.php"
19
+
"/wp-admin"
20
+
"/robots.txt"
21
+
"/.env"
22
+
"/.git/config"
23
+
"/config.php"
24
+
"/backup"
25
+
"/db.sql"
26
+
"/.DS_Store"
27
+
"/server-status"
28
+
"/.well-known/security.txt"
29
+
)
30
+
31
+
# Colors
32
+
RED='\033[0;31m'
33
+
GREEN='\033[0;32m'
34
+
YELLOW='\033[1;33m'
35
+
BLUE='\033[0;34m'
36
+
NC='\033[0m'
37
+
38
+
# Check dependencies
39
+
if ! command -v jq &> /dev/null; then
40
+
echo -e "${RED}Error: jq is required${NC}"
41
+
echo "Install: sudo apt-get install jq"
42
+
exit 1
43
+
fi
44
+
45
+
if ! command -v parallel &> /dev/null; then
46
+
echo -e "${RED}Error: GNU parallel is required${NC}"
47
+
echo "Install: sudo apt-get install parallel (or brew install parallel)"
48
+
exit 1
49
+
fi
50
+
51
+
mkdir -p "$OUTPUT_DIR"
52
+
53
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
54
+
echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}"
55
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
56
+
echo ""
57
+
echo "API Host: $API_HOST"
58
+
echo "Timeout: ${TIMEOUT}s per request"
59
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
60
+
echo "Paths to check: ${#PATHS[@]}"
61
+
echo ""
62
+
63
+
# Scan function - will be called by GNU parallel
64
+
scan_endpoint() {
65
+
local endpoint="$1"
66
+
local timeout="$2"
67
+
shift 2
68
+
local paths=("$@")
69
+
70
+
for path in "${paths[@]}"; do
71
+
url="${endpoint}${path}"
72
+
73
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
74
+
--max-time "$timeout" \
75
+
--connect-timeout "$timeout" \
76
+
--retry 0 \
77
+
-A "Mozilla/5.0 (Security Scanner)" \
78
+
"$url" 2>/dev/null)
79
+
80
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
81
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
82
+
echo "FOUND|$endpoint|$path|$response"
83
+
elif [ "$response" != "403" ] && [ "$response" != "401" ]; then
84
+
echo "MAYBE|$endpoint|$path|$response"
85
+
fi
86
+
fi
87
+
done
88
+
}
89
+
90
+
export -f scan_endpoint
91
+
92
+
# Fetch active PDS endpoints
93
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
94
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
95
+
jq -r '.[].endpoint' 2>/dev/null)
96
+
97
+
if [ -z "$ENDPOINTS" ]; then
98
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
99
+
echo "Check that the API is running at: $API_HOST"
100
+
exit 1
101
+
fi
102
+
103
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ')
104
+
echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
105
+
echo ""
106
+
107
+
# Write header to results file
108
+
{
109
+
echo "PDS Security Scan Results"
110
+
echo "========================="
111
+
echo "Scan started: $(date)"
112
+
echo "Endpoints scanned: ${ENDPOINT_COUNT}"
113
+
echo "Paths checked: ${#PATHS[@]}"
114
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
115
+
echo ""
116
+
echo "Results:"
117
+
echo "--------"
118
+
} > "$RESULTS_FILE"
119
+
120
+
# Run parallel scan
121
+
echo -e "${YELLOW}Starting parallel scan...${NC}"
122
+
echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}"
123
+
echo ""
124
+
125
+
echo "$ENDPOINTS" | \
126
+
parallel \
127
+
-j "$PARALLEL_JOBS" \
128
+
--bar \
129
+
--joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \
130
+
scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \
131
+
>> "$RESULTS_FILE"
132
+
133
+
echo ""
134
+
echo -e "${YELLOW}Processing results...${NC}"
135
+
136
+
# Count results
137
+
FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0)
138
+
MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0)
139
+
140
+
# Extract found URLs to separate file
141
+
{
142
+
echo "Found URLs (HTTP 200/301/302)"
143
+
echo "=============================="
144
+
echo "Scan: $(date)"
145
+
echo ""
146
+
} > "$FOUND_FILE"
147
+
148
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do
149
+
echo "$endpoint$path [$code]"
150
+
done >> "$FOUND_FILE"
151
+
152
+
# Create summary at end of results file
153
+
{
154
+
echo ""
155
+
echo "Summary"
156
+
echo "======="
157
+
echo "Scan completed: $(date)"
158
+
echo "Total endpoints scanned: ${ENDPOINT_COUNT}"
159
+
echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))"
160
+
echo "Found (200/301/302): ${FOUND_COUNT}"
161
+
echo "Maybe (other codes): ${MAYBE_COUNT}"
162
+
} >> "$RESULTS_FILE"
163
+
164
+
# Display summary
165
+
echo ""
166
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
167
+
echo -e "${BLUE}║ Scan Complete! ║${NC}"
168
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
169
+
echo ""
170
+
echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}"
171
+
echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}"
172
+
echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}"
173
+
echo ""
174
+
echo -e "Results:"
175
+
echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}"
176
+
echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}"
177
+
echo ""
178
+
echo "Files created:"
179
+
echo " Full results: $RESULTS_FILE"
180
+
echo " Found URLs: $FOUND_FILE"
181
+
echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt"
182
+
183
+
# Show sample of found URLs if any
184
+
if [ "$FOUND_COUNT" -gt 0 ]; then
185
+
echo ""
186
+
echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}"
187
+
echo ""
188
+
echo "Sample findings (first 10):"
189
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do
190
+
echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]"
191
+
done
192
+
193
+
if [ "$FOUND_COUNT" -gt 10 ]; then
194
+
echo ""
195
+
echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)"
196
+
fi
197
+
fi
198
+
199
+
echo ""
+117
utils/vuln-scanner.sh
+117
utils/vuln-scanner.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
OUTPUT_DIR="./pds_scan_results"
7
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
8
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
9
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
10
+
11
+
# Paths to check (one per line for easier editing)
12
+
PATHS=(
13
+
"/info.php"
14
+
"/phpinfo.php"
15
+
"/test.php"
16
+
"/admin"
17
+
"/admin.php"
18
+
"/wp-admin"
19
+
"/robots.txt"
20
+
"/.env"
21
+
"/.git/config"
22
+
"/config.php"
23
+
"/backup"
24
+
"/db.sql"
25
+
"/.DS_Store"
26
+
"/server-status"
27
+
"/.well-known/security.txt"
28
+
)
29
+
30
+
# Colors
31
+
RED='\033[0;31m'
32
+
GREEN='\033[0;32m'
33
+
YELLOW='\033[1;33m'
34
+
BLUE='\033[0;34m'
35
+
NC='\033[0m'
36
+
37
+
mkdir -p "$OUTPUT_DIR"
38
+
39
+
echo -e "${BLUE}=== PDS Security Scanner ===${NC}"
40
+
echo "API Host: $API_HOST"
41
+
echo "Timeout: ${TIMEOUT}s"
42
+
echo "Scanning for ${#PATHS[@]} paths"
43
+
echo "Results: $RESULTS_FILE"
44
+
echo ""
45
+
46
+
# Fetch active PDS endpoints
47
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
48
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
49
+
jq -r '.[].endpoint' 2>/dev/null)
50
+
51
+
if [ -z "$ENDPOINTS" ]; then
52
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
53
+
exit 1
54
+
fi
55
+
56
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l)
57
+
echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
58
+
echo ""
59
+
60
+
# Write header
61
+
echo "PDS Security Scan - $(date)" > "$RESULTS_FILE"
62
+
echo "========================================" >> "$RESULTS_FILE"
63
+
echo "" >> "$RESULTS_FILE"
64
+
65
+
# Counters
66
+
CURRENT=0
67
+
TOTAL_FOUND=0
68
+
TOTAL_MAYBE=0
69
+
70
+
# Scan each endpoint sequentially
71
+
while IFS= read -r endpoint; do
72
+
CURRENT=$((CURRENT + 1))
73
+
74
+
echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint"
75
+
76
+
# Scan each path
77
+
for path in "${PATHS[@]}"; do
78
+
url="${endpoint}${path}"
79
+
80
+
# Make request with timeout
81
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
82
+
--max-time "$TIMEOUT" \
83
+
--connect-timeout "$TIMEOUT" \
84
+
-L \
85
+
-A "Mozilla/5.0 (Security Scanner)" \
86
+
"$url" 2>/dev/null)
87
+
88
+
# Check response
89
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
90
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
91
+
echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}"
92
+
echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE"
93
+
echo "$endpoint$path" >> "$FOUND_FILE"
94
+
TOTAL_FOUND=$((TOTAL_FOUND + 1))
95
+
elif [ "$response" != "403" ]; then
96
+
echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}"
97
+
echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE"
98
+
TOTAL_MAYBE=$((TOTAL_MAYBE + 1))
99
+
fi
100
+
fi
101
+
done
102
+
103
+
echo "" >> "$RESULTS_FILE"
104
+
105
+
done <<< "$ENDPOINTS"
106
+
107
+
# Summary
108
+
echo ""
109
+
echo -e "${BLUE}========================================${NC}"
110
+
echo -e "${GREEN}Scan Complete!${NC}"
111
+
echo "Scanned: ${ENDPOINT_COUNT} endpoints"
112
+
echo "Paths checked per endpoint: ${#PATHS[@]}"
113
+
echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}"
114
+
echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}"
115
+
echo ""
116
+
echo "Full results: $RESULTS_FILE"
117
+
[ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"