+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-99
cmd/atscanner.go
-99
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"os"
7
-
"os/signal"
8
-
"syscall"
9
-
"time"
10
-
11
-
"github.com/atscan/atscanner/internal/api"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/log"
14
-
"github.com/atscan/atscanner/internal/pds"
15
-
"github.com/atscan/atscanner/internal/plc"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/atscan/atscanner/internal/worker"
18
-
)
19
-
20
-
func main() {
21
-
configPath := flag.String("config", "config.yaml", "path to config file")
22
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
23
-
flag.Parse()
24
-
25
-
// Load configuration
26
-
cfg, err := config.Load(*configPath)
27
-
if err != nil {
28
-
log.Fatal("Failed to load config: %v", err)
29
-
}
30
-
31
-
// Override verbose setting if flag is provided
32
-
if *verbose {
33
-
cfg.API.Verbose = true
34
-
}
35
-
36
-
// Initialize logger
37
-
log.Init(cfg.API.Verbose)
38
-
39
-
// Initialize database using factory pattern
40
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
41
-
if err != nil {
42
-
log.Fatal("Failed to initialize database: %v", err)
43
-
}
44
-
defer db.Close()
45
-
46
-
// Set scan retention from config
47
-
db.SetScanRetention(cfg.PDS.ScanRetention)
48
-
49
-
// Run migrations
50
-
if err := db.Migrate(); err != nil {
51
-
log.Fatal("Failed to run migrations: %v", err)
52
-
}
53
-
54
-
ctx, cancel := context.WithCancel(context.Background())
55
-
defer cancel()
56
-
57
-
// Initialize workers
58
-
plcScanner := plc.NewScanner(db, cfg.PLC)
59
-
defer plcScanner.Close()
60
-
61
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
62
-
63
-
scheduler := worker.NewScheduler()
64
-
65
-
// Schedule PLC directory scan
66
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
67
-
if err := plcScanner.Scan(ctx); err != nil {
68
-
log.Error("PLC scan error: %v", err)
69
-
}
70
-
})
71
-
72
-
// Schedule PDS availability checks
73
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
74
-
if err := pdsScanner.ScanAll(ctx); err != nil {
75
-
log.Error("PDS scan error: %v", err)
76
-
}
77
-
})
78
-
79
-
// Start API server
80
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
81
-
go func() {
82
-
if err := apiServer.Start(); err != nil {
83
-
log.Fatal("API server error: %v", err)
84
-
}
85
-
}()
86
-
87
-
// Start scheduler
88
-
scheduler.Start(ctx)
89
-
90
-
// Wait for interrupt
91
-
sigChan := make(chan os.Signal, 1)
92
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
93
-
<-sigChan
94
-
95
-
log.Info("Shutting down gracefully...")
96
-
cancel()
97
-
apiServer.Shutdown(context.Background())
98
-
time.Sleep(2 * time.Second)
99
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+22
config.sample.yaml
+22
config.sample.yaml
···
1
+
database:
2
+
type: "postgres" # or "sqlite"
3
+
path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable"
4
+
# For SQLite: path: "atscan.db"
5
+
6
+
plc:
7
+
directory_url: "https://plc.directory"
8
+
scan_interval: "5s"
9
+
bundle_dir: "./plc_bundles"
10
+
use_cache: true
11
+
index_dids: true
12
+
13
+
pds:
14
+
scan_interval: "30m"
15
+
timeout: "30s"
16
+
workers: 20
17
+
recheck_interval: "1.5h"
18
+
scan_retention: 20
19
+
20
+
api:
21
+
host: "0.0.0.0"
22
+
port: 8080
-22
config.yaml
-22
config.yaml
···
1
-
database:
2
-
type: "postgres" # or "sqlite"
3
-
path: "postgres://atscanner:Noor1kooz5eeFai9leZagh5ua5eihai4@localhost:5432/atscanner?sslmode=disable"
4
-
# For SQLite: path: "atscan.db"
5
-
6
-
plc:
7
-
directory_url: "https://plc.directory"
8
-
scan_interval: "5s"
9
-
bundle_dir: "./plc_bundles"
10
-
use_cache: true
11
-
index_dids: true
12
-
13
-
pds:
14
-
scan_interval: "15m"
15
-
timeout: "30s"
16
-
workers: 20
17
-
recheck_interval: "5m"
18
-
scan_retention: 3
19
-
20
-
api:
21
-
host: "0.0.0.0"
22
-
port: 8080
+6
-5
go.mod
+6
-5
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
5
5
require (
6
6
github.com/gorilla/mux v1.8.1
7
7
github.com/lib/pq v1.10.9
8
-
github.com/mattn/go-sqlite3 v1.14.18
9
8
gopkg.in/yaml.v3 v3.0.1
10
9
)
11
10
12
-
require github.com/klauspost/compress v1.18.0
11
+
require github.com/klauspost/compress v1.18.1
13
12
14
13
require (
15
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
16
14
github.com/gorilla/handlers v1.5.2
15
+
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
20
github.com/felixge/httpsnoop v1.0.3 // indirect
21
21
github.com/jackc/pgpassfile v1.0.0 // indirect
22
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
23
-
github.com/jackc/pgx/v5 v5.7.6 // indirect
24
23
github.com/jackc/puddle/v2 v2.2.2 // indirect
24
+
github.com/kr/text v0.2.0 // indirect
25
+
github.com/rogpeppe/go-internal v1.14.1 // indirect
25
26
golang.org/x/crypto v0.37.0 // indirect
26
27
golang.org/x/sync v0.13.0 // indirect
27
28
golang.org/x/text v0.24.0 // indirect
+17
-7
go.sum
+17
-7
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
1
+
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
3
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
5
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
5
6
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
6
7
github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE=
···
15
16
github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
16
17
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
17
18
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
18
-
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
19
-
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
19
+
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
20
+
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
21
+
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
22
+
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
23
+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
24
+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
20
25
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
21
26
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
22
-
github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI=
23
-
github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
27
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24
28
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
29
+
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
30
+
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
25
31
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26
32
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
27
33
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
35
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
28
36
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
29
37
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
30
38
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
31
39
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
32
40
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
33
41
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
34
-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
35
42
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
36
43
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
44
+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
37
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
38
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
39
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+686
-453
internal/api/handlers.go
+686
-453
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/plc"
19
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
20
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
21
20
)
22
21
23
22
// ===== RESPONSE HELPERS =====
···
39
38
http.Error(r.w, msg, code)
40
39
}
41
40
42
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
43
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
44
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
45
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
46
45
r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano))
47
46
r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano))
48
47
r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
49
-
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs)))
48
+
r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount))
50
49
}
51
50
52
51
// ===== REQUEST HELPERS =====
···
76
75
77
76
// ===== FORMATTING HELPERS =====
78
77
79
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
80
-
return map[string]interface{}{
81
-
"plc_bundle_number": bundle.BundleNumber,
82
-
"start_time": bundle.StartTime,
83
-
"end_time": bundle.EndTime,
84
-
"operation_count": plc.BUNDLE_SIZE,
85
-
"did_count": len(bundle.DIDs),
86
-
"hash": bundle.Hash,
87
-
"compressed_hash": bundle.CompressedHash,
88
-
"compressed_size": bundle.CompressedSize,
89
-
"uncompressed_size": bundle.UncompressedSize,
90
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
91
-
"cursor": bundle.Cursor,
92
-
"prev_bundle_hash": bundle.PrevBundleHash,
93
-
"created_at": bundle.CreatedAt,
94
-
}
95
-
}
96
-
97
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
98
79
response := map[string]interface{}{
99
80
"id": ep.ID,
···
102
83
"discovered_at": ep.DiscoveredAt,
103
84
"last_checked": ep.LastChecked,
104
85
"status": statusToString(ep.Status),
105
-
// REMOVED: "user_count": ep.UserCount, // No longer exists
106
86
}
107
87
108
-
// Add IP if available
88
+
// Add IPs if available
109
89
if ep.IP != "" {
110
90
response["ip"] = ep.IP
111
91
}
112
-
113
-
// REMOVED: IP info extraction - no longer in Endpoint struct
114
-
// IPInfo is now in separate table, joined only in PDS handlers
92
+
if ep.IPv6 != "" {
93
+
response["ipv6"] = ep.IPv6
94
+
}
115
95
116
96
return response
117
97
}
···
164
144
resp.json(stats)
165
145
}
166
146
147
+
// handleGetRandomEndpoint returns a random endpoint of specified type
148
+
func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) {
149
+
resp := newResponse(w)
150
+
151
+
// Get required type parameter
152
+
endpointType := r.URL.Query().Get("type")
153
+
if endpointType == "" {
154
+
resp.error("type parameter is required", http.StatusBadRequest)
155
+
return
156
+
}
157
+
158
+
// Get optional status parameter
159
+
status := r.URL.Query().Get("status")
160
+
161
+
filter := &storage.EndpointFilter{
162
+
Type: endpointType,
163
+
Status: status,
164
+
Random: true,
165
+
Limit: 1,
166
+
Offset: 0,
167
+
}
168
+
169
+
endpoints, err := s.db.GetEndpoints(r.Context(), filter)
170
+
if err != nil {
171
+
resp.error(err.Error(), http.StatusInternalServerError)
172
+
return
173
+
}
174
+
175
+
if len(endpoints) == 0 {
176
+
resp.error("no endpoints found matching criteria", http.StatusNotFound)
177
+
return
178
+
}
179
+
180
+
resp.json(formatEndpointResponse(endpoints[0]))
181
+
}
182
+
167
183
// ===== PDS HANDLERS =====
168
184
169
185
func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) {
···
232
248
"endpoint": pds.Endpoint,
233
249
"discovered_at": pds.DiscoveredAt,
234
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
252
+
}
253
+
254
+
// Add server_did if available
255
+
if pds.ServerDID != "" {
256
+
response["server_did"] = pds.ServerDID
235
257
}
236
258
237
259
// Add last_checked if available
···
243
265
if pds.LatestScan != nil {
244
266
response["user_count"] = pds.LatestScan.UserCount
245
267
response["response_time"] = pds.LatestScan.ResponseTime
246
-
if pds.LatestScan.Version != "" { // NEW: Add this block
268
+
if pds.LatestScan.Version != "" {
247
269
response["version"] = pds.LatestScan.Version
248
270
}
249
271
if !pds.LatestScan.ScannedAt.IsZero() {
···
251
273
}
252
274
}
253
275
254
-
// Add IP if available
276
+
// Add IPs if available
255
277
if pds.IP != "" {
256
278
response["ip"] = pds.IP
257
279
}
280
+
if pds.IPv6 != "" {
281
+
response["ipv6"] = pds.IPv6
282
+
}
258
283
259
284
// Add IP info (from ip_infos table via JOIN)
260
285
if pds.IPInfo != nil {
···
270
295
if pds.IPInfo.ASN > 0 {
271
296
response["asn"] = pds.IPInfo.ASN
272
297
}
298
+
299
+
// Add all network type flags
300
+
response["is_datacenter"] = pds.IPInfo.IsDatacenter
301
+
response["is_vpn"] = pds.IPInfo.IsVPN
302
+
response["is_crawler"] = pds.IPInfo.IsCrawler
303
+
response["is_tor"] = pds.IPInfo.IsTor
304
+
response["is_proxy"] = pds.IPInfo.IsProxy
305
+
306
+
// Add computed is_home field
307
+
response["is_home"] = pds.IPInfo.IsHome()
273
308
}
274
309
275
310
return response
276
311
}
277
312
278
313
func formatPDSDetail(pds *storage.PDSDetail) map[string]interface{} {
279
-
// Start with list item formatting
314
+
// Start with list item formatting (includes server_did)
280
315
response := formatPDSListItem(&pds.PDSListItem)
281
316
317
+
// Add is_primary flag
318
+
response["is_primary"] = pds.IsPrimary
319
+
320
+
// Add aliases if available
321
+
if len(pds.Aliases) > 0 {
322
+
response["aliases"] = pds.Aliases
323
+
response["alias_count"] = len(pds.Aliases)
324
+
}
325
+
282
326
// Add server_info and version from latest scan (PDSDetail's LatestScan takes precedence)
283
327
if pds.LatestScan != nil {
284
328
// Override with detail-specific scan data
···
298
342
}
299
343
}
300
344
301
-
// Add full IP info
345
+
// Add full IP info with computed is_home field
302
346
if pds.IPInfo != nil {
303
-
response["ip_info"] = pds.IPInfo
347
+
// Convert IPInfo to map
348
+
ipInfoMap := make(map[string]interface{})
349
+
ipInfoJSON, _ := json.Marshal(pds.IPInfo)
350
+
json.Unmarshal(ipInfoJSON, &ipInfoMap)
351
+
352
+
// Add computed is_home field
353
+
ipInfoMap["is_home"] = pds.IPInfo.IsHome()
354
+
355
+
response["ip_info"] = ipInfoMap
304
356
}
305
357
306
358
return response
···
315
367
"scanned_at": scan.ScannedAt,
316
368
}
317
369
370
+
if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil {
371
+
if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" {
372
+
scanMap["error"] = errorMsg
373
+
}
374
+
}
375
+
318
376
if scan.ResponseTime > 0 {
319
377
scanMap["response_time"] = scan.ResponseTime
320
378
}
321
379
322
-
// NEW: Add version if available
323
380
if scan.Version != "" {
324
381
scanMap["version"] = scan.Version
382
+
}
383
+
384
+
if scan.UsedIP != "" {
385
+
scanMap["used_ip"] = scan.UsedIP
325
386
}
326
387
327
388
// Use the top-level UserCount field first
···
348
409
return result
349
410
}
350
411
412
+
// Get repos for a specific PDS
413
+
func (s *Server) handleGetPDSRepos(w http.ResponseWriter, r *http.Request) {
414
+
resp := newResponse(w)
415
+
vars := mux.Vars(r)
416
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
417
+
418
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
419
+
if err != nil {
420
+
resp.error("PDS not found", http.StatusNotFound)
421
+
return
422
+
}
423
+
424
+
// Parse query parameters
425
+
activeOnly := r.URL.Query().Get("active") == "true"
426
+
limit := getQueryInt(r, "limit", 100)
427
+
offset := getQueryInt(r, "offset", 0)
428
+
429
+
// Cap limit at 1000
430
+
if limit > 1000 {
431
+
limit = 1000
432
+
}
433
+
434
+
repos, err := s.db.GetPDSRepos(r.Context(), pds.ID, activeOnly, limit, offset)
435
+
if err != nil {
436
+
resp.error(err.Error(), http.StatusInternalServerError)
437
+
return
438
+
}
439
+
440
+
// Get total from latest scan (same as user_count)
441
+
totalRepos := 0
442
+
if pds.LatestScan != nil {
443
+
totalRepos = pds.LatestScan.UserCount
444
+
}
445
+
446
+
resp.json(map[string]interface{}{
447
+
"endpoint": pds.Endpoint,
448
+
"total_repos": totalRepos,
449
+
"returned": len(repos),
450
+
"limit": limit,
451
+
"offset": offset,
452
+
"repos": repos,
453
+
})
454
+
}
455
+
456
+
// Find which PDS hosts a specific DID
457
+
func (s *Server) handleGetDIDRepos(w http.ResponseWriter, r *http.Request) {
458
+
resp := newResponse(w)
459
+
vars := mux.Vars(r)
460
+
did := vars["did"]
461
+
462
+
repos, err := s.db.GetReposByDID(r.Context(), did)
463
+
if err != nil {
464
+
resp.error(err.Error(), http.StatusInternalServerError)
465
+
return
466
+
}
467
+
468
+
resp.json(map[string]interface{}{
469
+
"did": did,
470
+
"pds_count": len(repos),
471
+
"hosting_on": repos,
472
+
})
473
+
}
474
+
475
+
// Add to internal/api/handlers.go
476
+
func (s *Server) handleGetPDSRepoStats(w http.ResponseWriter, r *http.Request) {
477
+
resp := newResponse(w)
478
+
vars := mux.Vars(r)
479
+
endpoint := "https://" + normalizeEndpoint(vars["endpoint"])
480
+
481
+
pds, err := s.db.GetPDSDetail(r.Context(), endpoint)
482
+
if err != nil {
483
+
resp.error("PDS not found", http.StatusNotFound)
484
+
return
485
+
}
486
+
487
+
stats, err := s.db.GetPDSRepoStats(r.Context(), pds.ID)
488
+
if err != nil {
489
+
resp.error(err.Error(), http.StatusInternalServerError)
490
+
return
491
+
}
492
+
493
+
resp.json(stats)
494
+
}
495
+
496
+
// ===== GLOBAL DID HANDLER =====
497
+
498
+
// handleGetGlobalDID provides a consolidated view of a DID
499
+
func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) {
500
+
resp := newResponse(w)
501
+
vars := mux.Vars(r)
502
+
did := vars["did"]
503
+
ctx := r.Context()
504
+
505
+
// Get DID info (now includes handle and pds from database)
506
+
didInfo, err := s.db.GetGlobalDIDInfo(ctx, did)
507
+
if err != nil {
508
+
if err == sql.ErrNoRows {
509
+
if !s.plcIndexDIDs {
510
+
resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
511
+
} else {
512
+
resp.error("DID not found in PLC index.", http.StatusNotFound)
513
+
}
514
+
} else {
515
+
resp.error(err.Error(), http.StatusInternalServerError)
516
+
}
517
+
return
518
+
}
519
+
520
+
// Optionally include latest operation details if requested
521
+
var latestOperation *plc.PLCOperation
522
+
if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 {
523
+
lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1]
524
+
ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum)
525
+
if err != nil {
526
+
log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err)
527
+
} else {
528
+
// Find latest operation for this DID (in reverse)
529
+
for i := len(ops) - 1; i >= 0; i-- {
530
+
if ops[i].DID == did {
531
+
latestOperation = &ops[i]
532
+
break
533
+
}
534
+
}
535
+
}
536
+
}
537
+
538
+
result := map[string]interface{}{
539
+
"did": didInfo.DID,
540
+
"handle": didInfo.Handle, // From database!
541
+
"current_pds": didInfo.CurrentPDS, // From database!
542
+
"plc_index_created_at": didInfo.CreatedAt,
543
+
"plc_bundle_history": didInfo.BundleNumbers,
544
+
"pds_hosting_on": didInfo.HostingOn,
545
+
}
546
+
547
+
// Only include operation if requested
548
+
if latestOperation != nil {
549
+
result["latest_plc_operation"] = latestOperation
550
+
}
551
+
552
+
resp.json(result)
553
+
}
554
+
555
+
// handleGetDIDByHandle resolves a handle to a DID
556
+
func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) {
557
+
resp := newResponse(w)
558
+
vars := mux.Vars(r)
559
+
handle := vars["handle"]
560
+
561
+
// Normalize handle (remove @ prefix if present)
562
+
handle = strings.TrimPrefix(handle, "@")
563
+
564
+
// Look up DID by handle
565
+
didRecord, err := s.db.GetDIDByHandle(r.Context(), handle)
566
+
if err != nil {
567
+
if err == sql.ErrNoRows {
568
+
if !s.plcIndexDIDs {
569
+
resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound)
570
+
} else {
571
+
resp.error("Handle not found.", http.StatusNotFound)
572
+
}
573
+
} else {
574
+
resp.error(err.Error(), http.StatusInternalServerError)
575
+
}
576
+
return
577
+
}
578
+
579
+
// Return just the handle and DID
580
+
resp.json(map[string]string{
581
+
"handle": handle,
582
+
"did": didRecord.DID,
583
+
})
584
+
}
585
+
351
586
// ===== DID HANDLERS =====
352
587
353
588
func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) {
···
449
684
return
450
685
}
451
686
452
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
453
-
if err != nil {
454
-
resp.error(err.Error(), http.StatusInternalServerError)
455
-
return
456
-
}
457
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
458
688
resp.json(map[string]interface{}{
459
689
"total_unique_dids": totalDIDs,
460
690
"last_bundle": lastBundle,
···
465
695
466
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
467
697
resp := newResponse(w)
468
-
469
698
bundleNum, err := getBundleNumber(r)
470
699
if err != nil {
471
700
resp.error("invalid bundle number", http.StatusBadRequest)
472
701
return
473
702
}
474
703
475
-
// Try to get existing bundle
476
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
477
-
if err == nil {
478
-
// Bundle exists, return it normally
479
-
resp.json(formatBundleResponse(bundle))
480
-
return
481
-
}
482
-
483
-
// Bundle not found - check if it's the next upcoming bundle
484
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
485
707
if err != nil {
486
-
resp.error("bundle not found", http.StatusNotFound)
487
-
return
488
-
}
489
-
490
-
if bundleNum == lastBundle+1 {
491
-
// This is the upcoming bundle - return preview based on mempool
492
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
493
-
if err != nil {
494
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
495
717
return
496
718
}
497
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
498
720
return
499
721
}
500
722
501
-
// Not an upcoming bundle, just not found
502
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
503
724
}
504
725
505
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
506
-
// Get mempool stats
507
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
508
-
if err != nil {
509
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
510
743
}
744
+
}
511
745
512
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
513
752
return map[string]interface{}{
514
753
"plc_bundle_number": bundleNum,
515
754
"is_upcoming": true,
···
519
758
}, nil
520
759
}
521
760
522
-
// Get first and last operations for time range
523
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
524
-
if err != nil {
525
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
526
771
}
527
772
528
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
529
-
if err != nil {
530
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
531
775
}
532
776
533
-
// Get unique DID count
534
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
535
-
if err != nil {
536
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
537
780
}
538
-
539
-
// Get uncompressed size estimate
540
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
541
-
if err != nil {
542
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
543
783
}
544
784
545
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
546
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
547
-
548
-
// Calculate completion estimate
549
-
var estimatedCompletionTime *time.Time
550
-
var operationsNeeded int
551
-
var currentRate float64
552
-
553
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
554
-
555
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
556
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
557
-
if timeSpan > 0 {
558
-
currentRate = float64(mempoolCount) / timeSpan
559
-
if currentRate > 0 {
560
-
secondsNeeded := float64(operationsNeeded) / currentRate
561
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
562
-
estimatedCompletionTime = &completionTime
563
-
}
564
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
565
789
}
566
790
567
-
// Get previous bundle for cursor context
568
-
var prevBundleHash string
569
-
var cursor string
791
+
// Get previous bundle info
570
792
if bundleNum > 1 {
571
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
572
-
if err == nil {
573
-
prevBundleHash = prevBundle.Hash
574
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
575
-
}
576
-
}
577
-
578
-
// Determine bundle status
579
-
status := "filling"
580
-
if mempoolCount >= plc.BUNDLE_SIZE {
581
-
status = "ready"
582
-
}
583
-
584
-
// Build upcoming bundle response
585
-
result := map[string]interface{}{
586
-
"plc_bundle_number": bundleNum,
587
-
"is_upcoming": true,
588
-
"status": status,
589
-
"operation_count": mempoolCount,
590
-
"target_operation_count": plc.BUNDLE_SIZE,
591
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
592
-
"operations_needed": operationsNeeded,
593
-
"did_count": uniqueDIDCount,
594
-
"start_time": firstOp.CreatedAt, // This is FIXED once first op exists
595
-
"current_end_time": lastOp.CreatedAt, // This will change as more ops arrive
596
-
"uncompressed_size": uncompressedSize,
597
-
"estimated_compressed_size": estimatedCompressedSize,
598
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
599
-
"prev_bundle_hash": prevBundleHash,
600
-
"cursor": cursor,
601
-
}
602
-
603
-
if estimatedCompletionTime != nil {
604
-
result["estimated_completion_time"] = *estimatedCompletionTime
605
-
result["current_rate_per_second"] = currentRate
606
-
}
607
-
608
-
// Get actual mempool operations if requested
609
-
if r.URL.Query().Get("include_dids") == "true" {
610
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
611
-
if err == nil {
612
-
// Extract unique DIDs
613
-
didSet := make(map[string]bool)
614
-
for _, op := range ops {
615
-
didSet[op.DID] = true
616
-
}
617
-
dids := make([]string, 0, len(didSet))
618
-
for did := range didSet {
619
-
dids = append(dids, did)
620
-
}
621
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
622
796
}
623
797
}
624
798
···
634
808
return
635
809
}
636
810
637
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
638
813
if err != nil {
639
814
resp.error("bundle not found", http.StatusNotFound)
640
815
return
641
816
}
642
817
643
818
resp.json(map[string]interface{}{
644
-
"plc_bundle_number": bundle.BundleNumber,
645
-
"did_count": len(bundle.DIDs),
646
-
"dids": bundle.DIDs,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
821
+
"dids": dids,
647
822
})
648
823
}
649
824
···
658
833
659
834
compressed := r.URL.Query().Get("compressed") != "false"
660
835
661
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
662
837
if err == nil {
663
838
// Bundle exists, serve it normally
664
839
resp.bundleHeaders(bundle)
···
672
847
}
673
848
674
849
// Bundle not found - check if it's the upcoming bundle
675
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
676
-
if err != nil {
677
-
resp.error("bundle not found", http.StatusNotFound)
678
-
return
679
-
}
680
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
681
851
if bundleNum == lastBundle+1 {
682
852
// This is the upcoming bundle - serve from mempool
683
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
684
854
return
685
855
}
686
856
···
688
858
resp.error("bundle not found", http.StatusNotFound)
689
859
}
690
860
691
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
692
-
ctx := r.Context()
693
-
694
-
// Get mempool count
695
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
696
-
if err != nil {
697
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
698
-
return
699
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
700
865
701
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
702
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
703
868
return
704
869
}
705
870
706
-
// Get mempool operations (up to BUNDLE_SIZE)
707
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
708
873
if err != nil {
709
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
710
875
return
711
876
}
712
877
713
-
if len(mempoolOps) == 0 {
714
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
715
880
return
716
881
}
717
882
718
-
// Get time range
719
-
firstOp := mempoolOps[0]
720
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
721
886
722
887
// Extract unique DIDs
723
888
didSet := make(map[string]bool)
724
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
725
890
didSet[op.DID] = true
726
891
}
727
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
728
899
// Get previous bundle hash
729
900
prevBundleHash := ""
730
901
if bundleNum > 1 {
731
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
732
903
prevBundleHash = prevBundle.Hash
733
904
}
734
905
}
735
906
736
-
// Serialize operations to JSONL
737
-
var buf []byte
738
-
for _, mop := range mempoolOps {
739
-
buf = append(buf, []byte(mop.Operation)...)
740
-
buf = append(buf, '\n')
741
-
}
742
-
743
-
// Calculate size
744
-
uncompressedSize := int64(len(buf))
745
-
746
907
// Set headers
747
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
748
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
749
910
w.Header().Set("X-Bundle-Status", "preview")
750
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
751
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
752
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
753
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
754
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
755
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
756
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
757
919
758
920
w.Header().Set("Content-Type", "application/jsonl")
759
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
760
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
761
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
762
922
923
+
// Stream operations as JSONL
763
924
w.WriteHeader(http.StatusOK)
764
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
765
937
}
766
938
767
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
768
940
resp := newResponse(w)
769
-
path := bundle.GetFilePath(s.plcBundleDir)
770
941
771
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
772
944
if err != nil {
773
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
774
946
return
775
947
}
776
-
defer file.Close()
777
-
778
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
779
949
780
950
w.Header().Set("Content-Type", "application/zstd")
781
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
782
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
783
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
784
954
785
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
786
958
}
787
959
788
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
789
961
resp := newResponse(w)
790
962
791
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
792
965
if err != nil {
793
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
794
967
return
795
968
}
796
-
797
-
// Serialize to JSONL
798
-
var buf []byte
799
-
for _, op := range ops {
800
-
buf = append(buf, op.RawJSON...)
801
-
buf = append(buf, '\n')
802
-
}
803
-
804
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
805
-
compressedSize := int64(0)
806
-
if fileInfo != nil {
807
-
compressedSize = fileInfo.Size()
808
-
}
969
+
defer reader.Close()
809
970
810
971
w.Header().Set("Content-Type", "application/jsonl")
811
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
812
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
813
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
814
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
815
-
if compressedSize > 0 {
816
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
817
978
}
818
979
980
+
// Stream the data directly to the response
819
981
w.WriteHeader(http.StatusOK)
820
-
w.Write(buf)
982
+
io.Copy(w, reader)
821
983
}
822
984
823
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
824
986
resp := newResponse(w)
825
987
limit := getQueryInt(r, "limit", 50)
826
988
827
-
bundles, err := s.db.GetBundles(r.Context(), limit)
828
-
if err != nil {
829
-
resp.error(err.Error(), http.StatusInternalServerError)
830
-
return
831
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
832
990
833
991
response := make([]map[string]interface{}, len(bundles))
834
992
for i, bundle := range bundles {
835
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
836
994
}
837
995
838
996
resp.json(response)
···
841
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
842
1000
resp := newResponse(w)
843
1001
844
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
845
-
if err != nil {
846
-
resp.error(err.Error(), http.StatusInternalServerError)
847
-
return
848
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
849
1008
850
1009
resp.json(map[string]interface{}{
851
-
"plc_bundle_count": count,
852
-
"last_bundle_number": lastBundle,
853
-
"total_compressed_size": compressedSize,
854
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
855
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
856
-
"total_uncompressed_size": uncompressedSize,
857
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
858
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
859
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
860
1015
})
861
1016
}
862
1017
···
864
1019
865
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
866
1021
resp := newResponse(w)
867
-
ctx := r.Context()
868
1022
869
-
count, err := s.db.GetMempoolCount(ctx)
870
-
if err != nil {
871
-
resp.error(err.Error(), http.StatusInternalServerError)
872
-
return
873
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
874
1025
875
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
876
-
if err != nil {
877
-
resp.error(err.Error(), http.StatusInternalServerError)
878
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
879
1030
}
880
1031
881
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
882
-
if err != nil {
883
-
resp.error(err.Error(), http.StatusInternalServerError)
884
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
885
1036
}
886
1037
887
-
result := map[string]interface{}{
888
-
"operation_count": count,
889
-
"unique_did_count": uniqueDIDCount,
890
-
"uncompressed_size": uncompressedSize,
891
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
892
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
893
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
894
1042
895
-
if count > 0 {
896
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
897
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
898
1045
899
-
if count < plc.BUNDLE_SIZE {
900
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
901
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
902
1049
if timeSpan > 0 {
903
1050
opsPerSecond := float64(count) / timeSpan
904
1051
if opsPerSecond > 0 {
905
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
906
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
907
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
908
-
result["operations_needed"] = remainingOps
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
909
1057
result["current_rate_per_second"] = opsPerSecond
1058
+
result["operations_needed"] = remainingOps
910
1059
}
911
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
912
1066
}
913
-
} else {
914
-
result["estimated_next_bundle_time"] = time.Now()
915
-
result["operations_needed"] = 0
916
1067
}
917
1068
}
918
1069
} else {
1070
+
// Empty mempool
919
1071
result["mempool_start_time"] = nil
920
1072
result["estimated_next_bundle_time"] = nil
921
1073
}
···
940
1092
941
1093
// ===== VERIFICATION HANDLERS =====
942
1094
943
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
944
-
resp := newResponse(w)
945
-
vars := mux.Vars(r)
946
-
947
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
948
-
if err != nil {
949
-
resp.error("Invalid bundle number", http.StatusBadRequest)
950
-
return
951
-
}
952
-
953
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
954
-
if err != nil {
955
-
resp.error("Bundle not found", http.StatusNotFound)
956
-
return
957
-
}
958
-
959
-
// Fetch from PLC and verify
960
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
961
-
if err != nil {
962
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
963
-
return
964
-
}
965
-
966
-
remoteHash := computeOperationsHash(remoteOps)
967
-
verified := bundle.Hash == remoteHash
968
-
969
-
resp.json(map[string]interface{}{
970
-
"bundle_number": bundleNumber,
971
-
"verified": verified,
972
-
"local_hash": bundle.Hash,
973
-
"remote_hash": remoteHash,
974
-
"local_op_count": plc.BUNDLE_SIZE,
975
-
"remote_op_count": len(remoteOps),
976
-
"boundary_cids_used": len(prevCIDs),
977
-
})
978
-
}
979
-
980
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
981
-
var after string
982
-
var prevBoundaryCIDs map[string]bool
983
-
984
-
if bundleNum > 1 {
985
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
986
-
if err != nil {
987
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
988
-
}
989
-
990
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
991
-
992
-
if len(prevBundle.BoundaryCIDs) > 0 {
993
-
prevBoundaryCIDs = make(map[string]bool)
994
-
for _, cid := range prevBundle.BoundaryCIDs {
995
-
prevBoundaryCIDs[cid] = true
996
-
}
997
-
}
998
-
}
999
-
1000
-
var allRemoteOps []plc.PLCOperation
1001
-
seenCIDs := make(map[string]bool)
1002
-
1003
-
for cid := range prevBoundaryCIDs {
1004
-
seenCIDs[cid] = true
1005
-
}
1006
-
1007
-
currentAfter := after
1008
-
maxFetches := 20
1009
-
1010
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1011
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1012
-
Count: 1000,
1013
-
After: currentAfter,
1014
-
})
1015
-
if err != nil || len(batch) == 0 {
1016
-
break
1017
-
}
1018
-
1019
-
for _, op := range batch {
1020
-
if !seenCIDs[op.CID] {
1021
-
seenCIDs[op.CID] = true
1022
-
allRemoteOps = append(allRemoteOps, op)
1023
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1024
-
break
1025
-
}
1026
-
}
1027
-
}
1028
-
1029
-
if len(batch) > 0 {
1030
-
lastOp := batch[len(batch)-1]
1031
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1032
-
}
1033
-
1034
-
if len(batch) < 1000 {
1035
-
break
1036
-
}
1037
-
}
1038
-
1039
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1040
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1041
-
}
1042
-
1043
-
return allRemoteOps, prevBoundaryCIDs, nil
1044
-
}
1045
-
1046
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1047
1096
resp := newResponse(w)
1048
-
ctx := r.Context()
1049
1097
1050
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1051
-
if err != nil {
1052
-
resp.error(err.Error(), http.StatusInternalServerError)
1053
-
return
1054
-
}
1055
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1056
1099
if lastBundle == 0 {
1057
1100
resp.json(map[string]interface{}{
1058
1101
"status": "empty",
···
1066
1109
var errorMsg string
1067
1110
1068
1111
for i := 1; i <= lastBundle; i++ {
1069
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1070
1113
if err != nil {
1071
1114
valid = false
1072
1115
brokenAt = i
···
1075
1118
}
1076
1119
1077
1120
if i > 1 {
1078
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1079
1122
if err != nil {
1080
1123
valid = false
1081
1124
brokenAt = i
···
1083
1126
break
1084
1127
}
1085
1128
1086
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1087
1130
valid = false
1088
1131
brokenAt = i
1089
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1090
1133
break
1091
1134
}
1092
1135
}
···
1107
1150
1108
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1109
1152
resp := newResponse(w)
1110
-
ctx := r.Context()
1111
1153
1112
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1113
-
if err != nil {
1114
-
resp.error(err.Error(), http.StatusInternalServerError)
1115
-
return
1116
-
}
1117
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1118
1155
if lastBundle == 0 {
1119
1156
resp.json(map[string]interface{}{
1120
1157
"chain_length": 0,
···
1123
1160
return
1124
1161
}
1125
1162
1126
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1127
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1128
-
1129
-
// Updated to receive 5 values instead of 3
1130
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1131
-
if err != nil {
1132
-
resp.error(err.Error(), http.StatusInternalServerError)
1133
-
return
1134
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1135
1166
1136
1167
resp.json(map[string]interface{}{
1137
-
"chain_length": lastBundle,
1138
-
"total_bundles": count,
1139
-
"total_compressed_size": compressedSize,
1140
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1141
-
"total_uncompressed_size": uncompressedSize,
1142
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1143
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1144
-
"chain_start_time": firstBundle.StartTime,
1145
-
"chain_end_time": lastBundleData.EndTime,
1146
-
"chain_head_hash": lastBundleData.Hash,
1147
-
"first_prev_hash": firstBundle.PrevBundleHash,
1148
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1149
1177
})
1150
1178
}
1151
1179
···
1166
1194
return
1167
1195
}
1168
1196
1169
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1170
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1171
1199
1172
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1206
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1207
1235
}
1208
1236
1209
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1210
1238
if afterTime.IsZero() {
1211
1239
return 1
1212
1240
}
1213
1241
1214
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1215
-
if err != nil {
1216
-
return 1
1217
-
}
1218
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1219
1243
if foundBundle > 1 {
1220
1244
return foundBundle - 1
1221
1245
}
···
1226
1250
var allOps []plc.PLCOperation
1227
1251
seenCIDs := make(map[string]bool)
1228
1252
1229
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1230
1254
1231
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1232
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1256
1280
return allOps
1257
1281
}
1258
1282
1283
+
func (s *Server) handleGetCountryLeaderboard(w http.ResponseWriter, r *http.Request) {
1284
+
resp := newResponse(w)
1285
+
1286
+
stats, err := s.db.GetCountryLeaderboard(r.Context())
1287
+
if err != nil {
1288
+
resp.error(err.Error(), http.StatusInternalServerError)
1289
+
return
1290
+
}
1291
+
1292
+
resp.json(stats)
1293
+
}
1294
+
1295
+
func (s *Server) handleGetVersionStats(w http.ResponseWriter, r *http.Request) {
1296
+
resp := newResponse(w)
1297
+
1298
+
stats, err := s.db.GetVersionStats(r.Context())
1299
+
if err != nil {
1300
+
resp.error(err.Error(), http.StatusInternalServerError)
1301
+
return
1302
+
}
1303
+
1304
+
// Add summary totals
1305
+
var totalPDS int64
1306
+
var totalUsers int64
1307
+
for _, stat := range stats {
1308
+
totalPDS += stat.PDSCount
1309
+
totalUsers += stat.TotalUsers
1310
+
}
1311
+
1312
+
result := map[string]interface{}{
1313
+
"versions": stats,
1314
+
"summary": map[string]interface{}{
1315
+
"total_pds_with_version": totalPDS,
1316
+
"total_users": totalUsers,
1317
+
"version_count": len(stats),
1318
+
},
1319
+
}
1320
+
1321
+
resp.json(result)
1322
+
}
1323
+
1259
1324
// ===== HEALTH HANDLER =====
1260
1325
1261
1326
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
1262
1327
newResponse(w).json(map[string]string{"status": "ok"})
1263
1328
}
1264
1329
1265
-
// ===== UTILITY FUNCTIONS =====
1330
+
func (s *Server) handleGetJobStatus(w http.ResponseWriter, r *http.Request) {
1331
+
resp := newResponse(w)
1332
+
tracker := monitor.GetTracker()
1333
+
1334
+
jobs := tracker.GetAllJobs()
1335
+
1336
+
result := make(map[string]interface{})
1337
+
for name, job := range jobs {
1338
+
jobData := map[string]interface{}{
1339
+
"name": job.Name,
1340
+
"status": job.Status,
1341
+
"run_count": job.RunCount,
1342
+
"success_count": job.SuccessCount,
1343
+
"error_count": job.ErrorCount,
1344
+
}
1345
+
1346
+
if !job.LastRun.IsZero() {
1347
+
jobData["last_run"] = job.LastRun
1348
+
jobData["last_duration"] = job.Duration.String()
1349
+
}
1350
+
1351
+
if !job.NextRun.IsZero() {
1352
+
jobData["next_run"] = job.NextRun
1353
+
jobData["next_run_in"] = time.Until(job.NextRun).Round(time.Second).String()
1354
+
}
1355
+
1356
+
if job.Status == "running" {
1357
+
jobData["running_for"] = job.Duration.Round(time.Second).String()
1358
+
1359
+
if job.Progress != nil {
1360
+
jobData["progress"] = job.Progress
1361
+
}
1266
1362
1267
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1268
-
var jsonlData []byte
1269
-
for _, op := range ops {
1270
-
jsonlData = append(jsonlData, op.RawJSON...)
1271
-
jsonlData = append(jsonlData, '\n')
1363
+
// Add worker status
1364
+
workers := tracker.GetWorkers(name)
1365
+
if len(workers) > 0 {
1366
+
jobData["workers"] = workers
1367
+
}
1368
+
}
1369
+
1370
+
if job.Error != "" {
1371
+
jobData["error"] = job.Error
1372
+
}
1373
+
1374
+
result[name] = jobData
1272
1375
}
1273
-
hash := sha256.Sum256(jsonlData)
1274
-
return hex.EncodeToString(hash[:])
1376
+
1377
+
resp.json(result)
1378
+
}
1379
+
1380
+
func (s *Server) handleGetDuplicateEndpoints(w http.ResponseWriter, r *http.Request) {
1381
+
resp := newResponse(w)
1382
+
1383
+
duplicates, err := s.db.GetDuplicateEndpoints(r.Context())
1384
+
if err != nil {
1385
+
resp.error(err.Error(), http.StatusInternalServerError)
1386
+
return
1387
+
}
1388
+
1389
+
// Format response
1390
+
result := make([]map[string]interface{}, 0)
1391
+
for serverDID, endpoints := range duplicates {
1392
+
result = append(result, map[string]interface{}{
1393
+
"server_did": serverDID,
1394
+
"primary": endpoints[0], // First discovered
1395
+
"aliases": endpoints[1:], // Other domains
1396
+
"alias_count": len(endpoints) - 1,
1397
+
"total_domains": len(endpoints),
1398
+
})
1399
+
}
1400
+
1401
+
resp.json(map[string]interface{}{
1402
+
"duplicates": result,
1403
+
"total_duplicate_servers": len(duplicates),
1404
+
})
1275
1405
}
1406
+
1407
+
func (s *Server) handleGetPLCHistory(w http.ResponseWriter, r *http.Request) {
1408
+
resp := newResponse(w)
1409
+
1410
+
limit := getQueryInt(r, "limit", 0)
1411
+
fromBundle := getQueryInt(r, "from", 1)
1412
+
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1415
+
if err != nil {
1416
+
resp.error(err.Error(), http.StatusInternalServerError)
1417
+
return
1418
+
}
1419
+
1420
+
var totalOps int64
1421
+
var totalUncompressed int64
1422
+
var totalCompressed int64
1423
+
1424
+
for _, point := range history {
1425
+
totalOps += int64(point.OperationCount)
1426
+
totalUncompressed += point.UncompressedSize
1427
+
totalCompressed += point.CompressedSize
1428
+
}
1429
+
1430
+
result := map[string]interface{}{
1431
+
"data": history,
1432
+
"summary": map[string]interface{}{
1433
+
"days": len(history),
1434
+
"total_operations": totalOps,
1435
+
"total_uncompressed": totalUncompressed,
1436
+
"total_compressed": totalCompressed,
1437
+
"compression_ratio": 0.0,
1438
+
},
1439
+
}
1440
+
1441
+
if len(history) > 0 {
1442
+
result["summary"].(map[string]interface{})["first_date"] = history[0].Date
1443
+
result["summary"].(map[string]interface{})["last_date"] = history[len(history)-1].Date
1444
+
result["summary"].(map[string]interface{})["time_span_days"] = len(history)
1445
+
1446
+
if totalCompressed > 0 {
1447
+
result["summary"].(map[string]interface{})["compression_ratio"] = float64(totalUncompressed) / float64(totalCompressed)
1448
+
}
1449
+
1450
+
result["summary"].(map[string]interface{})["avg_operations_per_day"] = totalOps / int64(len(history))
1451
+
result["summary"].(map[string]interface{})["avg_size_per_day"] = totalUncompressed / int64(len(history))
1452
+
}
1453
+
1454
+
resp.json(result)
1455
+
}
1456
+
1457
+
// ===== DEBUG HANDLERS =====
1458
+
1459
+
func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) {
1460
+
resp := newResponse(w)
1461
+
ctx := r.Context()
1462
+
schema := "public" // Or make configurable if needed
1463
+
1464
+
tableSizes, err := s.db.GetTableSizes(ctx, schema)
1465
+
if err != nil {
1466
+
log.Error("Failed to get table sizes: %v", err)
1467
+
resp.error("Failed to retrieve table sizes", http.StatusInternalServerError)
1468
+
return
1469
+
}
1470
+
1471
+
indexSizes, err := s.db.GetIndexSizes(ctx, schema)
1472
+
if err != nil {
1473
+
log.Error("Failed to get index sizes: %v", err)
1474
+
resp.error("Failed to retrieve index sizes", http.StatusInternalServerError)
1475
+
return
1476
+
}
1477
+
1478
+
resp.json(map[string]interface{}{
1479
+
"schema": schema,
1480
+
"tables": tableSizes,
1481
+
"indexes": indexSizes,
1482
+
"retrievedAt": time.Now().UTC(),
1483
+
})
1484
+
}
1485
+
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1488
+
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1493
+
}
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1506
+
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1276
1509
1277
1510
func normalizeEndpoint(endpoint string) string {
1278
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+31
-13
internal/api/server.go
+31
-13
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
23
+
plcIndexDIDs bool
24
24
}
25
25
26
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
27
-
bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs)
28
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
29
27
s := &Server{
30
28
router: mux.NewRouter(),
31
29
db: db,
32
-
plcClient: plc.NewClient(plcCfg.DirectoryURL),
33
30
plcBundleDir: plcCfg.BundleDir,
34
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
32
+
plcIndexDIDs: plcCfg.IndexDIDs,
35
33
}
36
34
37
35
s.setupRoutes()
···
59
57
// Generic endpoints (keep as-is)
60
58
api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET")
61
59
api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET")
60
+
api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET")
62
61
63
-
// NEW: PDS-specific endpoints (virtual, created via JOINs)
62
+
//PDS-specific endpoints (virtual, created via JOINs)
64
63
api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET")
65
64
api.HandleFunc("/pds/stats", s.handleGetPDSStats).Methods("GET")
65
+
api.HandleFunc("/pds/countries", s.handleGetCountryLeaderboard).Methods("GET")
66
+
api.HandleFunc("/pds/versions", s.handleGetVersionStats).Methods("GET")
67
+
api.HandleFunc("/pds/duplicates", s.handleGetDuplicateEndpoints).Methods("GET")
66
68
api.HandleFunc("/pds/{endpoint}", s.handleGetPDSDetail).Methods("GET")
69
+
70
+
// PDS repos
71
+
api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET")
72
+
api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET")
73
+
api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET")
74
+
75
+
// Global DID routes
76
+
api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET")
77
+
api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW
67
78
68
79
// PLC Bundle routes
69
80
api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET")
···
73
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
74
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
75
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
76
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
88
+
89
+
// PLC history/metrics
90
+
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
77
91
78
92
// PLC Export endpoint (simulates PLC directory)
79
93
api.HandleFunc("/plc/export", s.handlePLCExport).Methods("GET")
···
81
95
// DID routes
82
96
api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET")
83
97
api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET")
84
-
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") // NEW
98
+
api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET")
85
99
86
100
// Mempool routes
87
101
api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET")
88
102
89
103
// Metrics routes
90
104
api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET")
105
+
106
+
// Debug Endpoints
107
+
api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET")
108
+
api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET")
91
109
92
110
// Health check
93
111
s.router.HandleFunc("/health", s.handleHealth).Methods("GET")
+36
-13
internal/ipinfo/client.go
+36
-13
internal/ipinfo/client.go
···
99
99
return ipInfo, nil
100
100
}
101
101
102
-
// ExtractIPFromEndpoint extracts IP from endpoint URL
103
-
func ExtractIPFromEndpoint(endpoint string) (string, error) {
102
+
// IPAddresses holds both IPv4 and IPv6 addresses
103
+
type IPAddresses struct {
104
+
IPv4 string
105
+
IPv6 string
106
+
}
107
+
108
+
// ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL
109
+
func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) {
104
110
// Parse URL
105
111
parsedURL, err := url.Parse(endpoint)
106
112
if err != nil {
107
-
return "", fmt.Errorf("failed to parse endpoint URL: %w", err)
113
+
return nil, fmt.Errorf("failed to parse endpoint URL: %w", err)
108
114
}
109
115
110
116
host := parsedURL.Hostname()
111
117
if host == "" {
112
-
return "", fmt.Errorf("no hostname in endpoint")
118
+
return nil, fmt.Errorf("no hostname in endpoint")
113
119
}
120
+
121
+
result := &IPAddresses{}
114
122
115
123
// Check if host is already an IP
116
-
if net.ParseIP(host) != nil {
117
-
return host, nil
124
+
if ip := net.ParseIP(host); ip != nil {
125
+
if ip.To4() != nil {
126
+
result.IPv4 = host
127
+
} else {
128
+
result.IPv6 = host
129
+
}
130
+
return result, nil
118
131
}
119
132
120
-
// Resolve hostname to IP
133
+
// Resolve hostname to IPs
121
134
ips, err := net.LookupIP(host)
122
135
if err != nil {
123
-
return "", fmt.Errorf("failed to resolve hostname: %w", err)
136
+
return nil, fmt.Errorf("failed to resolve hostname: %w", err)
124
137
}
125
138
126
139
if len(ips) == 0 {
127
-
return "", fmt.Errorf("no IPs found for hostname")
140
+
return nil, fmt.Errorf("no IPs found for hostname")
128
141
}
129
142
130
-
// Return first IPv4 address
143
+
// Extract both IPv4 and IPv6
131
144
for _, ip := range ips {
132
145
if ipv4 := ip.To4(); ipv4 != nil {
133
-
return ipv4.String(), nil
146
+
if result.IPv4 == "" {
147
+
result.IPv4 = ipv4.String()
148
+
}
149
+
} else {
150
+
if result.IPv6 == "" {
151
+
result.IPv6 = ip.String()
152
+
}
134
153
}
135
154
}
136
155
137
-
// Fallback to first IP (might be IPv6)
138
-
return ips[0].String(), nil
156
+
// Must have at least one IP
157
+
if result.IPv4 == "" && result.IPv6 == "" {
158
+
return nil, fmt.Errorf("no valid IPs found")
159
+
}
160
+
161
+
return result, nil
139
162
}
+115
-7
internal/log/log.go
+115
-7
internal/log/log.go
···
1
1
package log
2
2
3
3
import (
4
+
"fmt"
4
5
"io"
5
6
"log"
6
7
"os"
8
+
"strings"
9
+
"time"
7
10
)
8
11
9
12
var (
···
19
22
verboseWriter = os.Stdout
20
23
}
21
24
22
-
infoLog = log.New(infoWriter, "INFO: ", log.Ldate|log.Ltime|log.Lshortfile)
23
-
verboseLog = log.New(verboseWriter, "VERBOSE: ", log.Ldate|log.Ltime|log.Lshortfile)
24
-
errorLog = log.New(os.Stderr, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile)
25
+
// Use no flags, we'll add our own ISO 8601 timestamps
26
+
infoLog = log.New(infoWriter, "", 0)
27
+
verboseLog = log.New(verboseWriter, "", 0)
28
+
errorLog = log.New(os.Stderr, "", 0)
29
+
}
30
+
31
+
// timestamp returns current time with milliseconds (local time, no timezone)
32
+
func timestamp() string {
33
+
return time.Now().Format("2006-01-02T15:04:05.000")
25
34
}
26
35
27
36
func Verbose(format string, v ...interface{}) {
28
-
verboseLog.Printf(format, v...)
37
+
verboseLog.Printf("%s [VERBOSE] %s", timestamp(), fmt.Sprintf(format, v...))
29
38
}
30
39
31
40
func Info(format string, v ...interface{}) {
32
-
infoLog.Printf(format, v...)
41
+
infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...))
42
+
}
43
+
44
+
func Warn(format string, v ...interface{}) {
45
+
infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...))
33
46
}
34
47
35
48
func Error(format string, v ...interface{}) {
36
-
errorLog.Printf(format, v...)
49
+
errorLog.Printf("%s [ERROR] %s", timestamp(), fmt.Sprintf(format, v...))
37
50
}
38
51
39
52
func Fatal(format string, v ...interface{}) {
40
-
errorLog.Fatalf(format, v...)
53
+
errorLog.Fatalf("%s [FATAL] %s", timestamp(), fmt.Sprintf(format, v...))
54
+
}
55
+
56
+
// Banner prints a startup banner
57
+
func Banner(version string) {
58
+
banner := `
59
+
╔════════════════════════════════════════════════════════════╗
60
+
║ ║
61
+
║ █████╗ ████████╗███████╗ ██████╗ █████╗ ███╗ ██╗ ║
62
+
║ ██╔══██╗╚══██╔══╝██╔════╝██╔════╝██╔══██╗████╗ ██║ ║
63
+
║ ███████║ ██║ ███████╗██║ ███████║██╔██╗ ██║ ║
64
+
║ ██╔══██║ ██║ ╚════██║██║ ██╔══██║██║╚██╗██║ ║
65
+
║ ██║ ██║ ██║ ███████║╚██████╗██║ ██║██║ ╚████║ ║
66
+
║ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ║
67
+
║ ║
68
+
║ AT Protocol Network Scanner & Indexer ║
69
+
║ Version %s ║
70
+
║ ║
71
+
╚════════════════════════════════════════════════════════════╝
72
+
`
73
+
fmt.Printf(banner, padVersion(version))
74
+
}
75
+
76
+
// padVersion pads the version string to fit the banner
77
+
func padVersion(version string) string {
78
+
targetLen := 7
79
+
if len(version) < targetLen {
80
+
padding := strings.Repeat(" ", (targetLen-len(version))/2)
81
+
return padding + version + padding
82
+
}
83
+
return version
84
+
}
85
+
86
+
// RedactPassword redacts passwords from connection strings
87
+
func RedactPassword(connStr string) string {
88
+
// Handle PostgreSQL URI format: postgresql://user:password@host/db
89
+
// Pattern: find everything between :// and @ that contains a colon
90
+
if strings.Contains(connStr, "://") && strings.Contains(connStr, "@") {
91
+
// Find the credentials section
92
+
parts := strings.SplitN(connStr, "://", 2)
93
+
if len(parts) == 2 {
94
+
scheme := parts[0]
95
+
remainder := parts[1]
96
+
97
+
// Find the @ symbol
98
+
atIndex := strings.Index(remainder, "@")
99
+
if atIndex > 0 {
100
+
credentials := remainder[:atIndex]
101
+
hostAndDb := remainder[atIndex:]
102
+
103
+
// Check if there's a password (look for colon in credentials)
104
+
colonIndex := strings.Index(credentials, ":")
105
+
if colonIndex > 0 {
106
+
username := credentials[:colonIndex]
107
+
return fmt.Sprintf("%s://%s:***%s", scheme, username, hostAndDb)
108
+
}
109
+
}
110
+
}
111
+
}
112
+
113
+
// Handle key-value format: host=localhost password=secret user=myuser
114
+
if strings.Contains(connStr, "password=") {
115
+
parts := strings.Split(connStr, " ")
116
+
for i, part := range parts {
117
+
if strings.HasPrefix(part, "password=") {
118
+
parts[i] = "password=***"
119
+
}
120
+
}
121
+
return strings.Join(parts, " ")
122
+
}
123
+
124
+
return connStr
125
+
}
126
+
127
+
// PrintConfig prints configuration summary
128
+
func PrintConfig(items map[string]string) {
129
+
Info("=== Configuration ===")
130
+
maxKeyLen := 0
131
+
for key := range items {
132
+
if len(key) > maxKeyLen {
133
+
maxKeyLen = len(key)
134
+
}
135
+
}
136
+
137
+
for key, value := range items {
138
+
padding := strings.Repeat(" ", maxKeyLen-len(key))
139
+
140
+
// Redact database connection strings
141
+
displayValue := value
142
+
if strings.Contains(key, "Database Path") || strings.Contains(key, "Connection") || strings.Contains(strings.ToLower(key), "password") {
143
+
displayValue = RedactPassword(value)
144
+
}
145
+
146
+
fmt.Printf(" %s:%s %s\n", key, padding, displayValue)
147
+
}
148
+
Info("====================")
41
149
}
+226
internal/monitor/tracker.go
+226
internal/monitor/tracker.go
···
1
+
package monitor
2
+
3
+
import (
4
+
"sync"
5
+
"time"
6
+
)
7
+
8
+
type JobStatus struct {
9
+
Name string `json:"name"`
10
+
Status string `json:"status"` // "idle", "running", "completed", "error"
11
+
StartTime time.Time `json:"start_time,omitempty"`
12
+
LastRun time.Time `json:"last_run,omitempty"`
13
+
Duration time.Duration `json:"duration,omitempty"`
14
+
Progress *Progress `json:"progress,omitempty"`
15
+
Error string `json:"error,omitempty"`
16
+
NextRun time.Time `json:"next_run,omitempty"`
17
+
RunCount int64 `json:"run_count"`
18
+
SuccessCount int64 `json:"success_count"`
19
+
ErrorCount int64 `json:"error_count"`
20
+
}
21
+
22
+
type Progress struct {
23
+
Current int `json:"current"`
24
+
Total int `json:"total"`
25
+
Percent float64 `json:"percent"`
26
+
Message string `json:"message,omitempty"`
27
+
}
28
+
29
+
type WorkerStatus struct {
30
+
ID int `json:"id"`
31
+
Status string `json:"status"` // "idle", "working"
32
+
CurrentTask string `json:"current_task,omitempty"`
33
+
StartedAt time.Time `json:"started_at,omitempty"`
34
+
Duration time.Duration `json:"duration,omitempty"`
35
+
}
36
+
37
+
type Tracker struct {
38
+
mu sync.RWMutex
39
+
jobs map[string]*JobStatus
40
+
workers map[string][]WorkerStatus // key is job name
41
+
}
42
+
43
+
var globalTracker *Tracker
44
+
45
+
func init() {
46
+
globalTracker = &Tracker{
47
+
jobs: make(map[string]*JobStatus),
48
+
workers: make(map[string][]WorkerStatus),
49
+
}
50
+
}
51
+
52
+
func GetTracker() *Tracker {
53
+
return globalTracker
54
+
}
55
+
56
+
// Job status methods
57
+
func (t *Tracker) RegisterJob(name string) {
58
+
t.mu.Lock()
59
+
defer t.mu.Unlock()
60
+
61
+
t.jobs[name] = &JobStatus{
62
+
Name: name,
63
+
Status: "idle",
64
+
}
65
+
}
66
+
67
+
func (t *Tracker) StartJob(name string) {
68
+
t.mu.Lock()
69
+
defer t.mu.Unlock()
70
+
71
+
if job, exists := t.jobs[name]; exists {
72
+
job.Status = "running"
73
+
job.StartTime = time.Now()
74
+
job.Error = ""
75
+
job.RunCount++
76
+
}
77
+
}
78
+
79
+
func (t *Tracker) CompleteJob(name string, err error) {
80
+
t.mu.Lock()
81
+
defer t.mu.Unlock()
82
+
83
+
if job, exists := t.jobs[name]; exists {
84
+
job.LastRun = time.Now()
85
+
job.Duration = time.Since(job.StartTime)
86
+
87
+
if err != nil {
88
+
job.Status = "error"
89
+
job.Error = err.Error()
90
+
job.ErrorCount++
91
+
} else {
92
+
job.Status = "completed"
93
+
job.SuccessCount++
94
+
}
95
+
96
+
job.Progress = nil // Clear progress
97
+
}
98
+
}
99
+
100
+
func (t *Tracker) UpdateProgress(name string, current, total int, message string) {
101
+
t.mu.Lock()
102
+
defer t.mu.Unlock()
103
+
104
+
if job, exists := t.jobs[name]; exists {
105
+
var percent float64
106
+
if total > 0 {
107
+
percent = float64(current) / float64(total) * 100
108
+
}
109
+
110
+
job.Progress = &Progress{
111
+
Current: current,
112
+
Total: total,
113
+
Percent: percent,
114
+
Message: message,
115
+
}
116
+
}
117
+
}
118
+
119
+
func (t *Tracker) SetNextRun(name string, nextRun time.Time) {
120
+
t.mu.Lock()
121
+
defer t.mu.Unlock()
122
+
123
+
if job, exists := t.jobs[name]; exists {
124
+
job.NextRun = nextRun
125
+
}
126
+
}
127
+
128
+
func (t *Tracker) GetJobStatus(name string) *JobStatus {
129
+
t.mu.RLock()
130
+
defer t.mu.RUnlock()
131
+
132
+
if job, exists := t.jobs[name]; exists {
133
+
// Create a copy
134
+
jobCopy := *job
135
+
if job.Progress != nil {
136
+
progressCopy := *job.Progress
137
+
jobCopy.Progress = &progressCopy
138
+
}
139
+
140
+
// Calculate duration for running jobs
141
+
if jobCopy.Status == "running" {
142
+
jobCopy.Duration = time.Since(jobCopy.StartTime)
143
+
}
144
+
145
+
return &jobCopy
146
+
}
147
+
return nil
148
+
}
149
+
150
+
func (t *Tracker) GetAllJobs() map[string]*JobStatus {
151
+
t.mu.RLock()
152
+
defer t.mu.RUnlock()
153
+
154
+
result := make(map[string]*JobStatus)
155
+
for name, job := range t.jobs {
156
+
jobCopy := *job
157
+
if job.Progress != nil {
158
+
progressCopy := *job.Progress
159
+
jobCopy.Progress = &progressCopy
160
+
}
161
+
162
+
// Calculate duration for running jobs
163
+
if jobCopy.Status == "running" {
164
+
jobCopy.Duration = time.Since(jobCopy.StartTime)
165
+
}
166
+
167
+
result[name] = &jobCopy
168
+
}
169
+
return result
170
+
}
171
+
172
+
// Worker status methods
173
+
func (t *Tracker) InitWorkers(jobName string, count int) {
174
+
t.mu.Lock()
175
+
defer t.mu.Unlock()
176
+
177
+
workers := make([]WorkerStatus, count)
178
+
for i := 0; i < count; i++ {
179
+
workers[i] = WorkerStatus{
180
+
ID: i + 1,
181
+
Status: "idle",
182
+
}
183
+
}
184
+
t.workers[jobName] = workers
185
+
}
186
+
187
+
func (t *Tracker) StartWorker(jobName string, workerID int, task string) {
188
+
t.mu.Lock()
189
+
defer t.mu.Unlock()
190
+
191
+
if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) {
192
+
workers[workerID-1].Status = "working"
193
+
workers[workerID-1].CurrentTask = task
194
+
workers[workerID-1].StartedAt = time.Now()
195
+
}
196
+
}
197
+
198
+
func (t *Tracker) CompleteWorker(jobName string, workerID int) {
199
+
t.mu.Lock()
200
+
defer t.mu.Unlock()
201
+
202
+
if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) {
203
+
workers[workerID-1].Status = "idle"
204
+
workers[workerID-1].CurrentTask = ""
205
+
workers[workerID-1].Duration = time.Since(workers[workerID-1].StartedAt)
206
+
workers[workerID-1].StartedAt = time.Time{}
207
+
}
208
+
}
209
+
210
+
func (t *Tracker) GetWorkers(jobName string) []WorkerStatus {
211
+
t.mu.RLock()
212
+
defer t.mu.RUnlock()
213
+
214
+
if workers, exists := t.workers[jobName]; exists {
215
+
// Create a copy with calculated durations
216
+
result := make([]WorkerStatus, len(workers))
217
+
for i, w := range workers {
218
+
result[i] = w
219
+
if w.Status == "working" && !w.StartedAt.IsZero() {
220
+
result[i].Duration = time.Since(w.StartedAt)
221
+
}
222
+
}
223
+
return result
224
+
}
225
+
return nil
226
+
}
+47
-18
internal/pds/client.go
+47
-18
internal/pds/client.go
···
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+
"net"
7
8
"net/http"
8
9
"time"
9
10
)
···
28
29
29
30
// Repo represents a repository in the list
30
31
type Repo struct {
31
-
DID string `json:"did"`
32
-
Head string `json:"head,omitempty"`
33
-
Rev string `json:"rev,omitempty"`
32
+
DID string `json:"did"`
33
+
Head string `json:"head,omitempty"`
34
+
Rev string `json:"rev,omitempty"`
35
+
Active *bool `json:"active,omitempty"`
36
+
Status *string `json:"status,omitempty"`
34
37
}
35
38
36
39
// ListRepos fetches all repositories from a PDS with pagination
37
-
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]string, error) {
38
-
var allDIDs []string
40
+
func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]Repo, error) {
41
+
var allRepos []Repo
39
42
var cursor *string
40
43
41
44
for {
···
67
70
}
68
71
resp.Body.Close()
69
72
70
-
// Collect DIDs
71
-
for _, repo := range result.Repos {
72
-
allDIDs = append(allDIDs, repo.DID)
73
-
}
73
+
// Collect repos
74
+
allRepos = append(allRepos, result.Repos...)
74
75
75
76
// Check if there are more pages
76
77
if result.Cursor == nil || *result.Cursor == "" {
···
79
80
cursor = result.Cursor
80
81
}
81
82
82
-
return allDIDs, nil
83
+
return allRepos, nil
83
84
}
84
85
85
86
// DescribeServer fetches com.atproto.server.describeServer
86
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
87
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
88
91
89
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
90
116
91
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
92
118
if err != nil {
93
-
return nil, err
119
+
return nil, 0, "", err
94
120
}
95
121
96
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
97
125
if err != nil {
98
-
return nil, err
126
+
return nil, responseTime, usedIP, err
99
127
}
100
128
defer resp.Body.Close()
101
129
102
130
if resp.StatusCode != http.StatusOK {
103
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
104
132
}
105
133
106
134
var desc ServerDescription
107
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
108
-
return nil, err
136
+
return nil, responseTime, usedIP, err
109
137
}
110
138
111
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
112
140
}
113
141
114
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
143
+
// Returns: available, responseTime, version, error
115
144
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
116
145
startTime := time.Now()
117
146
+149
-52
internal/pds/scanner.go
+149
-52
internal/pds/scanner.go
···
5
5
"fmt"
6
6
"math/rand"
7
7
"sync"
8
+
"sync/atomic"
8
9
"time"
9
10
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/ipinfo"
13
-
"github.com/atscan/atscanner/internal/log"
14
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
15
16
)
16
17
17
18
type Scanner struct {
···
34
35
startTime := time.Now()
35
36
log.Info("Starting PDS availability scan...")
36
37
37
-
// Get only PDS endpoints
38
+
// Get only PDS endpoints that need checking
38
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
39
-
Type: "pds",
40
+
Type: "pds",
41
+
OnlyStale: true,
42
+
OnlyValid: true,
43
+
RecheckInterval: s.config.RecheckInterval,
40
44
})
41
45
if err != nil {
42
46
return err
43
47
}
44
48
45
-
// 2. ADD THIS BLOCK TO SHUFFLE THE LIST
49
+
if len(servers) == 0 {
50
+
log.Info("No endpoints need scanning at this time")
51
+
monitor.GetTracker().UpdateProgress("pds_scan", 0, 0, "No endpoints need scanning")
52
+
return nil
53
+
}
54
+
55
+
log.Info("Found %d endpoints that need scanning", len(servers))
56
+
monitor.GetTracker().UpdateProgress("pds_scan", 0, len(servers), "Preparing to scan")
57
+
58
+
// Shuffle servers
46
59
if len(servers) > 0 {
47
-
// Create a new random source to avoid using the global one
48
60
r := rand.New(rand.NewSource(time.Now().UnixNano()))
49
-
// Shuffle the servers slice in place
50
61
r.Shuffle(len(servers), func(i, j int) {
51
62
servers[i], servers[j] = servers[j], servers[i]
52
63
})
53
-
log.Info("Randomized scan order for %d PDS servers...", len(servers))
54
-
} else {
55
-
log.Info("Scanning 0 PDS servers...")
56
-
return nil // No need to continue if there are no servers
57
64
}
58
65
59
-
// Worker pool
60
-
jobs := make(chan *storage.Endpoint, len(servers))
66
+
// Initialize workers in tracker
67
+
monitor.GetTracker().InitWorkers("pds_scan", s.config.Workers)
68
+
69
+
// Worker pool with progress tracking
70
+
jobs := make(chan *workerJob, len(servers))
61
71
var wg sync.WaitGroup
72
+
var completed int32
62
73
63
74
for i := 0; i < s.config.Workers; i++ {
64
75
wg.Add(1)
65
-
go func() {
76
+
workerID := i + 1
77
+
go func(id int) {
66
78
defer wg.Done()
67
-
s.worker(ctx, jobs)
68
-
}()
79
+
s.workerWithProgress(ctx, id, jobs, &completed, len(servers))
80
+
}(workerID)
69
81
}
70
82
71
83
// Send jobs
72
84
for _, server := range servers {
73
-
jobs <- server
85
+
jobs <- &workerJob{endpoint: server}
74
86
}
75
87
close(jobs)
76
88
···
78
90
wg.Wait()
79
91
80
92
log.Info("PDS scan completed in %v", time.Since(startTime))
93
+
monitor.GetTracker().UpdateProgress("pds_scan", len(servers), len(servers), "Completed")
81
94
82
95
return nil
83
96
}
84
97
85
-
func (s *Scanner) worker(ctx context.Context, jobs <-chan *storage.Endpoint) {
86
-
for server := range jobs {
98
+
type workerJob struct {
99
+
endpoint *storage.Endpoint
100
+
}
101
+
102
+
func (s *Scanner) workerWithProgress(ctx context.Context, workerID int, jobs <-chan *workerJob, completed *int32, total int) {
103
+
for job := range jobs {
87
104
select {
88
105
case <-ctx.Done():
89
106
return
90
107
default:
91
-
s.scanAndSaveEndpoint(ctx, server)
108
+
// Update worker status
109
+
monitor.GetTracker().StartWorker("pds_scan", workerID, job.endpoint.Endpoint)
110
+
111
+
// Scan endpoint
112
+
s.scanAndSaveEndpoint(ctx, job.endpoint)
113
+
114
+
// Update progress
115
+
atomic.AddInt32(completed, 1)
116
+
current := atomic.LoadInt32(completed)
117
+
monitor.GetTracker().UpdateProgress("pds_scan", int(current), total,
118
+
fmt.Sprintf("Scanned %d/%d endpoints", current, total))
119
+
120
+
// Mark worker as idle
121
+
monitor.GetTracker().CompleteWorker("pds_scan", workerID)
92
122
}
93
123
}
94
124
}
95
125
96
126
func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) {
97
-
// STEP 1: Resolve IP (before any network call)
98
-
ip, err := ipinfo.ExtractIPFromEndpoint(ep.Endpoint)
127
+
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
+
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
99
129
if err != nil {
100
-
// Mark as offline due to DNS failure
101
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
102
131
Status: storage.EndpointStatusOffline,
103
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
105
134
return
106
135
}
107
136
108
-
// Update IP immediately
109
-
s.db.UpdateEndpointIP(ctx, ep.ID, ip, time.Now().UTC())
137
+
// Update IPs immediately
138
+
s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC())
139
+
140
+
// STEP 1.5: Fetch IP info asynchronously for both IPs
141
+
if ips.IPv4 != "" {
142
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv4)
143
+
}
144
+
if ips.IPv6 != "" {
145
+
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
146
+
}
110
147
111
-
// STEP 2: Health check
112
-
available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) // CHANGED: receive version
113
-
if err != nil || !available {
114
-
errMsg := "health check failed"
115
-
if err != nil {
116
-
errMsg = err.Error()
117
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
118
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
119
152
Status: storage.EndpointStatusOffline,
120
-
ResponseTime: responseTime,
121
-
ErrorMessage: errMsg,
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
122
156
})
123
157
return
124
158
}
125
159
126
-
// STEP 3: Fetch PDS-specific data
127
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
128
-
if err != nil {
129
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
162
+
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
163
+
}
164
+
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
130
172
}
131
173
132
-
dids, err := s.client.ListRepos(ctx, ep.Endpoint)
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
181
+
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
133
182
if err != nil {
134
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
135
-
dids = []string{}
184
+
repoList = []Repo{}
185
+
}
186
+
187
+
// Convert to DIDs
188
+
dids := make([]string, len(repoList))
189
+
for i, repo := range repoList {
190
+
dids[i] = repo.DID
136
191
}
137
192
138
-
// STEP 4: SAVE IMMEDIATELY
193
+
// STEP 5: SAVE scan result
139
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
140
195
Status: storage.EndpointStatusOnline,
141
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
142
197
Description: desc,
143
198
DIDs: dids,
144
-
Version: version, // CHANGED: Pass version
199
+
Version: version,
200
+
UsedIP: usedIP, // Only from describeServer
145
201
})
146
202
147
-
// STEP 5: Fetch IP info if needed (async, with backoff)
148
-
go s.updateIPInfoIfNeeded(ctx, ip)
203
+
// STEP 6: Save repos in batches (only tracks changes)
204
+
if len(repoList) > 0 {
205
+
batchSize := 100_000
206
+
207
+
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
208
+
209
+
for i := 0; i < len(repoList); i += batchSize {
210
+
end := i + batchSize
211
+
if end > len(repoList) {
212
+
end = len(repoList)
213
+
}
214
+
215
+
batch := repoList[i:end]
216
+
repoData := make([]storage.PDSRepoData, len(batch))
217
+
218
+
for j, repo := range batch {
219
+
active := true
220
+
if repo.Active != nil {
221
+
active = *repo.Active
222
+
}
223
+
224
+
status := ""
225
+
if repo.Status != nil {
226
+
status = *repo.Status
227
+
}
228
+
229
+
repoData[j] = storage.PDSRepoData{
230
+
DID: repo.DID,
231
+
Head: repo.Head,
232
+
Rev: repo.Rev,
233
+
Active: active,
234
+
Status: status,
235
+
}
236
+
}
237
+
238
+
if err := s.db.UpsertPDSRepos(ctx, ep.ID, repoData); err != nil {
239
+
log.Error("Failed to save repo batch for endpoint %d: %v", ep.ID, err)
240
+
}
241
+
}
242
+
243
+
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
244
+
}
149
245
}
150
246
151
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
···
155
251
Metadata: make(map[string]interface{}),
156
252
}
157
253
158
-
var userCount int64 // NEW: Declare user count
254
+
var userCount int64
159
255
160
256
// Add PDS-specific metadata
161
257
if result.Status == storage.EndpointStatusOnline {
162
-
userCount = int64(len(result.DIDs)) // NEW: Get user count
163
-
scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness
258
+
userCount = int64(len(result.DIDs))
259
+
scanData.Metadata["user_count"] = userCount
164
260
if result.Description != nil {
165
261
scanData.Metadata["server_info"] = result.Description
166
262
}
···
177
273
Status: result.Status,
178
274
ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms
179
275
UserCount: userCount,
180
-
Version: result.Version, // NEW: Set the version field
276
+
Version: result.Version,
277
+
UsedIP: result.UsedIP, // NEW
181
278
ScanData: scanData,
182
279
ScannedAt: time.Now().UTC(),
183
280
}
+2
-1
internal/pds/types.go
+2
-1
internal/pds/types.go
-662
internal/plc/bundle.go
-662
internal/plc/bundle.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"bytes"
6
-
"context"
7
-
"crypto/sha256"
8
-
"encoding/hex"
9
-
"encoding/json"
10
-
"fmt"
11
-
"os"
12
-
"path/filepath"
13
-
"time"
14
-
15
-
"github.com/atscan/atscanner/internal/log"
16
-
"github.com/atscan/atscanner/internal/storage"
17
-
"github.com/klauspost/compress/zstd"
18
-
)
19
-
20
-
const BUNDLE_SIZE = 10000
21
-
22
-
type BundleManager struct {
23
-
dir string
24
-
enabled bool
25
-
encoder *zstd.Encoder
26
-
decoder *zstd.Decoder
27
-
db storage.Database
28
-
indexDIDs bool
29
-
}
30
-
31
-
// ===== INITIALIZATION =====
32
-
33
-
func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) {
34
-
if !enabled {
35
-
return &BundleManager{enabled: false}, nil
36
-
}
37
-
38
-
if err := os.MkdirAll(dir, 0755); err != nil {
39
-
return nil, fmt.Errorf("failed to create bundle dir: %w", err)
40
-
}
41
-
42
-
encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
43
-
if err != nil {
44
-
return nil, err
45
-
}
46
-
47
-
decoder, err := zstd.NewReader(nil)
48
-
if err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &BundleManager{
53
-
dir: dir,
54
-
enabled: enabled,
55
-
encoder: encoder,
56
-
decoder: decoder,
57
-
db: db,
58
-
indexDIDs: indexDIDs, // NEW
59
-
}, nil
60
-
}
61
-
62
-
func (bm *BundleManager) Close() {
63
-
if bm.encoder != nil {
64
-
bm.encoder.Close()
65
-
}
66
-
if bm.decoder != nil {
67
-
bm.decoder.Close()
68
-
}
69
-
}
70
-
71
-
// ===== BUNDLE FILE ABSTRACTION =====
72
-
73
-
type bundleFile struct {
74
-
path string
75
-
operations []PLCOperation
76
-
uncompressedHash string
77
-
compressedHash string
78
-
}
79
-
80
-
func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile {
81
-
return &bundleFile{
82
-
path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)),
83
-
}
84
-
}
85
-
86
-
func (bf *bundleFile) exists() bool {
87
-
_, err := os.Stat(bf.path)
88
-
return err == nil
89
-
}
90
-
91
-
func (bm *BundleManager) load(bf *bundleFile) error {
92
-
compressed, err := os.ReadFile(bf.path)
93
-
if err != nil {
94
-
return fmt.Errorf("read failed: %w", err)
95
-
}
96
-
97
-
decompressed, err := bm.decoder.DecodeAll(compressed, nil)
98
-
if err != nil {
99
-
return fmt.Errorf("decompress failed: %w", err)
100
-
}
101
-
102
-
bf.operations = bm.parseJSONL(decompressed)
103
-
return nil
104
-
}
105
-
106
-
func (bm *BundleManager) save(bf *bundleFile) error {
107
-
jsonlData := bm.serializeJSONL(bf.operations)
108
-
bf.uncompressedHash = bm.hash(jsonlData)
109
-
110
-
compressed := bm.encoder.EncodeAll(jsonlData, nil)
111
-
bf.compressedHash = bm.hash(compressed)
112
-
113
-
return os.WriteFile(bf.path, compressed, 0644)
114
-
}
115
-
116
-
func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation {
117
-
var ops []PLCOperation
118
-
scanner := bufio.NewScanner(bytes.NewReader(data))
119
-
120
-
for scanner.Scan() {
121
-
line := scanner.Bytes()
122
-
if len(line) == 0 {
123
-
continue
124
-
}
125
-
126
-
var op PLCOperation
127
-
if err := json.Unmarshal(line, &op); err == nil {
128
-
op.RawJSON = append([]byte(nil), line...)
129
-
ops = append(ops, op)
130
-
}
131
-
}
132
-
133
-
return ops
134
-
}
135
-
136
-
func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte {
137
-
var buf []byte
138
-
for _, op := range ops {
139
-
buf = append(buf, op.RawJSON...)
140
-
buf = append(buf, '\n')
141
-
}
142
-
return buf
143
-
}
144
-
145
-
// ===== BUNDLE FETCHING =====
146
-
147
-
type bundleFetcher struct {
148
-
client *Client
149
-
seenCIDs map[string]bool
150
-
currentAfter string
151
-
fetchCount int
152
-
}
153
-
154
-
func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher {
155
-
seen := make(map[string]bool)
156
-
for cid := range prevBoundaryCIDs {
157
-
seen[cid] = true
158
-
}
159
-
160
-
return &bundleFetcher{
161
-
client: client,
162
-
seenCIDs: seen,
163
-
currentAfter: afterTime,
164
-
}
165
-
}
166
-
167
-
func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) {
168
-
var ops []PLCOperation
169
-
maxFetches := (target / 900) + 5
170
-
171
-
for len(ops) < target && bf.fetchCount < maxFetches {
172
-
bf.fetchCount++
173
-
batchSize := bf.calculateBatchSize(target - len(ops))
174
-
175
-
log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize)
176
-
177
-
batch, shouldContinue := bf.fetchBatch(ctx, batchSize)
178
-
179
-
for _, op := range batch {
180
-
if !bf.seenCIDs[op.CID] {
181
-
bf.seenCIDs[op.CID] = true
182
-
ops = append(ops, op)
183
-
184
-
if len(ops) >= target {
185
-
return ops[:target], true
186
-
}
187
-
}
188
-
}
189
-
190
-
if !shouldContinue {
191
-
break
192
-
}
193
-
}
194
-
195
-
return ops, len(ops) >= target
196
-
}
197
-
198
-
func (bf *bundleFetcher) calculateBatchSize(remaining int) int {
199
-
if bf.fetchCount == 0 {
200
-
return 1000
201
-
}
202
-
if remaining < 100 {
203
-
return 50
204
-
}
205
-
if remaining < 500 {
206
-
return 200
207
-
}
208
-
return 1000
209
-
}
210
-
211
-
func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) {
212
-
ops, err := bf.client.Export(ctx, ExportOptions{
213
-
Count: size,
214
-
After: bf.currentAfter,
215
-
})
216
-
217
-
if err != nil || len(ops) == 0 {
218
-
return nil, false
219
-
}
220
-
221
-
if len(ops) > 0 {
222
-
bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano)
223
-
}
224
-
225
-
return ops, len(ops) >= size
226
-
}
227
-
228
-
// ===== MAIN BUNDLE LOADING =====
229
-
230
-
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) {
231
-
if !bm.enabled {
232
-
return nil, false, fmt.Errorf("bundle manager disabled")
233
-
}
234
-
235
-
bf := bm.newBundleFile(bundleNum)
236
-
237
-
// Try local file first
238
-
if bf.exists() {
239
-
return bm.loadFromFile(ctx, bundleNum, bf)
240
-
}
241
-
242
-
// Fetch from PLC
243
-
return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient)
244
-
}
245
-
246
-
func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) {
247
-
log.Verbose("→ Loading bundle %06d from local file", bundleNum)
248
-
249
-
// Verify hash if bundle is in DB
250
-
if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil {
251
-
if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil {
252
-
log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum)
253
-
os.Remove(bf.path)
254
-
return nil, false, fmt.Errorf("hash mismatch")
255
-
}
256
-
log.Verbose("✓ Hash verified for bundle %06d", bundleNum)
257
-
}
258
-
259
-
if err := bm.load(bf); err != nil {
260
-
return nil, false, err
261
-
}
262
-
263
-
// Index if not in DB
264
-
if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil {
265
-
bf.compressedHash = bm.hashFile(bf.path)
266
-
bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations))
267
-
268
-
// Calculate cursor from previous bundle
269
-
cursor := bm.calculateCursor(ctx, bundleNum)
270
-
271
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
272
-
}
273
-
274
-
return bf.operations, true, nil
275
-
}
276
-
277
-
func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) {
278
-
log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum)
279
-
280
-
afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum)
281
-
fetcher := newBundleFetcher(client, afterTime, prevCIDs)
282
-
283
-
ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE)
284
-
285
-
log.Info(" Collected %d unique operations after %d fetches (complete=%v)",
286
-
len(ops), fetcher.fetchCount, isComplete)
287
-
288
-
if isComplete {
289
-
bf.operations = ops
290
-
if err := bm.save(bf); err != nil {
291
-
log.Error("Warning: failed to save bundle: %v", err)
292
-
} else {
293
-
// The cursor is the afterTime that was used to fetch this bundle
294
-
cursor := afterTime
295
-
bm.indexBundle(ctx, bundleNum, bf, cursor)
296
-
log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]",
297
-
bundleNum, len(ops), bf.uncompressedHash[:16], cursor)
298
-
}
299
-
}
300
-
301
-
return ops, isComplete, nil
302
-
}
303
-
304
-
func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) {
305
-
if bundleNum == 1 {
306
-
return "", nil
307
-
}
308
-
309
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1)
310
-
if err != nil {
311
-
return "", nil
312
-
}
313
-
314
-
afterTime := prevBundle.EndTime.Format(time.RFC3339Nano)
315
-
316
-
// Return stored boundary CIDs if available
317
-
if len(prevBundle.BoundaryCIDs) > 0 {
318
-
cids := make(map[string]bool)
319
-
for _, cid := range prevBundle.BoundaryCIDs {
320
-
cids[cid] = true
321
-
}
322
-
return afterTime, cids
323
-
}
324
-
325
-
// Fallback: compute from file
326
-
bf := bm.newBundleFile(bundleNum - 1)
327
-
if bf.exists() {
328
-
if err := bm.load(bf); err == nil {
329
-
_, cids := GetBoundaryCIDs(bf.operations)
330
-
return afterTime, cids
331
-
}
332
-
}
333
-
334
-
return afterTime, nil
335
-
}
336
-
337
-
// ===== BUNDLE INDEXING =====
338
-
339
-
func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error {
340
-
prevHash := ""
341
-
if bundleNum > 1 {
342
-
if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
343
-
prevHash = prev.Hash
344
-
}
345
-
}
346
-
347
-
dids := bm.extractUniqueDIDs(bf.operations)
348
-
compressedFileSize := bm.getFileSize(bf.path)
349
-
350
-
// Calculate uncompressed size
351
-
uncompressedSize := int64(0)
352
-
for _, op := range bf.operations {
353
-
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
354
-
}
355
-
356
-
// Get time range from operations
357
-
firstSeenAt := bf.operations[0].CreatedAt
358
-
lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt
359
-
360
-
bundle := &storage.PLCBundle{
361
-
BundleNumber: bundleNum,
362
-
StartTime: firstSeenAt,
363
-
EndTime: lastSeenAt,
364
-
DIDs: dids,
365
-
Hash: bf.uncompressedHash,
366
-
CompressedHash: bf.compressedHash,
367
-
CompressedSize: compressedFileSize,
368
-
UncompressedSize: uncompressedSize,
369
-
Cursor: cursor,
370
-
PrevBundleHash: prevHash,
371
-
Compressed: true,
372
-
CreatedAt: time.Now().UTC(),
373
-
}
374
-
375
-
// Create bundle first
376
-
if err := bm.db.CreateBundle(ctx, bundle); err != nil {
377
-
return err
378
-
}
379
-
380
-
// NEW: Only index DIDs if enabled
381
-
if bm.indexDIDs {
382
-
start := time.Now()
383
-
if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil {
384
-
log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err)
385
-
// Don't return error - bundle is already created
386
-
} else {
387
-
elapsed := time.Since(start)
388
-
log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed)
389
-
}
390
-
} else {
391
-
log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum)
392
-
}
393
-
394
-
return nil
395
-
}
396
-
397
-
func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string {
398
-
didSet := make(map[string]bool)
399
-
for _, op := range ops {
400
-
didSet[op.DID] = true
401
-
}
402
-
403
-
dids := make([]string, 0, len(didSet))
404
-
for did := range didSet {
405
-
dids = append(dids, did)
406
-
}
407
-
return dids
408
-
}
409
-
410
-
// ===== MEMPOOL BUNDLE CREATION =====
411
-
412
-
func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) {
413
-
if !bm.enabled {
414
-
return 0, fmt.Errorf("bundle manager disabled")
415
-
}
416
-
417
-
if len(operations) != BUNDLE_SIZE {
418
-
return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations))
419
-
}
420
-
421
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
422
-
if err != nil {
423
-
return 0, err
424
-
}
425
-
bundleNum := lastBundle + 1
426
-
427
-
bf := bm.newBundleFile(bundleNum)
428
-
bf.operations = operations
429
-
430
-
if err := bm.save(bf); err != nil {
431
-
return 0, err
432
-
}
433
-
434
-
if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil {
435
-
return 0, err
436
-
}
437
-
438
-
log.Info("✓ Created bundle %06d from mempool (hash: %s...)",
439
-
bundleNum, bf.uncompressedHash[:16])
440
-
441
-
return bundleNum, nil
442
-
}
443
-
444
-
// ===== VERIFICATION =====
445
-
446
-
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
447
-
if !bm.enabled {
448
-
return fmt.Errorf("bundle manager disabled")
449
-
}
450
-
451
-
log.Info("Verifying bundle chain from 1 to %06d...", endBundle)
452
-
453
-
for i := 1; i <= endBundle; i++ {
454
-
bundle, err := bm.db.GetBundleByNumber(ctx, i)
455
-
if err != nil {
456
-
return fmt.Errorf("bundle %06d not found: %w", i, err)
457
-
}
458
-
459
-
// Verify file hash
460
-
path := bm.newBundleFile(i).path
461
-
if err := bm.verifyHash(path, bundle.CompressedHash); err != nil {
462
-
return fmt.Errorf("bundle %06d hash verification failed: %w", i, err)
463
-
}
464
-
465
-
// Verify chain link
466
-
if i > 1 {
467
-
prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1)
468
-
if err != nil {
469
-
return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i)
470
-
}
471
-
472
-
if bundle.PrevBundleHash != prevBundle.Hash {
473
-
return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s",
474
-
i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16])
475
-
}
476
-
}
477
-
478
-
if i%100 == 0 {
479
-
log.Verbose(" ✓ Verified bundles 1-%06d", i)
480
-
}
481
-
}
482
-
483
-
log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle)
484
-
return nil
485
-
}
486
-
487
-
func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error {
488
-
if !bm.enabled {
489
-
return nil
490
-
}
491
-
492
-
for i := 1; i < targetBundle; i++ {
493
-
if !bm.newBundleFile(i).exists() {
494
-
if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil {
495
-
return fmt.Errorf("bundle %06d is missing (required for continuity)", i)
496
-
}
497
-
}
498
-
}
499
-
500
-
return nil
501
-
}
502
-
503
-
// ===== UTILITY METHODS =====
504
-
505
-
func (bm *BundleManager) hash(data []byte) string {
506
-
h := sha256.Sum256(data)
507
-
return hex.EncodeToString(h[:])
508
-
}
509
-
510
-
func (bm *BundleManager) hashFile(path string) string {
511
-
data, _ := os.ReadFile(path)
512
-
return bm.hash(data)
513
-
}
514
-
515
-
func (bm *BundleManager) verifyHash(path, expectedHash string) error {
516
-
if expectedHash == "" {
517
-
return nil
518
-
}
519
-
520
-
actualHash := bm.hashFile(path)
521
-
if actualHash != expectedHash {
522
-
return fmt.Errorf("hash mismatch")
523
-
}
524
-
return nil
525
-
}
526
-
527
-
func (bm *BundleManager) getFileSize(path string) int64 {
528
-
if info, err := os.Stat(path); err == nil {
529
-
return info.Size()
530
-
}
531
-
return 0
532
-
}
533
-
534
-
func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) {
535
-
if !bm.enabled {
536
-
return 0, 0, 0, 0, nil
537
-
}
538
-
return bm.db.GetBundleStats(ctx)
539
-
}
540
-
541
-
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
542
-
lastBundle, err := bm.db.GetLastBundleNumber(ctx)
543
-
if err != nil {
544
-
return nil, err
545
-
}
546
-
547
-
if lastBundle == 0 {
548
-
return map[string]interface{}{
549
-
"chain_length": 0,
550
-
"status": "empty",
551
-
}, nil
552
-
}
553
-
554
-
firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1)
555
-
lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle)
556
-
557
-
return map[string]interface{}{
558
-
"chain_length": lastBundle,
559
-
"first_bundle": 1,
560
-
"last_bundle": lastBundle,
561
-
"chain_start_time": firstBundle.StartTime,
562
-
"chain_end_time": lastBundleData.EndTime,
563
-
"chain_head_hash": lastBundleData.Hash,
564
-
}, nil
565
-
}
566
-
567
-
// ===== EXPORTED HELPERS =====
568
-
569
-
func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) {
570
-
if len(operations) == 0 {
571
-
return time.Time{}, nil
572
-
}
573
-
574
-
lastOp := operations[len(operations)-1]
575
-
boundaryTime := lastOp.CreatedAt
576
-
cidSet := make(map[string]bool)
577
-
578
-
for i := len(operations) - 1; i >= 0; i-- {
579
-
op := operations[i]
580
-
if op.CreatedAt.Equal(boundaryTime) {
581
-
cidSet[op.CID] = true
582
-
} else {
583
-
break
584
-
}
585
-
}
586
-
587
-
return boundaryTime, cidSet
588
-
}
589
-
590
-
func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation {
591
-
if len(operations) == 0 {
592
-
return operations
593
-
}
594
-
595
-
boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp)
596
-
if err != nil {
597
-
return operations
598
-
}
599
-
600
-
startIdx := 0
601
-
for startIdx < len(operations) {
602
-
op := operations[startIdx]
603
-
604
-
if op.CreatedAt.After(boundaryTime) {
605
-
break
606
-
}
607
-
608
-
if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] {
609
-
startIdx++
610
-
continue
611
-
}
612
-
613
-
break
614
-
}
615
-
616
-
return operations[startIdx:]
617
-
}
618
-
619
-
// LoadBundleOperations is a public method for external access (e.g., API handlers)
620
-
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
621
-
if !bm.enabled {
622
-
return nil, fmt.Errorf("bundle manager disabled")
623
-
}
624
-
625
-
bf := bm.newBundleFile(bundleNum)
626
-
627
-
if !bf.exists() {
628
-
return nil, fmt.Errorf("bundle %06d not found", bundleNum)
629
-
}
630
-
631
-
if err := bm.load(bf); err != nil {
632
-
return nil, err
633
-
}
634
-
635
-
return bf.operations, nil
636
-
}
637
-
638
-
// calculateCursor determines the cursor value for a given bundle
639
-
// For bundle 1: returns empty string
640
-
// For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format
641
-
func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string {
642
-
if bundleNum == 1 {
643
-
return ""
644
-
}
645
-
646
-
// Try to get cursor from previous bundle in DB
647
-
if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
648
-
return prevBundle.EndTime.Format(time.RFC3339Nano)
649
-
}
650
-
651
-
// If previous bundle not in DB, try to load it from file
652
-
prevBf := bm.newBundleFile(bundleNum - 1)
653
-
if prevBf.exists() {
654
-
if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 {
655
-
// Return the createdAt of the last operation in previous bundle
656
-
lastOp := prevBf.operations[len(prevBf.operations)-1]
657
-
return lastOp.CreatedAt.Format(time.RFC3339Nano)
658
-
}
659
-
}
660
-
661
-
return ""
662
-
}
-237
internal/plc/client.go
-237
internal/plc/client.go
···
1
-
package plc
2
-
3
-
import (
4
-
"bufio"
5
-
"context"
6
-
"encoding/json"
7
-
"fmt"
8
-
"io"
9
-
"net/http"
10
-
"strconv"
11
-
"time"
12
-
13
-
"github.com/atscan/atscanner/internal/log"
14
-
)
15
-
16
-
type Client struct {
17
-
baseURL string
18
-
httpClient *http.Client
19
-
rateLimiter *RateLimiter
20
-
}
21
-
22
-
func NewClient(baseURL string) *Client {
23
-
// Rate limit: 90 requests per minute (leaving buffer below 100/min limit)
24
-
rateLimiter := NewRateLimiter(90, time.Minute)
25
-
26
-
return &Client{
27
-
baseURL: baseURL,
28
-
httpClient: &http.Client{
29
-
Timeout: 60 * time.Second,
30
-
},
31
-
rateLimiter: rateLimiter,
32
-
}
33
-
}
34
-
35
-
func (c *Client) Close() {
36
-
if c.rateLimiter != nil {
37
-
c.rateLimiter.Stop()
38
-
}
39
-
}
40
-
41
-
type ExportOptions struct {
42
-
Count int
43
-
After string // ISO 8601 datetime string
44
-
}
45
-
46
-
// Export fetches export data from PLC directory with rate limiting and retry
47
-
func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) {
48
-
return c.exportWithRetry(ctx, opts, 5)
49
-
}
50
-
51
-
// exportWithRetry implements retry logic with exponential backoff for rate limits
52
-
func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) {
53
-
var lastErr error
54
-
backoff := 1 * time.Second
55
-
56
-
for attempt := 1; attempt <= maxRetries; attempt++ {
57
-
// Wait for rate limiter token
58
-
if err := c.rateLimiter.Wait(ctx); err != nil {
59
-
return nil, err
60
-
}
61
-
62
-
operations, retryAfter, err := c.doExport(ctx, opts)
63
-
64
-
if err == nil {
65
-
return operations, nil
66
-
}
67
-
68
-
lastErr = err
69
-
70
-
// Check if it's a rate limit error (429)
71
-
if retryAfter > 0 {
72
-
log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d",
73
-
retryAfter, attempt, maxRetries)
74
-
75
-
select {
76
-
case <-time.After(retryAfter):
77
-
continue
78
-
case <-ctx.Done():
79
-
return nil, ctx.Err()
80
-
}
81
-
}
82
-
83
-
// Other errors - exponential backoff
84
-
if attempt < maxRetries {
85
-
log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v",
86
-
attempt, maxRetries, err, backoff)
87
-
88
-
select {
89
-
case <-time.After(backoff):
90
-
backoff *= 2 // Exponential backoff
91
-
case <-ctx.Done():
92
-
return nil, ctx.Err()
93
-
}
94
-
}
95
-
}
96
-
97
-
return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
98
-
}
99
-
100
-
// doExport performs the actual HTTP request
101
-
func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) {
102
-
url := fmt.Sprintf("%s/export", c.baseURL)
103
-
104
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
105
-
if err != nil {
106
-
return nil, 0, err
107
-
}
108
-
109
-
// Add query parameters
110
-
q := req.URL.Query()
111
-
if opts.Count > 0 {
112
-
q.Add("count", fmt.Sprintf("%d", opts.Count))
113
-
}
114
-
if opts.After != "" {
115
-
q.Add("after", opts.After)
116
-
}
117
-
req.URL.RawQuery = q.Encode()
118
-
119
-
resp, err := c.httpClient.Do(req)
120
-
if err != nil {
121
-
return nil, 0, fmt.Errorf("request failed: %w", err)
122
-
}
123
-
defer resp.Body.Close()
124
-
125
-
// Handle rate limiting (429)
126
-
if resp.StatusCode == http.StatusTooManyRequests {
127
-
retryAfter := parseRetryAfter(resp)
128
-
129
-
// Also check x-ratelimit headers for info
130
-
if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" {
131
-
log.Verbose("Rate limit: %s", limit)
132
-
}
133
-
134
-
return nil, retryAfter, fmt.Errorf("rate limited (429)")
135
-
}
136
-
137
-
if resp.StatusCode != http.StatusOK {
138
-
body, _ := io.ReadAll(resp.Body)
139
-
return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
140
-
}
141
-
142
-
var operations []PLCOperation
143
-
144
-
// PLC export returns newline-delimited JSON
145
-
scanner := bufio.NewScanner(resp.Body)
146
-
buf := make([]byte, 0, 64*1024)
147
-
scanner.Buffer(buf, 1024*1024)
148
-
149
-
lineCount := 0
150
-
for scanner.Scan() {
151
-
lineCount++
152
-
line := scanner.Bytes()
153
-
154
-
if len(line) == 0 {
155
-
continue
156
-
}
157
-
158
-
var op PLCOperation
159
-
if err := json.Unmarshal(line, &op); err != nil {
160
-
log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err)
161
-
continue
162
-
}
163
-
164
-
// CRITICAL: Store the original raw JSON bytes
165
-
op.RawJSON = make([]byte, len(line))
166
-
copy(op.RawJSON, line)
167
-
168
-
operations = append(operations, op)
169
-
}
170
-
171
-
if err := scanner.Err(); err != nil {
172
-
return nil, 0, fmt.Errorf("error reading response: %w", err)
173
-
}
174
-
175
-
return operations, 0, nil
176
-
177
-
}
178
-
179
-
// parseRetryAfter parses the Retry-After header
180
-
func parseRetryAfter(resp *http.Response) time.Duration {
181
-
retryAfter := resp.Header.Get("Retry-After")
182
-
if retryAfter == "" {
183
-
// Default to 5 minutes if no header
184
-
return 5 * time.Minute
185
-
}
186
-
187
-
// Try parsing as seconds
188
-
if seconds, err := strconv.Atoi(retryAfter); err == nil {
189
-
return time.Duration(seconds) * time.Second
190
-
}
191
-
192
-
// Try parsing as HTTP date
193
-
if t, err := http.ParseTime(retryAfter); err == nil {
194
-
return time.Until(t)
195
-
}
196
-
197
-
// Default
198
-
return 5 * time.Minute
199
-
}
200
-
201
-
// GetDID fetches a specific DID document from PLC
202
-
func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) {
203
-
// Wait for rate limiter
204
-
if err := c.rateLimiter.Wait(ctx); err != nil {
205
-
return nil, err
206
-
}
207
-
208
-
url := fmt.Sprintf("%s/%s", c.baseURL, did)
209
-
210
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
211
-
if err != nil {
212
-
return nil, err
213
-
}
214
-
215
-
resp, err := c.httpClient.Do(req)
216
-
if err != nil {
217
-
return nil, err
218
-
}
219
-
defer resp.Body.Close()
220
-
221
-
if resp.StatusCode == http.StatusTooManyRequests {
222
-
retryAfter := parseRetryAfter(resp)
223
-
return nil, fmt.Errorf("rate limited, retry after %v", retryAfter)
224
-
}
225
-
226
-
if resp.StatusCode != http.StatusOK {
227
-
body, _ := io.ReadAll(resp.Body)
228
-
return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
229
-
}
230
-
231
-
var doc DIDDocument
232
-
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
233
-
return nil, err
234
-
}
235
-
236
-
return &doc, nil
237
-
}
+112
internal/plc/helpers.go
+112
internal/plc/helpers.go
···
1
+
package plc
2
+
3
+
import (
4
+
"regexp"
5
+
"strings"
6
+
)
7
+
8
+
// MaxHandleLength is the maximum allowed handle length for database storage
9
+
const MaxHandleLength = 500
10
+
11
+
// Handle validation regex per AT Protocol spec
12
+
// Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter
13
+
var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`)
14
+
15
+
// ExtractHandle safely extracts the handle from a PLC operation
16
+
func ExtractHandle(op *PLCOperation) string {
17
+
if op == nil || op.Operation == nil {
18
+
return ""
19
+
}
20
+
21
+
// Get "alsoKnownAs"
22
+
aka, ok := op.Operation["alsoKnownAs"].([]interface{})
23
+
if !ok {
24
+
return ""
25
+
}
26
+
27
+
// Find the handle (e.g., "at://handle.bsky.social")
28
+
for _, item := range aka {
29
+
if handle, ok := item.(string); ok {
30
+
if strings.HasPrefix(handle, "at://") {
31
+
return strings.TrimPrefix(handle, "at://")
32
+
}
33
+
}
34
+
}
35
+
return ""
36
+
}
37
+
38
+
// ValidateHandle checks if a handle is valid for database storage
39
+
// Returns empty string if handle is invalid (too long or wrong format)
40
+
func ValidateHandle(handle string) string {
41
+
if handle == "" {
42
+
return ""
43
+
}
44
+
45
+
// Check length first (faster)
46
+
if len(handle) > MaxHandleLength {
47
+
return ""
48
+
}
49
+
50
+
// Validate format using regex
51
+
if !handleRegex.MatchString(handle) {
52
+
return ""
53
+
}
54
+
55
+
return handle
56
+
}
57
+
58
+
// ExtractPDS safely extracts the PDS endpoint from a PLC operation
59
+
func ExtractPDS(op *PLCOperation) string {
60
+
if op == nil || op.Operation == nil {
61
+
return ""
62
+
}
63
+
64
+
// Get "services"
65
+
services, ok := op.Operation["services"].(map[string]interface{})
66
+
if !ok {
67
+
return ""
68
+
}
69
+
70
+
// Get "atproto_pds"
71
+
pdsService, ok := services["atproto_pds"].(map[string]interface{})
72
+
if !ok {
73
+
return ""
74
+
}
75
+
76
+
// Get "endpoint"
77
+
if endpoint, ok := pdsService["endpoint"].(string); ok {
78
+
return endpoint
79
+
}
80
+
81
+
return ""
82
+
}
83
+
84
+
// DIDInfo contains extracted metadata from a PLC operation
85
+
type DIDInfo struct {
86
+
Handle string
87
+
PDS string
88
+
}
89
+
90
+
// ExtractDIDInfo extracts both handle and PDS from an operation
91
+
func ExtractDIDInfo(op *PLCOperation) DIDInfo {
92
+
return DIDInfo{
93
+
Handle: ExtractHandle(op),
94
+
PDS: ExtractPDS(op),
95
+
}
96
+
}
97
+
98
+
// ExtractDIDInfoMap creates a map of DID -> info from operations
99
+
// Processes in reverse order to get the latest state for each DID
100
+
func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo {
101
+
infoMap := make(map[string]DIDInfo)
102
+
103
+
// Process in reverse to get latest state
104
+
for i := len(ops) - 1; i >= 0; i-- {
105
+
op := ops[i]
106
+
if _, exists := infoMap[op.DID]; !exists {
107
+
infoMap[op.DID] = ExtractDIDInfo(&op)
108
+
}
109
+
}
110
+
111
+
return infoMap
112
+
}
+522
internal/plc/manager.go
+522
internal/plc/manager.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/csv"
6
+
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
13
+
"time"
14
+
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
19
+
)
20
+
21
+
// BundleManager wraps the library's manager with database integration
22
+
type BundleManager struct {
23
+
libManager *plcbundle.Manager
24
+
db storage.Database
25
+
bundleDir string
26
+
indexDIDs bool
27
+
}
28
+
29
+
func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) {
30
+
// Create library config
31
+
config := plcbundle.DefaultConfig(bundleDir)
32
+
33
+
// Create PLC client
34
+
var client *plcbundle.PLCClient
35
+
if plcURL != "" {
36
+
client = plcbundle.NewPLCClient(plcURL)
37
+
}
38
+
39
+
// Create library manager
40
+
libMgr, err := plcbundle.NewManager(config, client)
41
+
if err != nil {
42
+
return nil, fmt.Errorf("failed to create library manager: %w", err)
43
+
}
44
+
45
+
return &BundleManager{
46
+
libManager: libMgr,
47
+
db: db,
48
+
bundleDir: bundleDir,
49
+
indexDIDs: indexDIDs,
50
+
}, nil
51
+
}
52
+
53
+
func (bm *BundleManager) Close() {
54
+
if bm.libManager != nil {
55
+
bm.libManager.Close()
56
+
}
57
+
}
58
+
59
+
// LoadBundle loads a bundle (from library) and returns operations
60
+
func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) {
61
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
62
+
if err != nil {
63
+
return nil, err
64
+
}
65
+
return bundle.Operations, nil
66
+
}
67
+
68
+
// LoadBundle loads a full bundle with metadata
69
+
func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) {
70
+
return bm.libManager.LoadBundle(ctx, bundleNum)
71
+
}
72
+
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
74
+
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
75
+
// Fetch from PLC using library
76
+
bundle, err := bm.libManager.FetchNextBundle(ctx)
77
+
if err != nil {
78
+
return nil, err
79
+
}
80
+
81
+
// Save to disk (library handles this)
82
+
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
83
+
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
84
+
}
85
+
86
+
// Index DIDs if enabled (still use database for this)
87
+
if bm.indexDIDs && len(bundle.Operations) > 0 {
88
+
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
89
+
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
90
+
}
91
+
}
92
+
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
96
+
}
97
+
98
+
// indexBundleDIDs indexes DIDs from a bundle into the database
99
+
func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error {
100
+
start := time.Now()
101
+
log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber)
102
+
103
+
// Extract DID info from operations
104
+
didInfoMap := ExtractDIDInfoMap(bundle.Operations)
105
+
106
+
successCount := 0
107
+
errorCount := 0
108
+
invalidHandleCount := 0
109
+
110
+
// Upsert each DID
111
+
for did, info := range didInfoMap {
112
+
validHandle := ValidateHandle(info.Handle)
113
+
if info.Handle != "" && validHandle == "" {
114
+
invalidHandleCount++
115
+
}
116
+
117
+
if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil {
118
+
log.Error("Failed to index DID %s: %v", did, err)
119
+
errorCount++
120
+
} else {
121
+
successCount++
122
+
}
123
+
}
124
+
125
+
elapsed := time.Since(start)
126
+
log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v",
127
+
successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed)
128
+
129
+
return nil
130
+
}
131
+
132
+
// VerifyChain verifies bundle chain integrity
133
+
func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error {
134
+
result, err := bm.libManager.VerifyChain(ctx)
135
+
if err != nil {
136
+
return err
137
+
}
138
+
139
+
if !result.Valid {
140
+
return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error)
141
+
}
142
+
143
+
return nil
144
+
}
145
+
146
+
// GetChainInfo returns chain information
147
+
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
148
+
return bm.libManager.GetInfo(), nil
149
+
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
-70
internal/plc/ratelimiter.go
-70
internal/plc/ratelimiter.go
···
1
-
package plc
2
-
3
-
import (
4
-
"context"
5
-
"time"
6
-
)
7
-
8
-
// RateLimiter implements a token bucket rate limiter
9
-
type RateLimiter struct {
10
-
tokens chan struct{}
11
-
refillRate time.Duration
12
-
maxTokens int
13
-
stopRefill chan struct{}
14
-
}
15
-
16
-
// NewRateLimiter creates a new rate limiter
17
-
// Example: NewRateLimiter(90, time.Minute) = 90 requests per minute
18
-
func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter {
19
-
rl := &RateLimiter{
20
-
tokens: make(chan struct{}, requestsPerPeriod),
21
-
refillRate: period / time.Duration(requestsPerPeriod),
22
-
maxTokens: requestsPerPeriod,
23
-
stopRefill: make(chan struct{}),
24
-
}
25
-
26
-
// Fill initially
27
-
for i := 0; i < requestsPerPeriod; i++ {
28
-
rl.tokens <- struct{}{}
29
-
}
30
-
31
-
// Start refill goroutine
32
-
go rl.refill()
33
-
34
-
return rl
35
-
}
36
-
37
-
// refill adds tokens at the specified rate
38
-
func (rl *RateLimiter) refill() {
39
-
ticker := time.NewTicker(rl.refillRate)
40
-
defer ticker.Stop()
41
-
42
-
for {
43
-
select {
44
-
case <-ticker.C:
45
-
select {
46
-
case rl.tokens <- struct{}{}:
47
-
// Token added
48
-
default:
49
-
// Buffer full, skip
50
-
}
51
-
case <-rl.stopRefill:
52
-
return
53
-
}
54
-
}
55
-
}
56
-
57
-
// Wait blocks until a token is available
58
-
func (rl *RateLimiter) Wait(ctx context.Context) error {
59
-
select {
60
-
case <-rl.tokens:
61
-
return nil
62
-
case <-ctx.Done():
63
-
return ctx.Err()
64
-
}
65
-
}
66
-
67
-
// Stop stops the rate limiter
68
-
func (rl *RateLimiter) Stop() {
69
-
close(rl.stopRefill)
70
-
}
+92
-415
internal/plc/scanner.go
+92
-415
internal/plc/scanner.go
···
2
2
3
3
import (
4
4
"context"
5
-
"encoding/json"
6
5
"fmt"
7
6
"strings"
8
7
"time"
9
8
10
-
"github.com/acarl005/stripansi"
11
-
"github.com/atscan/atscanner/internal/config"
12
-
"github.com/atscan/atscanner/internal/log"
13
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
14
12
)
15
13
16
14
type Scanner struct {
17
-
client *Client
15
+
bundleManager *BundleManager
18
16
db storage.Database
19
17
config config.PLCConfig
20
-
bundleManager *BundleManager
21
18
}
22
19
23
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
24
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs
25
-
if err != nil {
26
-
log.Error("Warning: failed to initialize bundle manager: %v", err)
27
-
bundleManager = &BundleManager{enabled: false}
28
-
}
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
+
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
29
22
30
23
return &Scanner{
31
-
client: NewClient(cfg.DirectoryURL),
24
+
bundleManager: bundleManager, // Use provided instance
32
25
db: db,
33
26
config: cfg,
34
-
bundleManager: bundleManager,
35
27
}
36
28
}
37
29
38
30
func (s *Scanner) Close() {
39
-
if s.bundleManager != nil {
40
-
s.bundleManager.Close()
41
-
}
42
-
}
43
-
44
-
// ScanMetrics tracks scan progress
45
-
type ScanMetrics struct {
46
-
totalFetched int64 // Total ops fetched from PLC/bundles
47
-
totalProcessed int64 // Unique ops processed (after dedup)
48
-
newEndpoints int64 // New endpoints discovered
49
-
endpointCounts map[string]int64
50
-
currentBundle int
51
-
startTime time.Time
52
-
}
53
-
54
-
func newMetrics(startBundle int) *ScanMetrics {
55
-
return &ScanMetrics{
56
-
endpointCounts: make(map[string]int64),
57
-
currentBundle: startBundle,
58
-
startTime: time.Now(),
59
-
}
60
-
}
61
-
62
-
func (m *ScanMetrics) logSummary() {
63
-
summary := formatEndpointCounts(m.endpointCounts)
64
-
if m.newEndpoints > 0 {
65
-
log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v",
66
-
m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime))
67
-
} else {
68
-
log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v",
69
-
m.totalProcessed, m.totalFetched, time.Since(m.startTime))
70
-
}
31
+
// Don't close bundleManager here - it's shared
71
32
}
72
33
73
34
func (s *Scanner) Scan(ctx context.Context) error {
74
35
log.Info("Starting PLC directory scan...")
75
-
log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes")
76
36
77
37
cursor, err := s.db.GetScanCursor(ctx, "plc_directory")
78
38
if err != nil {
79
39
return fmt.Errorf("failed to get scan cursor: %w", err)
80
40
}
81
41
82
-
startBundle := s.calculateStartBundle(cursor.LastBundleNumber)
83
-
metrics := newMetrics(startBundle)
42
+
metrics := newMetrics(cursor.LastBundleNumber + 1)
84
43
85
-
if startBundle > 1 {
86
-
if err := s.ensureContinuity(ctx, startBundle); err != nil {
87
-
return err
88
-
}
89
-
}
90
-
91
-
// Handle existing mempool first
92
-
if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool {
93
-
return s.handleMempoolOnly(ctx, metrics)
94
-
}
95
-
96
-
// Process bundles until incomplete or error
44
+
// Main processing loop
97
45
for {
98
46
if err := ctx.Err(); err != nil {
99
47
return err
100
48
}
101
49
102
-
if err := s.processSingleBundle(ctx, metrics); err != nil {
103
-
if s.shouldRetry(err) {
104
-
continue
105
-
}
106
-
break
107
-
}
108
-
109
-
if err := s.updateCursor(ctx, cursor, metrics); err != nil {
110
-
log.Error("Warning: failed to update cursor: %v", err)
111
-
}
112
-
}
113
-
114
-
// Try to finalize mempool
115
-
s.finalizeMempool(ctx, metrics)
116
-
117
-
metrics.logSummary()
118
-
return nil
119
-
}
120
-
121
-
func (s *Scanner) calculateStartBundle(lastBundle int) int {
122
-
if lastBundle == 0 {
123
-
return 1
124
-
}
125
-
return lastBundle + 1
126
-
}
127
-
128
-
func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error {
129
-
log.Info("Checking bundle continuity...")
130
-
if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil {
131
-
return fmt.Errorf("bundle continuity check failed: %w", err)
132
-
}
133
-
return nil
134
-
}
135
-
136
-
func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) {
137
-
count, err := s.db.GetMempoolCount(ctx)
138
-
if err != nil {
139
-
return false, err
140
-
}
141
-
return count > 0, nil
142
-
}
143
-
144
-
func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error {
145
-
count, _ := s.db.GetMempoolCount(ctx)
146
-
log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count)
147
-
148
-
if err := s.fillMempool(ctx, m); err != nil {
149
-
return err
150
-
}
151
-
152
-
if err := s.processMempool(ctx, m); err != nil {
153
-
log.Error("Error processing mempool: %v", err)
154
-
}
155
-
156
-
m.logSummary()
157
-
return nil
158
-
}
159
-
160
-
func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error {
161
-
log.Verbose("→ Processing bundle %06d...", m.currentBundle)
162
-
163
-
ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client)
164
-
if err != nil {
165
-
return s.handleBundleError(err, m)
166
-
}
167
-
168
-
if isComplete {
169
-
return s.handleCompleteBundle(ctx, ops, m)
170
-
}
171
-
return s.handleIncompleteBundle(ctx, ops, m)
172
-
}
173
-
174
-
func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error {
175
-
log.Error("Failed to load bundle %06d: %v", m.currentBundle, err)
176
-
177
-
if strings.Contains(err.Error(), "rate limited") {
178
-
log.Info("⚠ Rate limit hit, pausing for 5 minutes...")
179
-
time.Sleep(5 * time.Minute)
180
-
return fmt.Errorf("retry")
181
-
}
182
-
183
-
if m.currentBundle > 1 {
184
-
log.Info("→ Reached end of available data")
185
-
}
186
-
return err
187
-
}
188
-
189
-
func (s *Scanner) shouldRetry(err error) bool {
190
-
return err != nil && err.Error() == "retry"
191
-
}
192
-
193
-
func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
194
-
counts, err := s.processBatch(ctx, ops)
195
-
if err != nil {
196
-
return err
197
-
}
198
-
199
-
s.mergeCounts(m.endpointCounts, counts)
200
-
m.totalProcessed += int64(len(ops)) // Unique ops after dedup
201
-
m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints
202
-
203
-
batchTotal := sumCounts(counts)
204
-
log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints",
205
-
m.currentBundle, len(ops), batchTotal)
206
-
207
-
m.currentBundle++
208
-
return nil
209
-
}
210
-
211
-
func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error {
212
-
log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops))
213
-
214
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
215
-
return err
216
-
}
217
-
218
-
s.finalizeMempool(ctx, m)
219
-
return fmt.Errorf("incomplete") // Signal end of processing
220
-
}
221
-
222
-
func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) {
223
-
if err := s.fillMempool(ctx, m); err != nil {
224
-
log.Error("Error filling mempool: %v", err)
225
-
}
226
-
if err := s.processMempool(ctx, m); err != nil {
227
-
log.Error("Error processing mempool: %v", err)
228
-
}
229
-
}
230
-
231
-
func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error {
232
-
const fetchLimit = 1000
233
-
234
-
for {
235
-
count, err := s.db.GetMempoolCount(ctx)
50
+
// Fetch and save bundle (library handles mempool internally)
51
+
bundle, err := s.bundleManager.FetchAndSaveBundle(ctx)
236
52
if err != nil {
237
-
return err
238
-
}
53
+
if isInsufficientOpsError(err) {
54
+
// Show mempool status
55
+
stats := s.bundleManager.libManager.GetMempoolStats()
56
+
mempoolCount := stats["count"].(int)
239
57
240
-
if count >= BUNDLE_SIZE {
241
-
log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE)
242
-
return nil
243
-
}
58
+
if mempoolCount > 0 {
59
+
log.Info("→ Waiting for more operations (mempool has %d/%d ops)",
60
+
mempoolCount, BUNDLE_SIZE)
61
+
} else {
62
+
log.Info("→ Caught up! No operations available")
63
+
}
64
+
break
65
+
}
244
66
245
-
log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE)
67
+
if strings.Contains(err.Error(), "rate limited") {
68
+
log.Info("⚠ Rate limited, pausing for 5 minutes...")
69
+
time.Sleep(5 * time.Minute)
70
+
continue
71
+
}
246
72
247
-
// ✅ Fix: Don't capture unused 'ops' variable
248
-
shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m)
249
-
if err != nil {
250
-
return err
73
+
return fmt.Errorf("failed to fetch bundle: %w", err)
251
74
}
252
75
253
-
if !shouldContinue {
254
-
finalCount, _ := s.db.GetMempoolCount(ctx)
255
-
log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE)
256
-
return nil
257
-
}
258
-
}
259
-
}
260
-
261
-
func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) {
262
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
263
-
if err != nil {
264
-
return false, err
265
-
}
266
-
267
-
var after string
268
-
if lastOp != nil {
269
-
after = lastOp.CreatedAt.Format(time.RFC3339Nano)
270
-
log.Verbose(" Using cursor: %s", after)
271
-
}
272
-
273
-
ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after})
274
-
if err != nil {
275
-
return false, fmt.Errorf("failed to fetch from PLC: %w", err)
276
-
}
277
-
278
-
fetchedCount := len(ops)
279
-
m.totalFetched += int64(fetchedCount) // Track all fetched
280
-
log.Verbose(" Fetched %d operations from PLC", fetchedCount)
281
-
282
-
if fetchedCount == 0 {
283
-
count, _ := s.db.GetMempoolCount(ctx)
284
-
log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE)
285
-
return false, nil
286
-
}
287
-
288
-
beforeCount, err := s.db.GetMempoolCount(ctx)
289
-
if err != nil {
290
-
return false, err
291
-
}
292
-
293
-
endpointsBefore := sumCounts(m.endpointCounts)
294
-
if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil {
295
-
return false, err
296
-
}
297
-
endpointsAfter := sumCounts(m.endpointCounts)
298
-
m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found
299
-
300
-
afterCount, err := s.db.GetMempoolCount(ctx)
301
-
if err != nil {
302
-
return false, err
303
-
}
304
-
305
-
uniqueAdded := int64(afterCount - beforeCount) // Cast to int64
306
-
m.totalProcessed += uniqueAdded // Track unique ops processed
307
-
308
-
log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)",
309
-
uniqueAdded, int64(fetchedCount)-uniqueAdded)
310
-
311
-
// Continue only if got full batch
312
-
shouldContinue := fetchedCount >= limit
313
-
if !shouldContinue {
314
-
log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit)
315
-
}
316
-
317
-
return shouldContinue, nil
318
-
}
319
-
320
-
func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error {
321
-
mempoolOps := make([]storage.MempoolOperation, len(ops))
322
-
for i, op := range ops {
323
-
mempoolOps[i] = storage.MempoolOperation{
324
-
DID: op.DID,
325
-
Operation: string(op.RawJSON),
326
-
CID: op.CID,
327
-
CreatedAt: op.CreatedAt,
328
-
}
329
-
}
330
-
331
-
if err := s.db.AddToMempool(ctx, mempoolOps); err != nil {
332
-
return err
333
-
}
334
-
335
-
// Process for endpoint discovery
336
-
batchCounts, err := s.processBatch(ctx, ops)
337
-
s.mergeCounts(counts, batchCounts)
338
-
return err
339
-
}
340
-
341
-
func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error {
342
-
for {
343
-
count, err := s.db.GetMempoolCount(ctx)
76
+
// Process operations for endpoint discovery
77
+
counts, err := s.processBatch(ctx, bundle.Operations)
344
78
if err != nil {
345
-
return err
79
+
log.Error("Failed to process batch: %v", err)
80
+
// Continue anyway
346
81
}
347
82
348
-
log.Verbose("Mempool contains %d operations", count)
349
-
350
-
if count < BUNDLE_SIZE {
351
-
log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE)
352
-
return nil
353
-
}
83
+
// Update metrics
84
+
s.mergeCounts(metrics.endpointCounts, counts)
85
+
metrics.totalProcessed += int64(len(bundle.Operations))
86
+
metrics.newEndpoints += sumCounts(counts)
87
+
metrics.currentBundle = bundle.BundleNumber
354
88
355
-
log.Info("→ Creating bundle from mempool (%d operations available)...", count)
89
+
log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints",
90
+
bundle.BundleNumber, len(bundle.Operations), sumCounts(counts))
356
91
357
-
// Updated to receive 4 values instead of 3
358
-
bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx)
359
-
if err != nil {
360
-
return err
361
-
}
362
-
363
-
// Process and update metrics
364
-
countsBefore := sumCounts(m.endpointCounts)
365
-
counts, _ := s.processBatch(ctx, ops)
366
-
s.mergeCounts(m.endpointCounts, counts)
367
-
newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore
368
-
369
-
m.totalProcessed += int64(len(ops))
370
-
m.newEndpoints += newEndpointsFound
371
-
m.currentBundle = bundleNum
372
-
373
-
if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil {
92
+
// Update cursor
93
+
if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil {
374
94
log.Error("Warning: failed to update cursor: %v", err)
375
95
}
376
-
377
-
log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor)
378
96
}
379
-
}
380
97
381
-
func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) {
382
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE)
383
-
if err != nil {
384
-
return 0, nil, "", err
98
+
// Show final mempool status
99
+
stats := s.bundleManager.libManager.GetMempoolStats()
100
+
if count, ok := stats["count"].(int); ok && count > 0 {
101
+
log.Info("Mempool contains %d operations (%.1f%% of next bundle)",
102
+
count, float64(count)/float64(BUNDLE_SIZE)*100)
385
103
}
386
104
387
-
ops, ids := s.deduplicateMempool(mempoolOps)
388
-
if len(ops) < BUNDLE_SIZE {
389
-
return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE)
390
-
}
391
-
392
-
// Determine cursor from last bundle
393
-
cursor := ""
394
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
395
-
if err == nil && lastBundle > 0 {
396
-
if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil {
397
-
cursor = bundle.EndTime.Format(time.RFC3339Nano)
398
-
}
399
-
}
400
-
401
-
bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor)
402
-
if err != nil {
403
-
return 0, nil, "", err
404
-
}
405
-
406
-
if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil {
407
-
return 0, nil, "", err
408
-
}
409
-
410
-
return bundleNum, ops, cursor, nil
105
+
metrics.logSummary()
106
+
return nil
411
107
}
412
108
413
-
func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) {
414
-
ops := make([]PLCOperation, 0, BUNDLE_SIZE)
415
-
ids := make([]int64, 0, BUNDLE_SIZE)
416
-
seenCIDs := make(map[string]bool)
417
-
418
-
for _, mop := range mempoolOps {
419
-
if seenCIDs[mop.CID] {
420
-
ids = append(ids, mop.ID)
421
-
continue
422
-
}
423
-
seenCIDs[mop.CID] = true
424
-
425
-
var op PLCOperation
426
-
json.Unmarshal([]byte(mop.Operation), &op)
427
-
op.RawJSON = []byte(mop.Operation)
428
-
429
-
ops = append(ops, op)
430
-
ids = append(ids, mop.ID)
431
-
432
-
if len(ops) >= BUNDLE_SIZE {
433
-
break
434
-
}
435
-
}
436
-
437
-
return ops, ids
438
-
}
439
-
109
+
// processBatch extracts endpoints from operations
440
110
func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) {
441
111
counts := make(map[string]int64)
442
112
seen := make(map[string]*PLCOperation)
443
113
444
114
// Collect unique endpoints
445
-
for _, op := range ops {
115
+
for i := range ops {
116
+
op := &ops[i]
117
+
446
118
if op.IsNullified() {
447
119
continue
448
120
}
449
-
for _, ep := range s.extractEndpointsFromOperation(op) {
121
+
122
+
for _, ep := range s.extractEndpointsFromOperation(*op) {
450
123
key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint)
451
124
if _, exists := seen[key]; !exists {
452
-
seen[key] = &op
125
+
seen[key] = op
453
126
}
454
127
}
455
128
}
···
465
138
}
466
139
467
140
if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil {
468
-
log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err)
141
+
log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err)
469
142
continue
470
143
}
471
144
472
-
log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint))
145
+
log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint)
473
146
counts[epType]++
474
147
}
475
148
476
149
return counts, nil
477
-
}
478
-
479
-
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
480
-
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
481
-
EndpointType: epType,
482
-
Endpoint: endpoint,
483
-
DiscoveredAt: discoveredAt,
484
-
LastChecked: time.Time{},
485
-
Status: storage.EndpointStatusUnknown,
486
-
})
487
150
}
488
151
489
152
func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo {
···
526
189
return nil
527
190
}
528
191
529
-
func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error {
530
-
return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{
531
-
Source: "plc_directory",
532
-
LastBundleNumber: m.currentBundle - 1,
533
-
LastScanTime: time.Now().UTC(),
534
-
RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed,
192
+
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
194
+
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
195
+
EndpointType: epType,
196
+
Endpoint: endpoint,
197
+
DiscoveredAt: discoveredAt,
198
+
LastChecked: time.Time{},
199
+
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
535
201
})
536
202
}
537
203
···
559
225
return total
560
226
}
561
227
562
-
func formatEndpointCounts(counts map[string]int64) string {
563
-
if len(counts) == 0 {
564
-
return "0 new endpoints"
565
-
}
228
+
func isInsufficientOpsError(err error) bool {
229
+
return err != nil && strings.Contains(err.Error(), "insufficient operations")
230
+
}
566
231
567
-
total := sumCounts(counts)
232
+
// ScanMetrics tracks scan progress
233
+
type ScanMetrics struct {
234
+
totalProcessed int64
235
+
newEndpoints int64
236
+
endpointCounts map[string]int64
237
+
currentBundle int
238
+
startTime time.Time
239
+
}
568
240
569
-
if len(counts) == 1 {
570
-
for typ, count := range counts {
571
-
return fmt.Sprintf("%d new %s endpoint(s)", count, typ)
572
-
}
241
+
func newMetrics(startBundle int) *ScanMetrics {
242
+
return &ScanMetrics{
243
+
endpointCounts: make(map[string]int64),
244
+
currentBundle: startBundle,
245
+
startTime: time.Now(),
573
246
}
247
+
}
574
248
575
-
parts := make([]string, 0, len(counts))
576
-
for typ, count := range counts {
577
-
parts = append(parts, fmt.Sprintf("%d %s", count, typ))
249
+
func (m *ScanMetrics) logSummary() {
250
+
if m.newEndpoints > 0 {
251
+
log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v",
252
+
m.totalProcessed, m.newEndpoints, time.Since(m.startTime))
253
+
} else {
254
+
log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v",
255
+
m.totalProcessed, time.Since(m.startTime))
578
256
}
579
-
return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", "))
580
257
}
+68
-55
internal/plc/types.go
+68
-55
internal/plc/types.go
···
1
1
package plc
2
2
3
-
import "time"
4
-
5
-
type PLCOperation struct {
6
-
DID string `json:"did"`
7
-
Operation map[string]interface{} `json:"operation"`
8
-
CID string `json:"cid"`
9
-
Nullified interface{} `json:"nullified,omitempty"`
10
-
CreatedAt time.Time `json:"createdAt"`
11
-
12
-
RawJSON []byte `json:"-"` // ✅ Exported (capital R)
13
-
}
3
+
import (
4
+
"net/url"
5
+
"strings"
14
6
15
-
// Helper method to check if nullified
16
-
func (op *PLCOperation) IsNullified() bool {
17
-
if op.Nullified == nil {
18
-
return false
19
-
}
20
-
21
-
switch v := op.Nullified.(type) {
22
-
case bool:
23
-
return v
24
-
case string:
25
-
return v != ""
26
-
default:
27
-
return false
28
-
}
29
-
}
30
-
31
-
// Get nullifying CID if available
32
-
func (op *PLCOperation) GetNullifyingCID() string {
33
-
if s, ok := op.Nullified.(string); ok {
34
-
return s
35
-
}
36
-
return ""
37
-
}
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
8
+
)
38
9
39
-
type DIDDocument struct {
40
-
Context []string `json:"@context"`
41
-
ID string `json:"id"`
42
-
AlsoKnownAs []string `json:"alsoKnownAs"`
43
-
VerificationMethod []VerificationMethod `json:"verificationMethod"`
44
-
Service []Service `json:"service"`
45
-
}
10
+
// Re-export library types
11
+
type PLCOperation = plclib.PLCOperation
12
+
type DIDDocument = plclib.DIDDocument
13
+
type Client = plclib.Client
14
+
type ExportOptions = plclib.ExportOptions
46
15
47
-
type VerificationMethod struct {
48
-
ID string `json:"id"`
49
-
Type string `json:"type"`
50
-
Controller string `json:"controller"`
51
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
52
-
}
16
+
// Keep your custom types
17
+
const BUNDLE_SIZE = 10000
53
18
54
-
type Service struct {
55
-
ID string `json:"id"`
56
-
Type string `json:"type"`
57
-
ServiceEndpoint string `json:"serviceEndpoint"`
58
-
}
59
-
60
-
// DIDHistoryEntry represents a single operation in DID history
61
19
type DIDHistoryEntry struct {
62
20
Operation PLCOperation `json:"operation"`
63
21
PLCBundle string `json:"plc_bundle,omitempty"`
64
22
}
65
23
66
-
// DIDHistory represents the full history of a DID
67
24
type DIDHistory struct {
68
25
DID string `json:"did"`
69
26
Current *PLCOperation `json:"current"`
···
74
31
Type string
75
32
Endpoint string
76
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
+19
-21
internal/storage/db.go
+19
-21
internal/storage/db.go
···
27
27
EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error)
28
28
GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error)
29
29
GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error)
30
-
UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error
30
+
UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error
31
31
SaveEndpointScan(ctx context.Context, scan *EndpointScan) error
32
32
SetScanRetention(retention int)
33
33
UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error
34
+
UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error
35
+
GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error)
34
36
35
37
// PDS virtual endpoints (created via JOINs)
36
38
GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error)
37
39
GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error)
38
40
GetPDSStats(ctx context.Context) (*PDSStats, error)
41
+
GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error)
42
+
GetVersionStats(ctx context.Context) ([]*VersionStats, error)
39
43
40
44
// IP operations (IP as primary key)
41
45
UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error
···
46
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
47
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
48
52
49
-
// Bundle operations
50
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
51
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
52
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
53
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
54
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
55
-
GetLastBundleNumber(ctx context.Context) (int, error)
56
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
57
-
58
-
// Mempool operations
59
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
60
-
GetMempoolCount(ctx context.Context) (int, error)
61
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
62
-
DeleteFromMempool(ctx context.Context, ids []int64) error
63
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
64
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
65
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
66
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
67
-
68
53
// Metrics
69
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
70
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
71
56
GetEndpointStats(ctx context.Context) (*EndpointStats, error)
72
57
73
58
// DID operations
74
-
UpsertDID(ctx context.Context, did string, bundleNum int) error
59
+
UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error
60
+
UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error
75
61
GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error)
62
+
GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW
63
+
GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error)
76
64
AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error
77
65
GetTotalDIDCount(ctx context.Context) (int64, error)
66
+
67
+
// PDS Repo operations
68
+
UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error
69
+
GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error)
70
+
GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error)
71
+
GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error)
72
+
73
+
// Internal
74
+
GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error)
75
+
GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error)
78
76
}
+1199
-601
internal/storage/postgres.go
+1199
-601
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
10
+
"github.com/atscan/atscand/internal/log"
11
11
"github.com/jackc/pgx/v5"
12
12
"github.com/jackc/pgx/v5/pgxpool"
13
13
_ "github.com/jackc/pgx/v5/stdlib"
14
+
"github.com/lib/pq"
14
15
)
15
16
16
17
type PostgresDB struct {
···
20
21
}
21
22
22
23
func NewPostgresDB(connString string) (*PostgresDB, error) {
24
+
log.Info("Connecting to PostgreSQL database...")
25
+
23
26
// Open standard sql.DB (for compatibility)
24
27
db, err := sql.Open("pgx", connString)
25
28
if err != nil {
26
-
return nil, err
29
+
return nil, fmt.Errorf("failed to open database: %w", err)
27
30
}
28
31
29
32
// Connection pool settings
···
32
35
db.SetConnMaxLifetime(5 * time.Minute)
33
36
db.SetConnMaxIdleTime(2 * time.Minute)
34
37
38
+
log.Verbose(" Max open connections: 50")
39
+
log.Verbose(" Max idle connections: 25")
40
+
log.Verbose(" Connection max lifetime: 5m")
41
+
35
42
// Test connection
43
+
log.Info("Testing database connection...")
36
44
if err := db.Ping(); err != nil {
37
45
return nil, fmt.Errorf("failed to ping database: %w", err)
38
46
}
47
+
log.Info("✓ Database connection successful")
39
48
40
49
// Also create pgx pool for COPY operations
50
+
log.Verbose("Creating pgx connection pool...")
41
51
pool, err := pgxpool.New(context.Background(), connString)
42
52
if err != nil {
43
53
return nil, fmt.Errorf("failed to create pgx pool: %w", err)
44
54
}
55
+
log.Verbose("✓ Connection pool created")
45
56
46
57
return &PostgresDB{
47
58
db: db,
48
59
pool: pool,
49
-
scanRetention: 3,
60
+
scanRetention: 3, // Default
50
61
}, nil
51
62
}
52
63
···
58
69
}
59
70
60
71
func (p *PostgresDB) Migrate() error {
72
+
log.Info("Running database migrations...")
73
+
61
74
schema := `
62
-
-- Endpoints table (NO user_count, NO ip_info)
75
+
-- Endpoints table (with IPv6 support)
63
76
CREATE TABLE IF NOT EXISTS endpoints (
64
77
id BIGSERIAL PRIMARY KEY,
65
78
endpoint_type TEXT NOT NULL DEFAULT 'pds',
66
79
endpoint TEXT NOT NULL,
80
+
server_did TEXT,
67
81
discovered_at TIMESTAMP NOT NULL,
68
82
last_checked TIMESTAMP,
69
83
status INTEGER DEFAULT 0,
70
84
ip TEXT,
85
+
ipv6 TEXT,
71
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
72
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
73
89
UNIQUE(endpoint_type, endpoint)
74
90
);
···
77
93
CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status);
78
94
CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type);
79
95
CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip);
96
+
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
+
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
80
100
81
-
-- IP infos table (IP as PRIMARY KEY)
82
-
CREATE TABLE IF NOT EXISTS ip_infos (
83
-
ip TEXT PRIMARY KEY,
84
-
city TEXT,
85
-
country TEXT,
86
-
country_code TEXT,
87
-
asn INTEGER,
88
-
asn_org TEXT,
89
-
is_datacenter BOOLEAN,
90
-
is_vpn BOOLEAN,
91
-
latitude REAL,
92
-
longitude REAL,
93
-
raw_data JSONB,
94
-
fetched_at TIMESTAMP NOT NULL,
95
-
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
96
-
);
101
+
-- IP infos table (IP as PRIMARY KEY)
102
+
CREATE TABLE IF NOT EXISTS ip_infos (
103
+
ip TEXT PRIMARY KEY,
104
+
city TEXT,
105
+
country TEXT,
106
+
country_code TEXT,
107
+
asn INTEGER,
108
+
asn_org TEXT,
109
+
is_datacenter BOOLEAN,
110
+
is_vpn BOOLEAN,
111
+
is_crawler BOOLEAN,
112
+
is_tor BOOLEAN,
113
+
is_proxy BOOLEAN,
114
+
latitude REAL,
115
+
longitude REAL,
116
+
raw_data JSONB,
117
+
fetched_at TIMESTAMP NOT NULL,
118
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
119
+
);
97
120
98
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
99
-
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
121
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code);
122
+
CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn);
100
123
101
-
-- Endpoint scans (renamed from pds_scans)
124
+
-- Endpoint scans
102
125
CREATE TABLE IF NOT EXISTS endpoint_scans (
103
126
id BIGSERIAL PRIMARY KEY,
104
127
endpoint_id BIGINT NOT NULL,
···
106
129
response_time DOUBLE PRECISION,
107
130
user_count BIGINT,
108
131
version TEXT,
132
+
used_ip TEXT,
109
133
scan_data JSONB,
110
134
scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
111
135
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE
···
114
138
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC);
115
139
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at);
116
140
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST);
141
+
CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip);
142
+
117
143
118
144
CREATE TABLE IF NOT EXISTS plc_metrics (
119
145
id BIGSERIAL PRIMARY KEY,
···
132
158
records_processed BIGINT DEFAULT 0
133
159
);
134
160
135
-
CREATE TABLE IF NOT EXISTS plc_bundles (
136
-
bundle_number INTEGER PRIMARY KEY,
137
-
start_time TIMESTAMP NOT NULL,
138
-
end_time TIMESTAMP NOT NULL,
139
-
dids JSONB NOT NULL,
140
-
hash TEXT NOT NULL,
141
-
compressed_hash TEXT NOT NULL,
142
-
compressed_size BIGINT NOT NULL,
143
-
uncompressed_size BIGINT NOT NULL,
144
-
cumulative_compressed_size BIGINT NOT NULL,
145
-
cumulative_uncompressed_size BIGINT NOT NULL,
146
-
cursor TEXT,
147
-
prev_bundle_hash TEXT,
148
-
compressed BOOLEAN DEFAULT true,
149
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
150
-
);
161
+
-- Minimal dids table
162
+
CREATE TABLE IF NOT EXISTS dids (
163
+
did TEXT PRIMARY KEY,
164
+
handle TEXT,
165
+
pds TEXT,
166
+
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
167
+
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
168
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
169
+
);
151
170
152
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
153
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
154
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
155
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
156
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids);
171
+
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
172
+
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
173
+
CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle);
174
+
CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds);
157
175
158
-
CREATE TABLE IF NOT EXISTS plc_mempool (
176
+
-- PDS Repositories table
177
+
CREATE TABLE IF NOT EXISTS pds_repos (
159
178
id BIGSERIAL PRIMARY KEY,
179
+
endpoint_id BIGINT NOT NULL,
160
180
did TEXT NOT NULL,
161
-
operation TEXT NOT NULL,
162
-
cid TEXT NOT NULL UNIQUE,
163
-
created_at TIMESTAMP NOT NULL,
164
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
165
-
);
166
-
167
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
168
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
169
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
170
-
171
-
-- Minimal dids table
172
-
CREATE TABLE IF NOT EXISTS dids (
173
-
did TEXT PRIMARY KEY,
174
-
bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb,
175
-
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
181
+
head TEXT,
182
+
rev TEXT,
183
+
active BOOLEAN DEFAULT true,
184
+
status TEXT,
185
+
first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
186
+
last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
187
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
188
+
FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE,
189
+
UNIQUE(endpoint_id, did)
176
190
);
177
191
178
-
CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers);
179
-
CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at);
192
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint ON pds_repos(endpoint_id);
193
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint_id_desc ON pds_repos(endpoint_id, id DESC);
194
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_did ON pds_repos(did);
195
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_active ON pds_repos(active);
196
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_status ON pds_repos(status);
197
+
CREATE INDEX IF NOT EXISTS idx_pds_repos_last_seen ON pds_repos(last_seen DESC);
180
198
`
181
199
182
200
_, err := p.db.Exec(schema)
183
-
return err
201
+
if err != nil {
202
+
return err
203
+
}
204
+
205
+
log.Info("✓ Database migrations completed successfully")
206
+
return nil
184
207
}
185
208
186
209
// ===== ENDPOINT OPERATIONS =====
187
210
188
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
189
212
query := `
190
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ip_resolved_at)
191
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
192
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
193
216
last_checked = EXCLUDED.last_checked,
194
217
status = EXCLUDED.status,
···
196
219
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip
197
220
ELSE endpoints.ip
198
221
END,
222
+
ipv6 = CASE
223
+
WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6
224
+
ELSE endpoints.ipv6
225
+
END,
199
226
ip_resolved_at = CASE
200
-
WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip_resolved_at
227
+
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
201
228
ELSE endpoints.ip_resolved_at
202
229
END,
230
+
valid = EXCLUDED.valid,
203
231
updated_at = CURRENT_TIMESTAMP
204
232
RETURNING id
205
233
`
206
234
err := p.db.QueryRowContext(ctx, query,
207
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
208
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
209
237
return err
210
238
}
211
239
···
226
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
227
255
query := `
228
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
229
-
ip, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
230
258
FROM endpoints
231
259
WHERE endpoint = $1 AND endpoint_type = $2
232
260
`
233
261
234
262
var ep Endpoint
235
263
var lastChecked, ipResolvedAt sql.NullTime
236
-
var ip sql.NullString
264
+
var ip, ipv6 sql.NullString
237
265
238
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
239
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
240
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
241
269
)
242
270
if err != nil {
243
271
return nil, err
···
249
277
if ip.Valid {
250
278
ep.IP = ip.String
251
279
}
280
+
if ipv6.Valid {
281
+
ep.IPv6 = ipv6.String
282
+
}
252
283
if ipResolvedAt.Valid {
253
284
ep.IPResolvedAt = ipResolvedAt.Time
254
285
}
···
258
289
259
290
func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) {
260
291
query := `
261
-
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
262
-
ip, ip_resolved_at, updated_at
263
-
FROM endpoints
264
-
WHERE 1=1
265
-
`
292
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
293
+
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
295
+
FROM endpoints
296
+
WHERE 1=1
297
+
`
266
298
args := []interface{}{}
267
299
argIdx := 1
268
300
···
272
304
args = append(args, filter.Type)
273
305
argIdx++
274
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
275
312
if filter.Status != "" {
276
313
statusInt := EndpointStatusUnknown
277
314
switch filter.Status {
···
284
321
args = append(args, statusInt)
285
322
argIdx++
286
323
}
324
+
325
+
// Filter for stale endpoints only
326
+
if filter.OnlyStale && filter.RecheckInterval > 0 {
327
+
cutoffTime := time.Now().UTC().Add(-filter.RecheckInterval)
328
+
query += fmt.Sprintf(" AND (last_checked IS NULL OR last_checked < $%d)", argIdx)
329
+
args = append(args, cutoffTime)
330
+
argIdx++
331
+
}
287
332
}
288
333
289
-
query += " ORDER BY id DESC"
334
+
// NEW: Choose ordering strategy
335
+
if filter != nil && filter.Random {
336
+
// For random selection, we need to wrap in a subquery
337
+
query = fmt.Sprintf(`
338
+
WITH filtered_endpoints AS (
339
+
%s
340
+
)
341
+
SELECT * FROM filtered_endpoints
342
+
ORDER BY RANDOM()
343
+
`, query)
344
+
} else {
345
+
// Original ordering for non-random queries
346
+
query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC"
347
+
}
290
348
291
349
if filter != nil && filter.Limit > 0 {
292
350
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
···
303
361
for rows.Next() {
304
362
var ep Endpoint
305
363
var lastChecked, ipResolvedAt sql.NullTime
306
-
var ip sql.NullString
364
+
var ip, ipv6, serverDID sql.NullString
307
365
308
366
err := rows.Scan(
309
-
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
310
-
&ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt,
367
+
&ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked,
368
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
311
369
)
312
370
if err != nil {
313
371
return nil, err
314
372
}
315
373
374
+
if serverDID.Valid {
375
+
ep.ServerDID = serverDID.String
376
+
}
316
377
if lastChecked.Valid {
317
378
ep.LastChecked = lastChecked.Time
318
379
}
319
380
if ip.Valid {
320
381
ep.IP = ip.String
382
+
}
383
+
if ipv6.Valid {
384
+
ep.IPv6 = ipv6.String
321
385
}
322
386
if ipResolvedAt.Valid {
323
387
ep.IPResolvedAt = ipResolvedAt.Time
···
339
403
return err
340
404
}
341
405
342
-
func (p *PostgresDB) UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error {
406
+
func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error {
343
407
query := `
344
408
UPDATE endpoints
345
-
SET ip = $1, ip_resolved_at = $2, updated_at = $3
346
-
WHERE id = $4
409
+
SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4
410
+
WHERE id = $5
347
411
`
348
-
_, err := p.db.ExecContext(ctx, query, ip, resolvedAt, time.Now().UTC(), endpointID)
412
+
_, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID)
413
+
return err
414
+
}
415
+
416
+
func (p *PostgresDB) UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error {
417
+
query := `
418
+
UPDATE endpoints
419
+
SET server_did = $1, updated_at = $2
420
+
WHERE id = $3
421
+
`
422
+
_, err := p.db.ExecContext(ctx, query, serverDID, time.Now().UTC(), endpointID)
349
423
return err
350
424
}
351
425
426
+
func (p *PostgresDB) GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error) {
427
+
query := `
428
+
SELECT server_did, array_agg(endpoint ORDER BY discovered_at ASC) as endpoints
429
+
FROM endpoints
430
+
WHERE server_did IS NOT NULL
431
+
AND server_did != ''
432
+
AND endpoint_type = 'pds'
433
+
GROUP BY server_did
434
+
HAVING COUNT(*) > 1
435
+
ORDER BY COUNT(*) DESC
436
+
`
437
+
438
+
rows, err := p.db.QueryContext(ctx, query)
439
+
if err != nil {
440
+
return nil, err
441
+
}
442
+
defer rows.Close()
443
+
444
+
duplicates := make(map[string][]string)
445
+
for rows.Next() {
446
+
var serverDID string
447
+
var endpoints []string
448
+
449
+
err := rows.Scan(&serverDID, pq.Array(&endpoints))
450
+
if err != nil {
451
+
return nil, err
452
+
}
453
+
454
+
duplicates[serverDID] = endpoints
455
+
}
456
+
457
+
return duplicates, rows.Err()
458
+
}
459
+
352
460
// ===== SCAN OPERATIONS =====
353
461
354
462
func (p *PostgresDB) SetScanRetention(retention int) {
···
368
476
defer tx.Rollback()
369
477
370
478
query := `
371
-
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at)
372
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
479
+
INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at)
480
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
373
481
`
374
-
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt)
482
+
_, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt)
375
483
if err != nil {
376
484
return err
377
485
}
···
398
506
399
507
func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) {
400
508
query := `
401
-
SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at
509
+
SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at
402
510
FROM endpoint_scans
403
511
WHERE endpoint_id = $1
404
512
ORDER BY scanned_at DESC
···
416
524
var scan EndpointScan
417
525
var responseTime sql.NullFloat64
418
526
var userCount sql.NullInt64
419
-
var version sql.NullString // NEW
527
+
var version, usedIP sql.NullString
420
528
var scanDataJSON []byte
421
529
422
-
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt)
530
+
err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt)
423
531
if err != nil {
424
532
return nil, err
425
533
}
···
432
540
scan.UserCount = userCount.Int64
433
541
}
434
542
435
-
if version.Valid { // NEW
543
+
if version.Valid {
436
544
scan.Version = version.String
437
545
}
438
546
547
+
if usedIP.Valid {
548
+
scan.UsedIP = usedIP.String
549
+
}
550
+
439
551
if len(scanDataJSON) > 0 {
440
552
var scanData EndpointScanData
441
553
if err := json.Unmarshal(scanDataJSON, &scanData); err == nil {
···
453
565
454
566
func (p *PostgresDB) GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error) {
455
567
query := `
456
-
SELECT
457
-
e.id, e.endpoint, e.discovered_at, e.last_checked, e.status, e.ip,
458
-
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
459
-
i.city, i.country, i.country_code, i.asn, i.asn_org,
460
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude
461
-
FROM endpoints e
462
-
LEFT JOIN LATERAL (
463
-
SELECT
464
-
user_count,
465
-
response_time,
466
-
version,
467
-
scanned_at
468
-
FROM endpoint_scans
469
-
WHERE endpoint_id = e.id AND status = 1
470
-
ORDER BY scanned_at DESC
471
-
LIMIT 1
472
-
) latest ON true
473
-
LEFT JOIN ip_infos i ON e.ip = i.ip
474
-
WHERE e.endpoint_type = 'pds'
475
-
`
568
+
WITH unique_servers AS (
569
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
570
+
id,
571
+
endpoint,
572
+
server_did,
573
+
discovered_at,
574
+
last_checked,
575
+
status,
576
+
ip,
577
+
ipv6,
578
+
valid
579
+
FROM endpoints
580
+
WHERE endpoint_type = 'pds'
581
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
582
+
)
583
+
SELECT
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
585
+
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
586
+
i.city, i.country, i.country_code, i.asn, i.asn_org,
587
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
588
+
i.latitude, i.longitude
589
+
FROM unique_servers e
590
+
LEFT JOIN LATERAL (
591
+
SELECT
592
+
user_count,
593
+
response_time,
594
+
version,
595
+
scanned_at
596
+
FROM endpoint_scans
597
+
WHERE endpoint_id = e.id AND status = 1
598
+
ORDER BY scanned_at DESC
599
+
LIMIT 1
600
+
) latest ON true
601
+
LEFT JOIN ip_infos i ON e.ip = i.ip
602
+
WHERE 1=1
603
+
`
476
604
477
605
args := []interface{}{}
478
606
argIdx := 1
···
514
642
var items []*PDSListItem
515
643
for rows.Next() {
516
644
item := &PDSListItem{}
517
-
var ip, city, country, countryCode, asnOrg sql.NullString
645
+
var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString
518
646
var asn sql.NullInt32
519
-
var isDatacenter, isVPN sql.NullBool
647
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
520
648
var lat, lon sql.NullFloat64
521
649
var userCount sql.NullInt32
522
650
var responseTime sql.NullFloat64
523
-
var version sql.NullString // ADD THIS LINE
651
+
var version sql.NullString
524
652
var scannedAt sql.NullTime
525
653
526
654
err := rows.Scan(
527
-
&item.ID, &item.Endpoint, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip,
528
-
&userCount, &responseTime, &version, &scannedAt, // ADD &version HERE
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
656
+
&userCount, &responseTime, &version, &scannedAt,
529
657
&city, &country, &countryCode, &asn, &asnOrg,
530
-
&isDatacenter, &isVPN, &lat, &lon,
658
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
659
+
&lat, &lon,
531
660
)
532
661
if err != nil {
533
662
return nil, err
···
536
665
if ip.Valid {
537
666
item.IP = ip.String
538
667
}
668
+
if ipv6.Valid {
669
+
item.IPv6 = ipv6.String
670
+
}
671
+
if serverDID.Valid {
672
+
item.ServerDID = serverDID.String
673
+
}
539
674
540
675
// Add latest scan data if available
541
676
if userCount.Valid {
···
563
698
ASNOrg: asnOrg.String,
564
699
IsDatacenter: isDatacenter.Bool,
565
700
IsVPN: isVPN.Bool,
701
+
IsCrawler: isCrawler.Bool,
702
+
IsTor: isTor.Bool,
703
+
IsProxy: isProxy.Bool,
566
704
Latitude: float32(lat.Float64),
567
705
Longitude: float32(lon.Float64),
568
706
}
···
576
714
577
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
578
716
query := `
579
-
SELECT
580
-
e.id, e.endpoint, e.discovered_at, e.last_checked, e.status, e.ip,
581
-
latest.user_count,
582
-
latest.response_time,
583
-
latest.version, -- ADD THIS LINE
584
-
latest.scan_data->'metadata'->'server_info' as server_info,
585
-
latest.scanned_at,
586
-
i.city, i.country, i.country_code, i.asn, i.asn_org,
587
-
i.is_datacenter, i.is_vpn, i.latitude, i.longitude,
588
-
i.raw_data
589
-
FROM endpoints e
590
-
LEFT JOIN LATERAL (
591
-
SELECT scan_data, response_time, version, scanned_at, user_count -- ADD version HERE
592
-
FROM endpoint_scans
593
-
WHERE endpoint_id = e.id
594
-
ORDER BY scanned_at DESC
595
-
LIMIT 1
596
-
) latest ON true
597
-
LEFT JOIN ip_infos i ON e.ip = i.ip
598
-
WHERE e.endpoint = $1 AND e.endpoint_type = 'pds'
599
-
`
717
+
WITH target_endpoint AS MATERIALIZED (
718
+
SELECT
719
+
e.id,
720
+
e.endpoint,
721
+
e.server_did,
722
+
e.discovered_at,
723
+
e.last_checked,
724
+
e.status,
725
+
e.ip,
726
+
e.ipv6,
727
+
e.valid
728
+
FROM endpoints e
729
+
WHERE e.endpoint = $1
730
+
AND e.endpoint_type = 'pds'
731
+
LIMIT 1
732
+
)
733
+
SELECT
734
+
te.id,
735
+
te.endpoint,
736
+
te.server_did,
737
+
te.discovered_at,
738
+
te.last_checked,
739
+
te.status,
740
+
te.ip,
741
+
te.ipv6,
742
+
te.valid,
743
+
latest.user_count,
744
+
latest.response_time,
745
+
latest.version,
746
+
latest.scan_data->'metadata'->'server_info' as server_info,
747
+
latest.scanned_at,
748
+
i.city, i.country, i.country_code, i.asn, i.asn_org,
749
+
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
750
+
i.latitude, i.longitude,
751
+
i.raw_data,
752
+
COALESCE(
753
+
ARRAY(
754
+
SELECT e2.endpoint
755
+
FROM endpoints e2
756
+
WHERE e2.server_did = te.server_did
757
+
AND e2.endpoint_type = 'pds'
758
+
AND e2.endpoint != te.endpoint
759
+
AND te.server_did IS NOT NULL
760
+
ORDER BY e2.discovered_at
761
+
),
762
+
ARRAY[]::text[]
763
+
) as aliases,
764
+
CASE
765
+
WHEN te.server_did IS NOT NULL THEN (
766
+
SELECT MIN(e3.discovered_at)
767
+
FROM endpoints e3
768
+
WHERE e3.server_did = te.server_did
769
+
AND e3.endpoint_type = 'pds'
770
+
)
771
+
ELSE NULL
772
+
END as first_discovered_at
773
+
FROM target_endpoint te
774
+
LEFT JOIN LATERAL (
775
+
SELECT
776
+
es.scan_data,
777
+
es.response_time,
778
+
es.version,
779
+
es.scanned_at,
780
+
es.user_count
781
+
FROM endpoint_scans es
782
+
WHERE es.endpoint_id = te.id
783
+
ORDER BY es.scanned_at DESC
784
+
LIMIT 1
785
+
) latest ON true
786
+
LEFT JOIN ip_infos i ON te.ip = i.ip;
787
+
`
600
788
601
789
detail := &PDSDetail{}
602
-
var ip, city, country, countryCode, asnOrg sql.NullString
790
+
var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString
603
791
var asn sql.NullInt32
604
-
var isDatacenter, isVPN sql.NullBool
792
+
var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool
605
793
var lat, lon sql.NullFloat64
606
794
var userCount sql.NullInt32
607
795
var responseTime sql.NullFloat64
608
-
var version sql.NullString // ADD THIS LINE
796
+
var version sql.NullString
609
797
var serverInfoJSON []byte
610
798
var scannedAt sql.NullTime
611
799
var rawDataJSON []byte
800
+
var aliases []string
801
+
var firstDiscoveredAt sql.NullTime
612
802
613
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
614
-
&detail.ID, &detail.Endpoint, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip,
615
-
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt, // ADD &version HERE
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
805
+
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
616
806
&city, &country, &countryCode, &asn, &asnOrg,
617
-
&isDatacenter, &isVPN, &lat, &lon,
807
+
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
808
+
&lat, &lon,
618
809
&rawDataJSON,
810
+
pq.Array(&aliases),
811
+
&firstDiscoveredAt,
619
812
)
620
813
if err != nil {
621
814
return nil, err
···
624
817
if ip.Valid {
625
818
detail.IP = ip.String
626
819
}
820
+
if ipv6.Valid {
821
+
detail.IPv6 = ipv6.String
822
+
}
823
+
824
+
if serverDID.Valid {
825
+
detail.ServerDID = serverDID.String
826
+
}
827
+
828
+
// Set aliases and is_primary
829
+
detail.Aliases = aliases
830
+
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
831
+
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
832
+
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
833
+
} else {
834
+
detail.IsPrimary = true
835
+
}
627
836
628
837
// Parse latest scan data
629
838
if userCount.Valid {
···
635
844
detail.LatestScan = &struct {
636
845
UserCount int
637
846
ResponseTime float64
638
-
Version string // ADD THIS LINE
847
+
Version string
639
848
ServerInfo interface{}
640
849
ScannedAt time.Time
641
850
}{
642
851
UserCount: int(userCount.Int32),
643
852
ResponseTime: responseTime.Float64,
644
-
Version: version.String, // ADD THIS LINE
853
+
Version: version.String,
645
854
ServerInfo: serverInfo,
646
855
ScannedAt: scannedAt.Time,
647
856
}
648
857
}
649
858
650
-
// Parse IP info
859
+
// Parse IP info with all fields
651
860
if city.Valid || country.Valid {
652
861
detail.IPInfo = &IPInfo{
653
862
IP: ip.String,
···
658
867
ASNOrg: asnOrg.String,
659
868
IsDatacenter: isDatacenter.Bool,
660
869
IsVPN: isVPN.Bool,
870
+
IsCrawler: isCrawler.Bool,
871
+
IsTor: isTor.Bool,
872
+
IsProxy: isProxy.Bool,
661
873
Latitude: float32(lat.Float64),
662
874
Longitude: float32(lon.Float64),
663
-
// RawData is unmarshaled below
664
875
}
665
876
666
-
// NEW: Unmarshal the raw_data JSON
667
877
if len(rawDataJSON) > 0 {
668
-
if err := json.Unmarshal(rawDataJSON, &detail.IPInfo.RawData); err != nil {
669
-
// Log the error but don't fail the request
670
-
fmt.Printf("Warning: failed to unmarshal raw_data for IP %s: %v\n", ip.String, err)
671
-
}
878
+
json.Unmarshal(rawDataJSON, &detail.IPInfo.RawData)
672
879
}
673
880
}
674
881
···
676
883
}
677
884
678
885
func (p *PostgresDB) GetPDSStats(ctx context.Context) (*PDSStats, error) {
679
-
// PDS stats - aggregate from latest scans
680
886
query := `
681
-
WITH latest_scans AS (
682
-
SELECT DISTINCT ON (endpoint_id)
683
-
endpoint_id,
684
-
user_count,
887
+
WITH unique_servers AS (
888
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
889
+
id,
890
+
COALESCE(server_did, id::text) as server_identity,
685
891
status
686
-
FROM endpoint_scans
687
-
WHERE endpoint_id IN (SELECT id FROM endpoints WHERE endpoint_type = 'pds')
688
-
ORDER BY endpoint_id, scanned_at DESC
689
-
)
690
-
SELECT
691
-
COUNT(*) as total,
692
-
SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online,
693
-
SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline,
694
-
SUM(user_count) as total_users
695
-
FROM latest_scans
696
-
`
892
+
FROM endpoints
893
+
WHERE endpoint_type = 'pds'
894
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
895
+
),
896
+
latest_scans AS (
897
+
SELECT DISTINCT ON (us.id)
898
+
us.id,
899
+
es.user_count,
900
+
us.status
901
+
FROM unique_servers us
902
+
LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id
903
+
ORDER BY us.id, es.scanned_at DESC
904
+
)
905
+
SELECT
906
+
COUNT(*) as total,
907
+
SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online,
908
+
SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline,
909
+
SUM(COALESCE(user_count, 0)) as total_users
910
+
FROM latest_scans
911
+
`
697
912
698
913
stats := &PDSStats{}
699
914
err := p.db.QueryRowContext(ctx, query).Scan(
···
716
931
err := p.db.QueryRowContext(ctx, query).Scan(
717
932
&stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints,
718
933
)
934
+
if err != nil {
935
+
return nil, err
936
+
}
719
937
720
938
// Get average response time from recent scans
721
939
avgQuery := `
···
724
942
WHERE response_time > 0 AND scanned_at > NOW() - INTERVAL '1 hour'
725
943
`
726
944
var avgResponseTime sql.NullFloat64
727
-
p.db.QueryRowContext(ctx, avgQuery).Scan(&avgResponseTime)
945
+
_ = p.db.QueryRowContext(ctx, avgQuery).Scan(&avgResponseTime)
728
946
if avgResponseTime.Valid {
729
947
stats.AvgResponseTime = avgResponseTime.Float64
730
948
}
···
750
968
751
969
// Get total DIDs from latest PDS scans
752
970
didQuery := `
753
-
WITH latest_pds_scans AS (
754
-
SELECT DISTINCT ON (endpoint_id)
755
-
endpoint_id,
756
-
user_count
757
-
FROM endpoint_scans
758
-
WHERE endpoint_id IN (SELECT id FROM endpoints WHERE endpoint_type = 'pds')
759
-
ORDER BY endpoint_id, scanned_at DESC
971
+
WITH unique_servers AS (
972
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
973
+
e.id
974
+
FROM endpoints e
975
+
WHERE e.endpoint_type = 'pds'
976
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
977
+
),
978
+
latest_pds_scans AS (
979
+
SELECT DISTINCT ON (us.id)
980
+
us.id,
981
+
es.user_count
982
+
FROM unique_servers us
983
+
LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id
984
+
ORDER BY us.id, es.scanned_at DESC
760
985
)
761
986
SELECT SUM(user_count) FROM latest_pds_scans
762
987
`
763
988
var totalDIDs sql.NullInt64
764
-
p.db.QueryRowContext(ctx, didQuery).Scan(&totalDIDs)
989
+
_ = p.db.QueryRowContext(ctx, didQuery).Scan(&totalDIDs)
765
990
if totalDIDs.Valid {
766
991
stats.TotalDIDs = totalDIDs.Int64
767
992
}
···
780
1005
countryCode := extractString(ipInfo, "location", "country_code")
781
1006
asn := extractInt(ipInfo, "asn", "asn")
782
1007
asnOrg := extractString(ipInfo, "asn", "org")
783
-
isDatacenter := extractBool(ipInfo, "company", "type", "hosting")
784
-
isVPN := extractBool(ipInfo, "security", "vpn")
1008
+
1009
+
// Extract top-level boolean flags
1010
+
isDatacenter := false
1011
+
if val, ok := ipInfo["is_datacenter"].(bool); ok {
1012
+
isDatacenter = val
1013
+
}
1014
+
1015
+
isVPN := false
1016
+
if val, ok := ipInfo["is_vpn"].(bool); ok {
1017
+
isVPN = val
1018
+
}
1019
+
1020
+
isCrawler := false
1021
+
if val, ok := ipInfo["is_crawler"].(bool); ok {
1022
+
isCrawler = val
1023
+
}
1024
+
1025
+
isTor := false
1026
+
if val, ok := ipInfo["is_tor"].(bool); ok {
1027
+
isTor = val
1028
+
}
1029
+
1030
+
isProxy := false
1031
+
if val, ok := ipInfo["is_proxy"].(bool); ok {
1032
+
isProxy = val
1033
+
}
1034
+
785
1035
lat := extractFloat(ipInfo, "location", "latitude")
786
1036
lon := extractFloat(ipInfo, "location", "longitude")
787
1037
788
1038
query := `
789
-
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, latitude, longitude, raw_data, fetched_at)
790
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
1039
+
INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at)
1040
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
791
1041
ON CONFLICT(ip) DO UPDATE SET
792
1042
city = EXCLUDED.city,
793
1043
country = EXCLUDED.country,
···
796
1046
asn_org = EXCLUDED.asn_org,
797
1047
is_datacenter = EXCLUDED.is_datacenter,
798
1048
is_vpn = EXCLUDED.is_vpn,
1049
+
is_crawler = EXCLUDED.is_crawler,
1050
+
is_tor = EXCLUDED.is_tor,
1051
+
is_proxy = EXCLUDED.is_proxy,
799
1052
latitude = EXCLUDED.latitude,
800
1053
longitude = EXCLUDED.longitude,
801
1054
raw_data = EXCLUDED.raw_data,
802
1055
fetched_at = EXCLUDED.fetched_at,
803
1056
updated_at = CURRENT_TIMESTAMP
804
1057
`
805
-
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, lat, lon, rawDataJSON, time.Now().UTC())
1058
+
_, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC())
806
1059
return err
807
1060
}
808
1061
809
1062
func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) {
810
1063
query := `
811
-
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn,
1064
+
SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy,
812
1065
latitude, longitude, raw_data, fetched_at, updated_at
813
1066
FROM ip_infos
814
1067
WHERE ip = $1
···
819
1072
820
1073
err := p.db.QueryRowContext(ctx, query, ip).Scan(
821
1074
&info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg,
822
-
&info.IsDatacenter, &info.IsVPN, &info.Latitude, &info.Longitude,
1075
+
&info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy,
1076
+
&info.Latitude, &info.Longitude,
823
1077
&rawDataJSON, &info.FetchedAt, &info.UpdatedAt,
824
1078
)
825
1079
if err != nil {
···
909
1163
return 0
910
1164
}
911
1165
912
-
func extractBool(data map[string]interface{}, keys ...string) bool {
913
-
current := data
914
-
for i, key := range keys {
915
-
if i == len(keys)-1 {
916
-
if val, ok := current[key].(bool); ok {
917
-
return val
918
-
}
919
-
// Check if it's a string that matches (for type="hosting")
920
-
if val, ok := current[key].(string); ok {
921
-
// For cases like company.type == "hosting"
922
-
expectedValue := keys[len(keys)-1]
923
-
return val == expectedValue
924
-
}
925
-
return false
926
-
}
927
-
if nested, ok := current[key].(map[string]interface{}); ok {
928
-
current = nested
929
-
} else {
930
-
return false
931
-
}
932
-
}
933
-
return false
934
-
}
935
-
936
-
// ===== BUNDLE OPERATIONS =====
937
-
938
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
939
-
didsJSON, err := json.Marshal(bundle.DIDs)
940
-
if err != nil {
941
-
return err
942
-
}
943
-
944
-
// Calculate cumulative sizes from previous bundle
945
-
if bundle.BundleNumber > 1 {
946
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
947
-
if err == nil && prevBundle != nil {
948
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
949
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
950
-
} else {
951
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
952
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
953
-
}
954
-
} else {
955
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
956
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
957
-
}
958
-
959
-
query := `
960
-
INSERT INTO plc_bundles (
961
-
bundle_number, start_time, end_time, dids,
962
-
hash, compressed_hash, compressed_size, uncompressed_size,
963
-
cumulative_compressed_size, cumulative_uncompressed_size,
964
-
cursor, prev_bundle_hash, compressed
965
-
)
966
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
967
-
ON CONFLICT(bundle_number) DO UPDATE SET
968
-
start_time = EXCLUDED.start_time,
969
-
end_time = EXCLUDED.end_time,
970
-
dids = EXCLUDED.dids,
971
-
hash = EXCLUDED.hash,
972
-
compressed_hash = EXCLUDED.compressed_hash,
973
-
compressed_size = EXCLUDED.compressed_size,
974
-
uncompressed_size = EXCLUDED.uncompressed_size,
975
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
976
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
977
-
cursor = EXCLUDED.cursor,
978
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
979
-
compressed = EXCLUDED.compressed
980
-
`
981
-
_, err = p.db.ExecContext(ctx, query,
982
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
983
-
didsJSON, bundle.Hash, bundle.CompressedHash,
984
-
bundle.CompressedSize, bundle.UncompressedSize,
985
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
986
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
987
-
)
988
-
989
-
return err
990
-
}
991
-
992
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
993
-
query := `
994
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
995
-
compressed_size, uncompressed_size, cumulative_compressed_size,
996
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
997
-
FROM plc_bundles
998
-
WHERE bundle_number = $1
999
-
`
1000
-
1001
-
var bundle PLCBundle
1002
-
var didsJSON []byte
1003
-
var prevHash sql.NullString
1004
-
var cursor sql.NullString
1005
-
1006
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1007
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1008
-
&didsJSON, &bundle.Hash, &bundle.CompressedHash,
1009
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1010
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1011
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1012
-
)
1013
-
if err != nil {
1014
-
return nil, err
1015
-
}
1016
-
1017
-
if prevHash.Valid {
1018
-
bundle.PrevBundleHash = prevHash.String
1019
-
}
1020
-
if cursor.Valid {
1021
-
bundle.Cursor = cursor.String
1022
-
}
1023
-
1024
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1025
-
return &bundle, nil
1026
-
}
1027
-
1028
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1029
-
query := `
1030
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1031
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1032
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1033
-
FROM plc_bundles
1034
-
ORDER BY bundle_number DESC
1035
-
LIMIT $1
1036
-
`
1037
-
1038
-
rows, err := p.db.QueryContext(ctx, query, limit)
1039
-
if err != nil {
1040
-
return nil, err
1041
-
}
1042
-
defer rows.Close()
1043
-
1044
-
return p.scanBundles(rows)
1045
-
}
1046
-
1047
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1048
-
query := `
1049
-
SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash,
1050
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1051
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1052
-
FROM plc_bundles
1053
-
WHERE dids ? $1
1054
-
ORDER BY bundle_number ASC
1055
-
`
1056
-
1057
-
rows, err := p.db.QueryContext(ctx, query, did)
1058
-
if err != nil {
1059
-
return nil, err
1060
-
}
1061
-
defer rows.Close()
1062
-
1063
-
return p.scanBundles(rows)
1064
-
}
1065
-
1066
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1067
-
var bundles []*PLCBundle
1068
-
1069
-
for rows.Next() {
1070
-
var bundle PLCBundle
1071
-
var didsJSON []byte
1072
-
var prevHash sql.NullString
1073
-
var cursor sql.NullString
1074
-
1075
-
if err := rows.Scan(
1076
-
&bundle.BundleNumber,
1077
-
&bundle.StartTime,
1078
-
&bundle.EndTime,
1079
-
&didsJSON,
1080
-
&bundle.Hash,
1081
-
&bundle.CompressedHash,
1082
-
&bundle.CompressedSize,
1083
-
&bundle.UncompressedSize,
1084
-
&bundle.CumulativeCompressedSize,
1085
-
&bundle.CumulativeUncompressedSize,
1086
-
&cursor,
1087
-
&prevHash,
1088
-
&bundle.Compressed,
1089
-
&bundle.CreatedAt,
1090
-
); err != nil {
1091
-
return nil, err
1092
-
}
1093
-
1094
-
if prevHash.Valid {
1095
-
bundle.PrevBundleHash = prevHash.String
1096
-
}
1097
-
if cursor.Valid {
1098
-
bundle.Cursor = cursor.String
1099
-
}
1100
-
1101
-
json.Unmarshal(didsJSON, &bundle.DIDs)
1102
-
bundles = append(bundles, &bundle)
1103
-
}
1104
-
1105
-
return bundles, rows.Err()
1106
-
}
1107
-
1108
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1109
-
var count, lastBundleNum int64
1110
-
err := p.db.QueryRowContext(ctx, `
1111
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1112
-
FROM plc_bundles
1113
-
`).Scan(&count, &lastBundleNum)
1114
-
if err != nil {
1115
-
return 0, 0, 0, 0, err
1116
-
}
1117
-
1118
-
if lastBundleNum == 0 {
1119
-
return 0, 0, 0, 0, nil
1120
-
}
1121
-
1122
-
var compressedSize, uncompressedSize int64
1123
-
err = p.db.QueryRowContext(ctx, `
1124
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1125
-
FROM plc_bundles
1126
-
WHERE bundle_number = $1
1127
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1128
-
if err != nil {
1129
-
return 0, 0, 0, 0, err
1130
-
}
1131
-
1132
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1133
-
}
1134
-
1135
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1136
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1137
-
var num int
1138
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1139
-
return num, err
1140
-
}
1141
-
1142
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1143
-
query := `
1144
-
SELECT bundle_number
1145
-
FROM plc_bundles
1146
-
WHERE start_time <= $1 AND end_time >= $1
1147
-
ORDER BY bundle_number ASC
1148
-
LIMIT 1
1149
-
`
1150
-
1151
-
var bundleNum int
1152
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1153
-
if err == sql.ErrNoRows {
1154
-
query = `
1155
-
SELECT bundle_number
1156
-
FROM plc_bundles
1157
-
WHERE end_time < $1
1158
-
ORDER BY bundle_number DESC
1159
-
LIMIT 1
1160
-
`
1161
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1162
-
if err == sql.ErrNoRows {
1163
-
return 1, nil
1164
-
}
1165
-
if err != nil {
1166
-
return 0, err
1167
-
}
1168
-
return bundleNum, nil
1169
-
}
1170
-
if err != nil {
1171
-
return 0, err
1172
-
}
1173
-
1174
-
return bundleNum, nil
1175
-
}
1176
-
1177
-
// ===== MEMPOOL OPERATIONS =====
1178
-
1179
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1180
-
if len(ops) == 0 {
1181
-
return nil
1182
-
}
1183
-
1184
-
tx, err := p.db.BeginTx(ctx, nil)
1185
-
if err != nil {
1186
-
return err
1187
-
}
1188
-
defer tx.Rollback()
1189
-
1190
-
stmt, err := tx.PrepareContext(ctx, `
1191
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1192
-
VALUES ($1, $2, $3, $4)
1193
-
ON CONFLICT(cid) DO NOTHING
1194
-
`)
1195
-
if err != nil {
1196
-
return err
1197
-
}
1198
-
defer stmt.Close()
1199
-
1200
-
for _, op := range ops {
1201
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1202
-
if err != nil {
1203
-
return err
1204
-
}
1205
-
}
1206
-
1207
-
return tx.Commit()
1208
-
}
1209
-
1210
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1211
-
query := "SELECT COUNT(*) FROM plc_mempool"
1212
-
var count int
1213
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1214
-
return count, err
1215
-
}
1216
-
1217
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1218
-
query := `
1219
-
SELECT id, did, operation, cid, created_at, added_at
1220
-
FROM plc_mempool
1221
-
ORDER BY created_at ASC
1222
-
LIMIT $1
1223
-
`
1224
-
1225
-
rows, err := p.db.QueryContext(ctx, query, limit)
1226
-
if err != nil {
1227
-
return nil, err
1228
-
}
1229
-
defer rows.Close()
1230
-
1231
-
var ops []MempoolOperation
1232
-
for rows.Next() {
1233
-
var op MempoolOperation
1234
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1235
-
if err != nil {
1236
-
return nil, err
1237
-
}
1238
-
ops = append(ops, op)
1239
-
}
1240
-
1241
-
return ops, rows.Err()
1242
-
}
1243
-
1244
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1245
-
if len(ids) == 0 {
1246
-
return nil
1247
-
}
1248
-
1249
-
placeholders := make([]string, len(ids))
1250
-
args := make([]interface{}, len(ids))
1251
-
for i, id := range ids {
1252
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1253
-
args[i] = id
1254
-
}
1255
-
1256
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1257
-
strings.Join(placeholders, ","))
1258
-
1259
-
_, err := p.db.ExecContext(ctx, query, args...)
1260
-
return err
1261
-
}
1262
-
1263
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1264
-
query := `
1265
-
SELECT id, did, operation, cid, created_at, added_at
1266
-
FROM plc_mempool
1267
-
ORDER BY created_at ASC, id ASC
1268
-
LIMIT 1
1269
-
`
1270
-
1271
-
var op MempoolOperation
1272
-
err := p.db.QueryRowContext(ctx, query).Scan(
1273
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1274
-
)
1275
-
if err == sql.ErrNoRows {
1276
-
return nil, nil
1277
-
}
1278
-
if err != nil {
1279
-
return nil, err
1280
-
}
1281
-
1282
-
return &op, nil
1283
-
}
1284
-
1285
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1286
-
query := `
1287
-
SELECT id, did, operation, cid, created_at, added_at
1288
-
FROM plc_mempool
1289
-
ORDER BY created_at DESC, id DESC
1290
-
LIMIT 1
1291
-
`
1292
-
1293
-
var op MempoolOperation
1294
-
err := p.db.QueryRowContext(ctx, query).Scan(
1295
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1296
-
)
1297
-
if err == sql.ErrNoRows {
1298
-
return nil, nil
1299
-
}
1300
-
if err != nil {
1301
-
return nil, err
1302
-
}
1303
-
1304
-
return &op, nil
1305
-
}
1306
-
1307
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1308
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1309
-
var count int
1310
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1311
-
return count, err
1312
-
}
1313
-
1314
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1315
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1316
-
var size int64
1317
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1318
-
return size, err
1319
-
}
1320
-
1321
1166
// ===== CURSOR OPERATIONS =====
1322
1167
1323
1168
func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) {
···
1390
1235
1391
1236
// ===== DID OPERATIONS =====
1392
1237
1393
-
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int) error {
1238
+
func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error {
1394
1239
query := `
1395
-
INSERT INTO dids (did, bundle_numbers, created_at)
1396
-
VALUES ($1, jsonb_build_array($2), CURRENT_TIMESTAMP)
1240
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1241
+
VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP)
1397
1242
ON CONFLICT(did) DO UPDATE SET
1243
+
handle = EXCLUDED.handle,
1244
+
pds = EXCLUDED.pds,
1398
1245
bundle_numbers = CASE
1399
-
WHEN dids.bundle_numbers ? $2::text THEN dids.bundle_numbers
1400
-
ELSE dids.bundle_numbers || jsonb_build_array($2)
1401
-
END
1246
+
WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers
1247
+
ELSE dids.bundle_numbers || jsonb_build_array($4::integer)
1248
+
END,
1249
+
updated_at = CURRENT_TIMESTAMP
1402
1250
`
1403
-
_, err := p.db.ExecContext(ctx, query, did, bundleNum)
1251
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum)
1252
+
return err
1253
+
}
1254
+
1255
+
// UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers
1256
+
func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error {
1257
+
query := `
1258
+
INSERT INTO dids (did, handle, pds, bundle_numbers, created_at)
1259
+
VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP)
1260
+
ON CONFLICT(did) DO UPDATE SET
1261
+
handle = EXCLUDED.handle,
1262
+
pds = EXCLUDED.pds,
1263
+
updated_at = CURRENT_TIMESTAMP
1264
+
`
1265
+
_, err := p.db.ExecContext(ctx, query, did, handle, pds)
1404
1266
return err
1405
1267
}
1406
1268
1407
1269
func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) {
1408
1270
query := `
1409
-
SELECT did, bundle_numbers, created_at
1271
+
SELECT did, handle, pds, bundle_numbers, created_at
1410
1272
FROM dids
1411
1273
WHERE did = $1
1412
1274
`
1413
1275
1414
1276
var record DIDRecord
1415
1277
var bundleNumbersJSON []byte
1278
+
var handle, pds sql.NullString
1416
1279
1417
1280
err := p.db.QueryRowContext(ctx, query, did).Scan(
1418
1281
&record.DID,
1282
+
&handle,
1283
+
&pds,
1419
1284
&bundleNumbersJSON,
1420
1285
&record.CreatedAt,
1421
1286
)
···
1423
1288
return nil, err
1424
1289
}
1425
1290
1291
+
if handle.Valid {
1292
+
record.Handle = handle.String
1293
+
}
1294
+
if pds.Valid {
1295
+
record.CurrentPDS = pds.String
1296
+
}
1297
+
1426
1298
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1427
1299
return nil, err
1428
1300
}
···
1430
1302
return &record, nil
1431
1303
}
1432
1304
1305
+
func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) {
1306
+
query := `
1307
+
SELECT did, handle, pds, bundle_numbers, created_at
1308
+
FROM dids
1309
+
WHERE handle = $1
1310
+
`
1311
+
1312
+
var record DIDRecord
1313
+
var bundleNumbersJSON []byte
1314
+
var recordHandle, pds sql.NullString
1315
+
1316
+
err := p.db.QueryRowContext(ctx, query, handle).Scan(
1317
+
&record.DID,
1318
+
&recordHandle,
1319
+
&pds,
1320
+
&bundleNumbersJSON,
1321
+
&record.CreatedAt,
1322
+
)
1323
+
if err != nil {
1324
+
return nil, err
1325
+
}
1326
+
1327
+
if recordHandle.Valid {
1328
+
record.Handle = recordHandle.String
1329
+
}
1330
+
if pds.Valid {
1331
+
record.CurrentPDS = pds.String
1332
+
}
1333
+
1334
+
if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil {
1335
+
return nil, err
1336
+
}
1337
+
1338
+
return &record, nil
1339
+
}
1340
+
1341
+
// GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos'
1342
+
func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) {
1343
+
query := `
1344
+
WITH primary_endpoints AS (
1345
+
SELECT DISTINCT ON (COALESCE(server_did, id::text))
1346
+
id
1347
+
FROM endpoints
1348
+
WHERE endpoint_type = 'pds'
1349
+
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
1350
+
)
1351
+
SELECT
1352
+
d.did,
1353
+
d.handle,
1354
+
d.pds,
1355
+
d.bundle_numbers,
1356
+
d.created_at,
1357
+
COALESCE(
1358
+
jsonb_agg(
1359
+
jsonb_build_object(
1360
+
'id', pr.id,
1361
+
'endpoint_id', pr.endpoint_id,
1362
+
'endpoint', e.endpoint,
1363
+
'did', pr.did,
1364
+
'head', pr.head,
1365
+
'rev', pr.rev,
1366
+
'active', pr.active,
1367
+
'status', pr.status,
1368
+
'first_seen', pr.first_seen AT TIME ZONE 'UTC',
1369
+
'last_seen', pr.last_seen AT TIME ZONE 'UTC',
1370
+
'updated_at', pr.updated_at AT TIME ZONE 'UTC'
1371
+
)
1372
+
ORDER BY pr.last_seen DESC
1373
+
) FILTER (
1374
+
WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL
1375
+
),
1376
+
'[]'::jsonb
1377
+
) AS hosting_on
1378
+
FROM
1379
+
dids d
1380
+
LEFT JOIN
1381
+
pds_repos pr ON d.did = pr.did
1382
+
LEFT JOIN
1383
+
endpoints e ON pr.endpoint_id = e.id
1384
+
LEFT JOIN
1385
+
primary_endpoints pe ON pr.endpoint_id = pe.id
1386
+
WHERE
1387
+
d.did = $1
1388
+
GROUP BY
1389
+
d.did, d.handle, d.pds, d.bundle_numbers, d.created_at
1390
+
`
1391
+
1392
+
var info GlobalDIDInfo
1393
+
var bundleNumbersJSON []byte
1394
+
var hostingOnJSON []byte
1395
+
var handle, pds sql.NullString
1396
+
1397
+
err := p.db.QueryRowContext(ctx, query, did).Scan(
1398
+
&info.DID,
1399
+
&handle,
1400
+
&pds,
1401
+
&bundleNumbersJSON,
1402
+
&info.CreatedAt,
1403
+
&hostingOnJSON,
1404
+
)
1405
+
if err != nil {
1406
+
return nil, err
1407
+
}
1408
+
1409
+
if handle.Valid {
1410
+
info.Handle = handle.String
1411
+
}
1412
+
if pds.Valid {
1413
+
info.CurrentPDS = pds.String
1414
+
}
1415
+
1416
+
if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil {
1417
+
return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err)
1418
+
}
1419
+
1420
+
if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil {
1421
+
return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err)
1422
+
}
1423
+
1424
+
return &info, nil
1425
+
}
1426
+
1433
1427
func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error {
1434
1428
if len(dids) == 0 {
1435
1429
return nil
···
1504
1498
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1505
1499
return count, err
1506
1500
}
1501
+
1502
+
func (p *PostgresDB) GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) {
1503
+
query := `
1504
+
WITH unique_servers AS (
1505
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
1506
+
e.id,
1507
+
e.ip,
1508
+
e.status
1509
+
FROM endpoints e
1510
+
WHERE e.endpoint_type = 'pds'
1511
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
1512
+
),
1513
+
pds_by_country AS (
1514
+
SELECT
1515
+
i.country,
1516
+
i.country_code,
1517
+
COUNT(DISTINCT us.id) as active_pds_count,
1518
+
SUM(latest.user_count) as total_users,
1519
+
AVG(latest.response_time) as avg_response_time
1520
+
FROM unique_servers us
1521
+
JOIN ip_infos i ON us.ip = i.ip
1522
+
LEFT JOIN LATERAL (
1523
+
SELECT user_count, response_time
1524
+
FROM endpoint_scans
1525
+
WHERE endpoint_id = us.id
1526
+
ORDER BY scanned_at DESC
1527
+
LIMIT 1
1528
+
) latest ON true
1529
+
WHERE us.status = 1
1530
+
AND i.country IS NOT NULL
1531
+
AND i.country != ''
1532
+
GROUP BY i.country, i.country_code
1533
+
),
1534
+
totals AS (
1535
+
SELECT
1536
+
SUM(active_pds_count) as total_pds,
1537
+
SUM(total_users) as total_users_global
1538
+
FROM pds_by_country
1539
+
)
1540
+
SELECT
1541
+
pbc.country,
1542
+
pbc.country_code,
1543
+
pbc.active_pds_count,
1544
+
ROUND((pbc.active_pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric, 2) as pds_percentage,
1545
+
COALESCE(pbc.total_users, 0) as total_users,
1546
+
ROUND((COALESCE(pbc.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric, 2) as users_percentage,
1547
+
ROUND(COALESCE(pbc.avg_response_time, 0)::numeric, 2) as avg_response_time_ms
1548
+
FROM pds_by_country pbc
1549
+
CROSS JOIN totals t
1550
+
ORDER BY pbc.active_pds_count DESC
1551
+
`
1552
+
1553
+
rows, err := p.db.QueryContext(ctx, query)
1554
+
if err != nil {
1555
+
return nil, err
1556
+
}
1557
+
defer rows.Close()
1558
+
1559
+
var stats []*CountryStats
1560
+
for rows.Next() {
1561
+
var s CountryStats
1562
+
var pdsPercentage, usersPercentage sql.NullFloat64
1563
+
1564
+
err := rows.Scan(
1565
+
&s.Country,
1566
+
&s.CountryCode,
1567
+
&s.ActivePDSCount,
1568
+
&pdsPercentage,
1569
+
&s.TotalUsers,
1570
+
&usersPercentage,
1571
+
&s.AvgResponseTimeMS,
1572
+
)
1573
+
if err != nil {
1574
+
return nil, err
1575
+
}
1576
+
1577
+
if pdsPercentage.Valid {
1578
+
s.PDSPercentage = pdsPercentage.Float64
1579
+
}
1580
+
if usersPercentage.Valid {
1581
+
s.UsersPercentage = usersPercentage.Float64
1582
+
}
1583
+
1584
+
stats = append(stats, &s)
1585
+
}
1586
+
1587
+
return stats, rows.Err()
1588
+
}
1589
+
1590
+
func (p *PostgresDB) GetVersionStats(ctx context.Context) ([]*VersionStats, error) {
1591
+
query := `
1592
+
WITH unique_servers AS (
1593
+
SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text))
1594
+
e.id
1595
+
FROM endpoints e
1596
+
WHERE e.endpoint_type = 'pds'
1597
+
AND e.status = 1
1598
+
ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC
1599
+
),
1600
+
latest_scans AS (
1601
+
SELECT DISTINCT ON (us.id)
1602
+
us.id,
1603
+
es.version,
1604
+
es.user_count,
1605
+
es.scanned_at
1606
+
FROM unique_servers us
1607
+
JOIN endpoint_scans es ON us.id = es.endpoint_id
1608
+
WHERE es.version IS NOT NULL
1609
+
AND es.version != ''
1610
+
ORDER BY us.id, es.scanned_at DESC
1611
+
),
1612
+
version_groups AS (
1613
+
SELECT
1614
+
version,
1615
+
COUNT(*) as pds_count,
1616
+
SUM(user_count) as total_users,
1617
+
MIN(scanned_at) as first_seen,
1618
+
MAX(scanned_at) as last_seen
1619
+
FROM latest_scans
1620
+
GROUP BY version
1621
+
),
1622
+
totals AS (
1623
+
SELECT
1624
+
SUM(pds_count) as total_pds,
1625
+
SUM(total_users) as total_users_global
1626
+
FROM version_groups
1627
+
)
1628
+
SELECT
1629
+
vg.version,
1630
+
vg.pds_count,
1631
+
(vg.pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric as percentage,
1632
+
COALESCE(vg.total_users, 0) as total_users,
1633
+
(COALESCE(vg.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric as users_percentage,
1634
+
vg.first_seen,
1635
+
vg.last_seen
1636
+
FROM version_groups vg
1637
+
CROSS JOIN totals t
1638
+
ORDER BY vg.pds_count DESC
1639
+
`
1640
+
1641
+
rows, err := p.db.QueryContext(ctx, query)
1642
+
if err != nil {
1643
+
return nil, err
1644
+
}
1645
+
defer rows.Close()
1646
+
1647
+
var stats []*VersionStats
1648
+
for rows.Next() {
1649
+
var s VersionStats
1650
+
var percentage, usersPercentage sql.NullFloat64
1651
+
1652
+
err := rows.Scan(
1653
+
&s.Version,
1654
+
&s.PDSCount,
1655
+
&percentage,
1656
+
&s.TotalUsers,
1657
+
&usersPercentage,
1658
+
&s.FirstSeen,
1659
+
&s.LastSeen,
1660
+
)
1661
+
if err != nil {
1662
+
return nil, err
1663
+
}
1664
+
1665
+
if percentage.Valid {
1666
+
s.Percentage = percentage.Float64
1667
+
s.PercentageText = formatPercentage(percentage.Float64)
1668
+
}
1669
+
if usersPercentage.Valid {
1670
+
s.UsersPercentage = usersPercentage.Float64
1671
+
}
1672
+
1673
+
stats = append(stats, &s)
1674
+
}
1675
+
1676
+
return stats, rows.Err()
1677
+
}
1678
+
1679
+
// Helper function (add if not already present)
1680
+
func formatPercentage(pct float64) string {
1681
+
if pct >= 10 {
1682
+
return fmt.Sprintf("%.2f%%", pct)
1683
+
} else if pct >= 1 {
1684
+
return fmt.Sprintf("%.3f%%", pct)
1685
+
} else if pct >= 0.01 {
1686
+
return fmt.Sprintf("%.4f%%", pct)
1687
+
} else if pct > 0 {
1688
+
return fmt.Sprintf("%.6f%%", pct)
1689
+
}
1690
+
return "0%"
1691
+
}
1692
+
1693
+
func (p *PostgresDB) UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error {
1694
+
if len(repos) == 0 {
1695
+
return nil
1696
+
}
1697
+
1698
+
// Step 1: Load all existing repos for this endpoint into memory
1699
+
query := `
1700
+
SELECT did, head, rev, active, status
1701
+
FROM pds_repos
1702
+
WHERE endpoint_id = $1
1703
+
`
1704
+
1705
+
rows, err := p.db.QueryContext(ctx, query, endpointID)
1706
+
if err != nil {
1707
+
return err
1708
+
}
1709
+
1710
+
existingRepos := make(map[string]*PDSRepo)
1711
+
for rows.Next() {
1712
+
var repo PDSRepo
1713
+
var head, rev, status sql.NullString
1714
+
1715
+
err := rows.Scan(&repo.DID, &head, &rev, &repo.Active, &status)
1716
+
if err != nil {
1717
+
rows.Close()
1718
+
return err
1719
+
}
1720
+
1721
+
if head.Valid {
1722
+
repo.Head = head.String
1723
+
}
1724
+
if rev.Valid {
1725
+
repo.Rev = rev.String
1726
+
}
1727
+
if status.Valid {
1728
+
repo.Status = status.String
1729
+
}
1730
+
1731
+
existingRepos[repo.DID] = &repo
1732
+
}
1733
+
rows.Close()
1734
+
1735
+
if err := rows.Err(); err != nil {
1736
+
return err
1737
+
}
1738
+
1739
+
// Step 2: Compare and collect changes
1740
+
var newRepos []PDSRepoData
1741
+
var changedRepos []PDSRepoData
1742
+
1743
+
for _, repo := range repos {
1744
+
existing, exists := existingRepos[repo.DID]
1745
+
if !exists {
1746
+
// New repo
1747
+
newRepos = append(newRepos, repo)
1748
+
} else if existing.Head != repo.Head ||
1749
+
existing.Rev != repo.Rev ||
1750
+
existing.Active != repo.Active ||
1751
+
existing.Status != repo.Status {
1752
+
// Repo changed
1753
+
changedRepos = append(changedRepos, repo)
1754
+
}
1755
+
}
1756
+
1757
+
// Log comparison results
1758
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, total=%d, existing=%d, new=%d, changed=%d, unchanged=%d",
1759
+
endpointID, len(repos), len(existingRepos), len(newRepos), len(changedRepos),
1760
+
len(repos)-len(newRepos)-len(changedRepos))
1761
+
1762
+
// If nothing changed, return early
1763
+
if len(newRepos) == 0 && len(changedRepos) == 0 {
1764
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, no changes detected, skipping database operations", endpointID)
1765
+
return nil
1766
+
}
1767
+
1768
+
// Step 3: Execute batched operations
1769
+
conn, err := p.pool.Acquire(ctx)
1770
+
if err != nil {
1771
+
return err
1772
+
}
1773
+
defer conn.Release()
1774
+
1775
+
tx, err := conn.Begin(ctx)
1776
+
if err != nil {
1777
+
return err
1778
+
}
1779
+
defer tx.Rollback(ctx)
1780
+
1781
+
// Insert new repos
1782
+
if len(newRepos) > 0 {
1783
+
_, err := tx.Exec(ctx, `
1784
+
CREATE TEMP TABLE temp_new_repos (
1785
+
did TEXT,
1786
+
head TEXT,
1787
+
rev TEXT,
1788
+
active BOOLEAN,
1789
+
status TEXT
1790
+
) ON COMMIT DROP
1791
+
`)
1792
+
if err != nil {
1793
+
return err
1794
+
}
1795
+
1796
+
_, err = tx.Conn().CopyFrom(
1797
+
ctx,
1798
+
pgx.Identifier{"temp_new_repos"},
1799
+
[]string{"did", "head", "rev", "active", "status"},
1800
+
pgx.CopyFromSlice(len(newRepos), func(i int) ([]interface{}, error) {
1801
+
repo := newRepos[i]
1802
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1803
+
}),
1804
+
)
1805
+
if err != nil {
1806
+
return err
1807
+
}
1808
+
1809
+
result, err := tx.Exec(ctx, `
1810
+
INSERT INTO pds_repos (endpoint_id, did, head, rev, active, status, first_seen, last_seen)
1811
+
SELECT $1, did, head, rev, active, status,
1812
+
TIMEZONE('UTC', NOW()),
1813
+
TIMEZONE('UTC', NOW())
1814
+
FROM temp_new_repos
1815
+
`, endpointID)
1816
+
if err != nil {
1817
+
return err
1818
+
}
1819
+
1820
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, inserted %d new repos", endpointID, result.RowsAffected())
1821
+
}
1822
+
1823
+
// Update changed repos
1824
+
if len(changedRepos) > 0 {
1825
+
_, err := tx.Exec(ctx, `
1826
+
CREATE TEMP TABLE temp_changed_repos (
1827
+
did TEXT,
1828
+
head TEXT,
1829
+
rev TEXT,
1830
+
active BOOLEAN,
1831
+
status TEXT
1832
+
) ON COMMIT DROP
1833
+
`)
1834
+
if err != nil {
1835
+
return err
1836
+
}
1837
+
1838
+
_, err = tx.Conn().CopyFrom(
1839
+
ctx,
1840
+
pgx.Identifier{"temp_changed_repos"},
1841
+
[]string{"did", "head", "rev", "active", "status"},
1842
+
pgx.CopyFromSlice(len(changedRepos), func(i int) ([]interface{}, error) {
1843
+
repo := changedRepos[i]
1844
+
return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil
1845
+
}),
1846
+
)
1847
+
if err != nil {
1848
+
return err
1849
+
}
1850
+
1851
+
result, err := tx.Exec(ctx, `
1852
+
UPDATE pds_repos
1853
+
SET head = t.head,
1854
+
rev = t.rev,
1855
+
active = t.active,
1856
+
status = t.status,
1857
+
last_seen = TIMEZONE('UTC', NOW()),
1858
+
updated_at = TIMEZONE('UTC', NOW())
1859
+
FROM temp_changed_repos t
1860
+
WHERE pds_repos.endpoint_id = $1
1861
+
AND pds_repos.did = t.did
1862
+
`, endpointID)
1863
+
if err != nil {
1864
+
return err
1865
+
}
1866
+
1867
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, updated %d changed repos", endpointID, result.RowsAffected())
1868
+
}
1869
+
1870
+
if err := tx.Commit(ctx); err != nil {
1871
+
return err
1872
+
}
1873
+
1874
+
log.Verbose("UpsertPDSRepos: endpoint_id=%d, transaction committed successfully", endpointID)
1875
+
return nil
1876
+
}
1877
+
1878
+
func (p *PostgresDB) GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) {
1879
+
query := `
1880
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1881
+
FROM pds_repos
1882
+
WHERE endpoint_id = $1
1883
+
`
1884
+
1885
+
args := []interface{}{endpointID}
1886
+
argIdx := 2
1887
+
1888
+
if activeOnly {
1889
+
query += " AND active = true"
1890
+
}
1891
+
1892
+
// Order by id (primary key) - fastest
1893
+
query += " ORDER BY id DESC"
1894
+
1895
+
if limit > 0 {
1896
+
query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1)
1897
+
args = append(args, limit, offset)
1898
+
}
1899
+
1900
+
rows, err := p.db.QueryContext(ctx, query, args...)
1901
+
if err != nil {
1902
+
return nil, err
1903
+
}
1904
+
defer rows.Close()
1905
+
1906
+
var repos []*PDSRepo
1907
+
for rows.Next() {
1908
+
var repo PDSRepo
1909
+
var head, rev, status sql.NullString
1910
+
1911
+
err := rows.Scan(
1912
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1913
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1914
+
)
1915
+
if err != nil {
1916
+
return nil, err
1917
+
}
1918
+
1919
+
if head.Valid {
1920
+
repo.Head = head.String
1921
+
}
1922
+
if rev.Valid {
1923
+
repo.Rev = rev.String
1924
+
}
1925
+
if status.Valid {
1926
+
repo.Status = status.String
1927
+
}
1928
+
1929
+
repos = append(repos, &repo)
1930
+
}
1931
+
1932
+
return repos, rows.Err()
1933
+
}
1934
+
1935
+
func (p *PostgresDB) GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) {
1936
+
query := `
1937
+
SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at
1938
+
FROM pds_repos
1939
+
WHERE did = $1
1940
+
ORDER BY last_seen DESC
1941
+
`
1942
+
1943
+
rows, err := p.db.QueryContext(ctx, query, did)
1944
+
if err != nil {
1945
+
return nil, err
1946
+
}
1947
+
defer rows.Close()
1948
+
1949
+
var repos []*PDSRepo
1950
+
for rows.Next() {
1951
+
var repo PDSRepo
1952
+
var head, rev, status sql.NullString
1953
+
1954
+
err := rows.Scan(
1955
+
&repo.ID, &repo.EndpointID, &repo.DID, &head, &rev,
1956
+
&repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt,
1957
+
)
1958
+
if err != nil {
1959
+
return nil, err
1960
+
}
1961
+
1962
+
if head.Valid {
1963
+
repo.Head = head.String
1964
+
}
1965
+
if rev.Valid {
1966
+
repo.Rev = rev.String
1967
+
}
1968
+
if status.Valid {
1969
+
repo.Status = status.String
1970
+
}
1971
+
1972
+
repos = append(repos, &repo)
1973
+
}
1974
+
1975
+
return repos, rows.Err()
1976
+
}
1977
+
1978
+
func (p *PostgresDB) GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) {
1979
+
query := `
1980
+
SELECT
1981
+
COUNT(*) as total_repos,
1982
+
COUNT(*) FILTER (WHERE active = true) as active_repos,
1983
+
COUNT(*) FILTER (WHERE active = false) as inactive_repos,
1984
+
COUNT(*) FILTER (WHERE status IS NOT NULL AND status != '') as repos_with_status,
1985
+
COUNT(*) FILTER (WHERE updated_at > CURRENT_TIMESTAMP - INTERVAL '1 hour') as recent_changes
1986
+
FROM pds_repos
1987
+
WHERE endpoint_id = $1
1988
+
`
1989
+
1990
+
var totalRepos, activeRepos, inactiveRepos, reposWithStatus, recentChanges int64
1991
+
1992
+
err := p.db.QueryRowContext(ctx, query, endpointID).Scan(
1993
+
&totalRepos, &activeRepos, &inactiveRepos, &reposWithStatus, &recentChanges,
1994
+
)
1995
+
if err != nil {
1996
+
return nil, err
1997
+
}
1998
+
1999
+
return map[string]interface{}{
2000
+
"total_repos": totalRepos,
2001
+
"active_repos": activeRepos,
2002
+
"inactive_repos": inactiveRepos,
2003
+
"repos_with_status": reposWithStatus,
2004
+
"recent_changes": recentChanges,
2005
+
}, nil
2006
+
}
2007
+
2008
+
// GetTableSizes fetches size information (in bytes) for all tables in the specified schema.
2009
+
func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) {
2010
+
// Query now selects raw byte values directly
2011
+
query := `
2012
+
SELECT
2013
+
c.relname AS table_name,
2014
+
pg_total_relation_size(c.oid) AS total_bytes,
2015
+
pg_relation_size(c.oid) AS table_heap_bytes,
2016
+
pg_indexes_size(c.oid) AS indexes_bytes
2017
+
FROM
2018
+
pg_class c
2019
+
LEFT JOIN
2020
+
pg_namespace n ON n.oid = c.relnamespace
2021
+
WHERE
2022
+
c.relkind = 'r' -- 'r' = ordinary table
2023
+
AND n.nspname = $1
2024
+
ORDER BY
2025
+
total_bytes DESC;
2026
+
`
2027
+
rows, err := p.db.QueryContext(ctx, query, schema)
2028
+
if err != nil {
2029
+
return nil, fmt.Errorf("failed to query table sizes: %w", err)
2030
+
}
2031
+
defer rows.Close()
2032
+
2033
+
var results []TableSizeInfo
2034
+
for rows.Next() {
2035
+
var info TableSizeInfo
2036
+
// Scan directly into int64 fields
2037
+
if err := rows.Scan(
2038
+
&info.TableName,
2039
+
&info.TotalBytes,
2040
+
&info.TableHeapBytes,
2041
+
&info.IndexesBytes,
2042
+
); err != nil {
2043
+
return nil, fmt.Errorf("failed to scan table size row: %w", err)
2044
+
}
2045
+
results = append(results, info)
2046
+
}
2047
+
if err := rows.Err(); err != nil {
2048
+
return nil, fmt.Errorf("error iterating table size rows: %w", err)
2049
+
}
2050
+
2051
+
return results, nil
2052
+
}
2053
+
2054
+
// GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema.
2055
+
func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) {
2056
+
// Query now selects raw byte values directly
2057
+
query := `
2058
+
SELECT
2059
+
c.relname AS index_name,
2060
+
COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name,
2061
+
pg_relation_size(c.oid) AS index_bytes
2062
+
FROM
2063
+
pg_class c
2064
+
LEFT JOIN
2065
+
pg_index i ON i.indexrelid = c.oid
2066
+
LEFT JOIN
2067
+
pg_namespace n ON n.oid = c.relnamespace
2068
+
WHERE
2069
+
c.relkind = 'i' -- 'i' = index
2070
+
AND n.nspname = $1
2071
+
ORDER BY
2072
+
index_bytes DESC;
2073
+
`
2074
+
rows, err := p.db.QueryContext(ctx, query, schema)
2075
+
if err != nil {
2076
+
return nil, fmt.Errorf("failed to query index sizes: %w", err)
2077
+
}
2078
+
defer rows.Close()
2079
+
2080
+
var results []IndexSizeInfo
2081
+
for rows.Next() {
2082
+
var info IndexSizeInfo
2083
+
var tableName sql.NullString
2084
+
// Scan directly into int64 field
2085
+
if err := rows.Scan(
2086
+
&info.IndexName,
2087
+
&tableName,
2088
+
&info.IndexBytes,
2089
+
); err != nil {
2090
+
return nil, fmt.Errorf("failed to scan index size row: %w", err)
2091
+
}
2092
+
if tableName.Valid {
2093
+
info.TableName = tableName.String
2094
+
} else {
2095
+
info.TableName = "N/A"
2096
+
}
2097
+
results = append(results, info)
2098
+
}
2099
+
if err := rows.Err(); err != nil {
2100
+
return nil, fmt.Errorf("error iterating index size rows: %w", err)
2101
+
}
2102
+
2103
+
return results, nil
2104
+
}
+121
-17
internal/storage/types.go
+121
-17
internal/storage/types.go
···
1
1
package storage
2
2
3
3
import (
4
+
"database/sql"
4
5
"fmt"
5
6
"path/filepath"
6
7
"time"
···
20
21
ID int64
21
22
EndpointType string
22
23
Endpoint string
24
+
ServerDID string
23
25
DiscoveredAt time.Time
24
26
LastChecked time.Time
25
27
Status int
26
28
IP string
29
+
IPv6 string
27
30
IPResolvedAt time.Time
31
+
Valid bool
28
32
UpdatedAt time.Time
29
33
}
30
34
···
51
55
Status int
52
56
ResponseTime float64
53
57
UserCount int64
54
-
Version string // NEW: Add this field
58
+
Version string
59
+
UsedIP string // NEW: Track which IP was actually used
55
60
ScanData *EndpointScanData
56
61
ScannedAt time.Time
57
62
}
···
72
77
73
78
// EndpointFilter for querying endpoints
74
79
type EndpointFilter struct {
75
-
Type string // "pds", "labeler", etc.
76
-
Status string
77
-
MinUserCount int64
78
-
Limit int
79
-
Offset int
80
+
Type string
81
+
Status string
82
+
MinUserCount int64
83
+
OnlyStale bool
84
+
OnlyValid bool
85
+
RecheckInterval time.Duration
86
+
Random bool
87
+
Limit int
88
+
Offset int
80
89
}
81
90
82
91
// EndpointStats contains aggregate statistics about endpoints
···
113
122
StartTime time.Time
114
123
EndTime time.Time
115
124
BoundaryCIDs []string
116
-
DIDs []string
125
+
DIDCount int // Changed from DIDs []string
117
126
Hash string
118
127
CompressedHash string
119
128
CompressedSize int64
···
136
145
return 10000
137
146
}
138
147
139
-
// MempoolOperation represents an operation waiting to be bundled
140
-
type MempoolOperation struct {
141
-
ID int64
142
-
DID string
143
-
Operation string
144
-
CID string
145
-
CreatedAt time.Time
146
-
AddedAt time.Time
148
+
type PLCHistoryPoint struct {
149
+
Date string `json:"date"`
150
+
BundleNumber int `json:"last_bundle_number"`
151
+
OperationCount int `json:"operations"`
152
+
UncompressedSize int64 `json:"size_uncompressed"`
153
+
CompressedSize int64 `json:"size_compressed"`
154
+
CumulativeUncompressed int64 `json:"cumulative_uncompressed"`
155
+
CumulativeCompressed int64 `json:"cumulative_compressed"`
147
156
}
148
157
149
158
// ScanCursor stores scanning progress
···
157
166
// DIDRecord represents a DID entry in the database
158
167
type DIDRecord struct {
159
168
DID string `json:"did"`
169
+
Handle string `json:"handle,omitempty"`
170
+
CurrentPDS string `json:"current_pds,omitempty"`
171
+
LastOpAt time.Time `json:"last_op_at,omitempty"`
160
172
BundleNumbers []int `json:"bundle_numbers"`
161
173
CreatedAt time.Time `json:"created_at"`
162
174
}
163
175
176
+
// GlobalDIDInfo consolidates DID data from PLC and PDS tables
177
+
type GlobalDIDInfo struct {
178
+
DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc.
179
+
HostingOn []*PDSRepo `json:"hosting_on"`
180
+
}
181
+
164
182
// IPInfo represents IP information (stored with IP as primary key)
165
183
type IPInfo struct {
166
184
IP string `json:"ip"`
···
171
189
ASNOrg string `json:"asn_org,omitempty"`
172
190
IsDatacenter bool `json:"is_datacenter"`
173
191
IsVPN bool `json:"is_vpn"`
192
+
IsCrawler bool `json:"is_crawler"`
193
+
IsTor bool `json:"is_tor"`
194
+
IsProxy bool `json:"is_proxy"`
174
195
Latitude float32 `json:"latitude,omitempty"`
175
196
Longitude float32 `json:"longitude,omitempty"`
176
197
RawData map[string]interface{} `json:"raw_data,omitempty"`
···
178
199
UpdatedAt time.Time `json:"updated_at"`
179
200
}
180
201
202
+
// IsHome returns true if this is a residential/home IP
203
+
// (not crawler, datacenter, tor, proxy, or vpn)
204
+
func (i *IPInfo) IsHome() bool {
205
+
return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN
206
+
}
207
+
181
208
// PDSListItem is a virtual type created by JOIN for /pds endpoint
182
209
type PDSListItem struct {
183
210
// From endpoints table
184
211
ID int64
185
212
Endpoint string
213
+
ServerDID string
186
214
DiscoveredAt time.Time
187
215
LastChecked time.Time
188
216
Status int
189
217
IP string
218
+
IPv6 string
219
+
Valid bool // NEW
190
220
191
221
// From latest endpoint_scans (via JOIN)
192
222
LatestScan *struct {
193
223
UserCount int
194
224
ResponseTime float64
195
-
Version string // NEW: Add this
225
+
Version string
196
226
ScannedAt time.Time
197
227
}
198
228
···
208
238
LatestScan *struct {
209
239
UserCount int
210
240
ResponseTime float64
211
-
Version string // ADD THIS LINE
241
+
Version string
212
242
ServerInfo interface{} // Full server description
213
243
ScannedAt time.Time
214
244
}
245
+
246
+
// NEW: Aliases (other domains pointing to same server)
247
+
Aliases []string `json:"aliases,omitempty"`
248
+
IsPrimary bool `json:"is_primary"`
249
+
}
250
+
251
+
type CountryStats struct {
252
+
Country string `json:"country"`
253
+
CountryCode string `json:"country_code"`
254
+
ActivePDSCount int64 `json:"active_pds_count"`
255
+
PDSPercentage float64 `json:"pds_percentage"`
256
+
TotalUsers int64 `json:"total_users"`
257
+
UsersPercentage float64 `json:"users_percentage"`
258
+
AvgResponseTimeMS float64 `json:"avg_response_time_ms"`
259
+
}
260
+
261
+
type VersionStats struct {
262
+
Version string `json:"version"`
263
+
PDSCount int64 `json:"pds_count"`
264
+
Percentage float64 `json:"percentage"`
265
+
PercentageText string `json:"percentage_text"`
266
+
TotalUsers int64 `json:"total_users"`
267
+
UsersPercentage float64 `json:"users_percentage"`
268
+
FirstSeen time.Time `json:"first_seen"`
269
+
LastSeen time.Time `json:"last_seen"`
270
+
}
271
+
272
+
type PDSRepo struct {
273
+
ID int64 `json:"id"`
274
+
EndpointID int64 `json:"endpoint_id"`
275
+
Endpoint string `json:"endpoint,omitempty"`
276
+
DID string `json:"did"`
277
+
Head string `json:"head,omitempty"`
278
+
Rev string `json:"rev,omitempty"`
279
+
Active bool `json:"active"`
280
+
Status string `json:"status,omitempty"`
281
+
FirstSeen time.Time `json:"first_seen"`
282
+
LastSeen time.Time `json:"last_seen"`
283
+
UpdatedAt time.Time `json:"updated_at"`
284
+
}
285
+
286
+
type PDSRepoData struct {
287
+
DID string
288
+
Head string
289
+
Rev string
290
+
Active bool
291
+
Status string
292
+
}
293
+
294
+
type DIDBackfillInfo struct {
295
+
DID string
296
+
LastBundleNum int
297
+
}
298
+
299
+
type DIDStateUpdateData struct {
300
+
DID string
301
+
Handle sql.NullString // Use sql.NullString for potential NULLs
302
+
PDS sql.NullString
303
+
OpTime time.Time
304
+
}
305
+
306
+
// TableSizeInfo holds size information for a database table.
307
+
type TableSizeInfo struct {
308
+
TableName string `json:"table_name"`
309
+
TotalBytes int64 `json:"total_bytes"` // Raw bytes
310
+
TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes
311
+
IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes
312
+
}
313
+
314
+
// IndexSizeInfo holds size information for a database index.
315
+
type IndexSizeInfo struct {
316
+
IndexName string `json:"index_name"`
317
+
TableName string `json:"table_name"`
318
+
IndexBytes int64 `json:"index_bytes"` // Raw bytes
215
319
}
+27
-3
internal/worker/scheduler.go
+27
-3
internal/worker/scheduler.go
···
5
5
"sync"
6
6
"time"
7
7
8
-
"github.com/atscan/atscanner/internal/log"
8
+
"github.com/atscan/atscand/internal/log"
9
+
"github.com/atscan/atscand/internal/monitor"
9
10
)
10
11
11
12
type Job struct {
···
34
35
Interval: interval,
35
36
Fn: fn,
36
37
})
38
+
39
+
// Register job with tracker
40
+
monitor.GetTracker().RegisterJob(name)
37
41
}
38
42
39
43
func (s *Scheduler) Start(ctx context.Context) {
···
52
56
53
57
// Run immediately
54
58
log.Info("Starting job: %s", job.Name)
55
-
job.Fn()
59
+
s.executeJob(job)
56
60
57
61
for {
62
+
// Set next run time
63
+
monitor.GetTracker().SetNextRun(job.Name, time.Now().Add(job.Interval))
64
+
58
65
select {
59
66
case <-ctx.Done():
60
67
log.Info("Stopping job: %s", job.Name)
61
68
return
62
69
case <-ticker.C:
63
70
log.Info("Running job: %s", job.Name)
64
-
job.Fn()
71
+
s.executeJob(job)
65
72
}
66
73
}
67
74
}
75
+
76
+
func (s *Scheduler) executeJob(job *Job) {
77
+
monitor.GetTracker().StartJob(job.Name)
78
+
79
+
// Run job and capture any panic
80
+
func() {
81
+
defer func() {
82
+
if r := recover(); r != nil {
83
+
log.Error("Job %s panicked: %v", job.Name, r)
84
+
monitor.GetTracker().CompleteJob(job.Name, nil)
85
+
}
86
+
}()
87
+
88
+
job.Fn()
89
+
monitor.GetTracker().CompleteJob(job.Name, nil)
90
+
}()
91
+
}
+125
utils/db-sizes.sh
+125
utils/db-sizes.sh
···
1
+
#!/bin/bash
2
+
3
+
# === Configuration ===
4
+
CONFIG_FILE="config.yaml" # Path to your config file
5
+
SCHEMA_NAME="public" # Replace if your schema is different
6
+
7
+
# Check if config file exists
8
+
if [ ! -f "$CONFIG_FILE" ]; then
9
+
echo "Error: Config file not found at '$CONFIG_FILE'"
10
+
exit 1
11
+
fi
12
+
13
+
# Check if yq is installed
14
+
if ! command -v yq &> /dev/null; then
15
+
echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)."
16
+
echo "See: https://github.com/mikefarah/yq/"
17
+
exit 1
18
+
fi
19
+
20
+
echo "--- Reading connection info from '$CONFIG_FILE' ---"
21
+
22
+
# === Extract Database Config using yq ===
23
+
DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE")
24
+
DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI
25
+
26
+
if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then
27
+
echo "Error: Could not read database type or path from '$CONFIG_FILE'."
28
+
exit 1
29
+
fi
30
+
31
+
# === Parse the Connection String ===
32
+
DB_USER=""
33
+
DB_PASSWORD=""
34
+
DB_HOST="localhost" # Default
35
+
DB_PORT="5432" # Default
36
+
DB_NAME=""
37
+
38
+
# Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters)
39
+
if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then
40
+
DB_USER="${BASH_REMATCH[2]}"
41
+
DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group
42
+
DB_HOST="${BASH_REMATCH[5]}"
43
+
DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default
44
+
DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?'
45
+
else
46
+
echo "Error: Could not parse database connection string URI: $DB_CONN_STRING"
47
+
exit 1
48
+
fi
49
+
50
+
# Set PGPASSWORD environment variable if password was found
51
+
if [ -n "$DB_PASSWORD" ]; then
52
+
export PGPASSWORD="$DB_PASSWORD"
53
+
else
54
+
echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt."
55
+
unset PGPASSWORD
56
+
fi
57
+
58
+
echo "--- Database Size Investigation ---"
59
+
echo "Database: $DB_NAME"
60
+
echo "Schema: $SCHEMA_NAME"
61
+
echo "User: $DB_USER"
62
+
echo "Host: $DB_HOST:$DB_PORT"
63
+
echo "-----------------------------------"
64
+
65
+
# === Table Sizes ===
66
+
echo ""
67
+
echo "## Table Sizes (Schema: $SCHEMA_NAME) ##"
68
+
# Removed --tuples-only and --no-align, added -P footer=off
69
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
70
+
SELECT
71
+
c.relname AS "Table Name",
72
+
pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size",
73
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size",
74
+
pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size"
75
+
FROM
76
+
pg_class c
77
+
LEFT JOIN
78
+
pg_namespace n ON n.oid = c.relnamespace
79
+
WHERE
80
+
c.relkind = 'r' -- 'r' = ordinary table
81
+
AND n.nspname = '$SCHEMA_NAME'
82
+
ORDER BY
83
+
pg_total_relation_size(c.oid) DESC;
84
+
EOF
85
+
86
+
if [ $? -ne 0 ]; then
87
+
echo "Error querying table sizes. Check connection details, permissions, and password."
88
+
unset PGPASSWORD
89
+
exit 1
90
+
fi
91
+
92
+
# === Index Sizes ===
93
+
echo ""
94
+
echo "## Index Sizes (Schema: $SCHEMA_NAME) ##"
95
+
# Removed --tuples-only and --no-align, added -P footer=off
96
+
psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF
97
+
SELECT
98
+
c.relname AS "Index Name",
99
+
i.indrelid::regclass AS "Table Name", -- Show associated table
100
+
pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size"
101
+
FROM
102
+
pg_class c
103
+
LEFT JOIN
104
+
pg_index i ON i.indexrelid = c.oid
105
+
LEFT JOIN
106
+
pg_namespace n ON n.oid = c.relnamespace
107
+
WHERE
108
+
c.relkind = 'i' -- 'i' = index
109
+
AND n.nspname = '$SCHEMA_NAME'
110
+
ORDER BY
111
+
pg_relation_size(c.oid) DESC;
112
+
EOF
113
+
114
+
if [ $? -ne 0 ]; then
115
+
echo "Error querying index sizes. Check connection details, permissions, and password."
116
+
unset PGPASSWORD
117
+
exit 1
118
+
fi
119
+
120
+
echo ""
121
+
echo "-----------------------------------"
122
+
echo "Investigation complete."
123
+
124
+
# Unset the password variable for security
125
+
unset PGPASSWORD
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"
+2
-2
utils/migrate-ipinfo.sh
+2
-2
utils/migrate-ipinfo.sh
+199
utils/vuln-scanner-parallel.sh
+199
utils/vuln-scanner-parallel.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
PARALLEL_JOBS=20
7
+
OUTPUT_DIR="./pds_scan_results"
8
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
9
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
10
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
11
+
12
+
# Paths to check
13
+
PATHS=(
14
+
"/info.php"
15
+
"/phpinfo.php"
16
+
"/test.php"
17
+
"/admin"
18
+
"/admin.php"
19
+
"/wp-admin"
20
+
"/robots.txt"
21
+
"/.env"
22
+
"/.git/config"
23
+
"/config.php"
24
+
"/backup"
25
+
"/db.sql"
26
+
"/.DS_Store"
27
+
"/server-status"
28
+
"/.well-known/security.txt"
29
+
)
30
+
31
+
# Colors
32
+
RED='\033[0;31m'
33
+
GREEN='\033[0;32m'
34
+
YELLOW='\033[1;33m'
35
+
BLUE='\033[0;34m'
36
+
NC='\033[0m'
37
+
38
+
# Check dependencies
39
+
if ! command -v jq &> /dev/null; then
40
+
echo -e "${RED}Error: jq is required${NC}"
41
+
echo "Install: sudo apt-get install jq"
42
+
exit 1
43
+
fi
44
+
45
+
if ! command -v parallel &> /dev/null; then
46
+
echo -e "${RED}Error: GNU parallel is required${NC}"
47
+
echo "Install: sudo apt-get install parallel (or brew install parallel)"
48
+
exit 1
49
+
fi
50
+
51
+
mkdir -p "$OUTPUT_DIR"
52
+
53
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
54
+
echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}"
55
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
56
+
echo ""
57
+
echo "API Host: $API_HOST"
58
+
echo "Timeout: ${TIMEOUT}s per request"
59
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
60
+
echo "Paths to check: ${#PATHS[@]}"
61
+
echo ""
62
+
63
+
# Scan function - will be called by GNU parallel
64
+
scan_endpoint() {
65
+
local endpoint="$1"
66
+
local timeout="$2"
67
+
shift 2
68
+
local paths=("$@")
69
+
70
+
for path in "${paths[@]}"; do
71
+
url="${endpoint}${path}"
72
+
73
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
74
+
--max-time "$timeout" \
75
+
--connect-timeout "$timeout" \
76
+
--retry 0 \
77
+
-A "Mozilla/5.0 (Security Scanner)" \
78
+
"$url" 2>/dev/null)
79
+
80
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
81
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
82
+
echo "FOUND|$endpoint|$path|$response"
83
+
elif [ "$response" != "403" ] && [ "$response" != "401" ]; then
84
+
echo "MAYBE|$endpoint|$path|$response"
85
+
fi
86
+
fi
87
+
done
88
+
}
89
+
90
+
export -f scan_endpoint
91
+
92
+
# Fetch active PDS endpoints
93
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
94
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
95
+
jq -r '.[].endpoint' 2>/dev/null)
96
+
97
+
if [ -z "$ENDPOINTS" ]; then
98
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
99
+
echo "Check that the API is running at: $API_HOST"
100
+
exit 1
101
+
fi
102
+
103
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ')
104
+
echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
105
+
echo ""
106
+
107
+
# Write header to results file
108
+
{
109
+
echo "PDS Security Scan Results"
110
+
echo "========================="
111
+
echo "Scan started: $(date)"
112
+
echo "Endpoints scanned: ${ENDPOINT_COUNT}"
113
+
echo "Paths checked: ${#PATHS[@]}"
114
+
echo "Parallel jobs: ${PARALLEL_JOBS}"
115
+
echo ""
116
+
echo "Results:"
117
+
echo "--------"
118
+
} > "$RESULTS_FILE"
119
+
120
+
# Run parallel scan
121
+
echo -e "${YELLOW}Starting parallel scan...${NC}"
122
+
echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}"
123
+
echo ""
124
+
125
+
echo "$ENDPOINTS" | \
126
+
parallel \
127
+
-j "$PARALLEL_JOBS" \
128
+
--bar \
129
+
--joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \
130
+
scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \
131
+
>> "$RESULTS_FILE"
132
+
133
+
echo ""
134
+
echo -e "${YELLOW}Processing results...${NC}"
135
+
136
+
# Count results
137
+
FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0)
138
+
MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0)
139
+
140
+
# Extract found URLs to separate file
141
+
{
142
+
echo "Found URLs (HTTP 200/301/302)"
143
+
echo "=============================="
144
+
echo "Scan: $(date)"
145
+
echo ""
146
+
} > "$FOUND_FILE"
147
+
148
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do
149
+
echo "$endpoint$path [$code]"
150
+
done >> "$FOUND_FILE"
151
+
152
+
# Create summary at end of results file
153
+
{
154
+
echo ""
155
+
echo "Summary"
156
+
echo "======="
157
+
echo "Scan completed: $(date)"
158
+
echo "Total endpoints scanned: ${ENDPOINT_COUNT}"
159
+
echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))"
160
+
echo "Found (200/301/302): ${FOUND_COUNT}"
161
+
echo "Maybe (other codes): ${MAYBE_COUNT}"
162
+
} >> "$RESULTS_FILE"
163
+
164
+
# Display summary
165
+
echo ""
166
+
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
167
+
echo -e "${BLUE}║ Scan Complete! ║${NC}"
168
+
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}"
169
+
echo ""
170
+
echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}"
171
+
echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}"
172
+
echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}"
173
+
echo ""
174
+
echo -e "Results:"
175
+
echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}"
176
+
echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}"
177
+
echo ""
178
+
echo "Files created:"
179
+
echo " Full results: $RESULTS_FILE"
180
+
echo " Found URLs: $FOUND_FILE"
181
+
echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt"
182
+
183
+
# Show sample of found URLs if any
184
+
if [ "$FOUND_COUNT" -gt 0 ]; then
185
+
echo ""
186
+
echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}"
187
+
echo ""
188
+
echo "Sample findings (first 10):"
189
+
grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do
190
+
echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]"
191
+
done
192
+
193
+
if [ "$FOUND_COUNT" -gt 10 ]; then
194
+
echo ""
195
+
echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)"
196
+
fi
197
+
fi
198
+
199
+
echo ""
+117
utils/vuln-scanner.sh
+117
utils/vuln-scanner.sh
···
1
+
#!/bin/bash
2
+
3
+
# Configuration
4
+
API_HOST="${API_HOST:-http://localhost:8080}"
5
+
TIMEOUT=5
6
+
OUTPUT_DIR="./pds_scan_results"
7
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
8
+
RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt"
9
+
FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt"
10
+
11
+
# Paths to check (one per line for easier editing)
12
+
PATHS=(
13
+
"/info.php"
14
+
"/phpinfo.php"
15
+
"/test.php"
16
+
"/admin"
17
+
"/admin.php"
18
+
"/wp-admin"
19
+
"/robots.txt"
20
+
"/.env"
21
+
"/.git/config"
22
+
"/config.php"
23
+
"/backup"
24
+
"/db.sql"
25
+
"/.DS_Store"
26
+
"/server-status"
27
+
"/.well-known/security.txt"
28
+
)
29
+
30
+
# Colors
31
+
RED='\033[0;31m'
32
+
GREEN='\033[0;32m'
33
+
YELLOW='\033[1;33m'
34
+
BLUE='\033[0;34m'
35
+
NC='\033[0m'
36
+
37
+
mkdir -p "$OUTPUT_DIR"
38
+
39
+
echo -e "${BLUE}=== PDS Security Scanner ===${NC}"
40
+
echo "API Host: $API_HOST"
41
+
echo "Timeout: ${TIMEOUT}s"
42
+
echo "Scanning for ${#PATHS[@]} paths"
43
+
echo "Results: $RESULTS_FILE"
44
+
echo ""
45
+
46
+
# Fetch active PDS endpoints
47
+
echo -e "${YELLOW}Fetching active PDS endpoints...${NC}"
48
+
ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \
49
+
jq -r '.[].endpoint' 2>/dev/null)
50
+
51
+
if [ -z "$ENDPOINTS" ]; then
52
+
echo -e "${RED}Error: Could not fetch endpoints from API${NC}"
53
+
exit 1
54
+
fi
55
+
56
+
ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l)
57
+
echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}"
58
+
echo ""
59
+
60
+
# Write header
61
+
echo "PDS Security Scan - $(date)" > "$RESULTS_FILE"
62
+
echo "========================================" >> "$RESULTS_FILE"
63
+
echo "" >> "$RESULTS_FILE"
64
+
65
+
# Counters
66
+
CURRENT=0
67
+
TOTAL_FOUND=0
68
+
TOTAL_MAYBE=0
69
+
70
+
# Scan each endpoint sequentially
71
+
while IFS= read -r endpoint; do
72
+
CURRENT=$((CURRENT + 1))
73
+
74
+
echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint"
75
+
76
+
# Scan each path
77
+
for path in "${PATHS[@]}"; do
78
+
url="${endpoint}${path}"
79
+
80
+
# Make request with timeout
81
+
response=$(curl -s -o /dev/null -w "%{http_code}" \
82
+
--max-time "$TIMEOUT" \
83
+
--connect-timeout "$TIMEOUT" \
84
+
-L \
85
+
-A "Mozilla/5.0 (Security Scanner)" \
86
+
"$url" 2>/dev/null)
87
+
88
+
# Check response
89
+
if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then
90
+
if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then
91
+
echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}"
92
+
echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE"
93
+
echo "$endpoint$path" >> "$FOUND_FILE"
94
+
TOTAL_FOUND=$((TOTAL_FOUND + 1))
95
+
elif [ "$response" != "403" ]; then
96
+
echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}"
97
+
echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE"
98
+
TOTAL_MAYBE=$((TOTAL_MAYBE + 1))
99
+
fi
100
+
fi
101
+
done
102
+
103
+
echo "" >> "$RESULTS_FILE"
104
+
105
+
done <<< "$ENDPOINTS"
106
+
107
+
# Summary
108
+
echo ""
109
+
echo -e "${BLUE}========================================${NC}"
110
+
echo -e "${GREEN}Scan Complete!${NC}"
111
+
echo "Scanned: ${ENDPOINT_COUNT} endpoints"
112
+
echo "Paths checked per endpoint: ${#PATHS[@]}"
113
+
echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}"
114
+
echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}"
115
+
echo ""
116
+
echo "Full results: $RESULTS_FILE"
117
+
[ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"