Compare changes

Choose any two refs to compare.

+2
.gitignore
··· 5 5 .DS_Store 6 6 plc_cache\.tmp/* 7 7 plc_bundles* 8 + config.yaml 9 + /atscand
+42
Makefile
··· 1 + .PHONY: all build install test clean fmt lint help 2 + 3 + # Binary name 4 + BINARY_NAME=atscand 5 + INSTALL_PATH=$(GOPATH)/bin 6 + 7 + # Go commands 8 + GOCMD=go 9 + GOBUILD=$(GOCMD) build 10 + GOINSTALL=$(GOCMD) install 11 + GOCLEAN=$(GOCMD) clean 12 + GOTEST=$(GOCMD) test 13 + GOGET=$(GOCMD) get 14 + GOFMT=$(GOCMD) fmt 15 + GOMOD=$(GOCMD) mod 16 + GORUN=$(GOCMD) run 17 + 18 + # Default target 19 + all: build 20 + 21 + # Build the CLI tool 22 + build: 23 + @echo "Building $(BINARY_NAME)..." 24 + $(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand 25 + 26 + # Install the CLI tool globally 27 + install: 28 + @echo "Installing $(BINARY_NAME)..." 29 + $(GOINSTALL) ./cmd/atscand 30 + 31 + run: 32 + $(GORUN) cmd/atscand/main.go -verbose 33 + 34 + update-plcbundle: 35 + GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest 36 + 37 + # Show help 38 + help: 39 + @echo "Available targets:" 40 + @echo " make build - Build the binary" 41 + @echo " make install - Install binary globally" 42 + @echo " make run - Run app"
+159
cmd/atscand/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "os/signal" 9 + "syscall" 10 + "time" 11 + 12 + "github.com/atscan/atscand/internal/api" 13 + "github.com/atscan/atscand/internal/config" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/pds" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 18 + "github.com/atscan/atscand/internal/worker" 19 + ) 20 + 21 + const VERSION = "1.0.0" 22 + 23 + func main() { 24 + configPath := flag.String("config", "config.yaml", "path to config file") 25 + verbose := flag.Bool("verbose", false, "enable verbose logging") 26 + flag.Parse() 27 + 28 + // Load configuration 29 + cfg, err := config.Load(*configPath) 30 + if err != nil { 31 + fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 + os.Exit(1) 33 + } 34 + 35 + // Override verbose setting if flag is provided 36 + if *verbose { 37 + cfg.API.Verbose = true 38 + } 39 + 40 + // Initialize logger 41 + log.Init(cfg.API.Verbose) 42 + 43 + // Print banner 44 + log.Banner(VERSION) 45 + 46 + // Print configuration summary 47 + log.PrintConfig(map[string]string{ 48 + "Database Type": cfg.Database.Type, 49 + "Database Path": cfg.Database.Path, // Will be auto-redacted 50 + "PLC Directory": cfg.PLC.DirectoryURL, 51 + "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 + "PLC Bundle Dir": cfg.PLC.BundleDir, 53 + "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 + "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 + "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 + "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 + "PDS Timeout": cfg.PDS.Timeout.String(), 58 + "API Host": cfg.API.Host, 59 + "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 + "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 + }) 62 + 63 + // Initialize database using factory pattern 64 + db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 + if err != nil { 66 + log.Fatal("Failed to initialize database: %v", err) 67 + } 68 + defer func() { 69 + log.Info("Closing database connection...") 70 + db.Close() 71 + }() 72 + 73 + // Set scan retention from config 74 + if cfg.PDS.ScanRetention > 0 { 75 + db.SetScanRetention(cfg.PDS.ScanRetention) 76 + log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 + } 78 + 79 + // Run migrations 80 + if err := db.Migrate(); err != nil { 81 + log.Fatal("Failed to run migrations: %v", err) 82 + } 83 + 84 + ctx, cancel := context.WithCancel(context.Background()) 85 + defer cancel() 86 + 87 + // Initialize workers 88 + log.Info("Initializing scanners...") 89 + 90 + bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs) 91 + if err != nil { 92 + log.Fatal("Failed to create bundle manager: %v", err) 93 + } 94 + defer bundleManager.Close() 95 + log.Verbose("✓ Bundle manager initialized (shared)") 96 + 97 + plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager) 98 + defer plcScanner.Close() 99 + log.Verbose("✓ PLC scanner initialized") 100 + 101 + pdsScanner := pds.NewScanner(db, cfg.PDS) 102 + log.Verbose("✓ PDS scanner initialized") 103 + 104 + scheduler := worker.NewScheduler() 105 + 106 + // Schedule PLC directory scan 107 + scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 108 + if err := plcScanner.Scan(ctx); err != nil { 109 + log.Error("PLC scan error: %v", err) 110 + } 111 + }) 112 + log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 113 + 114 + // Schedule PDS availability checks 115 + scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 116 + if err := pdsScanner.ScanAll(ctx); err != nil { 117 + log.Error("PDS scan error: %v", err) 118 + } 119 + }) 120 + log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 121 + 122 + // Start API server 123 + log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 124 + apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager) 125 + go func() { 126 + if err := apiServer.Start(); err != nil { 127 + log.Fatal("API server error: %v", err) 128 + } 129 + }() 130 + 131 + // Give the API server a moment to start 132 + time.Sleep(100 * time.Millisecond) 133 + log.Info("✓ API server started successfully") 134 + log.Info("") 135 + log.Info("🚀 ATScanner is running!") 136 + log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 137 + log.Info(" Press Ctrl+C to stop") 138 + log.Info("") 139 + 140 + // Start scheduler 141 + scheduler.Start(ctx) 142 + 143 + // Wait for interrupt 144 + sigChan := make(chan os.Signal, 1) 145 + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 146 + <-sigChan 147 + 148 + log.Info("") 149 + log.Info("Shutting down gracefully...") 150 + cancel() 151 + 152 + log.Info("Stopping API server...") 153 + apiServer.Shutdown(context.Background()) 154 + 155 + log.Info("Waiting for active tasks to complete...") 156 + time.Sleep(2 * time.Second) 157 + 158 + log.Info("✓ Shutdown complete. Goodbye!") 159 + }
-96
cmd/atscanner.go
··· 1 - package main 2 - 3 - import ( 4 - "context" 5 - "flag" 6 - "os" 7 - "os/signal" 8 - "syscall" 9 - "time" 10 - 11 - "github.com/atscan/atscanner/internal/api" 12 - "github.com/atscan/atscanner/internal/config" 13 - "github.com/atscan/atscanner/internal/log" 14 - "github.com/atscan/atscanner/internal/pds" 15 - "github.com/atscan/atscanner/internal/plc" 16 - "github.com/atscan/atscanner/internal/storage" 17 - "github.com/atscan/atscanner/internal/worker" 18 - ) 19 - 20 - func main() { 21 - configPath := flag.String("config", "config.yaml", "path to config file") 22 - verbose := flag.Bool("verbose", false, "enable verbose logging") 23 - flag.Parse() 24 - 25 - // Load configuration 26 - cfg, err := config.Load(*configPath) 27 - if err != nil { 28 - log.Fatal("Failed to load config: %v", err) 29 - } 30 - 31 - // Override verbose setting if flag is provided 32 - if *verbose { 33 - cfg.API.Verbose = true 34 - } 35 - 36 - // Initialize logger 37 - log.Init(cfg.API.Verbose) 38 - 39 - // Initialize database 40 - db, err := storage.NewSQLiteDB(cfg.Database.Path) 41 - if err != nil { 42 - log.Fatal("Failed to initialize database: %v", err) 43 - } 44 - defer db.Close() 45 - 46 - // Run migrations 47 - if err := db.Migrate(); err != nil { 48 - log.Fatal("Failed to run migrations: %v", err) 49 - } 50 - 51 - ctx, cancel := context.WithCancel(context.Background()) 52 - defer cancel() 53 - 54 - // Initialize workers 55 - plcScanner := plc.NewScanner(db, cfg.PLC) 56 - defer plcScanner.Close() // Close scanner to cleanup cache 57 - 58 - pdsScanner := pds.NewScanner(db, cfg.PDS) 59 - 60 - scheduler := worker.NewScheduler() 61 - 62 - // Schedule PLC directory scan 63 - scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 64 - if err := plcScanner.Scan(ctx); err != nil { 65 - log.Error("PLC scan error: %v", err) 66 - } 67 - }) 68 - 69 - // Schedule PDS availability checks 70 - scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 71 - if err := pdsScanner.ScanAll(ctx); err != nil { 72 - log.Error("PDS scan error: %v", err) 73 - } 74 - }) 75 - 76 - // Start API server 77 - apiServer := api.NewServer(db, cfg.API, cfg.PLC) 78 - go func() { 79 - if err := apiServer.Start(); err != nil { 80 - log.Fatal("API server error: %v", err) 81 - } 82 - }() 83 - 84 - // Start scheduler 85 - scheduler.Start(ctx) 86 - 87 - // Wait for interrupt 88 - sigChan := make(chan os.Signal, 1) 89 - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 90 - <-sigChan 91 - 92 - log.Info("Shutting down gracefully...") 93 - cancel() 94 - apiServer.Shutdown(context.Background()) 95 - time.Sleep(2 * time.Second) 96 - }
+168
cmd/import-labels/main.go
··· 1 + package main 2 + 3 + import ( 4 + "bufio" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + "time" 11 + 12 + "github.com/klauspost/compress/zstd" 13 + "gopkg.in/yaml.v3" 14 + ) 15 + 16 + type Config struct { 17 + PLC struct { 18 + BundleDir string `yaml:"bundle_dir"` 19 + } `yaml:"plc"` 20 + } 21 + 22 + var CONFIG_FILE = "config.yaml" 23 + 24 + // --------------------- 25 + 26 + func main() { 27 + // Define a new flag for changing the directory 28 + workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)") 29 + flag.Usage = func() { // Custom usage message 30 + fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0]) 31 + fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.") 32 + flag.PrintDefaults() 33 + } 34 + flag.Parse() // Parse all defined flags 35 + 36 + // Change directory if the flag was used 37 + if *workDir != "." { 38 + fmt.Printf("Changing working directory to %s...\n", *workDir) 39 + if err := os.Chdir(*workDir); err != nil { 40 + fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err) 41 + os.Exit(1) 42 + } 43 + } 44 + 45 + // --- REMOVED UNUSED CODE --- 46 + // The csvFilePath variable and NArg check were removed 47 + // as the script now reads from stdin. 48 + // --------------------------- 49 + 50 + fmt.Println("========================================") 51 + fmt.Println("PLC Operation Labels Import (Go STDIN)") 52 + fmt.Println("========================================") 53 + 54 + // 1. Read config (will now read from the new CWD) 55 + fmt.Printf("Loading config from %s...\n", CONFIG_FILE) 56 + configData, err := os.ReadFile(CONFIG_FILE) 57 + if err != nil { 58 + fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err) 59 + os.Exit(1) 60 + } 61 + 62 + var config Config 63 + if err := yaml.Unmarshal(configData, &config); err != nil { 64 + fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err) 65 + os.Exit(1) 66 + } 67 + 68 + if config.PLC.BundleDir == "" { 69 + fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml") 70 + os.Exit(1) 71 + } 72 + 73 + finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels") 74 + if err := os.MkdirAll(finalLabelsDir, 0755); err != nil { 75 + fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) 76 + os.Exit(1) 77 + } 78 + 79 + fmt.Printf("Output Dir: %s\n", finalLabelsDir) 80 + fmt.Println("Waiting for sorted data from stdin...") 81 + 82 + // 2. Process sorted data from stdin 83 + // This script *requires* the input to be sorted by bundle number. 84 + 85 + var currentWriter *zstd.Encoder 86 + var currentFile *os.File 87 + var lastBundleKey string = "" 88 + 89 + lineCount := 0 90 + startTime := time.Now() 91 + 92 + scanner := bufio.NewScanner(os.Stdin) 93 + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) 94 + 95 + for scanner.Scan() { 96 + line := scanner.Text() 97 + lineCount++ 98 + 99 + parts := strings.SplitN(line, ",", 2) 100 + if len(parts) < 1 { 101 + continue // Skip empty/bad lines 102 + } 103 + 104 + bundleNumStr := parts[0] 105 + bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros 106 + 107 + // If the bundle key is new, close the old writer and open a new one. 108 + if bundleKey != lastBundleKey { 109 + // Close the previous writer/file 110 + if currentWriter != nil { 111 + if err := currentWriter.Close(); err != nil { 112 + fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err) 113 + } 114 + currentFile.Close() 115 + } 116 + 117 + // Start the new one 118 + fmt.Printf(" -> Writing bundle %s\n", bundleKey) 119 + outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey)) 120 + 121 + file, err := os.Create(outPath) 122 + if err != nil { 123 + fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err) 124 + os.Exit(1) 125 + } 126 + currentFile = file 127 + 128 + writer, err := zstd.NewWriter(file) 129 + if err != nil { 130 + fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err) 131 + os.Exit(1) 132 + } 133 + currentWriter = writer 134 + lastBundleKey = bundleKey 135 + } 136 + 137 + // Write the line to the currently active writer 138 + if _, err := currentWriter.Write([]byte(line + "\n")); err != nil { 139 + fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err) 140 + } 141 + 142 + // Progress update 143 + if lineCount%100000 == 0 { 144 + elapsed := time.Since(startTime).Seconds() 145 + rate := float64(lineCount) / elapsed 146 + fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate) 147 + } 148 + } 149 + 150 + // 3. Close the very last writer 151 + if currentWriter != nil { 152 + if err := currentWriter.Close(); err != nil { 153 + fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err) 154 + } 155 + currentFile.Close() 156 + } 157 + 158 + if err := scanner.Err(); err != nil { 159 + fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err) 160 + } 161 + 162 + totalTime := time.Since(startTime) 163 + fmt.Println("\n========================================") 164 + fmt.Println("Import Summary") 165 + fmt.Println("========================================") 166 + fmt.Printf("✓ Import completed in %v\n", totalTime) 167 + fmt.Printf("Total lines processed: %d\n", lineCount) 168 + }
+22
config.sample.yaml
··· 1 + database: 2 + type: "postgres" # or "sqlite" 3 + path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable" 4 + # For SQLite: path: "atscan.db" 5 + 6 + plc: 7 + directory_url: "https://plc.directory" 8 + scan_interval: "5s" 9 + bundle_dir: "./plc_bundles" 10 + use_cache: true 11 + index_dids: true 12 + 13 + pds: 14 + scan_interval: "30m" 15 + timeout: "30s" 16 + workers: 20 17 + recheck_interval: "1.5h" 18 + scan_retention: 20 19 + 20 + api: 21 + host: "0.0.0.0" 22 + port: 8080
-19
config.yaml
··· 1 - database: 2 - path: "atscan.db" 3 - type: "sqlite" 4 - 5 - plc: 6 - directory_url: "https://plc.directory" 7 - scan_interval: "5s" 8 - bundle_dir: "./plc_bundles" 9 - use_cache: true 10 - 11 - pds: 12 - scan_interval: "15m" 13 - timeout: "30s" 14 - workers: 10 15 - recheck_interval: "5m" 16 - 17 - api: 18 - host: "0.0.0.0" 19 - port: 8080
+17 -6
go.mod
··· 1 - module github.com/atscan/atscanner 1 + module github.com/atscan/atscand 2 2 3 - go 1.22 3 + go 1.23.0 4 4 5 5 require ( 6 6 github.com/gorilla/mux v1.8.1 7 - github.com/mattn/go-sqlite3 v1.14.18 7 + github.com/lib/pq v1.10.9 8 8 gopkg.in/yaml.v3 v3.0.1 9 9 ) 10 10 11 - require github.com/klauspost/compress v1.18.0 11 + require github.com/klauspost/compress v1.18.1 12 12 13 13 require ( 14 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d 15 14 github.com/gorilla/handlers v1.5.2 15 + github.com/jackc/pgx/v5 v5.7.6 16 + tangled.org/atscan.net/plcbundle v0.3.6 16 17 ) 17 18 18 - require github.com/felixge/httpsnoop v1.0.3 // indirect 19 + require ( 20 + github.com/felixge/httpsnoop v1.0.3 // indirect 21 + github.com/jackc/pgpassfile v1.0.0 // indirect 22 + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 23 + github.com/jackc/puddle/v2 v2.2.2 // indirect 24 + github.com/kr/text v0.2.0 // indirect 25 + github.com/rogpeppe/go-internal v1.14.1 // indirect 26 + golang.org/x/crypto v0.37.0 // indirect 27 + golang.org/x/sync v0.13.0 // indirect 28 + golang.org/x/text v0.24.0 // indirect 29 + )
+40 -7
go.sum
··· 1 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 2 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 1 + github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 + github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 5 github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= 4 6 github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 5 7 github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= 6 8 github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= 7 9 github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= 8 10 github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= 9 - github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 10 - github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 11 - github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI= 12 - github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= 13 - gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 11 + github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= 12 + github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= 13 + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= 14 + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= 15 + github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk= 16 + github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= 17 + github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 18 + github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 19 + github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= 20 + github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= 21 + github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 22 + github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 23 + github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 24 + github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 25 + github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= 26 + github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 27 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 28 + github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 29 + github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= 30 + github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= 31 + github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 32 + github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 33 + github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 34 + github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 35 + github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 36 + golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 37 + golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 38 + golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= 39 + golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 40 + golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 41 + golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 14 42 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 43 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 44 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 45 + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 15 46 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 16 47 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 48 + tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY= 49 + tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+1173 -518
internal/api/handlers.go
··· 1 1 package api 2 2 3 3 import ( 4 - "bufio" 5 - "bytes" 6 - "crypto/sha256" 7 - "encoding/hex" 4 + "context" 5 + "database/sql" 8 6 "encoding/json" 9 7 "fmt" 8 + "io" 10 9 "net/http" 11 - "os" 12 - "path/filepath" 13 10 "strconv" 11 + "strings" 14 12 "time" 15 13 16 - "github.com/atscan/atscanner/internal/log" 17 - "github.com/atscan/atscanner/internal/plc" 18 - "github.com/atscan/atscanner/internal/storage" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/monitor" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 19 18 "github.com/gorilla/mux" 20 - "github.com/klauspost/compress/zstd" 19 + "tangled.org/atscan.net/plcbundle" 21 20 ) 22 21 23 - func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) { 24 - ctx := r.Context() 22 + // ===== RESPONSE HELPERS ===== 23 + 24 + type response struct { 25 + w http.ResponseWriter 26 + } 27 + 28 + func newResponse(w http.ResponseWriter) *response { 29 + return &response{w: w} 30 + } 25 31 26 - filter := &storage.PDSFilter{} 32 + func (r *response) json(data interface{}) { 33 + r.w.Header().Set("Content-Type", "application/json") 34 + json.NewEncoder(r.w).Encode(data) 35 + } 27 36 28 - if status := r.URL.Query().Get("status"); status != "" { 29 - filter.Status = status 30 - } 37 + func (r *response) error(msg string, code int) { 38 + http.Error(r.w, msg, code) 39 + } 31 40 32 - if limit := r.URL.Query().Get("limit"); limit != "" { 33 - if l, err := strconv.Atoi(limit); err == nil { 34 - filter.Limit = l 41 + func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) { 42 + r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber)) 43 + r.w.Header().Set("X-Bundle-Hash", bundle.Hash) 44 + r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash) 45 + r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano)) 46 + r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano)) 47 + r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 48 + r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount)) 49 + } 50 + 51 + // ===== REQUEST HELPERS ===== 52 + 53 + func getBundleNumber(r *http.Request) (int, error) { 54 + vars := mux.Vars(r) 55 + return strconv.Atoi(vars["number"]) 56 + } 57 + 58 + func getQueryInt(r *http.Request, key string, defaultVal int) int { 59 + if val := r.URL.Query().Get(key); val != "" { 60 + if parsed, err := strconv.Atoi(val); err == nil { 61 + return parsed 35 62 } 36 63 } 64 + return defaultVal 65 + } 37 66 38 - if offset := r.URL.Query().Get("offset"); offset != "" { 39 - if o, err := strconv.Atoi(offset); err == nil { 40 - filter.Offset = o 67 + func getQueryInt64(r *http.Request, key string, defaultVal int64) int64 { 68 + if val := r.URL.Query().Get(key); val != "" { 69 + if parsed, err := strconv.ParseInt(val, 10, 64); err == nil { 70 + return parsed 41 71 } 42 72 } 73 + return defaultVal 74 + } 43 75 44 - servers, err := s.db.GetPDSServers(ctx, filter) 76 + // ===== FORMATTING HELPERS ===== 77 + 78 + func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} { 79 + response := map[string]interface{}{ 80 + "id": ep.ID, 81 + "endpoint_type": ep.EndpointType, 82 + "endpoint": ep.Endpoint, 83 + "discovered_at": ep.DiscoveredAt, 84 + "last_checked": ep.LastChecked, 85 + "status": statusToString(ep.Status), 86 + } 87 + 88 + // Add IPs if available 89 + if ep.IP != "" { 90 + response["ip"] = ep.IP 91 + } 92 + if ep.IPv6 != "" { 93 + response["ipv6"] = ep.IPv6 94 + } 95 + 96 + return response 97 + } 98 + 99 + func statusToString(status int) string { 100 + switch status { 101 + case storage.EndpointStatusOnline: 102 + return "online" 103 + case storage.EndpointStatusOffline: 104 + return "offline" 105 + default: 106 + return "unknown" 107 + } 108 + } 109 + 110 + // ===== ENDPOINT HANDLERS ===== 111 + 112 + func (s *Server) handleGetEndpoints(w http.ResponseWriter, r *http.Request) { 113 + resp := newResponse(w) 114 + 115 + filter := &storage.EndpointFilter{ 116 + Type: r.URL.Query().Get("type"), 117 + Status: r.URL.Query().Get("status"), 118 + MinUserCount: getQueryInt64(r, "min_user_count", 0), 119 + Limit: getQueryInt(r, "limit", 50), 120 + Offset: getQueryInt(r, "offset", 0), 121 + } 122 + 123 + endpoints, err := s.db.GetEndpoints(r.Context(), filter) 45 124 if err != nil { 46 - http.Error(w, err.Error(), http.StatusInternalServerError) 125 + resp.error(err.Error(), http.StatusInternalServerError) 126 + return 127 + } 128 + 129 + response := make([]map[string]interface{}, len(endpoints)) 130 + for i, ep := range endpoints { 131 + response[i] = formatEndpointResponse(ep) 132 + } 133 + 134 + resp.json(response) 135 + } 136 + 137 + func (s *Server) handleGetEndpointStats(w http.ResponseWriter, r *http.Request) { 138 + resp := newResponse(w) 139 + stats, err := s.db.GetEndpointStats(r.Context()) 140 + if err != nil { 141 + resp.error(err.Error(), http.StatusInternalServerError) 47 142 return 48 143 } 144 + resp.json(stats) 145 + } 49 146 50 - // Convert status codes to strings for API 51 - response := make([]map[string]interface{}, len(servers)) 52 - for i, srv := range servers { 53 - response[i] = map[string]interface{}{ 54 - "id": srv.ID, 55 - "endpoint": srv.Endpoint, 56 - "discovered_at": srv.DiscoveredAt, 57 - "last_checked": srv.LastChecked, 58 - "status": statusToString(srv.Status), 59 - "user_count": srv.UserCount, 60 - } 147 + // handleGetRandomEndpoint returns a random endpoint of specified type 148 + func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) { 149 + resp := newResponse(w) 150 + 151 + // Get required type parameter 152 + endpointType := r.URL.Query().Get("type") 153 + if endpointType == "" { 154 + resp.error("type parameter is required", http.StatusBadRequest) 155 + return 156 + } 157 + 158 + // Get optional status parameter 159 + status := r.URL.Query().Get("status") 160 + 161 + filter := &storage.EndpointFilter{ 162 + Type: endpointType, 163 + Status: status, 164 + Random: true, 165 + Limit: 1, 166 + Offset: 0, 167 + } 168 + 169 + endpoints, err := s.db.GetEndpoints(r.Context(), filter) 170 + if err != nil { 171 + resp.error(err.Error(), http.StatusInternalServerError) 172 + return 173 + } 174 + 175 + if len(endpoints) == 0 { 176 + resp.error("no endpoints found matching criteria", http.StatusNotFound) 177 + return 178 + } 179 + 180 + resp.json(formatEndpointResponse(endpoints[0])) 181 + } 182 + 183 + // ===== PDS HANDLERS ===== 184 + 185 + func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) { 186 + resp := newResponse(w) 187 + 188 + filter := &storage.EndpointFilter{ 189 + Type: "pds", 190 + Status: r.URL.Query().Get("status"), 191 + MinUserCount: getQueryInt64(r, "min_user_count", 0), 192 + Limit: getQueryInt(r, "limit", 50), 193 + Offset: getQueryInt(r, "offset", 0), 194 + } 195 + 196 + pdsServers, err := s.db.GetPDSList(r.Context(), filter) 197 + if err != nil { 198 + resp.error(err.Error(), http.StatusInternalServerError) 199 + return 200 + } 201 + 202 + response := make([]map[string]interface{}, len(pdsServers)) 203 + for i, pds := range pdsServers { 204 + response[i] = formatPDSListItem(pds) 61 205 } 62 206 63 - respondJSON(w, response) 207 + resp.json(response) 64 208 } 65 209 66 - func (s *Server) handleGetPDS(w http.ResponseWriter, r *http.Request) { 67 - ctx := r.Context() 210 + func (s *Server) handleGetPDSDetail(w http.ResponseWriter, r *http.Request) { 211 + resp := newResponse(w) 68 212 vars := mux.Vars(r) 69 - endpoint := vars["endpoint"] 213 + endpoint := "https://" + normalizeEndpoint(vars["endpoint"]) 70 214 71 - pds, err := s.db.GetPDS(ctx, endpoint) 215 + // FIX: Use r.Context() instead of ctx 216 + pds, err := s.db.GetPDSDetail(r.Context(), endpoint) 72 217 if err != nil { 73 - http.Error(w, "PDS not found", http.StatusNotFound) 218 + resp.error("PDS not found", http.StatusNotFound) 74 219 return 75 220 } 76 221 77 222 // Get recent scans 78 - scans, _ := s.db.GetPDSScans(ctx, pds.ID, 10) 223 + scans, _ := s.db.GetEndpointScans(r.Context(), pds.ID, 10) 224 + 225 + result := formatPDSDetail(pds) 226 + result["recent_scans"] = formatScans(scans) 227 + 228 + resp.json(result) 229 + } 230 + 231 + func (s *Server) handleGetPDSStats(w http.ResponseWriter, r *http.Request) { 232 + resp := newResponse(w) 233 + ctx := r.Context() 234 + 235 + // Get PDS-specific stats 236 + stats, err := s.db.GetPDSStats(ctx) 237 + if err != nil { 238 + resp.error(err.Error(), http.StatusInternalServerError) 239 + return 240 + } 241 + 242 + resp.json(stats) 243 + } 79 244 245 + func formatPDSListItem(pds *storage.PDSListItem) map[string]interface{} { 80 246 response := map[string]interface{}{ 81 247 "id": pds.ID, 82 248 "endpoint": pds.Endpoint, 83 249 "discovered_at": pds.DiscoveredAt, 84 - "last_checked": pds.LastChecked, 85 250 "status": statusToString(pds.Status), 86 - "user_count": pds.UserCount, 87 - "recent_scans": scans, 251 + "valid": pds.Valid, // NEW 252 + } 253 + 254 + // Add server_did if available 255 + if pds.ServerDID != "" { 256 + response["server_did"] = pds.ServerDID 257 + } 258 + 259 + // Add last_checked if available 260 + if !pds.LastChecked.IsZero() { 261 + response["last_checked"] = pds.LastChecked 262 + } 263 + 264 + // Add data from latest scan (if available) 265 + if pds.LatestScan != nil { 266 + response["user_count"] = pds.LatestScan.UserCount 267 + response["response_time"] = pds.LatestScan.ResponseTime 268 + if pds.LatestScan.Version != "" { 269 + response["version"] = pds.LatestScan.Version 270 + } 271 + if !pds.LatestScan.ScannedAt.IsZero() { 272 + response["last_scan"] = pds.LatestScan.ScannedAt 273 + } 274 + } 275 + 276 + // Add IPs if available 277 + if pds.IP != "" { 278 + response["ip"] = pds.IP 279 + } 280 + if pds.IPv6 != "" { 281 + response["ipv6"] = pds.IPv6 282 + } 283 + 284 + // Add IP info (from ip_infos table via JOIN) 285 + if pds.IPInfo != nil { 286 + if pds.IPInfo.City != "" { 287 + response["city"] = pds.IPInfo.City 288 + } 289 + if pds.IPInfo.Country != "" { 290 + response["country"] = pds.IPInfo.Country 291 + } 292 + if pds.IPInfo.CountryCode != "" { 293 + response["country_code"] = pds.IPInfo.CountryCode 294 + } 295 + if pds.IPInfo.ASN > 0 { 296 + response["asn"] = pds.IPInfo.ASN 297 + } 298 + 299 + // Add all network type flags 300 + response["is_datacenter"] = pds.IPInfo.IsDatacenter 301 + response["is_vpn"] = pds.IPInfo.IsVPN 302 + response["is_crawler"] = pds.IPInfo.IsCrawler 303 + response["is_tor"] = pds.IPInfo.IsTor 304 + response["is_proxy"] = pds.IPInfo.IsProxy 305 + 306 + // Add computed is_home field 307 + response["is_home"] = pds.IPInfo.IsHome() 308 + } 309 + 310 + return response 311 + } 312 + 313 + func formatPDSDetail(pds *storage.PDSDetail) map[string]interface{} { 314 + // Start with list item formatting (includes server_did) 315 + response := formatPDSListItem(&pds.PDSListItem) 316 + 317 + // Add is_primary flag 318 + response["is_primary"] = pds.IsPrimary 319 + 320 + // Add aliases if available 321 + if len(pds.Aliases) > 0 { 322 + response["aliases"] = pds.Aliases 323 + response["alias_count"] = len(pds.Aliases) 324 + } 325 + 326 + // Add server_info and version from latest scan (PDSDetail's LatestScan takes precedence) 327 + if pds.LatestScan != nil { 328 + // Override with detail-specific scan data 329 + response["user_count"] = pds.LatestScan.UserCount 330 + response["response_time"] = pds.LatestScan.ResponseTime 331 + 332 + if pds.LatestScan.Version != "" { 333 + response["version"] = pds.LatestScan.Version 334 + } 335 + 336 + if !pds.LatestScan.ScannedAt.IsZero() { 337 + response["last_scan"] = pds.LatestScan.ScannedAt 338 + } 339 + 340 + if pds.LatestScan.ServerInfo != nil { 341 + response["server_info"] = pds.LatestScan.ServerInfo 342 + } 343 + } 344 + 345 + // Add full IP info with computed is_home field 346 + if pds.IPInfo != nil { 347 + // Convert IPInfo to map 348 + ipInfoMap := make(map[string]interface{}) 349 + ipInfoJSON, _ := json.Marshal(pds.IPInfo) 350 + json.Unmarshal(ipInfoJSON, &ipInfoMap) 351 + 352 + // Add computed is_home field 353 + ipInfoMap["is_home"] = pds.IPInfo.IsHome() 354 + 355 + response["ip_info"] = ipInfoMap 88 356 } 89 357 90 - respondJSON(w, response) 358 + return response 359 + } 360 + 361 + func formatScans(scans []*storage.EndpointScan) []map[string]interface{} { 362 + result := make([]map[string]interface{}, len(scans)) 363 + for i, scan := range scans { 364 + scanMap := map[string]interface{}{ 365 + "id": scan.ID, 366 + "status": statusToString(scan.Status), 367 + "scanned_at": scan.ScannedAt, 368 + } 369 + 370 + if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil { 371 + if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" { 372 + scanMap["error"] = errorMsg 373 + } 374 + } 375 + 376 + if scan.ResponseTime > 0 { 377 + scanMap["response_time"] = scan.ResponseTime 378 + } 379 + 380 + if scan.Version != "" { 381 + scanMap["version"] = scan.Version 382 + } 383 + 384 + if scan.UsedIP != "" { 385 + scanMap["used_ip"] = scan.UsedIP 386 + } 387 + 388 + // Use the top-level UserCount field first 389 + if scan.UserCount > 0 { 390 + scanMap["user_count"] = scan.UserCount 391 + } else if scan.ScanData != nil && scan.ScanData.Metadata != nil { 392 + // Fallback to metadata for older scans 393 + if userCount, ok := scan.ScanData.Metadata["user_count"].(int); ok { 394 + scanMap["user_count"] = userCount 395 + } else if userCount, ok := scan.ScanData.Metadata["user_count"].(float64); ok { 396 + scanMap["user_count"] = int(userCount) 397 + } 398 + } 399 + 400 + if scan.ScanData != nil { 401 + // Include DID count if available 402 + if scan.ScanData.DIDCount > 0 { 403 + scanMap["did_count"] = scan.ScanData.DIDCount 404 + } 405 + } 406 + 407 + result[i] = scanMap 408 + } 409 + return result 91 410 } 92 411 93 - func (s *Server) handleGetPDSStats(w http.ResponseWriter, r *http.Request) { 94 - ctx := r.Context() 412 + // Get repos for a specific PDS 413 + func (s *Server) handleGetPDSRepos(w http.ResponseWriter, r *http.Request) { 414 + resp := newResponse(w) 415 + vars := mux.Vars(r) 416 + endpoint := "https://" + normalizeEndpoint(vars["endpoint"]) 95 417 96 - stats, err := s.db.GetPDSStats(ctx) 418 + pds, err := s.db.GetPDSDetail(r.Context(), endpoint) 97 419 if err != nil { 98 - http.Error(w, err.Error(), http.StatusInternalServerError) 420 + resp.error("PDS not found", http.StatusNotFound) 99 421 return 100 422 } 101 423 102 - respondJSON(w, stats) 424 + // Parse query parameters 425 + activeOnly := r.URL.Query().Get("active") == "true" 426 + limit := getQueryInt(r, "limit", 100) 427 + offset := getQueryInt(r, "offset", 0) 428 + 429 + // Cap limit at 1000 430 + if limit > 1000 { 431 + limit = 1000 432 + } 433 + 434 + repos, err := s.db.GetPDSRepos(r.Context(), pds.ID, activeOnly, limit, offset) 435 + if err != nil { 436 + resp.error(err.Error(), http.StatusInternalServerError) 437 + return 438 + } 439 + 440 + // Get total from latest scan (same as user_count) 441 + totalRepos := 0 442 + if pds.LatestScan != nil { 443 + totalRepos = pds.LatestScan.UserCount 444 + } 445 + 446 + resp.json(map[string]interface{}{ 447 + "endpoint": pds.Endpoint, 448 + "total_repos": totalRepos, 449 + "returned": len(repos), 450 + "limit": limit, 451 + "offset": offset, 452 + "repos": repos, 453 + }) 103 454 } 104 455 105 - func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) { 106 - ctx := r.Context() 456 + // Find which PDS hosts a specific DID 457 + func (s *Server) handleGetDIDRepos(w http.ResponseWriter, r *http.Request) { 458 + resp := newResponse(w) 107 459 vars := mux.Vars(r) 108 460 did := vars["did"] 109 461 110 - bundles, err := s.db.GetBundlesForDID(ctx, did) 462 + repos, err := s.db.GetReposByDID(r.Context(), did) 111 463 if err != nil { 112 - http.Error(w, err.Error(), http.StatusInternalServerError) 464 + resp.error(err.Error(), http.StatusInternalServerError) 113 465 return 114 466 } 115 467 116 - if len(bundles) == 0 { 117 - http.Error(w, "DID not found in bundles", http.StatusNotFound) 468 + resp.json(map[string]interface{}{ 469 + "did": did, 470 + "pds_count": len(repos), 471 + "hosting_on": repos, 472 + }) 473 + } 474 + 475 + // Add to internal/api/handlers.go 476 + func (s *Server) handleGetPDSRepoStats(w http.ResponseWriter, r *http.Request) { 477 + resp := newResponse(w) 478 + vars := mux.Vars(r) 479 + endpoint := "https://" + normalizeEndpoint(vars["endpoint"]) 480 + 481 + pds, err := s.db.GetPDSDetail(r.Context(), endpoint) 482 + if err != nil { 483 + resp.error("PDS not found", http.StatusNotFound) 118 484 return 119 485 } 120 486 121 - lastBundle := bundles[len(bundles)-1] 487 + stats, err := s.db.GetPDSRepoStats(r.Context(), pds.ID) 488 + if err != nil { 489 + resp.error(err.Error(), http.StatusInternalServerError) 490 + return 491 + } 492 + 493 + resp.json(stats) 494 + } 495 + 496 + // ===== GLOBAL DID HANDLER ===== 122 497 123 - // Compute file path 124 - filePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", lastBundle.BundleNumber)) 498 + // handleGetGlobalDID provides a consolidated view of a DID 499 + func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) { 500 + resp := newResponse(w) 501 + vars := mux.Vars(r) 502 + did := vars["did"] 503 + ctx := r.Context() 125 504 126 - operations, err := s.loadBundleOperations(filePath) 505 + // Get DID info (now includes handle and pds from database) 506 + didInfo, err := s.db.GetGlobalDIDInfo(ctx, did) 127 507 if err != nil { 128 - http.Error(w, fmt.Sprintf("failed to load bundle: %v", err), http.StatusInternalServerError) 508 + if err == sql.ErrNoRows { 509 + if !s.plcIndexDIDs { 510 + resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound) 511 + } else { 512 + resp.error("DID not found in PLC index.", http.StatusNotFound) 513 + } 514 + } else { 515 + resp.error(err.Error(), http.StatusInternalServerError) 516 + } 129 517 return 130 518 } 131 519 132 - // Find latest operation for this DID 133 - var latestOp *plc.PLCOperation 134 - for i := len(operations) - 1; i >= 0; i-- { 135 - if operations[i].DID == did { 136 - latestOp = &operations[i] 137 - break 520 + // Optionally include latest operation details if requested 521 + var latestOperation *plc.PLCOperation 522 + if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 { 523 + lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1] 524 + ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum) 525 + if err != nil { 526 + log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err) 527 + } else { 528 + // Find latest operation for this DID (in reverse) 529 + for i := len(ops) - 1; i >= 0; i-- { 530 + if ops[i].DID == did { 531 + latestOperation = &ops[i] 532 + break 533 + } 534 + } 138 535 } 139 536 } 140 537 141 - if latestOp == nil { 142 - http.Error(w, "DID operation not found", http.StatusNotFound) 538 + result := map[string]interface{}{ 539 + "did": didInfo.DID, 540 + "handle": didInfo.Handle, // From database! 541 + "current_pds": didInfo.CurrentPDS, // From database! 542 + "plc_index_created_at": didInfo.CreatedAt, 543 + "plc_bundle_history": didInfo.BundleNumbers, 544 + "pds_hosting_on": didInfo.HostingOn, 545 + } 546 + 547 + // Only include operation if requested 548 + if latestOperation != nil { 549 + result["latest_plc_operation"] = latestOperation 550 + } 551 + 552 + resp.json(result) 553 + } 554 + 555 + // handleGetDIDByHandle resolves a handle to a DID 556 + func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) { 557 + resp := newResponse(w) 558 + vars := mux.Vars(r) 559 + handle := vars["handle"] 560 + 561 + // Normalize handle (remove @ prefix if present) 562 + handle = strings.TrimPrefix(handle, "@") 563 + 564 + // Look up DID by handle 565 + didRecord, err := s.db.GetDIDByHandle(r.Context(), handle) 566 + if err != nil { 567 + if err == sql.ErrNoRows { 568 + if !s.plcIndexDIDs { 569 + resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound) 570 + } else { 571 + resp.error("Handle not found.", http.StatusNotFound) 572 + } 573 + } else { 574 + resp.error(err.Error(), http.StatusInternalServerError) 575 + } 143 576 return 144 577 } 145 578 146 - respondJSON(w, latestOp) 579 + // Return just the handle and DID 580 + resp.json(map[string]string{ 581 + "handle": handle, 582 + "did": didRecord.DID, 583 + }) 147 584 } 148 585 149 - func (s *Server) handleGetDIDHistory(w http.ResponseWriter, r *http.Request) { 150 - ctx := r.Context() 586 + // ===== DID HANDLERS ===== 587 + 588 + func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) { 589 + resp := newResponse(w) 151 590 vars := mux.Vars(r) 152 591 did := vars["did"] 153 592 154 - bundles, err := s.db.GetBundlesForDID(ctx, did) 593 + // Fast lookup using dids table 594 + didRecord, err := s.db.GetDIDRecord(r.Context(), did) 595 + if err != nil { 596 + if err == sql.ErrNoRows { 597 + // NEW: Provide helpful message if indexing is disabled 598 + resp.error("DID not found. Note: DID indexing may be disabled in configuration.", http.StatusNotFound) 599 + } else { 600 + resp.error(err.Error(), http.StatusInternalServerError) 601 + } 602 + return 603 + } 604 + 605 + // Get the last bundle number where this DID appeared 606 + if len(didRecord.BundleNumbers) == 0 { 607 + resp.error("DID has no bundle history", http.StatusInternalServerError) 608 + return 609 + } 610 + 611 + lastBundleNum := didRecord.BundleNumbers[len(didRecord.BundleNumbers)-1] 612 + 613 + // Load last bundle to get latest operation 614 + ops, err := s.bundleManager.LoadBundleOperations(r.Context(), lastBundleNum) 155 615 if err != nil { 156 - http.Error(w, err.Error(), http.StatusInternalServerError) 616 + resp.error(fmt.Sprintf("failed to load bundle: %v", err), http.StatusInternalServerError) 157 617 return 158 618 } 159 619 160 - if len(bundles) == 0 { 161 - http.Error(w, "DID not found in bundles", http.StatusNotFound) 620 + // Find latest operation for this DID 621 + for i := len(ops) - 1; i >= 0; i-- { 622 + if ops[i].DID == did { 623 + resp.json(ops[i]) 624 + return 625 + } 626 + } 627 + 628 + resp.error("DID operation not found", http.StatusNotFound) 629 + } 630 + 631 + func (s *Server) handleGetDIDHistory(w http.ResponseWriter, r *http.Request) { 632 + resp := newResponse(w) 633 + vars := mux.Vars(r) 634 + did := vars["did"] 635 + 636 + // Fast lookup using dids table 637 + didRecord, err := s.db.GetDIDRecord(r.Context(), did) 638 + if err != nil { 639 + if err == sql.ErrNoRows { 640 + resp.error("DID not found", http.StatusNotFound) 641 + } else { 642 + resp.error(err.Error(), http.StatusInternalServerError) 643 + } 162 644 return 163 645 } 164 646 165 647 var allOperations []plc.DIDHistoryEntry 166 648 var currentOp *plc.PLCOperation 167 649 168 - for _, bundle := range bundles { 169 - // Compute file path 170 - filePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", bundle.BundleNumber)) 171 - 172 - operations, err := s.loadBundleOperations(filePath) 650 + // Load operations from each bundle 651 + for _, bundleNum := range didRecord.BundleNumbers { 652 + ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundleNum) 173 653 if err != nil { 174 - log.Error("Warning: failed to load bundle: %v", err) 654 + log.Error("Warning: failed to load bundle %d: %v", bundleNum, err) 175 655 continue 176 656 } 177 657 178 - for _, op := range operations { 658 + for _, op := range ops { 179 659 if op.DID == did { 180 660 entry := plc.DIDHistoryEntry{ 181 661 Operation: op, 182 - PLCBundle: fmt.Sprintf("%06d", bundle.BundleNumber), 662 + PLCBundle: fmt.Sprintf("%06d", bundleNum), 183 663 } 184 664 allOperations = append(allOperations, entry) 185 665 currentOp = &op ··· 187 667 } 188 668 } 189 669 190 - history := plc.DIDHistory{ 670 + resp.json(plc.DIDHistory{ 191 671 DID: did, 192 672 Current: currentOp, 193 673 Operations: allOperations, 674 + }) 675 + } 676 + 677 + func (s *Server) handleGetDIDStats(w http.ResponseWriter, r *http.Request) { 678 + resp := newResponse(w) 679 + ctx := r.Context() 680 + 681 + totalDIDs, err := s.db.GetTotalDIDCount(ctx) 682 + if err != nil { 683 + resp.error(err.Error(), http.StatusInternalServerError) 684 + return 194 685 } 195 686 196 - respondJSON(w, history) 687 + lastBundle := s.bundleManager.GetLastBundleNumber() 688 + resp.json(map[string]interface{}{ 689 + "total_unique_dids": totalDIDs, 690 + "last_bundle": lastBundle, 691 + }) 197 692 } 198 693 199 - func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) { 200 - ctx := r.Context() 201 - vars := mux.Vars(r) 694 + // ===== PLC BUNDLE HANDLERS ===== 202 695 203 - bundleNumber, err := strconv.Atoi(vars["number"]) 696 + func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) { 697 + resp := newResponse(w) 698 + bundleNum, err := getBundleNumber(r) 204 699 if err != nil { 205 - http.Error(w, "invalid bundle number", http.StatusBadRequest) 700 + resp.error("invalid bundle number", http.StatusBadRequest) 206 701 return 207 702 } 208 703 209 - bundle, err := s.db.GetBundleByNumber(ctx, bundleNumber) 704 + // Get from library's index 705 + index := s.bundleManager.GetIndex() 706 + bundleMeta, err := index.GetBundle(bundleNum) 210 707 if err != nil { 211 - http.Error(w, "bundle not found", http.StatusNotFound) 708 + // Check if it's upcoming bundle 709 + lastBundle := index.GetLastBundle() 710 + if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 { 711 + upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum) 712 + if err != nil { 713 + resp.error(err.Error(), http.StatusInternalServerError) 714 + return 715 + } 716 + resp.json(upcomingBundle) 717 + return 718 + } 719 + resp.error("bundle not found", http.StatusNotFound) 212 720 return 213 721 } 214 722 215 - response := map[string]interface{}{ 216 - "plc_bundle_number": bundle.BundleNumber, 217 - "start_time": bundle.StartTime, 218 - "end_time": bundle.EndTime, 219 - "operation_count": plc.BUNDLE_SIZE, 220 - "did_count": len(bundle.DIDs), 221 - "hash": bundle.Hash, // Uncompressed (verifiable) 222 - "compressed_hash": bundle.CompressedHash, // File integrity 223 - "compressed_size": bundle.CompressedSize, 224 - "prev_bundle_hash": bundle.PrevBundleHash, 225 - "created_at": bundle.CreatedAt, 723 + resp.json(formatBundleMetadata(bundleMeta)) 724 + } 725 + 726 + // Helper to format library's BundleMetadata 727 + func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} { 728 + return map[string]interface{}{ 729 + "plc_bundle_number": meta.BundleNumber, 730 + "start_time": meta.StartTime, 731 + "end_time": meta.EndTime, 732 + "operation_count": meta.OperationCount, 733 + "did_count": meta.DIDCount, 734 + "hash": meta.Hash, // Chain hash (primary) 735 + "content_hash": meta.ContentHash, // Content hash 736 + "parent": meta.Parent, // Parent chain hash 737 + "compressed_hash": meta.CompressedHash, 738 + "compressed_size": meta.CompressedSize, 739 + "uncompressed_size": meta.UncompressedSize, 740 + "compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize), 741 + "cursor": meta.Cursor, 742 + "created_at": meta.CreatedAt, 743 + } 744 + } 745 + 746 + func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) { 747 + // Get mempool stats from library via wrapper 748 + stats := s.bundleManager.GetMempoolStats() 749 + 750 + count, ok := stats["count"].(int) 751 + if !ok || count == 0 { 752 + return map[string]interface{}{ 753 + "plc_bundle_number": bundleNum, 754 + "is_upcoming": true, 755 + "status": "empty", 756 + "message": "No operations in mempool yet", 757 + "operation_count": 0, 758 + }, nil 759 + } 760 + 761 + // Build response 762 + result := map[string]interface{}{ 763 + "plc_bundle_number": bundleNum, 764 + "is_upcoming": true, 765 + "status": "filling", 766 + "operation_count": count, 767 + "did_count": stats["did_count"], 768 + "target_operation_count": 10000, 769 + "progress_percent": float64(count) / 100.0, 770 + "operations_needed": 10000 - count, 771 + } 772 + 773 + if count >= 10000 { 774 + result["status"] = "ready" 775 + } 776 + 777 + // Add time range if available 778 + if firstTime, ok := stats["first_time"]; ok { 779 + result["start_time"] = firstTime 780 + } 781 + if lastTime, ok := stats["last_time"]; ok { 782 + result["current_end_time"] = lastTime 783 + } 784 + 785 + // Add size info if available 786 + if sizeBytes, ok := stats["size_bytes"]; ok { 787 + result["uncompressed_size"] = sizeBytes 788 + result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12) 789 + } 790 + 791 + // Get previous bundle info 792 + if bundleNum > 1 { 793 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 794 + result["parent"] = prevBundle.Hash // Parent chain hash 795 + result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano) 796 + } 226 797 } 227 798 228 - respondJSON(w, response) 799 + return result, nil 229 800 } 230 801 231 802 func (s *Server) handleGetPLCBundleDIDs(w http.ResponseWriter, r *http.Request) { 232 - ctx := r.Context() 233 - vars := mux.Vars(r) 803 + resp := newResponse(w) 234 804 235 - bundleNumber, err := strconv.Atoi(vars["number"]) 805 + bundleNum, err := getBundleNumber(r) 236 806 if err != nil { 237 - http.Error(w, "invalid bundle number", http.StatusBadRequest) 807 + resp.error("invalid bundle number", http.StatusBadRequest) 238 808 return 239 809 } 240 810 241 - bundle, err := s.db.GetBundleByNumber(ctx, bundleNumber) 811 + // Get from library 812 + dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum) 242 813 if err != nil { 243 - http.Error(w, "bundle not found", http.StatusNotFound) 814 + resp.error("bundle not found", http.StatusNotFound) 244 815 return 245 816 } 246 817 247 - respondJSON(w, map[string]interface{}{ 248 - "plc_bundle_number": bundle.BundleNumber, 249 - "did_count": len(bundle.DIDs), 250 - "dids": bundle.DIDs, 818 + resp.json(map[string]interface{}{ 819 + "plc_bundle_number": bundleNum, 820 + "did_count": didCount, 821 + "dids": dids, 251 822 }) 252 823 } 253 824 254 825 func (s *Server) handleDownloadPLCBundle(w http.ResponseWriter, r *http.Request) { 255 - ctx := r.Context() 256 - vars := mux.Vars(r) 826 + resp := newResponse(w) 257 827 258 - bundleNumber, err := strconv.Atoi(vars["number"]) 828 + bundleNum, err := getBundleNumber(r) 259 829 if err != nil { 260 - http.Error(w, "invalid bundle number", http.StatusBadRequest) 830 + resp.error("invalid bundle number", http.StatusBadRequest) 261 831 return 262 832 } 263 833 264 - // Verify bundle exists in database 265 - bundle, err := s.db.GetBundleByNumber(ctx, bundleNumber) 266 - if err != nil { 267 - http.Error(w, "bundle not found", http.StatusNotFound) 268 - return 269 - } 834 + compressed := r.URL.Query().Get("compressed") != "false" 270 835 271 - // Build file path 272 - filePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", bundleNumber)) 836 + bundle, err := s.bundleManager.GetBundleMetadata(bundleNum) 837 + if err == nil { 838 + // Bundle exists, serve it normally 839 + resp.bundleHeaders(bundle) 273 840 274 - // Check if file exists 275 - fileInfo, err := os.Stat(filePath) 276 - if err != nil { 277 - if os.IsNotExist(err) { 278 - http.Error(w, "bundle file not found on disk", http.StatusNotFound) 279 - return 841 + if compressed { 842 + s.serveCompressedBundle(w, r, bundle) 843 + } else { 844 + s.serveUncompressedBundle(w, r, bundle) 280 845 } 281 - http.Error(w, fmt.Sprintf("error accessing bundle file: %v", err), http.StatusInternalServerError) 282 846 return 283 847 } 284 848 285 - // Open file 286 - file, err := os.Open(filePath) 287 - if err != nil { 288 - http.Error(w, fmt.Sprintf("error opening bundle file: %v", err), http.StatusInternalServerError) 849 + // Bundle not found - check if it's the upcoming bundle 850 + lastBundle := s.bundleManager.GetLastBundleNumber() 851 + if bundleNum == lastBundle+1 { 852 + // This is the upcoming bundle - serve from mempool 853 + s.serveUpcomingBundle(w, bundleNum) 289 854 return 290 855 } 291 - defer file.Close() 292 856 293 - // Set headers 294 - w.Header().Set("Content-Type", "application/zstd") 295 - w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundleNumber)) 296 - w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size())) 297 - w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNumber)) 298 - w.Header().Set("X-Bundle-Hash", bundle.Hash) 299 - w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash) 300 - w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano)) 301 - w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano)) 302 - w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 303 - w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs))) 304 - 305 - // Stream the file 306 - http.ServeContent(w, r, filepath.Base(filePath), bundle.CreatedAt, file) 857 + // Not an upcoming bundle, just not found 858 + resp.error("bundle not found", http.StatusNotFound) 307 859 } 308 860 309 - func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) { 310 - ctx := r.Context() 861 + func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) { 862 + // Get mempool stats 863 + stats := s.bundleManager.GetMempoolStats() 864 + count, ok := stats["count"].(int) 311 865 312 - count, err := s.db.GetMempoolCount(ctx) 313 - if err != nil { 314 - http.Error(w, err.Error(), http.StatusInternalServerError) 866 + if !ok || count == 0 { 867 + http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound) 315 868 return 316 869 } 317 870 318 - response := map[string]interface{}{ 319 - "operation_count": count, 320 - "can_create_bundle": count >= plc.BUNDLE_SIZE, 871 + // Get operations from mempool 872 + ops, err := s.bundleManager.GetMempoolOperations() 873 + if err != nil { 874 + http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError) 875 + return 321 876 } 322 877 323 - // Get mempool start time (first item) 324 - if count > 0 { 325 - firstOp, err := s.db.GetFirstMempoolOperation(ctx) 326 - if err == nil && firstOp != nil { 327 - response["mempool_start_time"] = firstOp.CreatedAt 328 - 329 - // Calculate estimated next bundle time 330 - if count < plc.BUNDLE_SIZE { 331 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 332 - if err == nil && lastOp != nil { 333 - // Calculate rate of operations per second 334 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 335 - 336 - if timeSpan > 0 { 337 - opsPerSecond := float64(count) / timeSpan 338 - 339 - if opsPerSecond > 0 { 340 - remainingOps := plc.BUNDLE_SIZE - count 341 - secondsNeeded := float64(remainingOps) / opsPerSecond 342 - estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 343 - 344 - response["estimated_next_bundle_time"] = estimatedTime 345 - response["operations_needed"] = remainingOps 346 - response["current_rate_per_second"] = opsPerSecond 347 - } 348 - } 349 - } 350 - } else { 351 - // Bundle can be created now 352 - response["estimated_next_bundle_time"] = time.Now() 353 - response["operations_needed"] = 0 354 - } 355 - } 356 - } else { 357 - response["mempool_start_time"] = nil 358 - response["estimated_next_bundle_time"] = nil 878 + if len(ops) == 0 { 879 + http.Error(w, "no operations in mempool", http.StatusNotFound) 880 + return 359 881 } 360 882 361 - respondJSON(w, response) 362 - } 883 + // Calculate times 884 + firstOp := ops[0] 885 + lastOp := ops[len(ops)-1] 363 886 364 - // Helper to load bundle operations - UPDATED FOR JSONL FORMAT 365 - func (s *Server) loadBundleOperations(path string) ([]plc.PLCOperation, error) { 366 - decoder, err := zstd.NewReader(nil) 367 - if err != nil { 368 - return nil, err 887 + // Extract unique DIDs 888 + didSet := make(map[string]bool) 889 + for _, op := range ops { 890 + didSet[op.DID] = true 369 891 } 370 - defer decoder.Close() 371 892 372 - compressedData, err := os.ReadFile(path) 373 - if err != nil { 374 - return nil, err 893 + // Calculate uncompressed size 894 + uncompressedSize := int64(0) 895 + for _, op := range ops { 896 + uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 375 897 } 376 898 377 - decompressed, err := decoder.DecodeAll(compressedData, nil) 378 - if err != nil { 379 - return nil, err 899 + // Get previous bundle hash 900 + prevBundleHash := "" 901 + if bundleNum > 1 { 902 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 903 + prevBundleHash = prevBundle.Hash 904 + } 380 905 } 381 906 382 - // Parse JSONL (newline-delimited JSON) 383 - var operations []plc.PLCOperation 384 - scanner := bufio.NewScanner(bytes.NewReader(decompressed)) 907 + // Set headers 908 + w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum)) 909 + w.Header().Set("X-Bundle-Is-Upcoming", "true") 910 + w.Header().Set("X-Bundle-Status", "preview") 911 + w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano)) 912 + w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano)) 913 + w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops))) 914 + w.Header().Set("X-Bundle-Target-Count", "10000") 915 + w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0)) 916 + w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet))) 917 + w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash) 918 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 385 919 386 - lineNum := 0 387 - for scanner.Scan() { 388 - lineNum++ 389 - line := scanner.Bytes() 920 + w.Header().Set("Content-Type", "application/jsonl") 921 + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum)) 390 922 391 - // Skip empty lines 392 - if len(line) == 0 { 393 - continue 394 - } 923 + // Stream operations as JSONL 924 + w.WriteHeader(http.StatusOK) 395 925 396 - var op plc.PLCOperation 397 - if err := json.Unmarshal(line, &op); err != nil { 398 - return nil, fmt.Errorf("failed to parse operation on line %d: %w", lineNum, err) 926 + for _, op := range ops { 927 + // Use RawJSON if available (preserves exact format) 928 + if len(op.RawJSON) > 0 { 929 + w.Write(op.RawJSON) 930 + } else { 931 + // Fallback to marshaling 932 + data, _ := json.Marshal(op) 933 + w.Write(data) 399 934 } 935 + w.Write([]byte("\n")) 936 + } 937 + } 400 938 401 - // CRITICAL: Store the original raw JSON bytes 402 - op.RawJSON = make([]byte, len(line)) 403 - copy(op.RawJSON, line) 939 + func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 940 + resp := newResponse(w) 404 941 405 - operations = append(operations, op) 942 + // Use the new streaming API for compressed data 943 + reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber) 944 + if err != nil { 945 + resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError) 946 + return 406 947 } 948 + defer reader.Close() 407 949 408 - if err := scanner.Err(); err != nil { 409 - return nil, fmt.Errorf("error reading JSONL: %w", err) 410 - } 950 + w.Header().Set("Content-Type", "application/zstd") 951 + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber)) 952 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize)) 953 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 411 954 412 - return operations, nil 955 + // Stream the data directly to the response 956 + w.WriteHeader(http.StatusOK) 957 + io.Copy(w, reader) 413 958 } 414 959 415 - func (s *Server) handleGetPLCMetrics(w http.ResponseWriter, r *http.Request) { 416 - ctx := r.Context() 960 + func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 961 + resp := newResponse(w) 417 962 418 - limit := 10 419 - if l := r.URL.Query().Get("limit"); l != "" { 420 - if parsed, err := strconv.Atoi(l); err == nil { 421 - limit = parsed 422 - } 963 + // Use the new streaming API for decompressed data 964 + reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber) 965 + if err != nil { 966 + resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError) 967 + return 423 968 } 969 + defer reader.Close() 424 970 425 - metrics, err := s.db.GetPLCMetrics(ctx, limit) 426 - if err != nil { 427 - http.Error(w, err.Error(), http.StatusInternalServerError) 428 - return 971 + w.Header().Set("Content-Type", "application/jsonl") 972 + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber)) 973 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize)) 974 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 975 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize)) 976 + if bundle.CompressedSize > 0 { 977 + w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize))) 429 978 } 430 979 431 - respondJSON(w, metrics) 980 + // Stream the data directly to the response 981 + w.WriteHeader(http.StatusOK) 982 + io.Copy(w, reader) 432 983 } 433 984 434 985 func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) { 435 - ctx := r.Context() 986 + resp := newResponse(w) 987 + limit := getQueryInt(r, "limit", 50) 436 988 437 - limit := 50 438 - if l := r.URL.Query().Get("limit"); l != "" { 439 - if parsed, err := strconv.Atoi(l); err == nil { 440 - limit = parsed 441 - } 442 - } 443 - 444 - bundles, err := s.db.GetBundles(ctx, limit) 445 - if err != nil { 446 - http.Error(w, err.Error(), http.StatusInternalServerError) 447 - return 448 - } 989 + bundles := s.bundleManager.GetBundles(limit) 449 990 450 991 response := make([]map[string]interface{}, len(bundles)) 451 992 for i, bundle := range bundles { 452 - response[i] = map[string]interface{}{ 453 - "plc_bundle_number": bundle.BundleNumber, 454 - "start_time": bundle.StartTime, 455 - "end_time": bundle.EndTime, 456 - "operation_count": 10000, 457 - "did_count": len(bundle.DIDs), 458 - "hash": bundle.Hash, 459 - "compressed_hash": bundle.CompressedHash, 460 - "compressed_size": bundle.CompressedSize, 461 - "prev_bundle_hash": bundle.PrevBundleHash, 462 - } 993 + response[i] = formatBundleMetadata(bundle) 463 994 } 464 995 465 - respondJSON(w, response) 996 + resp.json(response) 466 997 } 467 998 468 999 func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) { 469 - ctx := r.Context() 1000 + resp := newResponse(w) 1001 + 1002 + stats := s.bundleManager.GetBundleStats() 470 1003 471 - count, size, err := s.db.GetBundleStats(ctx) 472 - if err != nil { 473 - http.Error(w, err.Error(), http.StatusInternalServerError) 474 - return 475 - } 1004 + bundleCount := stats["bundle_count"].(int64) 1005 + totalSize := stats["total_size"].(int64) 1006 + totalUncompressedSize := stats["total_uncompressed_size"].(int64) 1007 + lastBundle := stats["last_bundle"].(int64) 476 1008 477 - respondJSON(w, map[string]interface{}{ 478 - "plc_bundle_count": count, 479 - "total_size": size, 480 - "total_size_mb": float64(size) / 1024 / 1024, 1009 + resp.json(map[string]interface{}{ 1010 + "plc_bundle_count": bundleCount, 1011 + "last_bundle_number": lastBundle, 1012 + "total_compressed_size": totalSize, 1013 + "total_uncompressed_size": totalUncompressedSize, 1014 + "overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize), 481 1015 }) 482 1016 } 483 1017 484 - func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { 485 - respondJSON(w, map[string]string{"status": "ok"}) 486 - } 1018 + // ===== MEMPOOL HANDLERS ===== 487 1019 488 - func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) { 489 - ctx := r.Context() 490 - vars := mux.Vars(r) 491 - bundleNumberStr := vars["bundleNumber"] 1020 + func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) { 1021 + resp := newResponse(w) 492 1022 493 - bundleNumber, err := strconv.Atoi(bundleNumberStr) 494 - if err != nil { 495 - http.Error(w, "Invalid bundle number", http.StatusBadRequest) 496 - return 497 - } 1023 + // Get stats from library's mempool via wrapper method 1024 + stats := s.bundleManager.GetMempoolStats() 498 1025 499 - // Get bundle from DB 500 - bundle, err := s.db.GetBundleByNumber(ctx, bundleNumber) 501 - if err != nil { 502 - http.Error(w, "Bundle not found", http.StatusNotFound) 503 - return 1026 + // Convert to API response format 1027 + result := map[string]interface{}{ 1028 + "operation_count": stats["count"], 1029 + "can_create_bundle": stats["can_create_bundle"], 504 1030 } 505 1031 506 - // Get previous bundle for boundary state 507 - var after string 508 - var prevBoundaryCIDs map[string]bool 509 - 510 - if bundleNumber > 1 { 511 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNumber-1) 512 - if err != nil { 513 - http.Error(w, "Failed to get previous bundle", http.StatusInternalServerError) 514 - return 515 - } 516 - 517 - after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z") 518 - 519 - // Convert stored boundary CIDs to map 520 - if len(prevBundle.BoundaryCIDs) > 0 { 521 - prevBoundaryCIDs = make(map[string]bool) 522 - for _, cid := range prevBundle.BoundaryCIDs { 523 - prevBoundaryCIDs[cid] = true 524 - } 525 - } 1032 + // Add size information 1033 + if sizeBytes, ok := stats["size_bytes"]; ok { 1034 + result["uncompressed_size"] = sizeBytes 1035 + result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024 526 1036 } 527 1037 528 - // Collect remote operations (may need multiple fetches for large bundles) 529 - var allRemoteOps []plc.PLCOperation 530 - seenCIDs := make(map[string]bool) 1038 + // Add time range and calculate estimated completion 1039 + if count, ok := stats["count"].(int); ok && count > 0 { 1040 + if firstTime, ok := stats["first_time"].(time.Time); ok { 1041 + result["mempool_start_time"] = firstTime 531 1042 532 - // Track boundary CIDs 533 - for cid := range prevBoundaryCIDs { 534 - seenCIDs[cid] = true 535 - } 1043 + if lastTime, ok := stats["last_time"].(time.Time); ok { 1044 + result["mempool_end_time"] = lastTime 536 1045 537 - currentAfter := after 538 - maxFetches := 20 // Enough for up to 20k operations 1046 + // Calculate estimated next bundle time if not complete 1047 + if count < 10000 { 1048 + timeSpan := lastTime.Sub(firstTime).Seconds() 1049 + if timeSpan > 0 { 1050 + opsPerSecond := float64(count) / timeSpan 1051 + if opsPerSecond > 0 { 1052 + remainingOps := 10000 - count 1053 + secondsNeeded := float64(remainingOps) / opsPerSecond 1054 + estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 539 1055 540 - for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ { 541 - // Fetch from PLC directory 542 - batch, err := s.plcClient.Export(ctx, plc.ExportOptions{ 543 - Count: 1000, 544 - After: currentAfter, 545 - }) 546 - if err != nil { 547 - http.Error(w, fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError) 548 - return 549 - } 550 - 551 - if len(batch) == 0 { 552 - break 553 - } 554 - 555 - // Deduplicate and add unique operations 556 - for _, op := range batch { 557 - if !seenCIDs[op.CID] { 558 - seenCIDs[op.CID] = true 559 - allRemoteOps = append(allRemoteOps, op) 560 - if len(allRemoteOps) >= plc.BUNDLE_SIZE { 561 - break 1056 + result["estimated_next_bundle_time"] = estimatedTime 1057 + result["current_rate_per_second"] = opsPerSecond 1058 + result["operations_needed"] = remainingOps 1059 + } 1060 + } 1061 + result["progress_percent"] = float64(count) / 100.0 1062 + } else { 1063 + // Ready to create bundle 1064 + result["estimated_next_bundle_time"] = time.Now() 1065 + result["operations_needed"] = 0 562 1066 } 563 1067 } 564 1068 } 1069 + } else { 1070 + // Empty mempool 1071 + result["mempool_start_time"] = nil 1072 + result["estimated_next_bundle_time"] = nil 1073 + } 565 1074 566 - // Update cursor for next fetch 567 - if len(batch) > 0 { 568 - lastOp := batch[len(batch)-1] 569 - currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z") 570 - } 1075 + resp.json(result) 1076 + } 571 1077 572 - // If we got less than 1000, we've reached the end 573 - if len(batch) < 1000 { 574 - break 575 - } 576 - } 1078 + // ===== PLC METRICS HANDLERS ===== 577 1079 578 - // Trim to exact bundle size 579 - if len(allRemoteOps) > plc.BUNDLE_SIZE { 580 - allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE] 581 - } 1080 + func (s *Server) handleGetPLCMetrics(w http.ResponseWriter, r *http.Request) { 1081 + resp := newResponse(w) 1082 + limit := getQueryInt(r, "limit", 10) 582 1083 583 - // Compute remote hash (uncompressed JSONL) 584 - remoteHash, err := computeRemoteOperationsHash(allRemoteOps) 1084 + metrics, err := s.db.GetPLCMetrics(r.Context(), limit) 585 1085 if err != nil { 586 - http.Error(w, fmt.Sprintf("Failed to compute remote hash: %v", err), http.StatusInternalServerError) 1086 + resp.error(err.Error(), http.StatusInternalServerError) 587 1087 return 588 1088 } 589 1089 590 - // Compare hashes (use uncompressed hash) 591 - verified := bundle.Hash == remoteHash 592 - 593 - respondJSON(w, map[string]interface{}{ 594 - "bundle_number": bundleNumber, 595 - "verified": verified, 596 - "local_hash": bundle.Hash, 597 - "remote_hash": remoteHash, 598 - "local_op_count": bundle.OperationCount, 599 - "remote_op_count": len(allRemoteOps), 600 - "boundary_cids_used": len(prevBoundaryCIDs), 601 - }) 1090 + resp.json(metrics) 602 1091 } 1092 + 1093 + // ===== VERIFICATION HANDLERS ===== 603 1094 604 1095 func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) { 605 - ctx := r.Context() 606 - 607 - // Get last bundle number 608 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 609 - if err != nil { 610 - http.Error(w, err.Error(), http.StatusInternalServerError) 611 - return 612 - } 1096 + resp := newResponse(w) 613 1097 1098 + lastBundle := s.bundleManager.GetLastBundleNumber() 614 1099 if lastBundle == 0 { 615 - respondJSON(w, map[string]interface{}{ 1100 + resp.json(map[string]interface{}{ 616 1101 "status": "empty", 617 1102 "message": "No bundles to verify", 618 1103 }) 619 1104 return 620 1105 } 621 1106 622 - // Verify chain 623 1107 valid := true 624 1108 var brokenAt int 625 1109 var errorMsg string 626 1110 627 1111 for i := 1; i <= lastBundle; i++ { 628 - bundle, err := s.db.GetBundleByNumber(ctx, i) 1112 + bundle, err := s.bundleManager.GetBundleMetadata(i) 629 1113 if err != nil { 630 1114 valid = false 631 1115 brokenAt = i ··· 633 1117 break 634 1118 } 635 1119 636 - // Verify chain link 637 1120 if i > 1 { 638 - prevBundle, err := s.db.GetBundleByNumber(ctx, i-1) 1121 + prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1) 639 1122 if err != nil { 640 1123 valid = false 641 1124 brokenAt = i ··· 643 1126 break 644 1127 } 645 1128 646 - if bundle.PrevBundleHash != prevBundle.Hash { 1129 + if bundle.Parent != prevBundle.Hash { 647 1130 valid = false 648 1131 brokenAt = i 649 - errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1) 1132 + errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1) 650 1133 break 651 1134 } 652 1135 } 653 1136 } 654 1137 655 - response := map[string]interface{}{ 1138 + result := map[string]interface{}{ 656 1139 "chain_length": lastBundle, 657 1140 "valid": valid, 658 1141 } 659 1142 660 1143 if !valid { 661 - response["broken_at"] = brokenAt 662 - response["error"] = errorMsg 1144 + result["broken_at"] = brokenAt 1145 + result["error"] = errorMsg 663 1146 } 664 1147 665 - respondJSON(w, response) 1148 + resp.json(result) 666 1149 } 667 1150 668 1151 func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) { 669 - ctx := r.Context() 1152 + resp := newResponse(w) 670 1153 671 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 672 - if err != nil { 673 - http.Error(w, err.Error(), http.StatusInternalServerError) 674 - return 675 - } 676 - 1154 + lastBundle := s.bundleManager.GetLastBundleNumber() 677 1155 if lastBundle == 0 { 678 - respondJSON(w, map[string]interface{}{ 1156 + resp.json(map[string]interface{}{ 679 1157 "chain_length": 0, 680 1158 "status": "empty", 681 1159 }) 682 1160 return 683 1161 } 684 1162 685 - firstBundle, _ := s.db.GetBundleByNumber(ctx, 1) 686 - lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle) 1163 + firstBundle, _ := s.bundleManager.GetBundleMetadata(1) 1164 + lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle) 1165 + stats := s.bundleManager.GetBundleStats() 687 1166 688 - count, size, _ := s.db.GetBundleStats(ctx) 689 - 690 - respondJSON(w, map[string]interface{}{ 691 - "chain_length": lastBundle, 692 - "total_bundles": count, 693 - "total_size_mb": float64(size) / 1024 / 1024, 694 - "chain_start_time": firstBundle.StartTime, 695 - "chain_end_time": lastBundleData.EndTime, 696 - "chain_head_hash": lastBundleData.Hash, 697 - "first_prev_hash": firstBundle.PrevBundleHash, // Should be empty 698 - "last_prev_hash": lastBundleData.PrevBundleHash, 1167 + resp.json(map[string]interface{}{ 1168 + "chain_length": lastBundle, 1169 + "total_bundles": stats["bundle_count"], 1170 + "total_compressed_size": stats["total_size"], 1171 + "total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024, 1172 + "chain_start_time": firstBundle.StartTime, 1173 + "chain_end_time": lastBundleData.EndTime, 1174 + "chain_head_hash": lastBundleData.Hash, 1175 + "first_parent": firstBundle.Parent, 1176 + "last_parent": lastBundleData.Parent, 699 1177 }) 700 1178 } 701 1179 702 - // handlePLCExport simulates PLC directory /export endpoint using cached bundles 1180 + // ===== PLC EXPORT HANDLER ===== 1181 + 703 1182 func (s *Server) handlePLCExport(w http.ResponseWriter, r *http.Request) { 1183 + resp := newResponse(w) 704 1184 ctx := r.Context() 705 1185 706 - // Parse query parameters 707 - countStr := r.URL.Query().Get("count") 708 - afterStr := r.URL.Query().Get("after") 1186 + count := getQueryInt(r, "count", 1000) 1187 + if count > 10000 { 1188 + count = 10000 1189 + } 709 1190 710 - count := 1000 // Default 711 - if countStr != "" { 712 - if c, err := strconv.Atoi(countStr); err == nil && c > 0 { 713 - count = c 714 - if count > 10000 { 715 - count = 10000 // Max limit 716 - } 1191 + afterTime, err := parseAfterParam(r.URL.Query().Get("after")) 1192 + if err != nil { 1193 + resp.error(fmt.Sprintf("Invalid after parameter: %v", err), http.StatusBadRequest) 1194 + return 1195 + } 1196 + 1197 + startBundle := s.findStartBundle(afterTime) 1198 + ops := s.collectOperations(ctx, startBundle, afterTime, count) 1199 + 1200 + w.Header().Set("Content-Type", "application/jsonl") 1201 + w.Header().Set("X-Operation-Count", strconv.Itoa(len(ops))) 1202 + 1203 + for _, op := range ops { 1204 + if len(op.RawJSON) > 0 { 1205 + w.Write(op.RawJSON) 1206 + } else { 1207 + jsonData, _ := json.Marshal(op) 1208 + w.Write(jsonData) 717 1209 } 1210 + w.Write([]byte("\n")) 718 1211 } 1212 + } 719 1213 720 - var afterTime time.Time 721 - if afterStr != "" { 722 - // Try multiple timestamp formats (from most specific to least) 723 - formats := []string{ 724 - time.RFC3339Nano, // 2023-11-09T03:55:00.123456789Z 725 - time.RFC3339, // 2023-11-09T03:55:00Z 726 - "2006-01-02T15:04:05.000Z", // 2023-11-09T03:55:00.000Z 727 - "2006-01-02T15:04:05", // 2023-11-09T03:55:00 728 - "2006-01-02T15:04", // 2023-11-09T03:55 729 - "2006-01-02", // 2023-11-09 730 - } 1214 + func parseAfterParam(afterStr string) (time.Time, error) { 1215 + if afterStr == "" { 1216 + return time.Time{}, nil 1217 + } 731 1218 732 - var parsed time.Time 733 - var parseErr error 734 - parsed = time.Time{} // zero value 1219 + formats := []string{ 1220 + time.RFC3339Nano, 1221 + time.RFC3339, 1222 + "2006-01-02T15:04:05.000Z", 1223 + "2006-01-02T15:04:05", 1224 + "2006-01-02T15:04", 1225 + "2006-01-02", 1226 + } 735 1227 736 - for _, format := range formats { 737 - parsed, parseErr = time.Parse(format, afterStr) 738 - if parseErr == nil { 739 - afterTime = parsed 740 - break 741 - } 1228 + for _, format := range formats { 1229 + if parsed, err := time.Parse(format, afterStr); err == nil { 1230 + return parsed, nil 742 1231 } 1232 + } 743 1233 744 - if parseErr != nil { 745 - http.Error(w, fmt.Sprintf("Invalid after parameter: %v", parseErr), http.StatusBadRequest) 746 - return 747 - } 1234 + return time.Time{}, fmt.Errorf("invalid timestamp format") 1235 + } 1236 + 1237 + func (s *Server) findStartBundle(afterTime time.Time) int { 1238 + if afterTime.IsZero() { 1239 + return 1 748 1240 } 749 1241 750 - // Find starting bundle (FAST - single query) 751 - startBundle := 1 752 - if !afterTime.IsZero() { 753 - foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime) 754 - if err != nil { 755 - log.Error("Failed to find bundle for timestamp: %v", err) 756 - // Fallback to bundle 1 757 - } else { 758 - startBundle = foundBundle 759 - // Go back one bundle to catch boundary timestamps 760 - if startBundle > 1 { 761 - startBundle-- 762 - } 763 - } 1242 + foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime) 1243 + if foundBundle > 1 { 1244 + return foundBundle - 1 764 1245 } 1246 + return foundBundle 1247 + } 765 1248 766 - // Collect operations from bundles 1249 + func (s *Server) collectOperations(ctx context.Context, startBundle int, afterTime time.Time, count int) []plc.PLCOperation { 767 1250 var allOps []plc.PLCOperation 768 1251 seenCIDs := make(map[string]bool) 769 1252 770 - // Load bundles sequentially until we have enough operations 771 - lastBundle, _ := s.db.GetLastBundleNumber(ctx) 1253 + lastBundle := s.bundleManager.GetLastBundleNumber() 772 1254 773 1255 for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ { 774 - bundlePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)) 775 - 776 - ops, err := s.loadBundleOperations(bundlePath) 1256 + ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum) 777 1257 if err != nil { 778 1258 log.Error("Warning: failed to load bundle %d: %v", bundleNum, err) 779 1259 continue 780 1260 } 781 1261 782 - // Filter operations 783 1262 for _, op := range ops { 784 - // Skip if STRICTLY BEFORE "after" timestamp 785 - // Include operations AT or AFTER the timestamp 786 1263 if !afterTime.IsZero() && op.CreatedAt.Before(afterTime) { 787 1264 continue 788 1265 } 789 1266 790 - // Skip duplicates (by CID) 791 1267 if seenCIDs[op.CID] { 792 1268 continue 793 1269 } ··· 801 1277 } 802 1278 } 803 1279 804 - // Set headers for JSONL response 805 - w.Header().Set("Content-Type", "application/jsonl") 806 - w.Header().Set("X-Operation-Count", strconv.Itoa(len(allOps))) 1280 + return allOps 1281 + } 807 1282 808 - // Write JSONL response (newline-delimited JSON with trailing newline) 809 - for _, op := range allOps { 810 - // Use raw JSON if available 811 - if len(op.RawJSON) > 0 { 812 - w.Write(op.RawJSON) 813 - } else { 814 - // Fallback: marshal the operation 815 - jsonData, err := json.Marshal(op) 816 - if err != nil { 817 - log.Error("Failed to marshal operation: %v", err) 818 - continue 1283 + func (s *Server) handleGetCountryLeaderboard(w http.ResponseWriter, r *http.Request) { 1284 + resp := newResponse(w) 1285 + 1286 + stats, err := s.db.GetCountryLeaderboard(r.Context()) 1287 + if err != nil { 1288 + resp.error(err.Error(), http.StatusInternalServerError) 1289 + return 1290 + } 1291 + 1292 + resp.json(stats) 1293 + } 1294 + 1295 + func (s *Server) handleGetVersionStats(w http.ResponseWriter, r *http.Request) { 1296 + resp := newResponse(w) 1297 + 1298 + stats, err := s.db.GetVersionStats(r.Context()) 1299 + if err != nil { 1300 + resp.error(err.Error(), http.StatusInternalServerError) 1301 + return 1302 + } 1303 + 1304 + // Add summary totals 1305 + var totalPDS int64 1306 + var totalUsers int64 1307 + for _, stat := range stats { 1308 + totalPDS += stat.PDSCount 1309 + totalUsers += stat.TotalUsers 1310 + } 1311 + 1312 + result := map[string]interface{}{ 1313 + "versions": stats, 1314 + "summary": map[string]interface{}{ 1315 + "total_pds_with_version": totalPDS, 1316 + "total_users": totalUsers, 1317 + "version_count": len(stats), 1318 + }, 1319 + } 1320 + 1321 + resp.json(result) 1322 + } 1323 + 1324 + // ===== HEALTH HANDLER ===== 1325 + 1326 + func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { 1327 + newResponse(w).json(map[string]string{"status": "ok"}) 1328 + } 1329 + 1330 + func (s *Server) handleGetJobStatus(w http.ResponseWriter, r *http.Request) { 1331 + resp := newResponse(w) 1332 + tracker := monitor.GetTracker() 1333 + 1334 + jobs := tracker.GetAllJobs() 1335 + 1336 + result := make(map[string]interface{}) 1337 + for name, job := range jobs { 1338 + jobData := map[string]interface{}{ 1339 + "name": job.Name, 1340 + "status": job.Status, 1341 + "run_count": job.RunCount, 1342 + "success_count": job.SuccessCount, 1343 + "error_count": job.ErrorCount, 1344 + } 1345 + 1346 + if !job.LastRun.IsZero() { 1347 + jobData["last_run"] = job.LastRun 1348 + jobData["last_duration"] = job.Duration.String() 1349 + } 1350 + 1351 + if !job.NextRun.IsZero() { 1352 + jobData["next_run"] = job.NextRun 1353 + jobData["next_run_in"] = time.Until(job.NextRun).Round(time.Second).String() 1354 + } 1355 + 1356 + if job.Status == "running" { 1357 + jobData["running_for"] = job.Duration.Round(time.Second).String() 1358 + 1359 + if job.Progress != nil { 1360 + jobData["progress"] = job.Progress 819 1361 } 820 - w.Write(jsonData) 1362 + 1363 + // Add worker status 1364 + workers := tracker.GetWorkers(name) 1365 + if len(workers) > 0 { 1366 + jobData["workers"] = workers 1367 + } 821 1368 } 822 1369 823 - // Always add newline after each operation (including the last) 824 - w.Write([]byte("\n")) 1370 + if job.Error != "" { 1371 + jobData["error"] = job.Error 1372 + } 1373 + 1374 + result[name] = jobData 825 1375 } 1376 + 1377 + resp.json(result) 826 1378 } 827 1379 828 - // computeRemoteOperationsHash - matching format 829 - func computeRemoteOperationsHash(ops []plc.PLCOperation) (string, error) { 830 - var jsonlData []byte 831 - for i, op := range ops { 832 - if len(op.RawJSON) > 0 { 833 - jsonlData = append(jsonlData, op.RawJSON...) 834 - } else { 835 - return "", fmt.Errorf("operation %d missing raw JSON data", i) 1380 + func (s *Server) handleGetDuplicateEndpoints(w http.ResponseWriter, r *http.Request) { 1381 + resp := newResponse(w) 1382 + 1383 + duplicates, err := s.db.GetDuplicateEndpoints(r.Context()) 1384 + if err != nil { 1385 + resp.error(err.Error(), http.StatusInternalServerError) 1386 + return 1387 + } 1388 + 1389 + // Format response 1390 + result := make([]map[string]interface{}, 0) 1391 + for serverDID, endpoints := range duplicates { 1392 + result = append(result, map[string]interface{}{ 1393 + "server_did": serverDID, 1394 + "primary": endpoints[0], // First discovered 1395 + "aliases": endpoints[1:], // Other domains 1396 + "alias_count": len(endpoints) - 1, 1397 + "total_domains": len(endpoints), 1398 + }) 1399 + } 1400 + 1401 + resp.json(map[string]interface{}{ 1402 + "duplicates": result, 1403 + "total_duplicate_servers": len(duplicates), 1404 + }) 1405 + } 1406 + 1407 + func (s *Server) handleGetPLCHistory(w http.ResponseWriter, r *http.Request) { 1408 + resp := newResponse(w) 1409 + 1410 + limit := getQueryInt(r, "limit", 0) 1411 + fromBundle := getQueryInt(r, "from", 1) 1412 + 1413 + // Use BundleManager instead of database 1414 + history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle) 1415 + if err != nil { 1416 + resp.error(err.Error(), http.StatusInternalServerError) 1417 + return 1418 + } 1419 + 1420 + var totalOps int64 1421 + var totalUncompressed int64 1422 + var totalCompressed int64 1423 + 1424 + for _, point := range history { 1425 + totalOps += int64(point.OperationCount) 1426 + totalUncompressed += point.UncompressedSize 1427 + totalCompressed += point.CompressedSize 1428 + } 1429 + 1430 + result := map[string]interface{}{ 1431 + "data": history, 1432 + "summary": map[string]interface{}{ 1433 + "days": len(history), 1434 + "total_operations": totalOps, 1435 + "total_uncompressed": totalUncompressed, 1436 + "total_compressed": totalCompressed, 1437 + "compression_ratio": 0.0, 1438 + }, 1439 + } 1440 + 1441 + if len(history) > 0 { 1442 + result["summary"].(map[string]interface{})["first_date"] = history[0].Date 1443 + result["summary"].(map[string]interface{})["last_date"] = history[len(history)-1].Date 1444 + result["summary"].(map[string]interface{})["time_span_days"] = len(history) 1445 + 1446 + if totalCompressed > 0 { 1447 + result["summary"].(map[string]interface{})["compression_ratio"] = float64(totalUncompressed) / float64(totalCompressed) 836 1448 } 837 - // Add newline ONLY between operations 838 - jsonlData = append(jsonlData, '\n') 1449 + 1450 + result["summary"].(map[string]interface{})["avg_operations_per_day"] = totalOps / int64(len(history)) 1451 + result["summary"].(map[string]interface{})["avg_size_per_day"] = totalUncompressed / int64(len(history)) 839 1452 } 840 1453 841 - hash := sha256.Sum256(jsonlData) 842 - return hex.EncodeToString(hash[:]), nil 1454 + resp.json(result) 843 1455 } 844 1456 845 - func respondJSON(w http.ResponseWriter, data interface{}) { 846 - w.Header().Set("Content-Type", "application/json") 847 - json.NewEncoder(w).Encode(data) 1457 + // ===== DEBUG HANDLERS ===== 1458 + 1459 + func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) { 1460 + resp := newResponse(w) 1461 + ctx := r.Context() 1462 + schema := "public" // Or make configurable if needed 1463 + 1464 + tableSizes, err := s.db.GetTableSizes(ctx, schema) 1465 + if err != nil { 1466 + log.Error("Failed to get table sizes: %v", err) 1467 + resp.error("Failed to retrieve table sizes", http.StatusInternalServerError) 1468 + return 1469 + } 1470 + 1471 + indexSizes, err := s.db.GetIndexSizes(ctx, schema) 1472 + if err != nil { 1473 + log.Error("Failed to get index sizes: %v", err) 1474 + resp.error("Failed to retrieve index sizes", http.StatusInternalServerError) 1475 + return 1476 + } 1477 + 1478 + resp.json(map[string]interface{}{ 1479 + "schema": schema, 1480 + "tables": tableSizes, 1481 + "indexes": indexSizes, 1482 + "retrievedAt": time.Now().UTC(), 1483 + }) 848 1484 } 849 1485 850 - // Helper function 851 - func statusToString(status int) string { 852 - switch status { 853 - case storage.PDSStatusOnline: 854 - return "online" 855 - case storage.PDSStatusOffline: 856 - return "offline" 857 - default: 858 - return "unknown" 1486 + func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) { 1487 + resp := newResponse(w) 1488 + 1489 + bundleNum, err := getBundleNumber(r) 1490 + if err != nil { 1491 + resp.error("invalid bundle number", http.StatusBadRequest) 1492 + return 859 1493 } 1494 + 1495 + labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum) 1496 + if err != nil { 1497 + resp.error(err.Error(), http.StatusInternalServerError) 1498 + return 1499 + } 1500 + 1501 + resp.json(map[string]interface{}{ 1502 + "bundle": bundleNum, 1503 + "count": len(labels), 1504 + "labels": labels, 1505 + }) 1506 + } 1507 + 1508 + // ===== UTILITY FUNCTIONS ===== 1509 + 1510 + func normalizeEndpoint(endpoint string) string { 1511 + endpoint = strings.TrimPrefix(endpoint, "https://") 1512 + endpoint = strings.TrimPrefix(endpoint, "http://") 1513 + endpoint = strings.TrimSuffix(endpoint, "/") 1514 + return endpoint 860 1515 }
+52 -24
internal/api/server.go
··· 6 6 "net/http" 7 7 "time" 8 8 9 - "github.com/atscan/atscanner/internal/config" 10 - "github.com/atscan/atscanner/internal/log" 11 - "github.com/atscan/atscanner/internal/plc" 12 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/plc" 12 + "github.com/atscan/atscand/internal/storage" 13 13 "github.com/gorilla/handlers" 14 14 "github.com/gorilla/mux" 15 15 ) 16 16 17 17 type Server struct { 18 - router *mux.Router 19 - server *http.Server 20 - db storage.Database 21 - plcClient *plc.Client 22 - plcBundleDir string // NEW: Store cache dir 18 + router *mux.Router 19 + server *http.Server 20 + db storage.Database 21 + plcBundleDir string 22 + bundleManager *plc.BundleManager 23 + plcIndexDIDs bool 23 24 } 24 25 25 - func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server { 26 + func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server { 26 27 s := &Server{ 27 - router: mux.NewRouter(), 28 - db: db, 29 - plcClient: plc.NewClient(plcCfg.DirectoryURL), 30 - plcBundleDir: plcCfg.BundleDir, // NEW 28 + router: mux.NewRouter(), 29 + db: db, 30 + plcBundleDir: plcCfg.BundleDir, 31 + bundleManager: bundleManager, // Use provided shared instance 32 + plcIndexDIDs: plcCfg.IndexDIDs, 31 33 } 32 34 33 35 s.setupRoutes() ··· 52 54 func (s *Server) setupRoutes() { 53 55 api := s.router.PathPrefix("/api/v1").Subrouter() 54 56 55 - // PDS endpoints 57 + // Generic endpoints (keep as-is) 58 + api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET") 59 + api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET") 60 + api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET") 61 + 62 + //PDS-specific endpoints (virtual, created via JOINs) 56 63 api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET") 57 64 api.HandleFunc("/pds/stats", s.handleGetPDSStats).Methods("GET") 58 - api.HandleFunc("/pds/{endpoint}", s.handleGetPDS).Methods("GET") 65 + api.HandleFunc("/pds/countries", s.handleGetCountryLeaderboard).Methods("GET") 66 + api.HandleFunc("/pds/versions", s.handleGetVersionStats).Methods("GET") 67 + api.HandleFunc("/pds/duplicates", s.handleGetDuplicateEndpoints).Methods("GET") 68 + api.HandleFunc("/pds/{endpoint}", s.handleGetPDSDetail).Methods("GET") 59 69 60 - // Metrics endpoints 61 - api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET") 70 + // PDS repos 71 + api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET") 72 + api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET") 73 + api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET") 74 + 75 + // Global DID routes 76 + api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET") 77 + api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW 62 78 63 - // PLC Bundle endpoints 79 + // PLC Bundle routes 64 80 api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET") 65 81 api.HandleFunc("/plc/bundles/stats", s.handleGetPLCBundleStats).Methods("GET") 66 82 api.HandleFunc("/plc/bundles/chain", s.handleGetChainInfo).Methods("GET") 67 83 api.HandleFunc("/plc/bundles/verify-chain", s.handleVerifyChain).Methods("POST") 84 + api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET") 68 85 api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET") 69 86 api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET") 70 - api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST") 71 - api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET") 87 + api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET") 88 + 89 + // PLC history/metrics 90 + api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET") 91 + 92 + // PLC Export endpoint (simulates PLC directory) 72 93 api.HandleFunc("/plc/export", s.handlePLCExport).Methods("GET") 73 94 74 - // PLC/DID endpoints 95 + // DID routes 75 96 api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET") 76 97 api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET") 98 + api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") 77 99 78 - // Mempool endpoint - NEW 100 + // Mempool routes 79 101 api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET") 80 102 81 - // Chain verification - NEW 103 + // Metrics routes 104 + api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET") 105 + 106 + // Debug Endpoints 107 + api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET") 108 + api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET") 82 109 83 110 // Health check 84 111 s.router.HandleFunc("/health", s.handleHealth).Methods("GET") 85 112 } 113 + 86 114 func (s *Server) Start() error { 87 115 log.Info("API server listening on %s", s.server.Addr) 88 116 return s.server.ListenAndServe()
+8 -3
internal/config/config.go
··· 16 16 17 17 type DatabaseConfig struct { 18 18 Path string `yaml:"path"` 19 - Type string `yaml:"type"` // sqlite, postgres 19 + Type string `yaml:"type"` // postgres 20 20 } 21 21 22 22 type PLCConfig struct { 23 23 DirectoryURL string `yaml:"directory_url"` 24 24 ScanInterval time.Duration `yaml:"scan_interval"` 25 25 BatchSize int `yaml:"batch_size"` 26 - BundleDir string `yaml:"bundles_dir"` // NEW: Cache directory 27 - UseCache bool `yaml:"use_cache"` // NEW: Enable/disable cache 26 + BundleDir string `yaml:"bundles_dir"` 27 + UseCache bool `yaml:"use_cache"` 28 + IndexDIDs bool `yaml:"index_dids"` 28 29 } 29 30 30 31 type PDSConfig struct { ··· 32 33 Timeout time.Duration `yaml:"timeout"` 33 34 Workers int `yaml:"workers"` 34 35 RecheckInterval time.Duration `yaml:"recheck_interval"` 36 + ScanRetention int `yaml:"scan_retention"` 35 37 } 36 38 37 39 type APIConfig struct { ··· 72 74 } 73 75 if cfg.PDS.Workers == 0 { 74 76 cfg.PDS.Workers = 10 77 + } 78 + if cfg.PDS.ScanRetention == 0 { 79 + cfg.PDS.ScanRetention = 3 75 80 } 76 81 if cfg.API.Port == 0 { 77 82 cfg.API.Port = 8080
+162
internal/ipinfo/client.go
··· 1 + package ipinfo 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "net" 8 + "net/http" 9 + "net/url" 10 + "sync" 11 + "time" 12 + ) 13 + 14 + type Client struct { 15 + httpClient *http.Client 16 + baseURL string 17 + mu sync.RWMutex 18 + backoffUntil time.Time 19 + backoffDuration time.Duration 20 + } 21 + 22 + func NewClient() *Client { 23 + return &Client{ 24 + httpClient: &http.Client{ 25 + Timeout: 10 * time.Second, 26 + }, 27 + baseURL: "https://api.ipapi.is", 28 + backoffDuration: 5 * time.Minute, 29 + } 30 + } 31 + 32 + // IsInBackoff checks if we're currently in backoff period 33 + func (c *Client) IsInBackoff() bool { 34 + c.mu.RLock() 35 + defer c.mu.RUnlock() 36 + return time.Now().Before(c.backoffUntil) 37 + } 38 + 39 + // SetBackoff sets the backoff period 40 + func (c *Client) SetBackoff() { 41 + c.mu.Lock() 42 + defer c.mu.Unlock() 43 + c.backoffUntil = time.Now().Add(c.backoffDuration) 44 + } 45 + 46 + // ClearBackoff clears the backoff (on successful request) 47 + func (c *Client) ClearBackoff() { 48 + c.mu.Lock() 49 + defer c.mu.Unlock() 50 + c.backoffUntil = time.Time{} 51 + } 52 + 53 + // GetIPInfo fetches IP information from ipapi.is 54 + func (c *Client) GetIPInfo(ctx context.Context, ip string) (map[string]interface{}, error) { 55 + // Check if we're in backoff period 56 + if c.IsInBackoff() { 57 + c.mu.RLock() 58 + remaining := time.Until(c.backoffUntil) 59 + c.mu.RUnlock() 60 + return nil, fmt.Errorf("in backoff period, retry in %v", remaining.Round(time.Second)) 61 + } 62 + 63 + // Build URL with IP parameter 64 + reqURL := fmt.Sprintf("%s/?q=%s", c.baseURL, url.QueryEscape(ip)) 65 + 66 + req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) 67 + if err != nil { 68 + return nil, fmt.Errorf("failed to create request: %w", err) 69 + } 70 + 71 + resp, err := c.httpClient.Do(req) 72 + if err != nil { 73 + // Set backoff on network errors (timeout, etc) 74 + c.SetBackoff() 75 + return nil, fmt.Errorf("failed to fetch IP info: %w", err) 76 + } 77 + defer resp.Body.Close() 78 + 79 + if resp.StatusCode == http.StatusTooManyRequests { 80 + // Set backoff on rate limit 81 + c.SetBackoff() 82 + return nil, fmt.Errorf("rate limited (429), backing off for %v", c.backoffDuration) 83 + } 84 + 85 + if resp.StatusCode != http.StatusOK { 86 + // Set backoff on other errors too 87 + c.SetBackoff() 88 + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 89 + } 90 + 91 + var ipInfo map[string]interface{} 92 + if err := json.NewDecoder(resp.Body).Decode(&ipInfo); err != nil { 93 + return nil, fmt.Errorf("failed to decode response: %w", err) 94 + } 95 + 96 + // Clear backoff on successful request 97 + c.ClearBackoff() 98 + 99 + return ipInfo, nil 100 + } 101 + 102 + // IPAddresses holds both IPv4 and IPv6 addresses 103 + type IPAddresses struct { 104 + IPv4 string 105 + IPv6 string 106 + } 107 + 108 + // ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL 109 + func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) { 110 + // Parse URL 111 + parsedURL, err := url.Parse(endpoint) 112 + if err != nil { 113 + return nil, fmt.Errorf("failed to parse endpoint URL: %w", err) 114 + } 115 + 116 + host := parsedURL.Hostname() 117 + if host == "" { 118 + return nil, fmt.Errorf("no hostname in endpoint") 119 + } 120 + 121 + result := &IPAddresses{} 122 + 123 + // Check if host is already an IP 124 + if ip := net.ParseIP(host); ip != nil { 125 + if ip.To4() != nil { 126 + result.IPv4 = host 127 + } else { 128 + result.IPv6 = host 129 + } 130 + return result, nil 131 + } 132 + 133 + // Resolve hostname to IPs 134 + ips, err := net.LookupIP(host) 135 + if err != nil { 136 + return nil, fmt.Errorf("failed to resolve hostname: %w", err) 137 + } 138 + 139 + if len(ips) == 0 { 140 + return nil, fmt.Errorf("no IPs found for hostname") 141 + } 142 + 143 + // Extract both IPv4 and IPv6 144 + for _, ip := range ips { 145 + if ipv4 := ip.To4(); ipv4 != nil { 146 + if result.IPv4 == "" { 147 + result.IPv4 = ipv4.String() 148 + } 149 + } else { 150 + if result.IPv6 == "" { 151 + result.IPv6 = ip.String() 152 + } 153 + } 154 + } 155 + 156 + // Must have at least one IP 157 + if result.IPv4 == "" && result.IPv6 == "" { 158 + return nil, fmt.Errorf("no valid IPs found") 159 + } 160 + 161 + return result, nil 162 + }
+115 -7
internal/log/log.go
··· 1 1 package log 2 2 3 3 import ( 4 + "fmt" 4 5 "io" 5 6 "log" 6 7 "os" 8 + "strings" 9 + "time" 7 10 ) 8 11 9 12 var ( ··· 19 22 verboseWriter = os.Stdout 20 23 } 21 24 22 - infoLog = log.New(infoWriter, "INFO: ", log.Ldate|log.Ltime|log.Lshortfile) 23 - verboseLog = log.New(verboseWriter, "VERBOSE: ", log.Ldate|log.Ltime|log.Lshortfile) 24 - errorLog = log.New(os.Stderr, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile) 25 + // Use no flags, we'll add our own ISO 8601 timestamps 26 + infoLog = log.New(infoWriter, "", 0) 27 + verboseLog = log.New(verboseWriter, "", 0) 28 + errorLog = log.New(os.Stderr, "", 0) 29 + } 30 + 31 + // timestamp returns current time with milliseconds (local time, no timezone) 32 + func timestamp() string { 33 + return time.Now().Format("2006-01-02T15:04:05.000") 25 34 } 26 35 27 36 func Verbose(format string, v ...interface{}) { 28 - verboseLog.Printf(format, v...) 37 + verboseLog.Printf("%s [VERBOSE] %s", timestamp(), fmt.Sprintf(format, v...)) 29 38 } 30 39 31 40 func Info(format string, v ...interface{}) { 32 - infoLog.Printf(format, v...) 41 + infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...)) 42 + } 43 + 44 + func Warn(format string, v ...interface{}) { 45 + infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...)) 33 46 } 34 47 35 48 func Error(format string, v ...interface{}) { 36 - errorLog.Printf(format, v...) 49 + errorLog.Printf("%s [ERROR] %s", timestamp(), fmt.Sprintf(format, v...)) 37 50 } 38 51 39 52 func Fatal(format string, v ...interface{}) { 40 - errorLog.Fatalf(format, v...) 53 + errorLog.Fatalf("%s [FATAL] %s", timestamp(), fmt.Sprintf(format, v...)) 54 + } 55 + 56 + // Banner prints a startup banner 57 + func Banner(version string) { 58 + banner := ` 59 + ╔════════════════════════════════════════════════════════════╗ 60 + ║ ║ 61 + ║ █████╗ ████████╗███████╗ ██████╗ █████╗ ███╗ ██╗ ║ 62 + ║ ██╔══██╗╚══██╔══╝██╔════╝██╔════╝██╔══██╗████╗ ██║ ║ 63 + ║ ███████║ ██║ ███████╗██║ ███████║██╔██╗ ██║ ║ 64 + ║ ██╔══██║ ██║ ╚════██║██║ ██╔══██║██║╚██╗██║ ║ 65 + ║ ██║ ██║ ██║ ███████║╚██████╗██║ ██║██║ ╚████║ ║ 66 + ║ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ║ 67 + ║ ║ 68 + ║ AT Protocol Network Scanner & Indexer ║ 69 + ║ Version %s ║ 70 + ║ ║ 71 + ╚════════════════════════════════════════════════════════════╝ 72 + ` 73 + fmt.Printf(banner, padVersion(version)) 74 + } 75 + 76 + // padVersion pads the version string to fit the banner 77 + func padVersion(version string) string { 78 + targetLen := 7 79 + if len(version) < targetLen { 80 + padding := strings.Repeat(" ", (targetLen-len(version))/2) 81 + return padding + version + padding 82 + } 83 + return version 84 + } 85 + 86 + // RedactPassword redacts passwords from connection strings 87 + func RedactPassword(connStr string) string { 88 + // Handle PostgreSQL URI format: postgresql://user:password@host/db 89 + // Pattern: find everything between :// and @ that contains a colon 90 + if strings.Contains(connStr, "://") && strings.Contains(connStr, "@") { 91 + // Find the credentials section 92 + parts := strings.SplitN(connStr, "://", 2) 93 + if len(parts) == 2 { 94 + scheme := parts[0] 95 + remainder := parts[1] 96 + 97 + // Find the @ symbol 98 + atIndex := strings.Index(remainder, "@") 99 + if atIndex > 0 { 100 + credentials := remainder[:atIndex] 101 + hostAndDb := remainder[atIndex:] 102 + 103 + // Check if there's a password (look for colon in credentials) 104 + colonIndex := strings.Index(credentials, ":") 105 + if colonIndex > 0 { 106 + username := credentials[:colonIndex] 107 + return fmt.Sprintf("%s://%s:***%s", scheme, username, hostAndDb) 108 + } 109 + } 110 + } 111 + } 112 + 113 + // Handle key-value format: host=localhost password=secret user=myuser 114 + if strings.Contains(connStr, "password=") { 115 + parts := strings.Split(connStr, " ") 116 + for i, part := range parts { 117 + if strings.HasPrefix(part, "password=") { 118 + parts[i] = "password=***" 119 + } 120 + } 121 + return strings.Join(parts, " ") 122 + } 123 + 124 + return connStr 125 + } 126 + 127 + // PrintConfig prints configuration summary 128 + func PrintConfig(items map[string]string) { 129 + Info("=== Configuration ===") 130 + maxKeyLen := 0 131 + for key := range items { 132 + if len(key) > maxKeyLen { 133 + maxKeyLen = len(key) 134 + } 135 + } 136 + 137 + for key, value := range items { 138 + padding := strings.Repeat(" ", maxKeyLen-len(key)) 139 + 140 + // Redact database connection strings 141 + displayValue := value 142 + if strings.Contains(key, "Database Path") || strings.Contains(key, "Connection") || strings.Contains(strings.ToLower(key), "password") { 143 + displayValue = RedactPassword(value) 144 + } 145 + 146 + fmt.Printf(" %s:%s %s\n", key, padding, displayValue) 147 + } 148 + Info("====================") 41 149 }
+226
internal/monitor/tracker.go
··· 1 + package monitor 2 + 3 + import ( 4 + "sync" 5 + "time" 6 + ) 7 + 8 + type JobStatus struct { 9 + Name string `json:"name"` 10 + Status string `json:"status"` // "idle", "running", "completed", "error" 11 + StartTime time.Time `json:"start_time,omitempty"` 12 + LastRun time.Time `json:"last_run,omitempty"` 13 + Duration time.Duration `json:"duration,omitempty"` 14 + Progress *Progress `json:"progress,omitempty"` 15 + Error string `json:"error,omitempty"` 16 + NextRun time.Time `json:"next_run,omitempty"` 17 + RunCount int64 `json:"run_count"` 18 + SuccessCount int64 `json:"success_count"` 19 + ErrorCount int64 `json:"error_count"` 20 + } 21 + 22 + type Progress struct { 23 + Current int `json:"current"` 24 + Total int `json:"total"` 25 + Percent float64 `json:"percent"` 26 + Message string `json:"message,omitempty"` 27 + } 28 + 29 + type WorkerStatus struct { 30 + ID int `json:"id"` 31 + Status string `json:"status"` // "idle", "working" 32 + CurrentTask string `json:"current_task,omitempty"` 33 + StartedAt time.Time `json:"started_at,omitempty"` 34 + Duration time.Duration `json:"duration,omitempty"` 35 + } 36 + 37 + type Tracker struct { 38 + mu sync.RWMutex 39 + jobs map[string]*JobStatus 40 + workers map[string][]WorkerStatus // key is job name 41 + } 42 + 43 + var globalTracker *Tracker 44 + 45 + func init() { 46 + globalTracker = &Tracker{ 47 + jobs: make(map[string]*JobStatus), 48 + workers: make(map[string][]WorkerStatus), 49 + } 50 + } 51 + 52 + func GetTracker() *Tracker { 53 + return globalTracker 54 + } 55 + 56 + // Job status methods 57 + func (t *Tracker) RegisterJob(name string) { 58 + t.mu.Lock() 59 + defer t.mu.Unlock() 60 + 61 + t.jobs[name] = &JobStatus{ 62 + Name: name, 63 + Status: "idle", 64 + } 65 + } 66 + 67 + func (t *Tracker) StartJob(name string) { 68 + t.mu.Lock() 69 + defer t.mu.Unlock() 70 + 71 + if job, exists := t.jobs[name]; exists { 72 + job.Status = "running" 73 + job.StartTime = time.Now() 74 + job.Error = "" 75 + job.RunCount++ 76 + } 77 + } 78 + 79 + func (t *Tracker) CompleteJob(name string, err error) { 80 + t.mu.Lock() 81 + defer t.mu.Unlock() 82 + 83 + if job, exists := t.jobs[name]; exists { 84 + job.LastRun = time.Now() 85 + job.Duration = time.Since(job.StartTime) 86 + 87 + if err != nil { 88 + job.Status = "error" 89 + job.Error = err.Error() 90 + job.ErrorCount++ 91 + } else { 92 + job.Status = "completed" 93 + job.SuccessCount++ 94 + } 95 + 96 + job.Progress = nil // Clear progress 97 + } 98 + } 99 + 100 + func (t *Tracker) UpdateProgress(name string, current, total int, message string) { 101 + t.mu.Lock() 102 + defer t.mu.Unlock() 103 + 104 + if job, exists := t.jobs[name]; exists { 105 + var percent float64 106 + if total > 0 { 107 + percent = float64(current) / float64(total) * 100 108 + } 109 + 110 + job.Progress = &Progress{ 111 + Current: current, 112 + Total: total, 113 + Percent: percent, 114 + Message: message, 115 + } 116 + } 117 + } 118 + 119 + func (t *Tracker) SetNextRun(name string, nextRun time.Time) { 120 + t.mu.Lock() 121 + defer t.mu.Unlock() 122 + 123 + if job, exists := t.jobs[name]; exists { 124 + job.NextRun = nextRun 125 + } 126 + } 127 + 128 + func (t *Tracker) GetJobStatus(name string) *JobStatus { 129 + t.mu.RLock() 130 + defer t.mu.RUnlock() 131 + 132 + if job, exists := t.jobs[name]; exists { 133 + // Create a copy 134 + jobCopy := *job 135 + if job.Progress != nil { 136 + progressCopy := *job.Progress 137 + jobCopy.Progress = &progressCopy 138 + } 139 + 140 + // Calculate duration for running jobs 141 + if jobCopy.Status == "running" { 142 + jobCopy.Duration = time.Since(jobCopy.StartTime) 143 + } 144 + 145 + return &jobCopy 146 + } 147 + return nil 148 + } 149 + 150 + func (t *Tracker) GetAllJobs() map[string]*JobStatus { 151 + t.mu.RLock() 152 + defer t.mu.RUnlock() 153 + 154 + result := make(map[string]*JobStatus) 155 + for name, job := range t.jobs { 156 + jobCopy := *job 157 + if job.Progress != nil { 158 + progressCopy := *job.Progress 159 + jobCopy.Progress = &progressCopy 160 + } 161 + 162 + // Calculate duration for running jobs 163 + if jobCopy.Status == "running" { 164 + jobCopy.Duration = time.Since(jobCopy.StartTime) 165 + } 166 + 167 + result[name] = &jobCopy 168 + } 169 + return result 170 + } 171 + 172 + // Worker status methods 173 + func (t *Tracker) InitWorkers(jobName string, count int) { 174 + t.mu.Lock() 175 + defer t.mu.Unlock() 176 + 177 + workers := make([]WorkerStatus, count) 178 + for i := 0; i < count; i++ { 179 + workers[i] = WorkerStatus{ 180 + ID: i + 1, 181 + Status: "idle", 182 + } 183 + } 184 + t.workers[jobName] = workers 185 + } 186 + 187 + func (t *Tracker) StartWorker(jobName string, workerID int, task string) { 188 + t.mu.Lock() 189 + defer t.mu.Unlock() 190 + 191 + if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) { 192 + workers[workerID-1].Status = "working" 193 + workers[workerID-1].CurrentTask = task 194 + workers[workerID-1].StartedAt = time.Now() 195 + } 196 + } 197 + 198 + func (t *Tracker) CompleteWorker(jobName string, workerID int) { 199 + t.mu.Lock() 200 + defer t.mu.Unlock() 201 + 202 + if workers, exists := t.workers[jobName]; exists && workerID > 0 && workerID <= len(workers) { 203 + workers[workerID-1].Status = "idle" 204 + workers[workerID-1].CurrentTask = "" 205 + workers[workerID-1].Duration = time.Since(workers[workerID-1].StartedAt) 206 + workers[workerID-1].StartedAt = time.Time{} 207 + } 208 + } 209 + 210 + func (t *Tracker) GetWorkers(jobName string) []WorkerStatus { 211 + t.mu.RLock() 212 + defer t.mu.RUnlock() 213 + 214 + if workers, exists := t.workers[jobName]; exists { 215 + // Create a copy with calculated durations 216 + result := make([]WorkerStatus, len(workers)) 217 + for i, w := range workers { 218 + result[i] = w 219 + if w.Status == "working" && !w.StartedAt.IsZero() { 220 + result[i].Duration = time.Since(w.StartedAt) 221 + } 222 + } 223 + return result 224 + } 225 + return nil 226 + }
+70 -23
internal/pds/client.go
··· 4 4 "context" 5 5 "encoding/json" 6 6 "fmt" 7 + "net" 7 8 "net/http" 8 9 "time" 9 10 ) ··· 28 29 29 30 // Repo represents a repository in the list 30 31 type Repo struct { 31 - DID string `json:"did"` 32 - Head string `json:"head,omitempty"` 33 - Rev string `json:"rev,omitempty"` 32 + DID string `json:"did"` 33 + Head string `json:"head,omitempty"` 34 + Rev string `json:"rev,omitempty"` 35 + Active *bool `json:"active,omitempty"` 36 + Status *string `json:"status,omitempty"` 34 37 } 35 38 36 39 // ListRepos fetches all repositories from a PDS with pagination 37 - func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]string, error) { 38 - var allDIDs []string 40 + func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]Repo, error) { 41 + var allRepos []Repo 39 42 var cursor *string 40 43 41 44 for { ··· 67 70 } 68 71 resp.Body.Close() 69 72 70 - // Collect DIDs 71 - for _, repo := range result.Repos { 72 - allDIDs = append(allDIDs, repo.DID) 73 - } 73 + // Collect repos 74 + allRepos = append(allRepos, result.Repos...) 74 75 75 76 // Check if there are more pages 76 77 if result.Cursor == nil || *result.Cursor == "" { ··· 79 80 cursor = result.Cursor 80 81 } 81 82 82 - return allDIDs, nil 83 + return allRepos, nil 83 84 } 84 85 85 86 // DescribeServer fetches com.atproto.server.describeServer 86 - func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) { 87 + // Returns: description, responseTime, usedIP, error 88 + func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) { 89 + startTime := time.Now() 87 90 url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint) 88 91 89 - //fmt.Println(url) 92 + // Track which IP was used 93 + var usedIP string 94 + transport := &http.Transport{ 95 + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 96 + conn, err := (&net.Dialer{ 97 + Timeout: 30 * time.Second, 98 + KeepAlive: 30 * time.Second, 99 + }).DialContext(ctx, network, addr) 100 + 101 + if err == nil && conn != nil { 102 + if remoteAddr := conn.RemoteAddr(); remoteAddr != nil { 103 + if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok { 104 + usedIP = tcpAddr.IP.String() 105 + } 106 + } 107 + } 108 + return conn, err 109 + }, 110 + } 111 + 112 + client := &http.Client{ 113 + Timeout: c.httpClient.Timeout, 114 + Transport: transport, 115 + } 90 116 91 117 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 92 118 if err != nil { 93 - return nil, err 119 + return nil, 0, "", err 94 120 } 95 121 96 - resp, err := c.httpClient.Do(req) 122 + resp, err := client.Do(req) 123 + responseTime := time.Since(startTime) 124 + 97 125 if err != nil { 98 - return nil, err 126 + return nil, responseTime, usedIP, err 99 127 } 100 128 defer resp.Body.Close() 101 129 102 130 if resp.StatusCode != http.StatusOK { 103 - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 131 + return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 104 132 } 105 133 106 134 var desc ServerDescription 107 135 if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil { 108 - return nil, err 136 + return nil, responseTime, usedIP, err 109 137 } 110 138 111 - return &desc, nil 139 + return &desc, responseTime, usedIP, nil 112 140 } 113 141 114 - // CheckHealth performs a basic health check 115 - func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, error) { 142 + // CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version" 143 + // Returns: available, responseTime, version, error 144 + func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) { 116 145 startTime := time.Now() 117 146 118 147 url := fmt.Sprintf("%s/xrpc/_health", endpoint) 119 148 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 120 149 if err != nil { 121 - return false, 0, err 150 + return false, 0, "", err 122 151 } 123 152 124 153 resp, err := c.httpClient.Do(req) 125 154 duration := time.Since(startTime) 126 155 127 156 if err != nil { 128 - return false, duration, err 157 + return false, duration, "", err 129 158 } 130 159 defer resp.Body.Close() 131 160 132 - return resp.StatusCode == http.StatusOK, duration, nil 161 + if resp.StatusCode != http.StatusOK { 162 + return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode) 163 + } 164 + 165 + // Decode the JSON response and check for "version" 166 + var healthResponse struct { 167 + Version string `json:"version"` 168 + } 169 + 170 + if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil { 171 + return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err) 172 + } 173 + 174 + if healthResponse.Version == "" { 175 + return false, duration, "", fmt.Errorf("health JSON response missing 'version' field") 176 + } 177 + 178 + // All checks passed 179 + return true, duration, healthResponse.Version, nil 133 180 }
+270 -103
internal/pds/scanner.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "fmt" 6 + "math/rand" 5 7 "sync" 8 + "sync/atomic" 6 9 "time" 7 10 8 - "github.com/acarl005/stripansi" 9 - "github.com/atscan/atscanner/internal/config" 10 - "github.com/atscan/atscanner/internal/log" 11 - "github.com/atscan/atscanner/internal/storage" 11 + "github.com/atscan/atscand/internal/config" 12 + "github.com/atscan/atscand/internal/ipinfo" 13 + "github.com/atscan/atscand/internal/log" 14 + "github.com/atscan/atscand/internal/monitor" 15 + "github.com/atscan/atscand/internal/storage" 12 16 ) 13 17 14 18 type Scanner struct { 15 - client *Client 16 - db storage.Database 17 - config config.PDSConfig 19 + client *Client 20 + db storage.Database 21 + config config.PDSConfig 22 + ipInfoClient *ipinfo.Client 18 23 } 19 24 20 25 func NewScanner(db storage.Database, cfg config.PDSConfig) *Scanner { 21 26 return &Scanner{ 22 - client: NewClient(cfg.Timeout), 23 - db: db, 24 - config: cfg, 27 + client: NewClient(cfg.Timeout), 28 + db: db, 29 + config: cfg, 30 + ipInfoClient: ipinfo.NewClient(), 25 31 } 26 32 } 27 33 ··· 29 35 startTime := time.Now() 30 36 log.Info("Starting PDS availability scan...") 31 37 32 - servers, err := s.db.GetPDSServers(ctx, nil) 38 + // Get only PDS endpoints that need checking 39 + servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{ 40 + Type: "pds", 41 + OnlyStale: true, 42 + OnlyValid: true, 43 + RecheckInterval: s.config.RecheckInterval, 44 + }) 33 45 if err != nil { 34 46 return err 35 47 } 36 48 37 - log.Info("Scanning %d PDS servers...", len(servers)) 49 + if len(servers) == 0 { 50 + log.Info("No endpoints need scanning at this time") 51 + monitor.GetTracker().UpdateProgress("pds_scan", 0, 0, "No endpoints need scanning") 52 + return nil 53 + } 38 54 39 - // Worker pool 40 - jobs := make(chan *storage.PDS, len(servers)) 41 - results := make(chan *PDSStatus, len(servers)) 55 + log.Info("Found %d endpoints that need scanning", len(servers)) 56 + monitor.GetTracker().UpdateProgress("pds_scan", 0, len(servers), "Preparing to scan") 42 57 58 + // Shuffle servers 59 + if len(servers) > 0 { 60 + r := rand.New(rand.NewSource(time.Now().UnixNano())) 61 + r.Shuffle(len(servers), func(i, j int) { 62 + servers[i], servers[j] = servers[j], servers[i] 63 + }) 64 + } 65 + 66 + // Initialize workers in tracker 67 + monitor.GetTracker().InitWorkers("pds_scan", s.config.Workers) 68 + 69 + // Worker pool with progress tracking 70 + jobs := make(chan *workerJob, len(servers)) 43 71 var wg sync.WaitGroup 72 + var completed int32 73 + 44 74 for i := 0; i < s.config.Workers; i++ { 45 75 wg.Add(1) 46 - go func() { 76 + workerID := i + 1 77 + go func(id int) { 47 78 defer wg.Done() 48 - s.worker(ctx, jobs, results) 49 - }() 79 + s.workerWithProgress(ctx, id, jobs, &completed, len(servers)) 80 + }(workerID) 50 81 } 51 82 52 - go func() { 53 - for _, server := range servers { 54 - jobs <- server 55 - } 56 - close(jobs) 57 - }() 83 + // Send jobs 84 + for _, server := range servers { 85 + jobs <- &workerJob{endpoint: server} 86 + } 87 + close(jobs) 58 88 59 - go func() { 60 - wg.Wait() 61 - close(results) 62 - }() 89 + // Wait for completion 90 + wg.Wait() 63 91 64 - // Process results 65 - successCount := 0 66 - failureCount := 0 67 - totalUsers := int64(0) 92 + log.Info("PDS scan completed in %v", time.Since(startTime)) 93 + monitor.GetTracker().UpdateProgress("pds_scan", len(servers), len(servers), "Completed") 68 94 69 - for status := range results { 70 - // Determine status code 71 - statusCode := storage.PDSStatusOffline 72 - if status.Available { 73 - statusCode = storage.PDSStatusOnline 74 - } 95 + return nil 96 + } 75 97 76 - // Build scan data 77 - scanData := &storage.PDSScanData{ 78 - ServerInfo: status.Description, 79 - DIDs: status.DIDs, 80 - DIDCount: len(status.DIDs), 81 - } 82 - 83 - // Update using PDS ID 84 - if err := s.db.UpdatePDSStatus(ctx, status.PDSID, &storage.PDSUpdate{ 85 - Status: statusCode, 86 - LastChecked: status.LastChecked, 87 - ResponseTime: status.ResponseTime.Seconds() * 1000, // Convert to ms 88 - ScanData: scanData, 89 - }); err != nil { 90 - log.Error("Error updating PDS ID %d: %v", status.PDSID, err) 91 - } 92 - 93 - if status.Available { 94 - successCount++ 95 - totalUsers += int64(len(status.DIDs)) 96 - } else { 97 - failureCount++ 98 - } 99 - } 100 - 101 - log.Info("PDS scan completed: %d available, %d unavailable, %d total users in %v", 102 - successCount, failureCount, totalUsers, time.Since(startTime)) 103 - 104 - return nil 98 + type workerJob struct { 99 + endpoint *storage.Endpoint 105 100 } 106 101 107 - func (s *Scanner) worker(ctx context.Context, jobs <-chan *storage.PDS, results chan<- *PDSStatus) { 108 - for server := range jobs { 102 + func (s *Scanner) workerWithProgress(ctx context.Context, workerID int, jobs <-chan *workerJob, completed *int32, total int) { 103 + for job := range jobs { 109 104 select { 110 105 case <-ctx.Done(): 111 106 return 112 107 default: 113 - status := s.scanPDS(ctx, server.ID, server.Endpoint) 114 - results <- status 108 + // Update worker status 109 + monitor.GetTracker().StartWorker("pds_scan", workerID, job.endpoint.Endpoint) 110 + 111 + // Scan endpoint 112 + s.scanAndSaveEndpoint(ctx, job.endpoint) 113 + 114 + // Update progress 115 + atomic.AddInt32(completed, 1) 116 + current := atomic.LoadInt32(completed) 117 + monitor.GetTracker().UpdateProgress("pds_scan", int(current), total, 118 + fmt.Sprintf("Scanned %d/%d endpoints", current, total)) 119 + 120 + // Mark worker as idle 121 + monitor.GetTracker().CompleteWorker("pds_scan", workerID) 115 122 } 116 123 } 117 124 } 118 125 119 - func (s *Scanner) scanPDS(ctx context.Context, pdsID int64, endpoint string) *PDSStatus { 120 - status := &PDSStatus{ 121 - PDSID: pdsID, // Store ID 122 - Endpoint: endpoint, 123 - LastChecked: time.Now(), 126 + func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) { 127 + // STEP 1: Resolve IPs (both IPv4 and IPv6) 128 + ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint) 129 + if err != nil { 130 + s.saveScanResult(ctx, ep.ID, &ScanResult{ 131 + Status: storage.EndpointStatusOffline, 132 + ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err), 133 + }) 134 + return 124 135 } 125 136 126 - // Health check 127 - available, responseTime, err := s.client.CheckHealth(ctx, endpoint) 128 - status.Available = available 129 - status.ResponseTime = responseTime 137 + // Update IPs immediately 138 + s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC()) 139 + 140 + // STEP 1.5: Fetch IP info asynchronously for both IPs 141 + if ips.IPv4 != "" { 142 + go s.updateIPInfoIfNeeded(ctx, ips.IPv4) 143 + } 144 + if ips.IPv6 != "" { 145 + go s.updateIPInfoIfNeeded(ctx, ips.IPv6) 146 + } 130 147 148 + // STEP 2: Call describeServer (primary health check + metadata) 149 + desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint) 131 150 if err != nil { 132 - status.ErrorMessage = err.Error() 133 - return status 151 + s.saveScanResult(ctx, ep.ID, &ScanResult{ 152 + Status: storage.EndpointStatusOffline, 153 + ResponseTime: descResponseTime, 154 + ErrorMessage: fmt.Sprintf("describeServer failed: %v", err), 155 + UsedIP: usedIP, 156 + }) 157 + return 158 + } 159 + 160 + // Update server DID immediately 161 + if desc.DID != "" { 162 + s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID) 134 163 } 135 164 136 - if !available { 137 - status.ErrorMessage = "health check failed" 138 - return status 165 + // STEP 3: Call _health to get version 166 + available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 167 + if err != nil || !available { 168 + log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err) 169 + // Server is online (describeServer worked) but _health failed 170 + // Continue with empty version 171 + version = "" 139 172 } 140 173 141 - // Describe server 142 - desc, err := s.client.DescribeServer(ctx, endpoint) 174 + // Calculate average response time from both calls 175 + avgResponseTime := descResponseTime 176 + if available { 177 + avgResponseTime = (descResponseTime + healthResponseTime) / 2 178 + } 179 + 180 + // STEP 4: Fetch repos 181 + repoList, err := s.client.ListRepos(ctx, ep.Endpoint) 143 182 if err != nil { 144 - log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(endpoint), err) 145 - } else { 146 - status.Description = desc 183 + log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err) 184 + repoList = []Repo{} 185 + } 186 + 187 + // Convert to DIDs 188 + dids := make([]string, len(repoList)) 189 + for i, repo := range repoList { 190 + dids[i] = repo.DID 147 191 } 148 192 149 - // List repos (DIDs) 150 - /*dids, err := s.client.ListRepos(ctx, endpoint) 151 - if err != nil { 152 - log.Verbose("Warning: failed to list repos for %s: %v", endpoint, err) 153 - status.DIDs = []string{} 193 + // STEP 5: SAVE scan result 194 + s.saveScanResult(ctx, ep.ID, &ScanResult{ 195 + Status: storage.EndpointStatusOnline, 196 + ResponseTime: avgResponseTime, 197 + Description: desc, 198 + DIDs: dids, 199 + Version: version, 200 + UsedIP: usedIP, // Only from describeServer 201 + }) 202 + 203 + // STEP 6: Save repos in batches (only tracks changes) 204 + if len(repoList) > 0 { 205 + batchSize := 100_000 206 + 207 + log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint) 208 + 209 + for i := 0; i < len(repoList); i += batchSize { 210 + end := i + batchSize 211 + if end > len(repoList) { 212 + end = len(repoList) 213 + } 214 + 215 + batch := repoList[i:end] 216 + repoData := make([]storage.PDSRepoData, len(batch)) 217 + 218 + for j, repo := range batch { 219 + active := true 220 + if repo.Active != nil { 221 + active = *repo.Active 222 + } 223 + 224 + status := "" 225 + if repo.Status != nil { 226 + status = *repo.Status 227 + } 228 + 229 + repoData[j] = storage.PDSRepoData{ 230 + DID: repo.DID, 231 + Head: repo.Head, 232 + Rev: repo.Rev, 233 + Active: active, 234 + Status: status, 235 + } 236 + } 237 + 238 + if err := s.db.UpsertPDSRepos(ctx, ep.ID, repoData); err != nil { 239 + log.Error("Failed to save repo batch for endpoint %d: %v", ep.ID, err) 240 + } 241 + } 242 + 243 + log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint) 244 + } 245 + } 246 + 247 + func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) { 248 + // Build scan_data with PDS-specific info in Metadata 249 + scanData := &storage.EndpointScanData{ 250 + DIDCount: len(result.DIDs), 251 + Metadata: make(map[string]interface{}), 252 + } 253 + 254 + var userCount int64 255 + 256 + // Add PDS-specific metadata 257 + if result.Status == storage.EndpointStatusOnline { 258 + userCount = int64(len(result.DIDs)) 259 + scanData.Metadata["user_count"] = userCount 260 + if result.Description != nil { 261 + scanData.Metadata["server_info"] = result.Description 262 + } 154 263 } else { 155 - status.DIDs = dids 156 - log.Verbose(" → Found %d users on %s", len(dids), endpoint) 157 - }*/ 264 + // Include error message for offline status 265 + if result.ErrorMessage != "" { 266 + scanData.Metadata["error"] = result.ErrorMessage 267 + } 268 + } 269 + 270 + // Save scan record 271 + scan := &storage.EndpointScan{ 272 + EndpointID: endpointID, 273 + Status: result.Status, 274 + ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms 275 + UserCount: userCount, 276 + Version: result.Version, 277 + UsedIP: result.UsedIP, // NEW 278 + ScanData: scanData, 279 + ScannedAt: time.Now().UTC(), 280 + } 281 + 282 + if err := s.db.SaveEndpointScan(ctx, scan); err != nil { 283 + log.Error("Failed to save scan for endpoint %d: %v", endpointID, err) 284 + } 285 + 286 + // Update endpoint status 287 + update := &storage.EndpointUpdate{ 288 + Status: result.Status, 289 + LastChecked: time.Now().UTC(), 290 + ResponseTime: result.ResponseTime.Seconds() * 1000, 291 + } 158 292 159 - return status 293 + if err := s.db.UpdateEndpointStatus(ctx, endpointID, update); err != nil { 294 + log.Error("Failed to update endpoint status for %d: %v", endpointID, err) 295 + } 160 296 } 161 297 162 - func (s *Scanner) statusString(available bool) string { 163 - if available { 164 - return "online" 298 + func (s *Scanner) updateIPInfoIfNeeded(ctx context.Context, ip string) { 299 + // Check if IP info client is in backoff 300 + if s.ipInfoClient.IsInBackoff() { 301 + return 302 + } 303 + 304 + // Check if we need to update IP info 305 + exists, needsUpdate, err := s.db.ShouldUpdateIPInfo(ctx, ip) 306 + if err != nil { 307 + log.Verbose("Failed to check IP info status: %v", err) 308 + return 165 309 } 166 - return "offline" 310 + 311 + if exists && !needsUpdate { 312 + return // IP info is fresh 313 + } 314 + 315 + // Fetch IP info from ipapi.is 316 + log.Verbose("Fetching IP info for %s", ip) 317 + ipInfo, err := s.ipInfoClient.GetIPInfo(ctx, ip) 318 + if err != nil { 319 + // Log only once when backoff starts 320 + if s.ipInfoClient.IsInBackoff() { 321 + log.Info("⚠ IP info API unavailable, pausing requests for 5 minutes") 322 + } else { 323 + log.Verbose("Failed to fetch IP info for %s: %v", ip, err) 324 + } 325 + return 326 + } 327 + 328 + // Update database 329 + if err := s.db.UpsertIPInfo(ctx, ip, ipInfo); err != nil { 330 + log.Error("Failed to update IP info for %s: %v", ip, err) 331 + } else { 332 + log.Verbose("✓ Updated IP info for %s", ip) 333 + } 167 334 }
+11 -1
internal/pds/types.go
··· 21 21 } 22 22 23 23 type PDSStatus struct { 24 - PDSID int64 // NEW: PDS ID 24 + EndpointID int64 // Changed from PDSID to EndpointID 25 25 Endpoint string 26 26 Available bool 27 27 ResponseTime time.Duration ··· 30 30 Description *ServerDescription 31 31 DIDs []string 32 32 } 33 + 34 + type ScanResult struct { 35 + Status int 36 + ResponseTime time.Duration 37 + ErrorMessage string 38 + Description *ServerDescription 39 + DIDs []string 40 + Version string 41 + UsedIP string // NEW 42 + }
-696
internal/plc/bundle.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "bytes" 6 - "context" 7 - "crypto/sha256" 8 - "encoding/hex" 9 - "encoding/json" 10 - "fmt" 11 - "os" 12 - "path/filepath" 13 - "time" 14 - 15 - "github.com/atscan/atscanner/internal/log" 16 - "github.com/atscan/atscanner/internal/storage" 17 - "github.com/klauspost/compress/zstd" 18 - ) 19 - 20 - // BUNDLE_SIZE is the number of operations per bundle 21 - const BUNDLE_SIZE = 10000 22 - 23 - type BundleManager struct { 24 - dir string 25 - enabled bool 26 - encoder *zstd.Encoder 27 - decoder *zstd.Decoder 28 - db storage.Database 29 - } 30 - 31 - func NewBundleManager(dir string, enabled bool, db storage.Database) (*BundleManager, error) { 32 - if !enabled { 33 - return &BundleManager{enabled: false}, nil 34 - } 35 - 36 - if err := os.MkdirAll(dir, 0755); err != nil { 37 - return nil, fmt.Errorf("failed to create bundle dir: %w", err) 38 - } 39 - 40 - encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 41 - if err != nil { 42 - return nil, err 43 - } 44 - 45 - decoder, err := zstd.NewReader(nil) 46 - if err != nil { 47 - return nil, err 48 - } 49 - 50 - return &BundleManager{ 51 - dir: dir, 52 - enabled: true, 53 - encoder: encoder, 54 - decoder: decoder, 55 - db: db, 56 - }, nil 57 - } 58 - 59 - func (bm *BundleManager) Close() { 60 - if bm.encoder != nil { 61 - bm.encoder.Close() 62 - } 63 - if bm.decoder != nil { 64 - bm.decoder.Close() 65 - } 66 - } 67 - 68 - // GetBundleFilename returns filename for bundle number (6-digit decimal, JSONL format) 69 - func (bm *BundleManager) GetBundleFilename(bundleNumber int) string { 70 - return fmt.Sprintf("%06d.jsonl.zst", bundleNumber) 71 - } 72 - 73 - // GetBundlePath returns full path for bundle number 74 - func (bm *BundleManager) GetBundlePath(bundleNumber int) string { 75 - return filepath.Join(bm.dir, bm.GetBundleFilename(bundleNumber)) 76 - } 77 - 78 - // BundleExists checks if bundle file exists locally 79 - func (bm *BundleManager) BundleExists(bundleNumber int) bool { 80 - _, err := os.Stat(bm.GetBundlePath(bundleNumber)) 81 - return err == nil 82 - } 83 - 84 - // LoadBundle returns exactly 1000 unique operations by fetching additional batches if needed 85 - func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNumber int, plcClient *Client) ([]PLCOperation, bool, error) { 86 - if !bm.enabled { 87 - return nil, false, fmt.Errorf("bundle manager disabled") 88 - } 89 - 90 - path := bm.GetBundlePath(bundleNumber) 91 - 92 - // Try to load from local file first 93 - if bm.BundleExists(bundleNumber) { 94 - log.Verbose("→ Loading bundle %06d from local file", bundleNumber) 95 - 96 - // Check if bundle exists in database 97 - dbBundle, dbErr := bm.db.GetBundleByNumber(ctx, bundleNumber) 98 - bundleInDB := dbErr == nil && dbBundle != nil 99 - 100 - if bundleInDB { 101 - // Verify compressed file hash 102 - if dbBundle.CompressedHash != "" { 103 - valid, err := bm.verifyBundleHash(path, dbBundle.CompressedHash) 104 - if err != nil { 105 - log.Error("Warning: failed to verify compressed hash for bundle %06d: %v", bundleNumber, err) 106 - } else if !valid { 107 - log.Error("⚠ Compressed hash mismatch for bundle %06d! Re-fetching...", bundleNumber) 108 - os.Remove(path) 109 - return bm.LoadBundle(ctx, bundleNumber, plcClient) 110 - } else { 111 - log.Verbose("✓ Hash verified for bundle %06d", bundleNumber) 112 - } 113 - } 114 - } 115 - 116 - // Load operations from file 117 - operations, err := bm.loadBundleFromFile(path) 118 - if err != nil { 119 - return nil, false, fmt.Errorf("failed to load bundle from file: %w", err) 120 - } 121 - 122 - // If not in database, index it now 123 - if !bundleInDB { 124 - // Calculate both hashes 125 - fileData, err := os.ReadFile(path) 126 - if err != nil { 127 - log.Error("Warning: failed to read file: %v", err) 128 - } else { 129 - compressedHash := bm.calculateHash(fileData) 130 - 131 - // Calculate uncompressed hash 132 - var jsonlData []byte 133 - for _, op := range operations { 134 - jsonlData = append(jsonlData, op.RawJSON...) 135 - jsonlData = append(jsonlData, '\n') 136 - } 137 - uncompressedHash := bm.calculateHash(jsonlData) 138 - 139 - if err := bm.indexBundleWithHash(ctx, bundleNumber, operations, path, uncompressedHash, compressedHash); err != nil { 140 - log.Error("Warning: failed to index bundle: %v", err) 141 - } else { 142 - log.Info("✓ Indexed bundle %06d", bundleNumber) 143 - } 144 - } 145 - } 146 - 147 - // If loaded from disk, it's always complete 148 - return operations, true, nil 149 - } 150 - 151 - // Bundle doesn't exist locally - fetch from PLC directory 152 - log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNumber) 153 - 154 - var afterTimestamp string 155 - var prevBoundaryCIDs map[string]bool 156 - 157 - if bundleNumber > 1 { 158 - prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNumber-1) 159 - if err == nil && prevBundle != nil { 160 - afterTimestamp = prevBundle.EndTime.Format(time.RFC3339Nano) 161 - 162 - // Get boundary CIDs from previous bundle 163 - if len(prevBundle.BoundaryCIDs) > 0 { 164 - prevBoundaryCIDs = make(map[string]bool) 165 - for _, cid := range prevBundle.BoundaryCIDs { 166 - prevBoundaryCIDs[cid] = true 167 - } 168 - log.Verbose(" Using %d boundary CIDs from previous bundle", len(prevBoundaryCIDs)) 169 - } else { 170 - // Fallback: load previous bundle's operations 171 - prevPath := bm.GetBundlePath(bundleNumber - 1) 172 - if bm.BundleExists(bundleNumber - 1) { 173 - prevOps, err := bm.loadBundleFromFile(prevPath) 174 - if err == nil { 175 - _, prevBoundaryCIDs = GetBoundaryCIDs(prevOps) 176 - log.Verbose(" Computed %d boundary CIDs from previous bundle file", len(prevBoundaryCIDs)) 177 - } 178 - } 179 - } 180 - } 181 - } 182 - 183 - // Collect operations until we have exactly BUNDLE_SIZE unique ones 184 - var allOperations []PLCOperation 185 - seenCIDs := make(map[string]bool) 186 - 187 - // Track what we've already seen from previous bundle 188 - for cid := range prevBoundaryCIDs { 189 - seenCIDs[cid] = true 190 - } 191 - 192 - currentAfter := afterTimestamp 193 - 194 - // Scale maxFetches based on bundle size 195 - // Assume worst case: 90% dedup rate, need buffer 196 - maxFetches := (BUNDLE_SIZE / 900) + 5 // For 10k: ~16 fetches, for 1k: ~6 fetches 197 - fetchCount := 0 198 - 199 - for len(allOperations) < BUNDLE_SIZE && fetchCount < maxFetches { 200 - fetchCount++ 201 - 202 - // Calculate how many more operations we need 203 - remaining := BUNDLE_SIZE - len(allOperations) 204 - 205 - // Determine fetch size based on remaining operations 206 - var fetchSize int 207 - if fetchCount == 1 { 208 - // First fetch: always get 1000 (PLC limit) 209 - fetchSize = 1000 210 - } else if remaining < 100 { 211 - // Need less than 100: fetch 50 212 - fetchSize = 50 213 - } else if remaining < 500 { 214 - // Need 100-500: fetch 200 215 - fetchSize = 200 216 - } else { 217 - // Need 500+: fetch 1000 218 - fetchSize = 1000 219 - } 220 - 221 - // Fetch next batch 222 - log.Verbose(" Fetch #%d: need %d more, requesting %d", fetchCount, remaining, fetchSize) 223 - 224 - rawOperations, err := bm.fetchBundleFromPLCWithCount(ctx, plcClient, currentAfter, fetchSize) 225 - if err != nil { 226 - return nil, false, fmt.Errorf("failed to fetch bundle from PLC: %w", err) 227 - } 228 - 229 - if len(rawOperations) == 0 { 230 - // No more data available 231 - log.Info(" No more operations available after %d fetches (got %d/%d)", 232 - fetchCount, len(allOperations), BUNDLE_SIZE) 233 - break 234 - } 235 - 236 - log.Verbose(" Got %d raw operations", len(rawOperations)) 237 - 238 - // Filter out duplicates and add unique operations 239 - newOpsAdded := 0 240 - for _, op := range rawOperations { 241 - if !seenCIDs[op.CID] { 242 - seenCIDs[op.CID] = true 243 - allOperations = append(allOperations, op) 244 - newOpsAdded++ 245 - 246 - if len(allOperations) >= BUNDLE_SIZE { 247 - break 248 - } 249 - } 250 - } 251 - 252 - log.Verbose(" Added %d unique operations (total: %d/%d, %d dupes)", 253 - newOpsAdded, len(allOperations), BUNDLE_SIZE, len(rawOperations)-newOpsAdded) 254 - 255 - // If we added no new operations, we're stuck 256 - if newOpsAdded == 0 { 257 - log.Error(" No new unique operations found, stopping") 258 - break 259 - } 260 - 261 - // Update cursor for next fetch 262 - if len(rawOperations) > 0 { 263 - lastOp := rawOperations[len(rawOperations)-1] 264 - currentAfter = lastOp.CreatedAt.Format(time.RFC3339Nano) 265 - } 266 - 267 - // If PLC returned less than requested, we've reached the end 268 - if len(rawOperations) < fetchSize { 269 - log.Info(" Reached end of PLC data (got %d < %d requested)", len(rawOperations), fetchSize) 270 - break 271 - } 272 - } 273 - 274 - // Warn if we hit the fetch limit 275 - if fetchCount >= maxFetches { 276 - log.Verbose(" ⚠ Hit maxFetches limit (%d) with only %d/%d operations", 277 - maxFetches, len(allOperations), BUNDLE_SIZE) 278 - } 279 - 280 - // Check if we got exactly BUNDLE_SIZE operations 281 - isComplete := len(allOperations) >= BUNDLE_SIZE 282 - 283 - if len(allOperations) > BUNDLE_SIZE { 284 - // Trim to exactly BUNDLE_SIZE 285 - log.Verbose(" Trimming from %d to %d operations", len(allOperations), BUNDLE_SIZE) 286 - allOperations = allOperations[:BUNDLE_SIZE] 287 - } 288 - 289 - log.Info(" Collected %d unique operations after %d fetches (complete=%v, target=%d)", 290 - len(allOperations), fetchCount, isComplete, BUNDLE_SIZE) 291 - 292 - // Only save as bundle if complete 293 - if isComplete { 294 - // Save bundle with both hashes 295 - uncompressedHash, compressedHash, err := bm.saveBundleFileWithHash(path, allOperations) 296 - if err != nil { 297 - log.Error("Warning: failed to save bundle file: %v", err) 298 - } else { 299 - // Index with both hashes 300 - if err := bm.indexBundleWithHash(ctx, bundleNumber, allOperations, path, uncompressedHash, compressedHash); err != nil { 301 - log.Error("Warning: failed to index bundle: %v", err) 302 - } else { 303 - log.Info("✓ Bundle %06d saved [%d ops, hash: %s, compressed: %s]", 304 - bundleNumber, len(allOperations), uncompressedHash[:16]+"...", compressedHash[:16]+"...") 305 - } 306 - } 307 - } 308 - 309 - return allOperations, isComplete, nil 310 - } 311 - 312 - // fetchBundleFromPLCWithCount fetches operations with a specific count 313 - func (bm *BundleManager) fetchBundleFromPLCWithCount(ctx context.Context, client *Client, afterTimestamp string, count int) ([]PLCOperation, error) { 314 - return client.Export(ctx, ExportOptions{ 315 - Count: count, 316 - After: afterTimestamp, 317 - }) 318 - } 319 - 320 - // saveBundleFileWithHash - NO trailing newline 321 - func (bm *BundleManager) saveBundleFileWithHash(path string, operations []PLCOperation) (string, string, error) { 322 - var jsonlData []byte 323 - for _, op := range operations { 324 - jsonlData = append(jsonlData, op.RawJSON...) 325 - jsonlData = append(jsonlData, '\n') 326 - } 327 - 328 - uncompressedHash := bm.calculateHash(jsonlData) 329 - compressed := bm.encoder.EncodeAll(jsonlData, nil) 330 - compressedHash := bm.calculateHash(compressed) 331 - 332 - if err := os.WriteFile(path, compressed, 0644); err != nil { 333 - return "", "", err 334 - } 335 - 336 - return uncompressedHash, compressedHash, nil 337 - } 338 - 339 - // fetchBundleFromPLC fetches operations from PLC directory (returns RAW operations) 340 - func (bm *BundleManager) fetchBundleFromPLC(ctx context.Context, client *Client, afterTimestamp string) ([]PLCOperation, error) { 341 - // Just fetch - no deduplication here 342 - return client.Export(ctx, ExportOptions{ 343 - Count: 1000, 344 - After: afterTimestamp, 345 - }) 346 - } 347 - 348 - // StripBoundaryDuplicates removes operations that were already seen on the previous page 349 - // This is exported so it can be used in verification 350 - func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation { 351 - if len(operations) == 0 { 352 - return operations 353 - } 354 - 355 - boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp) 356 - if err != nil { 357 - return operations 358 - } 359 - 360 - // Skip operations at the start that match the boundary 361 - startIdx := 0 362 - for startIdx < len(operations) { 363 - op := operations[startIdx] 364 - 365 - // If timestamp is AFTER boundary, we're past duplicates 366 - if op.CreatedAt.After(boundaryTime) { 367 - break 368 - } 369 - 370 - // Same timestamp - check if we've seen this CID before 371 - if op.CreatedAt.Equal(boundaryTime) { 372 - if prevBoundaryCIDs[op.CID] { 373 - // This is a duplicate, skip it 374 - startIdx++ 375 - continue 376 - } 377 - // Same timestamp but new CID - keep it 378 - break 379 - } 380 - 381 - // Earlier timestamp (shouldn't happen) 382 - break 383 - } 384 - 385 - return operations[startIdx:] 386 - } 387 - 388 - // Keep the private version for internal use 389 - func stripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation { 390 - return StripBoundaryDuplicates(operations, boundaryTimestamp, prevBoundaryCIDs) 391 - } 392 - 393 - // GetBoundaryCIDs returns all CIDs that share the same timestamp as the last operation 394 - func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) { 395 - if len(operations) == 0 { 396 - return time.Time{}, nil 397 - } 398 - 399 - lastOp := operations[len(operations)-1] 400 - boundaryTime := lastOp.CreatedAt 401 - cidSet := make(map[string]bool) 402 - 403 - // Walk backwards from the end, collecting all CIDs with the same timestamp 404 - for i := len(operations) - 1; i >= 0; i-- { 405 - op := operations[i] 406 - if op.CreatedAt.Equal(boundaryTime) { 407 - cidSet[op.CID] = true 408 - } else { 409 - // Different timestamp, we're done 410 - break 411 - } 412 - } 413 - 414 - return boundaryTime, cidSet 415 - } 416 - 417 - // saveBundleFile (keep for compatibility, calls saveBundleFileWithHash) 418 - func (bm *BundleManager) saveBundleFile(path string, operations []PLCOperation) error { 419 - _, _, err := bm.saveBundleFileWithHash(path, operations) // ✅ All 3 values 420 - return err 421 - } 422 - 423 - // loadBundleFromFile loads operations from bundle file (JSONL format) 424 - func (bm *BundleManager) loadBundleFromFile(path string) ([]PLCOperation, error) { 425 - // Read compressed file 426 - compressedData, err := os.ReadFile(path) 427 - if err != nil { 428 - return nil, fmt.Errorf("failed to read bundle file: %w", err) 429 - } 430 - 431 - // Decompress 432 - decompressed, err := bm.decoder.DecodeAll(compressedData, nil) 433 - if err != nil { 434 - return nil, fmt.Errorf("failed to decompress bundle: %w", err) 435 - } 436 - 437 - // Parse JSONL (newline-delimited JSON) 438 - var operations []PLCOperation 439 - scanner := bufio.NewScanner(bytes.NewReader(decompressed)) 440 - 441 - lineNum := 0 442 - for scanner.Scan() { 443 - lineNum++ 444 - line := scanner.Bytes() 445 - 446 - // Skip empty lines 447 - if len(line) == 0 { 448 - continue 449 - } 450 - 451 - var op PLCOperation 452 - if err := json.Unmarshal(line, &op); err != nil { 453 - return nil, fmt.Errorf("failed to parse operation on line %d: %w", lineNum, err) 454 - } 455 - 456 - // CRITICAL: Store the original raw JSON bytes 457 - op.RawJSON = make([]byte, len(line)) 458 - copy(op.RawJSON, line) 459 - 460 - operations = append(operations, op) 461 - } 462 - 463 - if err := scanner.Err(); err != nil { 464 - return nil, fmt.Errorf("error reading JSONL: %w", err) 465 - } 466 - 467 - return operations, nil 468 - } 469 - 470 - // indexBundleWithHash stores bundle with both hashes 471 - func (bm *BundleManager) indexBundleWithHash(ctx context.Context, bundleNumber int, operations []PLCOperation, path string, uncompressedHash, compressedHash string) error { 472 - // Get previous bundle's hash (uncompressed) 473 - var prevBundleHash string 474 - if bundleNumber > 1 { 475 - prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNumber-1) 476 - if err == nil && prevBundle != nil { 477 - prevBundleHash = prevBundle.Hash // Use uncompressed hash for chain 478 - log.Verbose(" Linking to previous bundle %06d (hash: %s)", bundleNumber-1, prevBundleHash[:16]+"...") 479 - } 480 - } 481 - 482 - // Extract unique DIDs 483 - didSet := make(map[string]bool) 484 - for _, op := range operations { 485 - didSet[op.DID] = true 486 - } 487 - 488 - dids := make([]string, 0, len(didSet)) 489 - for did := range didSet { 490 - dids = append(dids, did) 491 - } 492 - 493 - // Get compressed file size 494 - fileInfo, _ := os.Stat(path) 495 - compressedSize := int64(0) 496 - if fileInfo != nil { 497 - compressedSize = fileInfo.Size() 498 - } 499 - 500 - bundle := &storage.PLCBundle{ 501 - BundleNumber: bundleNumber, 502 - StartTime: operations[0].CreatedAt, 503 - EndTime: operations[len(operations)-1].CreatedAt, 504 - DIDs: dids, 505 - Hash: uncompressedHash, // Primary hash (JSONL) 506 - CompressedHash: compressedHash, // File integrity hash 507 - CompressedSize: compressedSize, // Compressed size 508 - PrevBundleHash: prevBundleHash, // Chain link 509 - Compressed: true, 510 - CreatedAt: time.Now(), 511 - } 512 - 513 - return bm.db.CreateBundle(ctx, bundle) 514 - } 515 - 516 - // indexBundle (keep for compatibility) - FIX: Calculate both hashes 517 - func (bm *BundleManager) indexBundle(ctx context.Context, bundleNumber int, operations []PLCOperation, path string) error { 518 - // Calculate compressed hash from file 519 - fileData, err := os.ReadFile(path) 520 - if err != nil { 521 - return err 522 - } 523 - compressedHash := bm.calculateHash(fileData) 524 - 525 - // Calculate uncompressed hash from operations 526 - var jsonlData []byte 527 - for _, op := range operations { 528 - jsonlData = append(jsonlData, op.RawJSON...) 529 - jsonlData = append(jsonlData, '\n') 530 - } 531 - uncompressedHash := bm.calculateHash(jsonlData) 532 - 533 - return bm.indexBundleWithHash(ctx, bundleNumber, operations, path, uncompressedHash, compressedHash) 534 - } 535 - 536 - // Update CreateBundleFromMempool 537 - func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation) (int, error) { 538 - if !bm.enabled { 539 - return 0, fmt.Errorf("bundle manager disabled") 540 - } 541 - 542 - if len(operations) != BUNDLE_SIZE { 543 - return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations)) 544 - } 545 - 546 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 547 - if err != nil { 548 - return 0, err 549 - } 550 - bundleNumber := lastBundle + 1 551 - 552 - path := bm.GetBundlePath(bundleNumber) 553 - 554 - // Save bundle with both hashes 555 - uncompressedHash, compressedHash, err := bm.saveBundleFileWithHash(path, operations) 556 - if err != nil { 557 - return 0, err 558 - } 559 - 560 - // Index bundle 561 - if err := bm.indexBundleWithHash(ctx, bundleNumber, operations, path, uncompressedHash, compressedHash); err != nil { 562 - return 0, err 563 - } 564 - 565 - log.Info("✓ Created bundle %06d from mempool (hash: %s)", 566 - bundleNumber, uncompressedHash[:16]+"...") 567 - 568 - return bundleNumber, nil 569 - } 570 - 571 - // EnsureBundleContinuity checks that all bundles from 1 to N exist 572 - func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error { 573 - if !bm.enabled { 574 - return nil 575 - } 576 - 577 - for i := 1; i < targetBundle; i++ { 578 - if !bm.BundleExists(i) { 579 - // Check if in database 580 - _, err := bm.db.GetBundleByNumber(ctx, i) 581 - if err != nil { 582 - return fmt.Errorf("bundle %06d is missing (required for continuity)", i) 583 - } 584 - } 585 - } 586 - 587 - return nil 588 - } 589 - 590 - // GetStats returns bundle statistics 591 - func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, error) { 592 - if !bm.enabled { 593 - return 0, 0, nil 594 - } 595 - return bm.db.GetBundleStats(ctx) 596 - } 597 - 598 - // calculateHash computes SHA256 hash of data 599 - func (bm *BundleManager) calculateHash(data []byte) string { 600 - hash := sha256.Sum256(data) 601 - return hex.EncodeToString(hash[:]) 602 - } 603 - 604 - // verifyBundleHash checks if file hash matches expected hash 605 - func (bm *BundleManager) verifyBundleHash(path string, expectedHash string) (bool, error) { 606 - data, err := os.ReadFile(path) 607 - if err != nil { 608 - return false, err 609 - } 610 - 611 - actualHash := bm.calculateHash(data) 612 - return actualHash == expectedHash, nil 613 - } 614 - 615 - // VerifyChain - FIX 616 - func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 617 - if !bm.enabled { 618 - return fmt.Errorf("bundle manager disabled") 619 - } 620 - 621 - log.Info("Verifying bundle chain from 1 to %06d...", endBundle) 622 - 623 - for i := 1; i <= endBundle; i++ { 624 - bundle, err := bm.db.GetBundleByNumber(ctx, i) 625 - if err != nil { 626 - return fmt.Errorf("bundle %06d not found: %w", i, err) 627 - } 628 - 629 - // Compute file path 630 - filePath := bm.GetBundlePath(i) 631 - 632 - // Verify file hash 633 - valid, err := bm.verifyBundleHash(filePath, bundle.CompressedHash) 634 - if err != nil { 635 - return fmt.Errorf("bundle %06d hash verification failed: %w", i, err) 636 - } 637 - if !valid { 638 - return fmt.Errorf("bundle %06d compressed hash mismatch!", i) 639 - } 640 - 641 - // Verify chain link 642 - if i > 1 { 643 - prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1) 644 - if err != nil { 645 - return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i) 646 - } 647 - 648 - if bundle.PrevBundleHash != prevBundle.Hash { 649 - return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s", 650 - i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16]) 651 - } 652 - } 653 - 654 - if i%100 == 0 { 655 - log.Verbose(" ✓ Verified bundles 1-%06d", i) 656 - } 657 - } 658 - 659 - log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle) 660 - return nil 661 - } 662 - 663 - // GetChainInfo returns information about the bundle chain 664 - func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 665 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 666 - if err != nil { 667 - return nil, err 668 - } 669 - 670 - if lastBundle == 0 { 671 - return map[string]interface{}{ 672 - "chain_length": 0, 673 - "status": "empty", 674 - }, nil 675 - } 676 - 677 - // Quick check first and last 678 - firstBundle, err := bm.db.GetBundleByNumber(ctx, 1) 679 - if err != nil { 680 - return nil, err 681 - } 682 - 683 - lastBundleData, err := bm.db.GetBundleByNumber(ctx, lastBundle) 684 - if err != nil { 685 - return nil, err 686 - } 687 - 688 - return map[string]interface{}{ 689 - "chain_length": lastBundle, 690 - "first_bundle": 1, 691 - "last_bundle": lastBundle, 692 - "chain_start_time": firstBundle.StartTime, 693 - "chain_end_time": lastBundleData.EndTime, 694 - "chain_head_hash": lastBundleData.Hash, 695 - }, nil 696 - }
-237
internal/plc/client.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "context" 6 - "encoding/json" 7 - "fmt" 8 - "io" 9 - "net/http" 10 - "strconv" 11 - "time" 12 - 13 - "github.com/atscan/atscanner/internal/log" 14 - ) 15 - 16 - type Client struct { 17 - baseURL string 18 - httpClient *http.Client 19 - rateLimiter *RateLimiter 20 - } 21 - 22 - func NewClient(baseURL string) *Client { 23 - // Rate limit: 90 requests per minute (leaving buffer below 100/min limit) 24 - rateLimiter := NewRateLimiter(90, time.Minute) 25 - 26 - return &Client{ 27 - baseURL: baseURL, 28 - httpClient: &http.Client{ 29 - Timeout: 60 * time.Second, 30 - }, 31 - rateLimiter: rateLimiter, 32 - } 33 - } 34 - 35 - func (c *Client) Close() { 36 - if c.rateLimiter != nil { 37 - c.rateLimiter.Stop() 38 - } 39 - } 40 - 41 - type ExportOptions struct { 42 - Count int 43 - After string // ISO 8601 datetime string 44 - } 45 - 46 - // Export fetches export data from PLC directory with rate limiting and retry 47 - func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) { 48 - return c.exportWithRetry(ctx, opts, 5) 49 - } 50 - 51 - // exportWithRetry implements retry logic with exponential backoff for rate limits 52 - func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) { 53 - var lastErr error 54 - backoff := 1 * time.Second 55 - 56 - for attempt := 1; attempt <= maxRetries; attempt++ { 57 - // Wait for rate limiter token 58 - if err := c.rateLimiter.Wait(ctx); err != nil { 59 - return nil, err 60 - } 61 - 62 - operations, retryAfter, err := c.doExport(ctx, opts) 63 - 64 - if err == nil { 65 - return operations, nil 66 - } 67 - 68 - lastErr = err 69 - 70 - // Check if it's a rate limit error (429) 71 - if retryAfter > 0 { 72 - log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d", 73 - retryAfter, attempt, maxRetries) 74 - 75 - select { 76 - case <-time.After(retryAfter): 77 - continue 78 - case <-ctx.Done(): 79 - return nil, ctx.Err() 80 - } 81 - } 82 - 83 - // Other errors - exponential backoff 84 - if attempt < maxRetries { 85 - log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v", 86 - attempt, maxRetries, err, backoff) 87 - 88 - select { 89 - case <-time.After(backoff): 90 - backoff *= 2 // Exponential backoff 91 - case <-ctx.Done(): 92 - return nil, ctx.Err() 93 - } 94 - } 95 - } 96 - 97 - return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr) 98 - } 99 - 100 - // doExport performs the actual HTTP request 101 - func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) { 102 - url := fmt.Sprintf("%s/export", c.baseURL) 103 - 104 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 105 - if err != nil { 106 - return nil, 0, err 107 - } 108 - 109 - // Add query parameters 110 - q := req.URL.Query() 111 - if opts.Count > 0 { 112 - q.Add("count", fmt.Sprintf("%d", opts.Count)) 113 - } 114 - if opts.After != "" { 115 - q.Add("after", opts.After) 116 - } 117 - req.URL.RawQuery = q.Encode() 118 - 119 - resp, err := c.httpClient.Do(req) 120 - if err != nil { 121 - return nil, 0, fmt.Errorf("request failed: %w", err) 122 - } 123 - defer resp.Body.Close() 124 - 125 - // Handle rate limiting (429) 126 - if resp.StatusCode == http.StatusTooManyRequests { 127 - retryAfter := parseRetryAfter(resp) 128 - 129 - // Also check x-ratelimit headers for info 130 - if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" { 131 - log.Verbose("Rate limit: %s", limit) 132 - } 133 - 134 - return nil, retryAfter, fmt.Errorf("rate limited (429)") 135 - } 136 - 137 - if resp.StatusCode != http.StatusOK { 138 - body, _ := io.ReadAll(resp.Body) 139 - return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 140 - } 141 - 142 - var operations []PLCOperation 143 - 144 - // PLC export returns newline-delimited JSON 145 - scanner := bufio.NewScanner(resp.Body) 146 - buf := make([]byte, 0, 64*1024) 147 - scanner.Buffer(buf, 1024*1024) 148 - 149 - lineCount := 0 150 - for scanner.Scan() { 151 - lineCount++ 152 - line := scanner.Bytes() 153 - 154 - if len(line) == 0 { 155 - continue 156 - } 157 - 158 - var op PLCOperation 159 - if err := json.Unmarshal(line, &op); err != nil { 160 - log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err) 161 - continue 162 - } 163 - 164 - // CRITICAL: Store the original raw JSON bytes 165 - op.RawJSON = make([]byte, len(line)) 166 - copy(op.RawJSON, line) 167 - 168 - operations = append(operations, op) 169 - } 170 - 171 - if err := scanner.Err(); err != nil { 172 - return nil, 0, fmt.Errorf("error reading response: %w", err) 173 - } 174 - 175 - return operations, 0, nil 176 - 177 - } 178 - 179 - // parseRetryAfter parses the Retry-After header 180 - func parseRetryAfter(resp *http.Response) time.Duration { 181 - retryAfter := resp.Header.Get("Retry-After") 182 - if retryAfter == "" { 183 - // Default to 5 minutes if no header 184 - return 5 * time.Minute 185 - } 186 - 187 - // Try parsing as seconds 188 - if seconds, err := strconv.Atoi(retryAfter); err == nil { 189 - return time.Duration(seconds) * time.Second 190 - } 191 - 192 - // Try parsing as HTTP date 193 - if t, err := http.ParseTime(retryAfter); err == nil { 194 - return time.Until(t) 195 - } 196 - 197 - // Default 198 - return 5 * time.Minute 199 - } 200 - 201 - // GetDID fetches a specific DID document from PLC 202 - func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) { 203 - // Wait for rate limiter 204 - if err := c.rateLimiter.Wait(ctx); err != nil { 205 - return nil, err 206 - } 207 - 208 - url := fmt.Sprintf("%s/%s", c.baseURL, did) 209 - 210 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 211 - if err != nil { 212 - return nil, err 213 - } 214 - 215 - resp, err := c.httpClient.Do(req) 216 - if err != nil { 217 - return nil, err 218 - } 219 - defer resp.Body.Close() 220 - 221 - if resp.StatusCode == http.StatusTooManyRequests { 222 - retryAfter := parseRetryAfter(resp) 223 - return nil, fmt.Errorf("rate limited, retry after %v", retryAfter) 224 - } 225 - 226 - if resp.StatusCode != http.StatusOK { 227 - body, _ := io.ReadAll(resp.Body) 228 - return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 229 - } 230 - 231 - var doc DIDDocument 232 - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { 233 - return nil, err 234 - } 235 - 236 - return &doc, nil 237 - }
+112
internal/plc/helpers.go
··· 1 + package plc 2 + 3 + import ( 4 + "regexp" 5 + "strings" 6 + ) 7 + 8 + // MaxHandleLength is the maximum allowed handle length for database storage 9 + const MaxHandleLength = 500 10 + 11 + // Handle validation regex per AT Protocol spec 12 + // Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter 13 + var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`) 14 + 15 + // ExtractHandle safely extracts the handle from a PLC operation 16 + func ExtractHandle(op *PLCOperation) string { 17 + if op == nil || op.Operation == nil { 18 + return "" 19 + } 20 + 21 + // Get "alsoKnownAs" 22 + aka, ok := op.Operation["alsoKnownAs"].([]interface{}) 23 + if !ok { 24 + return "" 25 + } 26 + 27 + // Find the handle (e.g., "at://handle.bsky.social") 28 + for _, item := range aka { 29 + if handle, ok := item.(string); ok { 30 + if strings.HasPrefix(handle, "at://") { 31 + return strings.TrimPrefix(handle, "at://") 32 + } 33 + } 34 + } 35 + return "" 36 + } 37 + 38 + // ValidateHandle checks if a handle is valid for database storage 39 + // Returns empty string if handle is invalid (too long or wrong format) 40 + func ValidateHandle(handle string) string { 41 + if handle == "" { 42 + return "" 43 + } 44 + 45 + // Check length first (faster) 46 + if len(handle) > MaxHandleLength { 47 + return "" 48 + } 49 + 50 + // Validate format using regex 51 + if !handleRegex.MatchString(handle) { 52 + return "" 53 + } 54 + 55 + return handle 56 + } 57 + 58 + // ExtractPDS safely extracts the PDS endpoint from a PLC operation 59 + func ExtractPDS(op *PLCOperation) string { 60 + if op == nil || op.Operation == nil { 61 + return "" 62 + } 63 + 64 + // Get "services" 65 + services, ok := op.Operation["services"].(map[string]interface{}) 66 + if !ok { 67 + return "" 68 + } 69 + 70 + // Get "atproto_pds" 71 + pdsService, ok := services["atproto_pds"].(map[string]interface{}) 72 + if !ok { 73 + return "" 74 + } 75 + 76 + // Get "endpoint" 77 + if endpoint, ok := pdsService["endpoint"].(string); ok { 78 + return endpoint 79 + } 80 + 81 + return "" 82 + } 83 + 84 + // DIDInfo contains extracted metadata from a PLC operation 85 + type DIDInfo struct { 86 + Handle string 87 + PDS string 88 + } 89 + 90 + // ExtractDIDInfo extracts both handle and PDS from an operation 91 + func ExtractDIDInfo(op *PLCOperation) DIDInfo { 92 + return DIDInfo{ 93 + Handle: ExtractHandle(op), 94 + PDS: ExtractPDS(op), 95 + } 96 + } 97 + 98 + // ExtractDIDInfoMap creates a map of DID -> info from operations 99 + // Processes in reverse order to get the latest state for each DID 100 + func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo { 101 + infoMap := make(map[string]DIDInfo) 102 + 103 + // Process in reverse to get latest state 104 + for i := len(ops) - 1; i >= 0; i-- { 105 + op := ops[i] 106 + if _, exists := infoMap[op.DID]; !exists { 107 + infoMap[op.DID] = ExtractDIDInfo(&op) 108 + } 109 + } 110 + 111 + return infoMap 112 + }
+522
internal/plc/manager.go
··· 1 + package plc 2 + 3 + import ( 4 + "context" 5 + "encoding/csv" 6 + "fmt" 7 + "io" 8 + "os" 9 + "path/filepath" 10 + "sort" 11 + "strconv" 12 + "strings" 13 + "time" 14 + 15 + "github.com/atscan/atscand/internal/log" 16 + "github.com/atscan/atscand/internal/storage" 17 + "github.com/klauspost/compress/zstd" 18 + plcbundle "tangled.org/atscan.net/plcbundle" 19 + ) 20 + 21 + // BundleManager wraps the library's manager with database integration 22 + type BundleManager struct { 23 + libManager *plcbundle.Manager 24 + db storage.Database 25 + bundleDir string 26 + indexDIDs bool 27 + } 28 + 29 + func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) { 30 + // Create library config 31 + config := plcbundle.DefaultConfig(bundleDir) 32 + 33 + // Create PLC client 34 + var client *plcbundle.PLCClient 35 + if plcURL != "" { 36 + client = plcbundle.NewPLCClient(plcURL) 37 + } 38 + 39 + // Create library manager 40 + libMgr, err := plcbundle.NewManager(config, client) 41 + if err != nil { 42 + return nil, fmt.Errorf("failed to create library manager: %w", err) 43 + } 44 + 45 + return &BundleManager{ 46 + libManager: libMgr, 47 + db: db, 48 + bundleDir: bundleDir, 49 + indexDIDs: indexDIDs, 50 + }, nil 51 + } 52 + 53 + func (bm *BundleManager) Close() { 54 + if bm.libManager != nil { 55 + bm.libManager.Close() 56 + } 57 + } 58 + 59 + // LoadBundle loads a bundle (from library) and returns operations 60 + func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 61 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 62 + if err != nil { 63 + return nil, err 64 + } 65 + return bundle.Operations, nil 66 + } 67 + 68 + // LoadBundle loads a full bundle with metadata 69 + func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) { 70 + return bm.libManager.LoadBundle(ctx, bundleNum) 71 + } 72 + 73 + // FetchAndSaveBundle fetches next bundle from PLC and saves 74 + func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) { 75 + // Fetch from PLC using library 76 + bundle, err := bm.libManager.FetchNextBundle(ctx) 77 + if err != nil { 78 + return nil, err 79 + } 80 + 81 + // Save to disk (library handles this) 82 + if err := bm.libManager.SaveBundle(ctx, bundle); err != nil { 83 + return nil, fmt.Errorf("failed to save bundle to disk: %w", err) 84 + } 85 + 86 + // Index DIDs if enabled (still use database for this) 87 + if bm.indexDIDs && len(bundle.Operations) > 0 { 88 + if err := bm.indexBundleDIDs(ctx, bundle); err != nil { 89 + log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err) 90 + } 91 + } 92 + 93 + log.Info("✓ Saved bundle %06d", bundle.BundleNumber) 94 + 95 + return bundle, nil 96 + } 97 + 98 + // indexBundleDIDs indexes DIDs from a bundle into the database 99 + func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error { 100 + start := time.Now() 101 + log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber) 102 + 103 + // Extract DID info from operations 104 + didInfoMap := ExtractDIDInfoMap(bundle.Operations) 105 + 106 + successCount := 0 107 + errorCount := 0 108 + invalidHandleCount := 0 109 + 110 + // Upsert each DID 111 + for did, info := range didInfoMap { 112 + validHandle := ValidateHandle(info.Handle) 113 + if info.Handle != "" && validHandle == "" { 114 + invalidHandleCount++ 115 + } 116 + 117 + if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil { 118 + log.Error("Failed to index DID %s: %v", did, err) 119 + errorCount++ 120 + } else { 121 + successCount++ 122 + } 123 + } 124 + 125 + elapsed := time.Since(start) 126 + log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v", 127 + successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed) 128 + 129 + return nil 130 + } 131 + 132 + // VerifyChain verifies bundle chain integrity 133 + func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 134 + result, err := bm.libManager.VerifyChain(ctx) 135 + if err != nil { 136 + return err 137 + } 138 + 139 + if !result.Valid { 140 + return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error) 141 + } 142 + 143 + return nil 144 + } 145 + 146 + // GetChainInfo returns chain information 147 + func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 148 + return bm.libManager.GetInfo(), nil 149 + } 150 + 151 + // GetMempoolStats returns mempool statistics from the library 152 + func (bm *BundleManager) GetMempoolStats() map[string]interface{} { 153 + return bm.libManager.GetMempoolStats() 154 + } 155 + 156 + // GetMempoolOperations returns all operations currently in mempool 157 + func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) { 158 + return bm.libManager.GetMempoolOperations() 159 + } 160 + 161 + // GetIndex returns the library's bundle index 162 + func (bm *BundleManager) GetIndex() *plcbundle.Index { 163 + return bm.libManager.GetIndex() 164 + } 165 + 166 + // GetLastBundleNumber returns the last bundle number 167 + func (bm *BundleManager) GetLastBundleNumber() int { 168 + index := bm.libManager.GetIndex() 169 + lastBundle := index.GetLastBundle() 170 + if lastBundle == nil { 171 + return 0 172 + } 173 + return lastBundle.BundleNumber 174 + } 175 + 176 + // GetBundleMetadata gets bundle metadata by number 177 + func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) { 178 + index := bm.libManager.GetIndex() 179 + return index.GetBundle(bundleNum) 180 + } 181 + 182 + // GetBundles returns the most recent bundles (newest first) 183 + func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata { 184 + index := bm.libManager.GetIndex() 185 + allBundles := index.GetBundles() 186 + 187 + // Determine how many bundles to return 188 + count := limit 189 + if count <= 0 || count > len(allBundles) { 190 + count = len(allBundles) 191 + } 192 + 193 + // Build result in reverse order (newest first) 194 + result := make([]*plcbundle.BundleMetadata, count) 195 + for i := 0; i < count; i++ { 196 + result[i] = allBundles[len(allBundles)-1-i] 197 + } 198 + 199 + return result 200 + } 201 + 202 + // GetBundleStats returns bundle statistics 203 + func (bm *BundleManager) GetBundleStats() map[string]interface{} { 204 + index := bm.libManager.GetIndex() 205 + stats := index.GetStats() 206 + 207 + // Convert to expected format 208 + lastBundle := stats["last_bundle"] 209 + if lastBundle == nil { 210 + lastBundle = int64(0) 211 + } 212 + 213 + // Calculate total uncompressed size by iterating through all bundles 214 + totalUncompressedSize := int64(0) 215 + allBundles := index.GetBundles() 216 + for _, bundle := range allBundles { 217 + totalUncompressedSize += bundle.UncompressedSize 218 + } 219 + 220 + return map[string]interface{}{ 221 + "bundle_count": int64(stats["bundle_count"].(int)), 222 + "total_size": stats["total_size"].(int64), 223 + "total_uncompressed_size": totalUncompressedSize, 224 + "last_bundle": int64(lastBundle.(int)), 225 + } 226 + } 227 + 228 + // GetDIDsForBundle gets DIDs from a bundle (loads and extracts) 229 + func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) { 230 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 231 + if err != nil { 232 + return nil, 0, err 233 + } 234 + 235 + // Extract unique DIDs 236 + didSet := make(map[string]bool) 237 + for _, op := range bundle.Operations { 238 + didSet[op.DID] = true 239 + } 240 + 241 + dids := make([]string, 0, len(didSet)) 242 + for did := range didSet { 243 + dids = append(dids, did) 244 + } 245 + 246 + return dids, bundle.DIDCount, nil 247 + } 248 + 249 + // FindBundleForTimestamp finds bundle containing a timestamp 250 + func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int { 251 + index := bm.libManager.GetIndex() 252 + bundles := index.GetBundles() 253 + 254 + // Find bundle containing this time 255 + for _, bundle := range bundles { 256 + if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) && 257 + (bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) { 258 + return bundle.BundleNumber 259 + } 260 + } 261 + 262 + // Return closest bundle before this time 263 + for i := len(bundles) - 1; i >= 0; i-- { 264 + if bundles[i].EndTime.Before(afterTime) { 265 + return bundles[i].BundleNumber 266 + } 267 + } 268 + 269 + return 1 // Default to first bundle 270 + } 271 + 272 + // StreamRaw streams raw compressed bundle data 273 + func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 274 + return bm.libManager.StreamBundleRaw(ctx, bundleNumber) 275 + } 276 + 277 + // StreamDecompressed streams decompressed bundle data 278 + func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 279 + return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber) 280 + } 281 + 282 + // GetPLCHistory calculates historical statistics from the bundle index 283 + func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) { 284 + index := bm.libManager.GetIndex() 285 + allBundles := index.GetBundles() 286 + 287 + // Filter bundles >= fromBundle 288 + var filtered []*plcbundle.BundleMetadata 289 + for _, b := range allBundles { 290 + if b.BundleNumber >= fromBundle { 291 + filtered = append(filtered, b) 292 + } 293 + } 294 + 295 + if len(filtered) == 0 { 296 + return []*storage.PLCHistoryPoint{}, nil 297 + } 298 + 299 + // Sort bundles by bundle number to ensure proper cumulative calculation 300 + sort.Slice(filtered, func(i, j int) bool { 301 + return filtered[i].BundleNumber < filtered[j].BundleNumber 302 + }) 303 + 304 + // Group by date 305 + type dailyStat struct { 306 + lastBundle int 307 + bundleCount int 308 + totalUncompressed int64 309 + totalCompressed int64 310 + } 311 + 312 + dailyStats := make(map[string]*dailyStat) 313 + 314 + // Map to store the cumulative values at the end of each date 315 + dateCumulatives := make(map[string]struct { 316 + uncompressed int64 317 + compressed int64 318 + }) 319 + 320 + // Calculate cumulative totals as we iterate through sorted bundles 321 + cumulativeUncompressed := int64(0) 322 + cumulativeCompressed := int64(0) 323 + 324 + for _, bundle := range filtered { 325 + dateStr := bundle.StartTime.Format("2006-01-02") 326 + 327 + // Update cumulative totals 328 + cumulativeUncompressed += bundle.UncompressedSize 329 + cumulativeCompressed += bundle.CompressedSize 330 + 331 + if stat, exists := dailyStats[dateStr]; exists { 332 + // Update existing day 333 + if bundle.BundleNumber > stat.lastBundle { 334 + stat.lastBundle = bundle.BundleNumber 335 + } 336 + stat.bundleCount++ 337 + stat.totalUncompressed += bundle.UncompressedSize 338 + stat.totalCompressed += bundle.CompressedSize 339 + } else { 340 + // Create new day entry 341 + dailyStats[dateStr] = &dailyStat{ 342 + lastBundle: bundle.BundleNumber, 343 + bundleCount: 1, 344 + totalUncompressed: bundle.UncompressedSize, 345 + totalCompressed: bundle.CompressedSize, 346 + } 347 + } 348 + 349 + // Store the cumulative values at the end of this date 350 + // (will be overwritten if there are multiple bundles on the same day) 351 + dateCumulatives[dateStr] = struct { 352 + uncompressed int64 353 + compressed int64 354 + }{ 355 + uncompressed: cumulativeUncompressed, 356 + compressed: cumulativeCompressed, 357 + } 358 + } 359 + 360 + // Convert map to sorted slice by date 361 + var dates []string 362 + for date := range dailyStats { 363 + dates = append(dates, date) 364 + } 365 + sort.Strings(dates) 366 + 367 + // Build history points with cumulative operations 368 + var history []*storage.PLCHistoryPoint 369 + cumulativeOps := 0 370 + 371 + for _, date := range dates { 372 + stat := dailyStats[date] 373 + cumulativeOps += stat.bundleCount * 10000 374 + cumulative := dateCumulatives[date] 375 + 376 + history = append(history, &storage.PLCHistoryPoint{ 377 + Date: date, 378 + BundleNumber: stat.lastBundle, 379 + OperationCount: cumulativeOps, 380 + UncompressedSize: stat.totalUncompressed, 381 + CompressedSize: stat.totalCompressed, 382 + CumulativeUncompressed: cumulative.uncompressed, 383 + CumulativeCompressed: cumulative.compressed, 384 + }) 385 + } 386 + 387 + // Apply limit if specified 388 + if limit > 0 && len(history) > limit { 389 + history = history[:limit] 390 + } 391 + 392 + return history, nil 393 + } 394 + 395 + // GetBundleLabels reads labels from a compressed CSV file for a specific bundle 396 + func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) { 397 + // Define the path to the labels file 398 + labelsDir := filepath.Join(bm.bundleDir, "labels") 399 + labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum)) 400 + 401 + // Check if file exists 402 + if _, err := os.Stat(labelsFile); os.IsNotExist(err) { 403 + log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile) 404 + // Return empty, not an error 405 + return []*PLCOpLabel{}, nil 406 + } 407 + 408 + // Open the Zstd-compressed file 409 + file, err := os.Open(labelsFile) 410 + if err != nil { 411 + return nil, fmt.Errorf("failed to open labels file: %w", err) 412 + } 413 + defer file.Close() 414 + 415 + // Create a Zstd reader 416 + zstdReader, err := zstd.NewReader(file) 417 + if err != nil { 418 + return nil, fmt.Errorf("failed to create zstd reader: %w", err) 419 + } 420 + defer zstdReader.Close() 421 + 422 + // Create a CSV reader 423 + csvReader := csv.NewReader(zstdReader) 424 + // We skipped the header, so no header read needed 425 + // Set FieldsPerRecord to 7 for validation 426 + //csvReader.FieldsPerRecord = 7 427 + 428 + var labels []*PLCOpLabel 429 + 430 + // Read all records 431 + for { 432 + // Check for context cancellation 433 + if err := ctx.Err(); err != nil { 434 + return nil, err 435 + } 436 + 437 + record, err := csvReader.Read() 438 + if err == io.EOF { 439 + break // End of file 440 + } 441 + if err != nil { 442 + log.Error("Error reading CSV record in %s: %v", labelsFile, err) 443 + continue // Skip bad line 444 + } 445 + 446 + // Parse the CSV record (which is []string) 447 + label, err := parseLabelRecord(record) 448 + if err != nil { 449 + log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err) 450 + continue // Skip bad data 451 + } 452 + 453 + labels = append(labels, label) 454 + } 455 + 456 + return labels, nil 457 + } 458 + 459 + // parseLabelRecord converts a new format CSV record into a PLCOpLabel struct 460 + func parseLabelRecord(record []string) (*PLCOpLabel, error) { 461 + // New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels 462 + if len(record) != 6 { 463 + err := fmt.Errorf("invalid record length: expected 6, got %d", len(record)) 464 + // --- ADDED LOG --- 465 + log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ",")) 466 + // --- 467 + return nil, err 468 + } 469 + 470 + // 0:bundle 471 + bundle, err := strconv.Atoi(record[0]) 472 + if err != nil { 473 + // --- ADDED LOG --- 474 + log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ",")) 475 + // --- 476 + return nil, fmt.Errorf("parsing 'bundle': %w", err) 477 + } 478 + 479 + // 1:position 480 + position, err := strconv.Atoi(record[1]) 481 + if err != nil { 482 + // --- ADDED LOG --- 483 + log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ",")) 484 + // --- 485 + return nil, fmt.Errorf("parsing 'position': %w", err) 486 + } 487 + 488 + // 2:cid(short) 489 + shortCID := record[2] 490 + 491 + // 3:size 492 + size, err := strconv.Atoi(record[3]) 493 + if err != nil { 494 + // --- ADDED LOG --- 495 + log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ",")) 496 + // --- 497 + return nil, fmt.Errorf("parsing 'size': %w", err) 498 + } 499 + 500 + // 4:confidence 501 + confidence, err := strconv.ParseFloat(record[4], 64) 502 + if err != nil { 503 + // --- ADDED LOG --- 504 + log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ",")) 505 + // --- 506 + return nil, fmt.Errorf("parsing 'confidence': %w", err) 507 + } 508 + 509 + // 5:labels 510 + detectors := strings.Split(record[5], ";") 511 + 512 + label := &PLCOpLabel{ 513 + Bundle: bundle, 514 + Position: position, 515 + CID: shortCID, 516 + Size: size, 517 + Confidence: confidence, 518 + Detectors: detectors, 519 + } 520 + 521 + return label, nil 522 + }
-70
internal/plc/ratelimiter.go
··· 1 - package plc 2 - 3 - import ( 4 - "context" 5 - "time" 6 - ) 7 - 8 - // RateLimiter implements a token bucket rate limiter 9 - type RateLimiter struct { 10 - tokens chan struct{} 11 - refillRate time.Duration 12 - maxTokens int 13 - stopRefill chan struct{} 14 - } 15 - 16 - // NewRateLimiter creates a new rate limiter 17 - // Example: NewRateLimiter(90, time.Minute) = 90 requests per minute 18 - func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter { 19 - rl := &RateLimiter{ 20 - tokens: make(chan struct{}, requestsPerPeriod), 21 - refillRate: period / time.Duration(requestsPerPeriod), 22 - maxTokens: requestsPerPeriod, 23 - stopRefill: make(chan struct{}), 24 - } 25 - 26 - // Fill initially 27 - for i := 0; i < requestsPerPeriod; i++ { 28 - rl.tokens <- struct{}{} 29 - } 30 - 31 - // Start refill goroutine 32 - go rl.refill() 33 - 34 - return rl 35 - } 36 - 37 - // refill adds tokens at the specified rate 38 - func (rl *RateLimiter) refill() { 39 - ticker := time.NewTicker(rl.refillRate) 40 - defer ticker.Stop() 41 - 42 - for { 43 - select { 44 - case <-ticker.C: 45 - select { 46 - case rl.tokens <- struct{}{}: 47 - // Token added 48 - default: 49 - // Buffer full, skip 50 - } 51 - case <-rl.stopRefill: 52 - return 53 - } 54 - } 55 - } 56 - 57 - // Wait blocks until a token is available 58 - func (rl *RateLimiter) Wait(ctx context.Context) error { 59 - select { 60 - case <-rl.tokens: 61 - return nil 62 - case <-ctx.Done(): 63 - return ctx.Err() 64 - } 65 - } 66 - 67 - // Stop stops the rate limiter 68 - func (rl *RateLimiter) Stop() { 69 - close(rl.stopRefill) 70 - }
+161 -338
internal/plc/scanner.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "encoding/json" 6 5 "fmt" 6 + "strings" 7 7 "time" 8 8 9 - "github.com/acarl005/stripansi" 10 - "github.com/atscan/atscanner/internal/config" 11 - "github.com/atscan/atscanner/internal/log" 12 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/storage" 13 12 ) 14 13 15 14 type Scanner struct { 16 - client *Client 15 + bundleManager *BundleManager 17 16 db storage.Database 18 17 config config.PLCConfig 19 - bundleManager *BundleManager 20 18 } 21 19 22 - func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner { 23 - bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db) 24 - if err != nil { 25 - log.Error("Warning: failed to initialize bundle manager: %v", err) 26 - bundleManager = &BundleManager{enabled: false} 27 - } 20 + func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner { 21 + log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs) 28 22 29 23 return &Scanner{ 30 - client: NewClient(cfg.DirectoryURL), 24 + bundleManager: bundleManager, // Use provided instance 31 25 db: db, 32 26 config: cfg, 33 - bundleManager: bundleManager, 34 27 } 35 28 } 36 29 37 30 func (s *Scanner) Close() { 38 - if s.bundleManager != nil { 39 - s.bundleManager.Close() 40 - } 31 + // Don't close bundleManager here - it's shared 41 32 } 42 33 43 34 func (s *Scanner) Scan(ctx context.Context) error { 44 - startTime := time.Now() 45 35 log.Info("Starting PLC directory scan...") 46 - log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes") 47 36 48 37 cursor, err := s.db.GetScanCursor(ctx, "plc_directory") 49 38 if err != nil { 50 39 return fmt.Errorf("failed to get scan cursor: %w", err) 51 40 } 52 41 53 - currentBundle := cursor.LastBundleNumber 54 - if currentBundle == 0 { 55 - currentBundle = 1 56 - } else { 57 - currentBundle++ 58 - } 42 + metrics := newMetrics(cursor.LastBundleNumber + 1) 59 43 60 - log.Info("Starting from bundle %06d", currentBundle) 61 - 62 - // Ensure bundle continuity (all previous bundles exist) 63 - if currentBundle > 1 { 64 - log.Info("Checking bundle continuity...") 65 - if err := s.bundleManager.EnsureBundleContinuity(ctx, currentBundle); err != nil { 66 - return fmt.Errorf("bundle continuity check failed: %w", err) 67 - } 68 - } 69 - 70 - totalProcessed := int64(0) 71 - newPDSCount := int64(0) 72 - 73 - // ✅ CHECK MEMPOOL FIRST - if it has data, continue filling it instead of fetching new bundle 74 - mempoolCount, err := s.db.GetMempoolCount(ctx) 75 - if err != nil { 76 - return err 77 - } 78 - 79 - if mempoolCount > 0 { 80 - log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", mempoolCount) 81 - 82 - // Fill mempool until we have 10,000 83 - if err := s.fillMempoolToSize(ctx, &newPDSCount, &totalProcessed); err != nil { 84 - log.Error("Error filling mempool: %v", err) 44 + // Main processing loop 45 + for { 46 + if err := ctx.Err(); err != nil { 85 47 return err 86 48 } 87 49 88 - // Try to create bundles from mempool 89 - if err := s.processMempoolRecursive(ctx, &newPDSCount, &currentBundle, &totalProcessed); err != nil { 90 - log.Error("Error processing mempool: %v", err) 91 - } 92 - 93 - log.Info("PLC scan completed: %d operations, %d new PDS servers in %v", 94 - totalProcessed, newPDSCount, time.Since(startTime)) 95 - return nil 96 - } 97 - 98 - // Process bundles sequentially (normal flow when mempool is empty) 99 - for { 100 - select { 101 - case <-ctx.Done(): 102 - return ctx.Err() 103 - default: 104 - } 105 - 106 - log.Verbose("→ Processing bundle %06d...", currentBundle) 107 - 108 - // Load bundle (returns operations, isComplete flag, and error) 109 - operations, isComplete, err := s.bundleManager.LoadBundle(ctx, currentBundle, s.client) 50 + // Fetch and save bundle (library handles mempool internally) 51 + bundle, err := s.bundleManager.FetchAndSaveBundle(ctx) 110 52 if err != nil { 111 - log.Error("Failed to load bundle %06d: %v", currentBundle, err) 112 - 113 - // If rate limited, wait and retry 114 - if contains(err.Error(), "rate limited") { 115 - log.Info("⚠ Rate limit hit, pausing for 5 minutes...") 116 - time.Sleep(5 * time.Minute) 117 - continue 118 - } 53 + if isInsufficientOpsError(err) { 54 + // Show mempool status 55 + stats := s.bundleManager.libManager.GetMempoolStats() 56 + mempoolCount := stats["count"].(int) 119 57 120 - // Check if this is just end of data 121 - if currentBundle > 1 { 122 - log.Info("→ Reached end of available data") 123 - // Try mempool processing 124 - if err := s.processMempoolRecursive(ctx, &newPDSCount, &currentBundle, &totalProcessed); err != nil { 125 - log.Error("Error processing mempool: %v", err) 58 + if mempoolCount > 0 { 59 + log.Info("→ Waiting for more operations (mempool has %d/%d ops)", 60 + mempoolCount, BUNDLE_SIZE) 61 + } else { 62 + log.Info("→ Caught up! No operations available") 126 63 } 64 + break 127 65 } 128 - break 129 - } 130 66 131 - if isComplete { 132 - // Complete bundle 133 - batchNewPDS, err := s.processBatch(ctx, operations) 134 - if err != nil { 135 - log.Error("Error processing bundle: %v", err) 136 - } 137 - 138 - newPDSCount += batchNewPDS 139 - totalProcessed += int64(len(operations)) 140 - 141 - log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new PDS", 142 - currentBundle, len(operations), batchNewPDS) 143 - 144 - // Update cursor 145 - if err := s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 146 - Source: "plc_directory", 147 - LastBundleNumber: currentBundle, 148 - LastScanTime: time.Now(), 149 - RecordsProcessed: cursor.RecordsProcessed + totalProcessed, 150 - }); err != nil { 151 - log.Error("Warning: failed to update cursor: %v", err) 67 + if strings.Contains(err.Error(), "rate limited") { 68 + log.Info("⚠ Rate limited, pausing for 5 minutes...") 69 + time.Sleep(5 * time.Minute) 70 + continue 152 71 } 153 72 154 - currentBundle++ 155 - } else { 156 - // Incomplete bundle - we've reached the end of available data 157 - log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", currentBundle, len(operations)) 73 + return fmt.Errorf("failed to fetch bundle: %w", err) 74 + } 158 75 159 - if err := s.addToMempool(ctx, operations); err != nil { 160 - log.Error("Error adding to mempool: %v", err) 161 - } 76 + // Process operations for endpoint discovery 77 + counts, err := s.processBatch(ctx, bundle.Operations) 78 + if err != nil { 79 + log.Error("Failed to process batch: %v", err) 80 + // Continue anyway 81 + } 162 82 163 - // ✅ Now fill mempool to 10,000 164 - if err := s.fillMempoolToSize(ctx, &newPDSCount, &totalProcessed); err != nil { 165 - log.Error("Error filling mempool: %v", err) 166 - } 83 + // Update metrics 84 + s.mergeCounts(metrics.endpointCounts, counts) 85 + metrics.totalProcessed += int64(len(bundle.Operations)) 86 + metrics.newEndpoints += sumCounts(counts) 87 + metrics.currentBundle = bundle.BundleNumber 167 88 168 - // Process mempool 169 - if err := s.processMempoolRecursive(ctx, &newPDSCount, &currentBundle, &totalProcessed); err != nil { 170 - log.Error("Error processing mempool: %v", err) 171 - } 89 + log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints", 90 + bundle.BundleNumber, len(bundle.Operations), sumCounts(counts)) 172 91 173 - break // End of scan 92 + // Update cursor 93 + if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil { 94 + log.Error("Warning: failed to update cursor: %v", err) 174 95 } 175 96 } 176 97 177 - log.Info("PLC scan completed: %d operations, %d new PDS servers in %v", 178 - totalProcessed, newPDSCount, time.Since(startTime)) 98 + // Show final mempool status 99 + stats := s.bundleManager.libManager.GetMempoolStats() 100 + if count, ok := stats["count"].(int); ok && count > 0 { 101 + log.Info("Mempool contains %d operations (%.1f%% of next bundle)", 102 + count, float64(count)/float64(BUNDLE_SIZE)*100) 103 + } 179 104 105 + metrics.logSummary() 180 106 return nil 181 107 } 182 108 183 - func (s *Scanner) fillMempoolToSize(ctx context.Context, newPDSCount *int64, totalProcessed *int64) error { 184 - const fetchLimit = 1000 // PLC directory limit 109 + // processBatch extracts endpoints from operations 110 + func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) { 111 + counts := make(map[string]int64) 112 + seen := make(map[string]*PLCOperation) 185 113 186 - for { 187 - countBefore, err := s.db.GetMempoolCount(ctx) 188 - if err != nil { 189 - return err 190 - } 114 + // Collect unique endpoints 115 + for i := range ops { 116 + op := &ops[i] 191 117 192 - if countBefore >= BUNDLE_SIZE { 193 - log.Info("✓ Mempool filled to %d operations (target: %d)", countBefore, BUNDLE_SIZE) 194 - return nil 118 + if op.IsNullified() { 119 + continue 195 120 } 196 121 197 - log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", countBefore, BUNDLE_SIZE) 198 - 199 - // ✅ Get just the last operation (much faster!) 200 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 201 - if err != nil { 202 - return err 122 + for _, ep := range s.extractEndpointsFromOperation(*op) { 123 + key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint) 124 + if _, exists := seen[key]; !exists { 125 + seen[key] = op 126 + } 203 127 } 128 + } 204 129 205 - var afterTimestamp string 206 - if lastOp != nil { 207 - afterTimestamp = lastOp.CreatedAt.Format(time.RFC3339Nano) 208 - log.Verbose(" Using cursor: %s", afterTimestamp) 209 - } 130 + // Store new endpoints 131 + for key, firstOp := range seen { 132 + parts := strings.SplitN(key, ":", 2) 133 + epType, endpoint := parts[0], parts[1] 210 134 211 - // ✅ Always fetch 1000 (PLC limit) 212 - operations, err := s.client.Export(ctx, ExportOptions{ 213 - Count: fetchLimit, 214 - After: afterTimestamp, 215 - }) 216 - if err != nil { 217 - return fmt.Errorf("failed to fetch from PLC: %w", err) 135 + exists, err := s.db.EndpointExists(ctx, endpoint, epType) 136 + if err != nil || exists { 137 + continue 218 138 } 219 139 220 - fetchedCount := len(operations) 221 - log.Verbose(" Fetched %d operations from PLC", fetchedCount) 222 - 223 - // ✅ No data at all - we're done 224 - if fetchedCount == 0 { 225 - log.Info("→ No more data available from PLC directory (mempool has %d/%d)", countBefore, BUNDLE_SIZE) 226 - return nil 140 + if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil { 141 + log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err) 142 + continue 227 143 } 228 144 229 - // Add to mempool (with duplicate checking) 230 - if err := s.addToMempool(ctx, operations); err != nil { 231 - return err 232 - } 145 + log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint) 146 + counts[epType]++ 147 + } 233 148 234 - *totalProcessed += int64(fetchedCount) 235 - 236 - // Check if mempool actually grew 237 - countAfter, err := s.db.GetMempoolCount(ctx) 238 - if err != nil { 239 - return err 240 - } 241 - 242 - newOpsAdded := countAfter - countBefore 243 - duplicateCount := fetchedCount - newOpsAdded 244 - 245 - log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)", 246 - newOpsAdded, duplicateCount) 149 + return counts, nil 150 + } 247 151 248 - // ✅ KEY LOGIC: Only repeat if we got a FULL batch (1000) 249 - // If < 1000, it means we've caught up to the latest data 250 - if fetchedCount < fetchLimit { 251 - log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", 252 - fetchedCount, fetchLimit) 253 - log.Info("→ Stopping fill, mempool has %d/%d operations", countAfter, BUNDLE_SIZE) 254 - return nil 255 - } 152 + func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo { 153 + var endpoints []EndpointInfo 256 154 257 - // Got full batch (1000), might be more data - continue loop 258 - log.Verbose(" Received full batch (%d), checking for more data...", fetchLimit) 155 + services, ok := op.Operation["services"].(map[string]interface{}) 156 + if !ok { 157 + return endpoints 259 158 } 260 - } 261 159 262 - // addToMempool adds operations to mempool and processes them for PDS discovery 263 - func (s *Scanner) addToMempool(ctx context.Context, operations []PLCOperation) error { 264 - mempoolOps := make([]storage.MempoolOperation, len(operations)) 265 - 266 - for i, op := range operations { 267 - // ✅ Store the original RawJSON directly 268 - mempoolOps[i] = storage.MempoolOperation{ 269 - DID: op.DID, 270 - Operation: string(op.RawJSON), // ✅ Use RawJSON instead of Marshal 271 - CID: op.CID, 272 - CreatedAt: op.CreatedAt, 273 - } 160 + // Extract PDS 161 + if ep := s.extractServiceEndpoint(services, "atproto_pds", "AtprotoPersonalDataServer", "pds"); ep != nil { 162 + endpoints = append(endpoints, *ep) 274 163 } 275 164 276 - // Add to mempool 277 - if err := s.db.AddToMempool(ctx, mempoolOps); err != nil { 278 - return err 165 + // Extract Labeler 166 + if ep := s.extractServiceEndpoint(services, "atproto_labeler", "AtprotoLabeler", "labeler"); ep != nil { 167 + endpoints = append(endpoints, *ep) 279 168 } 280 169 281 - // Process for PDS discovery immediately 282 - _, err := s.processBatch(ctx, operations) 283 - return err 170 + return endpoints 284 171 } 285 172 286 - // processMempoolRecursive checks mempool and creates bundles when >= 1000 ops 287 - func (s *Scanner) processMempoolRecursive(ctx context.Context, newPDSCount *int64, currentBundle *int, totalProcessed *int64) error { 288 - for { 289 - // Check mempool size 290 - count, err := s.db.GetMempoolCount(ctx) 291 - if err != nil { 292 - return err 293 - } 173 + func (s *Scanner) extractServiceEndpoint(services map[string]interface{}, serviceKey, expectedType, resultType string) *EndpointInfo { 174 + svc, ok := services[serviceKey].(map[string]interface{}) 175 + if !ok { 176 + return nil 177 + } 294 178 295 - log.Verbose("Mempool contains %d operations", count) 296 - 297 - if count < BUNDLE_SIZE { 298 - log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE) 299 - break 300 - } 301 - 302 - log.Info("→ Creating bundle from mempool (%d operations available)...", count) 303 - 304 - // Get first BUNDLE_SIZE operations ordered by timestamp 305 - mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE) 306 - if err != nil { 307 - return err 308 - } 309 - 310 - // Convert to PLCOperations and track IDs 311 - operations := make([]PLCOperation, 0, BUNDLE_SIZE) 312 - mempoolIDs := make([]int64, 0, BUNDLE_SIZE) 313 - seenCIDs := make(map[string]bool) 314 - 315 - for _, mop := range mempoolOps { 316 - // ✅ Skip duplicates (shouldn't happen but safety check) 317 - if seenCIDs[mop.CID] { 318 - mempoolIDs = append(mempoolIDs, mop.ID) // Still delete it 319 - continue 320 - } 321 - seenCIDs[mop.CID] = true 322 - 323 - var op PLCOperation 324 - json.Unmarshal([]byte(mop.Operation), &op) 325 - 326 - // ✅ Restore RawJSON from database 327 - op.RawJSON = []byte(mop.Operation) 328 - 329 - operations = append(operations, op) 330 - mempoolIDs = append(mempoolIDs, mop.ID) 179 + endpoint, hasEndpoint := svc["endpoint"].(string) 180 + svcType, hasType := svc["type"].(string) 331 181 332 - if len(operations) >= BUNDLE_SIZE { 333 - break 334 - } 335 - } 336 - 337 - // Final check 338 - if len(operations) < BUNDLE_SIZE { 339 - log.Error("⚠ Only got %d unique operations from mempool, need %d", len(operations), BUNDLE_SIZE) 340 - break 341 - } 342 - 343 - // Create bundle from these operations 344 - bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, operations) 345 - if err != nil { 346 - return err 182 + if hasEndpoint && hasType && svcType == expectedType { 183 + return &EndpointInfo{ 184 + Type: resultType, 185 + Endpoint: endpoint, 347 186 } 348 - 349 - // Remove from mempool (only what we used) 350 - if err := s.db.DeleteFromMempool(ctx, mempoolIDs[:len(operations)]); err != nil { 351 - return err 352 - } 353 - 354 - // Process for PDS 355 - batchNewPDS, _ := s.processBatch(ctx, operations) 356 - *newPDSCount += batchNewPDS 357 - 358 - *currentBundle = bundleNum 359 - 360 - // Update cursor 361 - s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 362 - Source: "plc_directory", 363 - LastBundleNumber: bundleNum, 364 - LastScanTime: time.Now(), 365 - RecordsProcessed: *totalProcessed, 366 - }) 367 - 368 - log.Info("✓ Created bundle %06d from mempool", bundleNum) 369 187 } 370 188 371 189 return nil 372 190 } 373 191 374 - // processBatch processes operations for PDS discovery 375 - func (s *Scanner) processBatch(ctx context.Context, operations []PLCOperation) (int64, error) { 376 - newPDSCount := int64(0) 377 - seenInBatch := make(map[string]*PLCOperation) 378 - 379 - for _, op := range operations { 380 - if op.IsNullified() { 381 - continue 382 - } 192 + func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 193 + valid := validateEndpoint(endpoint) 194 + return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 195 + EndpointType: epType, 196 + Endpoint: endpoint, 197 + DiscoveredAt: discoveredAt, 198 + LastChecked: time.Time{}, 199 + Status: storage.EndpointStatusUnknown, 200 + Valid: valid, 201 + }) 202 + } 383 203 384 - pdsEndpoint := s.extractPDSFromOperation(op) 385 - if pdsEndpoint == "" { 386 - continue 387 - } 204 + func (s *Scanner) updateCursorForBundle(ctx context.Context, bundle int, totalProcessed int64) error { 205 + return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 206 + Source: "plc_directory", 207 + LastBundleNumber: bundle, 208 + LastScanTime: time.Now().UTC(), 209 + RecordsProcessed: totalProcessed, 210 + }) 211 + } 388 212 389 - if _, seen := seenInBatch[pdsEndpoint]; !seen { 390 - seenInBatch[pdsEndpoint] = &op 391 - } 213 + // Helper functions 214 + func (s *Scanner) mergeCounts(dest, src map[string]int64) { 215 + for k, v := range src { 216 + dest[k] += v 392 217 } 393 - 394 - for pdsEndpoint, firstOp := range seenInBatch { 395 - exists, err := s.db.PDSExists(ctx, pdsEndpoint) 396 - if err != nil || exists { 397 - continue 398 - } 218 + } 399 219 400 - if err := s.db.UpsertPDS(ctx, &storage.PDS{ 401 - Endpoint: pdsEndpoint, 402 - DiscoveredAt: firstOp.CreatedAt, 403 - LastChecked: time.Time{}, 404 - Status: storage.PDSStatusUnknown, 405 - }); err != nil { 406 - log.Error("Error storing PDS %s: %v", stripansi.Strip(pdsEndpoint), err) 407 - continue 408 - } 409 - 410 - log.Info("✓ Discovered new PDS: %s", stripansi.Strip(pdsEndpoint)) 411 - newPDSCount++ 220 + func sumCounts(counts map[string]int64) int64 { 221 + total := int64(0) 222 + for _, v := range counts { 223 + total += v 412 224 } 225 + return total 226 + } 413 227 414 - return newPDSCount, nil 228 + func isInsufficientOpsError(err error) bool { 229 + return err != nil && strings.Contains(err.Error(), "insufficient operations") 415 230 } 416 231 417 - func (s *Scanner) extractPDSFromOperation(op PLCOperation) string { 418 - if services, ok := op.Operation["services"].(map[string]interface{}); ok { 419 - if atprotoPDS, ok := services["atproto_pds"].(map[string]interface{}); ok { 420 - if endpoint, ok := atprotoPDS["endpoint"].(string); ok { 421 - if svcType, ok := atprotoPDS["type"].(string); ok { 422 - if svcType == "AtprotoPersonalDataServer" { 423 - return endpoint 424 - } 425 - } 426 - } 427 - } 232 + // ScanMetrics tracks scan progress 233 + type ScanMetrics struct { 234 + totalProcessed int64 235 + newEndpoints int64 236 + endpointCounts map[string]int64 237 + currentBundle int 238 + startTime time.Time 239 + } 240 + 241 + func newMetrics(startBundle int) *ScanMetrics { 242 + return &ScanMetrics{ 243 + endpointCounts: make(map[string]int64), 244 + currentBundle: startBundle, 245 + startTime: time.Now(), 428 246 } 429 - return "" 430 247 } 431 248 432 - func contains(s, substr string) bool { 433 - return len(s) >= len(substr) && s[:len(substr)] == substr 249 + func (m *ScanMetrics) logSummary() { 250 + if m.newEndpoints > 0 { 251 + log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v", 252 + m.totalProcessed, m.newEndpoints, time.Since(m.startTime)) 253 + } else { 254 + log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v", 255 + m.totalProcessed, time.Since(m.startTime)) 256 + } 434 257 }
+70 -52
internal/plc/types.go
··· 1 1 package plc 2 2 3 - import "time" 3 + import ( 4 + "net/url" 5 + "strings" 6 + 7 + plclib "tangled.org/atscan.net/plcbundle/plc" 8 + ) 9 + 10 + // Re-export library types 11 + type PLCOperation = plclib.PLCOperation 12 + type DIDDocument = plclib.DIDDocument 13 + type Client = plclib.Client 14 + type ExportOptions = plclib.ExportOptions 15 + 16 + // Keep your custom types 17 + const BUNDLE_SIZE = 10000 18 + 19 + type DIDHistoryEntry struct { 20 + Operation PLCOperation `json:"operation"` 21 + PLCBundle string `json:"plc_bundle,omitempty"` 22 + } 23 + 24 + type DIDHistory struct { 25 + DID string `json:"did"` 26 + Current *PLCOperation `json:"current"` 27 + Operations []DIDHistoryEntry `json:"operations"` 28 + } 4 29 5 - type PLCOperation struct { 6 - DID string `json:"did"` 7 - Operation map[string]interface{} `json:"operation"` 8 - CID string `json:"cid"` 9 - Nullified interface{} `json:"nullified,omitempty"` 10 - CreatedAt time.Time `json:"createdAt"` 30 + type EndpointInfo struct { 31 + Type string 32 + Endpoint string 33 + } 11 34 12 - RawJSON []byte `json:"-"` // ✅ Exported (capital R) 35 + // PLCOpLabel holds metadata from the label CSV file 36 + type PLCOpLabel struct { 37 + Bundle int `json:"bundle"` 38 + Position int `json:"position"` 39 + CID string `json:"cid"` 40 + Size int `json:"size"` 41 + Confidence float64 `json:"confidence"` 42 + Detectors []string `json:"detectors"` 13 43 } 14 44 15 - // Helper method to check if nullified 16 - func (op *PLCOperation) IsNullified() bool { 17 - if op.Nullified == nil { 45 + // validateEndpoint checks if endpoint is in correct format: https://<domain> 46 + func validateEndpoint(endpoint string) bool { 47 + // Must not be empty 48 + if endpoint == "" { 18 49 return false 19 50 } 20 51 21 - switch v := op.Nullified.(type) { 22 - case bool: 23 - return v 24 - case string: 25 - return v != "" 26 - default: 52 + // Must not have trailing slash 53 + if strings.HasSuffix(endpoint, "/") { 27 54 return false 28 55 } 29 - } 30 56 31 - // Get nullifying CID if available 32 - func (op *PLCOperation) GetNullifyingCID() string { 33 - if s, ok := op.Nullified.(string); ok { 34 - return s 57 + // Parse URL 58 + u, err := url.Parse(endpoint) 59 + if err != nil { 60 + return false 35 61 } 36 - return "" 37 - } 62 + 63 + // Must use https scheme 64 + if u.Scheme != "https" { 65 + return false 66 + } 38 67 39 - type DIDDocument struct { 40 - Context []string `json:"@context"` 41 - ID string `json:"id"` 42 - AlsoKnownAs []string `json:"alsoKnownAs"` 43 - VerificationMethod []VerificationMethod `json:"verificationMethod"` 44 - Service []Service `json:"service"` 45 - } 68 + // Must have a host 69 + if u.Host == "" { 70 + return false 71 + } 46 72 47 - type VerificationMethod struct { 48 - ID string `json:"id"` 49 - Type string `json:"type"` 50 - Controller string `json:"controller"` 51 - PublicKeyMultibase string `json:"publicKeyMultibase"` 52 - } 73 + // Must not have path (except empty) 74 + if u.Path != "" && u.Path != "/" { 75 + return false 76 + } 53 77 54 - type Service struct { 55 - ID string `json:"id"` 56 - Type string `json:"type"` 57 - ServiceEndpoint string `json:"serviceEndpoint"` 58 - } 78 + // Must not have query parameters 79 + if u.RawQuery != "" { 80 + return false 81 + } 59 82 60 - // DIDHistoryEntry represents a single operation in DID history 61 - type DIDHistoryEntry struct { 62 - Operation PLCOperation `json:"operation"` 63 - PLCBundle string `json:"plc_bundle,omitempty"` 64 - } 83 + // Must not have fragment 84 + if u.Fragment != "" { 85 + return false 86 + } 65 87 66 - // DIDHistory represents the full history of a DID 67 - type DIDHistory struct { 68 - DID string `json:"did"` 69 - Current *PLCOperation `json:"current"` 70 - Operations []DIDHistoryEntry `json:"operations"` 88 + return true 71 89 }
+56 -28
internal/storage/db.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "fmt" 5 6 "time" 6 7 ) 8 + 9 + // NewDatabase creates a database connection based on type 10 + func NewDatabase(dbType, connectionString string) (Database, error) { 11 + switch dbType { 12 + case "postgres", "postgresql": 13 + return NewPostgresDB(connectionString) 14 + default: 15 + return nil, fmt.Errorf("unsupported database type: %s (supported: sqlite, postgres)", dbType) 16 + } 17 + } 7 18 8 19 type Database interface { 9 20 Close() error 10 21 Migrate() error 11 22 12 - // PDS operations 13 - UpsertPDS(ctx context.Context, pds *PDS) error 14 - GetPDS(ctx context.Context, endpoint string) (*PDS, error) 15 - GetPDSByID(ctx context.Context, id int64) (*PDS, error) 16 - GetPDSServers(ctx context.Context, filter *PDSFilter) ([]*PDS, error) 17 - UpdatePDSStatus(ctx context.Context, pdsID int64, update *PDSUpdate) error 18 - PDSExists(ctx context.Context, endpoint string) (bool, error) 19 - GetPDSIDByEndpoint(ctx context.Context, endpoint string) (int64, error) 20 - GetPDSScans(ctx context.Context, pdsID int64, limit int) ([]*PDSScan, error) 23 + // Endpoint operations 24 + UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error 25 + GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) 26 + GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) 27 + EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error) 28 + GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error) 29 + GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) 30 + UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error 31 + SaveEndpointScan(ctx context.Context, scan *EndpointScan) error 32 + SetScanRetention(retention int) 33 + UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error 34 + UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error 35 + GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error) 36 + 37 + // PDS virtual endpoints (created via JOINs) 38 + GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error) 39 + GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) 40 + GetPDSStats(ctx context.Context) (*PDSStats, error) 41 + GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) 42 + GetVersionStats(ctx context.Context) ([]*VersionStats, error) 43 + 44 + // IP operations (IP as primary key) 45 + UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error 46 + GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) 47 + ShouldUpdateIPInfo(ctx context.Context, ip string) (exists bool, needsUpdate bool, err error) 21 48 22 49 // Cursor operations 23 50 GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) 24 51 UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error 25 - 26 - // Bundle operations 27 - CreateBundle(ctx context.Context, bundle *PLCBundle) error 28 - GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) 29 - // GetBundleByID removed - bundle_number IS the ID 30 - GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) 31 - GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) 32 - GetBundleStats(ctx context.Context) (int64, int64, error) 33 - GetLastBundleNumber(ctx context.Context) (int, error) 34 - GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) 35 - 36 - // Mempool operations 37 - AddToMempool(ctx context.Context, ops []MempoolOperation) error 38 - GetMempoolCount(ctx context.Context) (int, error) 39 - GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) 40 - DeleteFromMempool(ctx context.Context, ids []int64) error 41 - GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) 42 - GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) 43 52 44 53 // Metrics 45 54 StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error 46 55 GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) 47 - GetPDSStats(ctx context.Context) (*PDSStats, error) 56 + GetEndpointStats(ctx context.Context) (*EndpointStats, error) 57 + 58 + // DID operations 59 + UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error 60 + UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error 61 + GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) 62 + GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW 63 + GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) 64 + AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error 65 + GetTotalDIDCount(ctx context.Context) (int64, error) 66 + 67 + // PDS Repo operations 68 + UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error 69 + GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) 70 + GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) 71 + GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) 72 + 73 + // Internal 74 + GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) 75 + GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) 48 76 }
+2104
internal/storage/postgres.go
··· 1 + package storage 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "encoding/json" 7 + "fmt" 8 + "time" 9 + 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/jackc/pgx/v5" 12 + "github.com/jackc/pgx/v5/pgxpool" 13 + _ "github.com/jackc/pgx/v5/stdlib" 14 + "github.com/lib/pq" 15 + ) 16 + 17 + type PostgresDB struct { 18 + db *sql.DB 19 + pool *pgxpool.Pool 20 + scanRetention int 21 + } 22 + 23 + func NewPostgresDB(connString string) (*PostgresDB, error) { 24 + log.Info("Connecting to PostgreSQL database...") 25 + 26 + // Open standard sql.DB (for compatibility) 27 + db, err := sql.Open("pgx", connString) 28 + if err != nil { 29 + return nil, fmt.Errorf("failed to open database: %w", err) 30 + } 31 + 32 + // Connection pool settings 33 + db.SetMaxOpenConns(50) 34 + db.SetMaxIdleConns(25) 35 + db.SetConnMaxLifetime(5 * time.Minute) 36 + db.SetConnMaxIdleTime(2 * time.Minute) 37 + 38 + log.Verbose(" Max open connections: 50") 39 + log.Verbose(" Max idle connections: 25") 40 + log.Verbose(" Connection max lifetime: 5m") 41 + 42 + // Test connection 43 + log.Info("Testing database connection...") 44 + if err := db.Ping(); err != nil { 45 + return nil, fmt.Errorf("failed to ping database: %w", err) 46 + } 47 + log.Info("✓ Database connection successful") 48 + 49 + // Also create pgx pool for COPY operations 50 + log.Verbose("Creating pgx connection pool...") 51 + pool, err := pgxpool.New(context.Background(), connString) 52 + if err != nil { 53 + return nil, fmt.Errorf("failed to create pgx pool: %w", err) 54 + } 55 + log.Verbose("✓ Connection pool created") 56 + 57 + return &PostgresDB{ 58 + db: db, 59 + pool: pool, 60 + scanRetention: 3, // Default 61 + }, nil 62 + } 63 + 64 + func (p *PostgresDB) Close() error { 65 + if p.pool != nil { 66 + p.pool.Close() 67 + } 68 + return p.db.Close() 69 + } 70 + 71 + func (p *PostgresDB) Migrate() error { 72 + log.Info("Running database migrations...") 73 + 74 + schema := ` 75 + -- Endpoints table (with IPv6 support) 76 + CREATE TABLE IF NOT EXISTS endpoints ( 77 + id BIGSERIAL PRIMARY KEY, 78 + endpoint_type TEXT NOT NULL DEFAULT 'pds', 79 + endpoint TEXT NOT NULL, 80 + server_did TEXT, 81 + discovered_at TIMESTAMP NOT NULL, 82 + last_checked TIMESTAMP, 83 + status INTEGER DEFAULT 0, 84 + ip TEXT, 85 + ipv6 TEXT, 86 + ip_resolved_at TIMESTAMP, 87 + valid BOOLEAN DEFAULT true, 88 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 89 + UNIQUE(endpoint_type, endpoint) 90 + ); 91 + 92 + CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint); 93 + CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status); 94 + CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type); 95 + CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip); 96 + CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6); 97 + CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did); 98 + CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 99 + CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid); 100 + 101 + -- IP infos table (IP as PRIMARY KEY) 102 + CREATE TABLE IF NOT EXISTS ip_infos ( 103 + ip TEXT PRIMARY KEY, 104 + city TEXT, 105 + country TEXT, 106 + country_code TEXT, 107 + asn INTEGER, 108 + asn_org TEXT, 109 + is_datacenter BOOLEAN, 110 + is_vpn BOOLEAN, 111 + is_crawler BOOLEAN, 112 + is_tor BOOLEAN, 113 + is_proxy BOOLEAN, 114 + latitude REAL, 115 + longitude REAL, 116 + raw_data JSONB, 117 + fetched_at TIMESTAMP NOT NULL, 118 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 119 + ); 120 + 121 + CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code); 122 + CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn); 123 + 124 + -- Endpoint scans 125 + CREATE TABLE IF NOT EXISTS endpoint_scans ( 126 + id BIGSERIAL PRIMARY KEY, 127 + endpoint_id BIGINT NOT NULL, 128 + status INTEGER NOT NULL, 129 + response_time DOUBLE PRECISION, 130 + user_count BIGINT, 131 + version TEXT, 132 + used_ip TEXT, 133 + scan_data JSONB, 134 + scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 135 + FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE 136 + ); 137 + 138 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC); 139 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at); 140 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST); 141 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip); 142 + 143 + 144 + CREATE TABLE IF NOT EXISTS plc_metrics ( 145 + id BIGSERIAL PRIMARY KEY, 146 + total_dids BIGINT, 147 + total_pds BIGINT, 148 + unique_pds BIGINT, 149 + scan_duration_ms BIGINT, 150 + error_count INTEGER, 151 + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 152 + ); 153 + 154 + CREATE TABLE IF NOT EXISTS scan_cursors ( 155 + source TEXT PRIMARY KEY, 156 + last_bundle_number INTEGER DEFAULT 0, 157 + last_scan_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 158 + records_processed BIGINT DEFAULT 0 159 + ); 160 + 161 + -- Minimal dids table 162 + CREATE TABLE IF NOT EXISTS dids ( 163 + did TEXT PRIMARY KEY, 164 + handle TEXT, 165 + pds TEXT, 166 + bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb, 167 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 168 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 169 + ); 170 + 171 + CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers); 172 + CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at); 173 + CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle); 174 + CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds); 175 + 176 + -- PDS Repositories table 177 + CREATE TABLE IF NOT EXISTS pds_repos ( 178 + id BIGSERIAL PRIMARY KEY, 179 + endpoint_id BIGINT NOT NULL, 180 + did TEXT NOT NULL, 181 + head TEXT, 182 + rev TEXT, 183 + active BOOLEAN DEFAULT true, 184 + status TEXT, 185 + first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 186 + last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 187 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 188 + FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE, 189 + UNIQUE(endpoint_id, did) 190 + ); 191 + 192 + CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint ON pds_repos(endpoint_id); 193 + CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint_id_desc ON pds_repos(endpoint_id, id DESC); 194 + CREATE INDEX IF NOT EXISTS idx_pds_repos_did ON pds_repos(did); 195 + CREATE INDEX IF NOT EXISTS idx_pds_repos_active ON pds_repos(active); 196 + CREATE INDEX IF NOT EXISTS idx_pds_repos_status ON pds_repos(status); 197 + CREATE INDEX IF NOT EXISTS idx_pds_repos_last_seen ON pds_repos(last_seen DESC); 198 + ` 199 + 200 + _, err := p.db.Exec(schema) 201 + if err != nil { 202 + return err 203 + } 204 + 205 + log.Info("✓ Database migrations completed successfully") 206 + return nil 207 + } 208 + 209 + // ===== ENDPOINT OPERATIONS ===== 210 + 211 + func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error { 212 + query := ` 213 + INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid) 214 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) 215 + ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET 216 + last_checked = EXCLUDED.last_checked, 217 + status = EXCLUDED.status, 218 + ip = CASE 219 + WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip 220 + ELSE endpoints.ip 221 + END, 222 + ipv6 = CASE 223 + WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6 224 + ELSE endpoints.ipv6 225 + END, 226 + ip_resolved_at = CASE 227 + WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at 228 + ELSE endpoints.ip_resolved_at 229 + END, 230 + valid = EXCLUDED.valid, 231 + updated_at = CURRENT_TIMESTAMP 232 + RETURNING id 233 + ` 234 + err := p.db.QueryRowContext(ctx, query, 235 + endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt, 236 + endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID) 237 + return err 238 + } 239 + 240 + func (p *PostgresDB) EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error) { 241 + query := "SELECT EXISTS(SELECT 1 FROM endpoints WHERE endpoint = $1 AND endpoint_type = $2)" 242 + var exists bool 243 + err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&exists) 244 + return exists, err 245 + } 246 + 247 + func (p *PostgresDB) GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error) { 248 + query := "SELECT id FROM endpoints WHERE endpoint = $1 AND endpoint_type = $2" 249 + var id int64 250 + err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(&id) 251 + return id, err 252 + } 253 + 254 + func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) { 255 + query := ` 256 + SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, 257 + ip, ipv6, ip_resolved_at, valid, updated_at 258 + FROM endpoints 259 + WHERE endpoint = $1 AND endpoint_type = $2 260 + ` 261 + 262 + var ep Endpoint 263 + var lastChecked, ipResolvedAt sql.NullTime 264 + var ip, ipv6 sql.NullString 265 + 266 + err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan( 267 + &ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked, 268 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt, 269 + ) 270 + if err != nil { 271 + return nil, err 272 + } 273 + 274 + if lastChecked.Valid { 275 + ep.LastChecked = lastChecked.Time 276 + } 277 + if ip.Valid { 278 + ep.IP = ip.String 279 + } 280 + if ipv6.Valid { 281 + ep.IPv6 = ipv6.String 282 + } 283 + if ipResolvedAt.Valid { 284 + ep.IPResolvedAt = ipResolvedAt.Time 285 + } 286 + 287 + return &ep, nil 288 + } 289 + 290 + func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) { 291 + query := ` 292 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 293 + id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status, 294 + ip, ipv6, ip_resolved_at, valid, updated_at 295 + FROM endpoints 296 + WHERE 1=1 297 + ` 298 + args := []interface{}{} 299 + argIdx := 1 300 + 301 + if filter != nil { 302 + if filter.Type != "" { 303 + query += fmt.Sprintf(" AND endpoint_type = $%d", argIdx) 304 + args = append(args, filter.Type) 305 + argIdx++ 306 + } 307 + 308 + // NEW: Filter by valid flag 309 + if filter.OnlyValid { 310 + query += fmt.Sprintf(" AND valid = true", argIdx) 311 + } 312 + if filter.Status != "" { 313 + statusInt := EndpointStatusUnknown 314 + switch filter.Status { 315 + case "online": 316 + statusInt = EndpointStatusOnline 317 + case "offline": 318 + statusInt = EndpointStatusOffline 319 + } 320 + query += fmt.Sprintf(" AND status = $%d", argIdx) 321 + args = append(args, statusInt) 322 + argIdx++ 323 + } 324 + 325 + // Filter for stale endpoints only 326 + if filter.OnlyStale && filter.RecheckInterval > 0 { 327 + cutoffTime := time.Now().UTC().Add(-filter.RecheckInterval) 328 + query += fmt.Sprintf(" AND (last_checked IS NULL OR last_checked < $%d)", argIdx) 329 + args = append(args, cutoffTime) 330 + argIdx++ 331 + } 332 + } 333 + 334 + // NEW: Choose ordering strategy 335 + if filter != nil && filter.Random { 336 + // For random selection, we need to wrap in a subquery 337 + query = fmt.Sprintf(` 338 + WITH filtered_endpoints AS ( 339 + %s 340 + ) 341 + SELECT * FROM filtered_endpoints 342 + ORDER BY RANDOM() 343 + `, query) 344 + } else { 345 + // Original ordering for non-random queries 346 + query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC" 347 + } 348 + 349 + if filter != nil && filter.Limit > 0 { 350 + query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) 351 + args = append(args, filter.Limit, filter.Offset) 352 + } 353 + 354 + rows, err := p.db.QueryContext(ctx, query, args...) 355 + if err != nil { 356 + return nil, err 357 + } 358 + defer rows.Close() 359 + 360 + var endpoints []*Endpoint 361 + for rows.Next() { 362 + var ep Endpoint 363 + var lastChecked, ipResolvedAt sql.NullTime 364 + var ip, ipv6, serverDID sql.NullString 365 + 366 + err := rows.Scan( 367 + &ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked, 368 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt, 369 + ) 370 + if err != nil { 371 + return nil, err 372 + } 373 + 374 + if serverDID.Valid { 375 + ep.ServerDID = serverDID.String 376 + } 377 + if lastChecked.Valid { 378 + ep.LastChecked = lastChecked.Time 379 + } 380 + if ip.Valid { 381 + ep.IP = ip.String 382 + } 383 + if ipv6.Valid { 384 + ep.IPv6 = ipv6.String 385 + } 386 + if ipResolvedAt.Valid { 387 + ep.IPResolvedAt = ipResolvedAt.Time 388 + } 389 + 390 + endpoints = append(endpoints, &ep) 391 + } 392 + 393 + return endpoints, rows.Err() 394 + } 395 + 396 + func (p *PostgresDB) UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error { 397 + query := ` 398 + UPDATE endpoints 399 + SET status = $1, last_checked = $2, updated_at = $3 400 + WHERE id = $4 401 + ` 402 + _, err := p.db.ExecContext(ctx, query, update.Status, update.LastChecked, time.Now().UTC(), endpointID) 403 + return err 404 + } 405 + 406 + func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error { 407 + query := ` 408 + UPDATE endpoints 409 + SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4 410 + WHERE id = $5 411 + ` 412 + _, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID) 413 + return err 414 + } 415 + 416 + func (p *PostgresDB) UpdateEndpointServerDID(ctx context.Context, endpointID int64, serverDID string) error { 417 + query := ` 418 + UPDATE endpoints 419 + SET server_did = $1, updated_at = $2 420 + WHERE id = $3 421 + ` 422 + _, err := p.db.ExecContext(ctx, query, serverDID, time.Now().UTC(), endpointID) 423 + return err 424 + } 425 + 426 + func (p *PostgresDB) GetDuplicateEndpoints(ctx context.Context) (map[string][]string, error) { 427 + query := ` 428 + SELECT server_did, array_agg(endpoint ORDER BY discovered_at ASC) as endpoints 429 + FROM endpoints 430 + WHERE server_did IS NOT NULL 431 + AND server_did != '' 432 + AND endpoint_type = 'pds' 433 + GROUP BY server_did 434 + HAVING COUNT(*) > 1 435 + ORDER BY COUNT(*) DESC 436 + ` 437 + 438 + rows, err := p.db.QueryContext(ctx, query) 439 + if err != nil { 440 + return nil, err 441 + } 442 + defer rows.Close() 443 + 444 + duplicates := make(map[string][]string) 445 + for rows.Next() { 446 + var serverDID string 447 + var endpoints []string 448 + 449 + err := rows.Scan(&serverDID, pq.Array(&endpoints)) 450 + if err != nil { 451 + return nil, err 452 + } 453 + 454 + duplicates[serverDID] = endpoints 455 + } 456 + 457 + return duplicates, rows.Err() 458 + } 459 + 460 + // ===== SCAN OPERATIONS ===== 461 + 462 + func (p *PostgresDB) SetScanRetention(retention int) { 463 + p.scanRetention = retention 464 + } 465 + 466 + func (p *PostgresDB) SaveEndpointScan(ctx context.Context, scan *EndpointScan) error { 467 + var scanDataJSON []byte 468 + if scan.ScanData != nil { 469 + scanDataJSON, _ = json.Marshal(scan.ScanData) 470 + } 471 + 472 + tx, err := p.db.BeginTx(ctx, nil) 473 + if err != nil { 474 + return err 475 + } 476 + defer tx.Rollback() 477 + 478 + query := ` 479 + INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at) 480 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 481 + ` 482 + _, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt) 483 + if err != nil { 484 + return err 485 + } 486 + 487 + // Use configured retention value 488 + cleanupQuery := ` 489 + DELETE FROM endpoint_scans 490 + WHERE endpoint_id = $1 491 + AND id NOT IN ( 492 + SELECT id 493 + FROM endpoint_scans 494 + WHERE endpoint_id = $1 495 + ORDER BY scanned_at DESC 496 + LIMIT $2 497 + ) 498 + ` 499 + _, err = tx.ExecContext(ctx, cleanupQuery, scan.EndpointID, p.scanRetention) 500 + if err != nil { 501 + return err 502 + } 503 + 504 + return tx.Commit() 505 + } 506 + 507 + func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) { 508 + query := ` 509 + SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at 510 + FROM endpoint_scans 511 + WHERE endpoint_id = $1 512 + ORDER BY scanned_at DESC 513 + LIMIT $2 514 + ` 515 + 516 + rows, err := p.db.QueryContext(ctx, query, endpointID, limit) 517 + if err != nil { 518 + return nil, err 519 + } 520 + defer rows.Close() 521 + 522 + var scans []*EndpointScan 523 + for rows.Next() { 524 + var scan EndpointScan 525 + var responseTime sql.NullFloat64 526 + var userCount sql.NullInt64 527 + var version, usedIP sql.NullString 528 + var scanDataJSON []byte 529 + 530 + err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt) 531 + if err != nil { 532 + return nil, err 533 + } 534 + 535 + if responseTime.Valid { 536 + scan.ResponseTime = responseTime.Float64 537 + } 538 + 539 + if userCount.Valid { 540 + scan.UserCount = userCount.Int64 541 + } 542 + 543 + if version.Valid { 544 + scan.Version = version.String 545 + } 546 + 547 + if usedIP.Valid { 548 + scan.UsedIP = usedIP.String 549 + } 550 + 551 + if len(scanDataJSON) > 0 { 552 + var scanData EndpointScanData 553 + if err := json.Unmarshal(scanDataJSON, &scanData); err == nil { 554 + scan.ScanData = &scanData 555 + } 556 + } 557 + 558 + scans = append(scans, &scan) 559 + } 560 + 561 + return scans, rows.Err() 562 + } 563 + 564 + // ===== PDS VIRTUAL ENDPOINTS ===== 565 + 566 + func (p *PostgresDB) GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error) { 567 + query := ` 568 + WITH unique_servers AS ( 569 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 570 + id, 571 + endpoint, 572 + server_did, 573 + discovered_at, 574 + last_checked, 575 + status, 576 + ip, 577 + ipv6, 578 + valid 579 + FROM endpoints 580 + WHERE endpoint_type = 'pds' 581 + ORDER BY COALESCE(server_did, id::text), discovered_at ASC 582 + ) 583 + SELECT 584 + e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid, 585 + latest.user_count, latest.response_time, latest.version, latest.scanned_at, 586 + i.city, i.country, i.country_code, i.asn, i.asn_org, 587 + i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 588 + i.latitude, i.longitude 589 + FROM unique_servers e 590 + LEFT JOIN LATERAL ( 591 + SELECT 592 + user_count, 593 + response_time, 594 + version, 595 + scanned_at 596 + FROM endpoint_scans 597 + WHERE endpoint_id = e.id AND status = 1 598 + ORDER BY scanned_at DESC 599 + LIMIT 1 600 + ) latest ON true 601 + LEFT JOIN ip_infos i ON e.ip = i.ip 602 + WHERE 1=1 603 + ` 604 + 605 + args := []interface{}{} 606 + argIdx := 1 607 + 608 + if filter != nil { 609 + if filter.Status != "" { 610 + statusInt := EndpointStatusUnknown 611 + switch filter.Status { 612 + case "online": 613 + statusInt = EndpointStatusOnline 614 + case "offline": 615 + statusInt = EndpointStatusOffline 616 + } 617 + query += fmt.Sprintf(" AND e.status = $%d", argIdx) 618 + args = append(args, statusInt) 619 + argIdx++ 620 + } 621 + 622 + if filter.MinUserCount > 0 { 623 + query += fmt.Sprintf(" AND latest.user_count >= $%d", argIdx) 624 + args = append(args, filter.MinUserCount) 625 + argIdx++ 626 + } 627 + } 628 + 629 + query += " ORDER BY latest.user_count DESC NULLS LAST" 630 + 631 + if filter != nil && filter.Limit > 0 { 632 + query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) 633 + args = append(args, filter.Limit, filter.Offset) 634 + } 635 + 636 + rows, err := p.db.QueryContext(ctx, query, args...) 637 + if err != nil { 638 + return nil, err 639 + } 640 + defer rows.Close() 641 + 642 + var items []*PDSListItem 643 + for rows.Next() { 644 + item := &PDSListItem{} 645 + var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString 646 + var asn sql.NullInt32 647 + var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 648 + var lat, lon sql.NullFloat64 649 + var userCount sql.NullInt32 650 + var responseTime sql.NullFloat64 651 + var version sql.NullString 652 + var scannedAt sql.NullTime 653 + 654 + err := rows.Scan( 655 + &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid, 656 + &userCount, &responseTime, &version, &scannedAt, 657 + &city, &country, &countryCode, &asn, &asnOrg, 658 + &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, 659 + &lat, &lon, 660 + ) 661 + if err != nil { 662 + return nil, err 663 + } 664 + 665 + if ip.Valid { 666 + item.IP = ip.String 667 + } 668 + if ipv6.Valid { 669 + item.IPv6 = ipv6.String 670 + } 671 + if serverDID.Valid { 672 + item.ServerDID = serverDID.String 673 + } 674 + 675 + // Add latest scan data if available 676 + if userCount.Valid { 677 + item.LatestScan = &struct { 678 + UserCount int 679 + ResponseTime float64 680 + Version string 681 + ScannedAt time.Time 682 + }{ 683 + UserCount: int(userCount.Int32), 684 + ResponseTime: responseTime.Float64, 685 + Version: version.String, 686 + ScannedAt: scannedAt.Time, 687 + } 688 + } 689 + 690 + // Add IP info if available 691 + if city.Valid || country.Valid { 692 + item.IPInfo = &IPInfo{ 693 + IP: ip.String, 694 + City: city.String, 695 + Country: country.String, 696 + CountryCode: countryCode.String, 697 + ASN: int(asn.Int32), 698 + ASNOrg: asnOrg.String, 699 + IsDatacenter: isDatacenter.Bool, 700 + IsVPN: isVPN.Bool, 701 + IsCrawler: isCrawler.Bool, 702 + IsTor: isTor.Bool, 703 + IsProxy: isProxy.Bool, 704 + Latitude: float32(lat.Float64), 705 + Longitude: float32(lon.Float64), 706 + } 707 + } 708 + 709 + items = append(items, item) 710 + } 711 + 712 + return items, rows.Err() 713 + } 714 + 715 + func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) { 716 + query := ` 717 + WITH target_endpoint AS MATERIALIZED ( 718 + SELECT 719 + e.id, 720 + e.endpoint, 721 + e.server_did, 722 + e.discovered_at, 723 + e.last_checked, 724 + e.status, 725 + e.ip, 726 + e.ipv6, 727 + e.valid 728 + FROM endpoints e 729 + WHERE e.endpoint = $1 730 + AND e.endpoint_type = 'pds' 731 + LIMIT 1 732 + ) 733 + SELECT 734 + te.id, 735 + te.endpoint, 736 + te.server_did, 737 + te.discovered_at, 738 + te.last_checked, 739 + te.status, 740 + te.ip, 741 + te.ipv6, 742 + te.valid, 743 + latest.user_count, 744 + latest.response_time, 745 + latest.version, 746 + latest.scan_data->'metadata'->'server_info' as server_info, 747 + latest.scanned_at, 748 + i.city, i.country, i.country_code, i.asn, i.asn_org, 749 + i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 750 + i.latitude, i.longitude, 751 + i.raw_data, 752 + COALESCE( 753 + ARRAY( 754 + SELECT e2.endpoint 755 + FROM endpoints e2 756 + WHERE e2.server_did = te.server_did 757 + AND e2.endpoint_type = 'pds' 758 + AND e2.endpoint != te.endpoint 759 + AND te.server_did IS NOT NULL 760 + ORDER BY e2.discovered_at 761 + ), 762 + ARRAY[]::text[] 763 + ) as aliases, 764 + CASE 765 + WHEN te.server_did IS NOT NULL THEN ( 766 + SELECT MIN(e3.discovered_at) 767 + FROM endpoints e3 768 + WHERE e3.server_did = te.server_did 769 + AND e3.endpoint_type = 'pds' 770 + ) 771 + ELSE NULL 772 + END as first_discovered_at 773 + FROM target_endpoint te 774 + LEFT JOIN LATERAL ( 775 + SELECT 776 + es.scan_data, 777 + es.response_time, 778 + es.version, 779 + es.scanned_at, 780 + es.user_count 781 + FROM endpoint_scans es 782 + WHERE es.endpoint_id = te.id 783 + ORDER BY es.scanned_at DESC 784 + LIMIT 1 785 + ) latest ON true 786 + LEFT JOIN ip_infos i ON te.ip = i.ip; 787 + ` 788 + 789 + detail := &PDSDetail{} 790 + var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString 791 + var asn sql.NullInt32 792 + var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 793 + var lat, lon sql.NullFloat64 794 + var userCount sql.NullInt32 795 + var responseTime sql.NullFloat64 796 + var version sql.NullString 797 + var serverInfoJSON []byte 798 + var scannedAt sql.NullTime 799 + var rawDataJSON []byte 800 + var aliases []string 801 + var firstDiscoveredAt sql.NullTime 802 + 803 + err := p.db.QueryRowContext(ctx, query, endpoint).Scan( 804 + &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid, 805 + &userCount, &responseTime, &version, &serverInfoJSON, &scannedAt, 806 + &city, &country, &countryCode, &asn, &asnOrg, 807 + &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, 808 + &lat, &lon, 809 + &rawDataJSON, 810 + pq.Array(&aliases), 811 + &firstDiscoveredAt, 812 + ) 813 + if err != nil { 814 + return nil, err 815 + } 816 + 817 + if ip.Valid { 818 + detail.IP = ip.String 819 + } 820 + if ipv6.Valid { 821 + detail.IPv6 = ipv6.String 822 + } 823 + 824 + if serverDID.Valid { 825 + detail.ServerDID = serverDID.String 826 + } 827 + 828 + // Set aliases and is_primary 829 + detail.Aliases = aliases 830 + if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid { 831 + detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) || 832 + detail.DiscoveredAt.Before(firstDiscoveredAt.Time) 833 + } else { 834 + detail.IsPrimary = true 835 + } 836 + 837 + // Parse latest scan data 838 + if userCount.Valid { 839 + var serverInfo interface{} 840 + if len(serverInfoJSON) > 0 { 841 + json.Unmarshal(serverInfoJSON, &serverInfo) 842 + } 843 + 844 + detail.LatestScan = &struct { 845 + UserCount int 846 + ResponseTime float64 847 + Version string 848 + ServerInfo interface{} 849 + ScannedAt time.Time 850 + }{ 851 + UserCount: int(userCount.Int32), 852 + ResponseTime: responseTime.Float64, 853 + Version: version.String, 854 + ServerInfo: serverInfo, 855 + ScannedAt: scannedAt.Time, 856 + } 857 + } 858 + 859 + // Parse IP info with all fields 860 + if city.Valid || country.Valid { 861 + detail.IPInfo = &IPInfo{ 862 + IP: ip.String, 863 + City: city.String, 864 + Country: country.String, 865 + CountryCode: countryCode.String, 866 + ASN: int(asn.Int32), 867 + ASNOrg: asnOrg.String, 868 + IsDatacenter: isDatacenter.Bool, 869 + IsVPN: isVPN.Bool, 870 + IsCrawler: isCrawler.Bool, 871 + IsTor: isTor.Bool, 872 + IsProxy: isProxy.Bool, 873 + Latitude: float32(lat.Float64), 874 + Longitude: float32(lon.Float64), 875 + } 876 + 877 + if len(rawDataJSON) > 0 { 878 + json.Unmarshal(rawDataJSON, &detail.IPInfo.RawData) 879 + } 880 + } 881 + 882 + return detail, nil 883 + } 884 + 885 + func (p *PostgresDB) GetPDSStats(ctx context.Context) (*PDSStats, error) { 886 + query := ` 887 + WITH unique_servers AS ( 888 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 889 + id, 890 + COALESCE(server_did, id::text) as server_identity, 891 + status 892 + FROM endpoints 893 + WHERE endpoint_type = 'pds' 894 + ORDER BY COALESCE(server_did, id::text), discovered_at ASC 895 + ), 896 + latest_scans AS ( 897 + SELECT DISTINCT ON (us.id) 898 + us.id, 899 + es.user_count, 900 + us.status 901 + FROM unique_servers us 902 + LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id 903 + ORDER BY us.id, es.scanned_at DESC 904 + ) 905 + SELECT 906 + COUNT(*) as total, 907 + SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online, 908 + SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline, 909 + SUM(COALESCE(user_count, 0)) as total_users 910 + FROM latest_scans 911 + ` 912 + 913 + stats := &PDSStats{} 914 + err := p.db.QueryRowContext(ctx, query).Scan( 915 + &stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints, &stats.TotalDIDs, 916 + ) 917 + 918 + return stats, err 919 + } 920 + 921 + func (p *PostgresDB) GetEndpointStats(ctx context.Context) (*EndpointStats, error) { 922 + query := ` 923 + SELECT 924 + COUNT(*) as total_endpoints, 925 + SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online_endpoints, 926 + SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline_endpoints 927 + FROM endpoints 928 + ` 929 + 930 + var stats EndpointStats 931 + err := p.db.QueryRowContext(ctx, query).Scan( 932 + &stats.TotalEndpoints, &stats.OnlineEndpoints, &stats.OfflineEndpoints, 933 + ) 934 + if err != nil { 935 + return nil, err 936 + } 937 + 938 + // Get average response time from recent scans 939 + avgQuery := ` 940 + SELECT AVG(response_time) 941 + FROM endpoint_scans 942 + WHERE response_time > 0 AND scanned_at > NOW() - INTERVAL '1 hour' 943 + ` 944 + var avgResponseTime sql.NullFloat64 945 + _ = p.db.QueryRowContext(ctx, avgQuery).Scan(&avgResponseTime) 946 + if avgResponseTime.Valid { 947 + stats.AvgResponseTime = avgResponseTime.Float64 948 + } 949 + 950 + // Get counts by type 951 + typeQuery := ` 952 + SELECT endpoint_type, COUNT(*) 953 + FROM endpoints 954 + GROUP BY endpoint_type 955 + ` 956 + rows, err := p.db.QueryContext(ctx, typeQuery) 957 + if err == nil { 958 + defer rows.Close() 959 + stats.ByType = make(map[string]int64) 960 + for rows.Next() { 961 + var typ string 962 + var count int64 963 + if err := rows.Scan(&typ, &count); err == nil { 964 + stats.ByType[typ] = count 965 + } 966 + } 967 + } 968 + 969 + // Get total DIDs from latest PDS scans 970 + didQuery := ` 971 + WITH unique_servers AS ( 972 + SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text)) 973 + e.id 974 + FROM endpoints e 975 + WHERE e.endpoint_type = 'pds' 976 + ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC 977 + ), 978 + latest_pds_scans AS ( 979 + SELECT DISTINCT ON (us.id) 980 + us.id, 981 + es.user_count 982 + FROM unique_servers us 983 + LEFT JOIN endpoint_scans es ON us.id = es.endpoint_id 984 + ORDER BY us.id, es.scanned_at DESC 985 + ) 986 + SELECT SUM(user_count) FROM latest_pds_scans 987 + ` 988 + var totalDIDs sql.NullInt64 989 + _ = p.db.QueryRowContext(ctx, didQuery).Scan(&totalDIDs) 990 + if totalDIDs.Valid { 991 + stats.TotalDIDs = totalDIDs.Int64 992 + } 993 + 994 + return &stats, err 995 + } 996 + 997 + // ===== IP INFO OPERATIONS ===== 998 + 999 + func (p *PostgresDB) UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error { 1000 + rawDataJSON, _ := json.Marshal(ipInfo) 1001 + 1002 + // Extract fields from ipInfo map 1003 + city := extractString(ipInfo, "location", "city") 1004 + country := extractString(ipInfo, "location", "country") 1005 + countryCode := extractString(ipInfo, "location", "country_code") 1006 + asn := extractInt(ipInfo, "asn", "asn") 1007 + asnOrg := extractString(ipInfo, "asn", "org") 1008 + 1009 + // Extract top-level boolean flags 1010 + isDatacenter := false 1011 + if val, ok := ipInfo["is_datacenter"].(bool); ok { 1012 + isDatacenter = val 1013 + } 1014 + 1015 + isVPN := false 1016 + if val, ok := ipInfo["is_vpn"].(bool); ok { 1017 + isVPN = val 1018 + } 1019 + 1020 + isCrawler := false 1021 + if val, ok := ipInfo["is_crawler"].(bool); ok { 1022 + isCrawler = val 1023 + } 1024 + 1025 + isTor := false 1026 + if val, ok := ipInfo["is_tor"].(bool); ok { 1027 + isTor = val 1028 + } 1029 + 1030 + isProxy := false 1031 + if val, ok := ipInfo["is_proxy"].(bool); ok { 1032 + isProxy = val 1033 + } 1034 + 1035 + lat := extractFloat(ipInfo, "location", "latitude") 1036 + lon := extractFloat(ipInfo, "location", "longitude") 1037 + 1038 + query := ` 1039 + INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at) 1040 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 1041 + ON CONFLICT(ip) DO UPDATE SET 1042 + city = EXCLUDED.city, 1043 + country = EXCLUDED.country, 1044 + country_code = EXCLUDED.country_code, 1045 + asn = EXCLUDED.asn, 1046 + asn_org = EXCLUDED.asn_org, 1047 + is_datacenter = EXCLUDED.is_datacenter, 1048 + is_vpn = EXCLUDED.is_vpn, 1049 + is_crawler = EXCLUDED.is_crawler, 1050 + is_tor = EXCLUDED.is_tor, 1051 + is_proxy = EXCLUDED.is_proxy, 1052 + latitude = EXCLUDED.latitude, 1053 + longitude = EXCLUDED.longitude, 1054 + raw_data = EXCLUDED.raw_data, 1055 + fetched_at = EXCLUDED.fetched_at, 1056 + updated_at = CURRENT_TIMESTAMP 1057 + ` 1058 + _, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC()) 1059 + return err 1060 + } 1061 + 1062 + func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) { 1063 + query := ` 1064 + SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, 1065 + latitude, longitude, raw_data, fetched_at, updated_at 1066 + FROM ip_infos 1067 + WHERE ip = $1 1068 + ` 1069 + 1070 + info := &IPInfo{} 1071 + var rawDataJSON []byte 1072 + 1073 + err := p.db.QueryRowContext(ctx, query, ip).Scan( 1074 + &info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg, 1075 + &info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy, 1076 + &info.Latitude, &info.Longitude, 1077 + &rawDataJSON, &info.FetchedAt, &info.UpdatedAt, 1078 + ) 1079 + if err != nil { 1080 + return nil, err 1081 + } 1082 + 1083 + if len(rawDataJSON) > 0 { 1084 + json.Unmarshal(rawDataJSON, &info.RawData) 1085 + } 1086 + 1087 + return info, nil 1088 + } 1089 + 1090 + func (p *PostgresDB) ShouldUpdateIPInfo(ctx context.Context, ip string) (bool, bool, error) { 1091 + query := `SELECT fetched_at FROM ip_infos WHERE ip = $1` 1092 + 1093 + var fetchedAt time.Time 1094 + err := p.db.QueryRowContext(ctx, query, ip).Scan(&fetchedAt) 1095 + if err == sql.ErrNoRows { 1096 + return false, true, nil // Doesn't exist, needs update 1097 + } 1098 + if err != nil { 1099 + return false, false, err 1100 + } 1101 + 1102 + // Check if older than 30 days 1103 + needsUpdate := time.Since(fetchedAt) > 30*24*time.Hour 1104 + return true, needsUpdate, nil 1105 + } 1106 + 1107 + // ===== HELPER FUNCTIONS ===== 1108 + 1109 + func extractString(data map[string]interface{}, keys ...string) string { 1110 + current := data 1111 + for i, key := range keys { 1112 + if i == len(keys)-1 { 1113 + if val, ok := current[key].(string); ok { 1114 + return val 1115 + } 1116 + return "" 1117 + } 1118 + if nested, ok := current[key].(map[string]interface{}); ok { 1119 + current = nested 1120 + } else { 1121 + return "" 1122 + } 1123 + } 1124 + return "" 1125 + } 1126 + 1127 + func extractInt(data map[string]interface{}, keys ...string) int { 1128 + current := data 1129 + for i, key := range keys { 1130 + if i == len(keys)-1 { 1131 + if val, ok := current[key].(float64); ok { 1132 + return int(val) 1133 + } 1134 + if val, ok := current[key].(int); ok { 1135 + return val 1136 + } 1137 + return 0 1138 + } 1139 + if nested, ok := current[key].(map[string]interface{}); ok { 1140 + current = nested 1141 + } else { 1142 + return 0 1143 + } 1144 + } 1145 + return 0 1146 + } 1147 + 1148 + func extractFloat(data map[string]interface{}, keys ...string) float32 { 1149 + current := data 1150 + for i, key := range keys { 1151 + if i == len(keys)-1 { 1152 + if val, ok := current[key].(float64); ok { 1153 + return float32(val) 1154 + } 1155 + return 0 1156 + } 1157 + if nested, ok := current[key].(map[string]interface{}); ok { 1158 + current = nested 1159 + } else { 1160 + return 0 1161 + } 1162 + } 1163 + return 0 1164 + } 1165 + 1166 + // ===== CURSOR OPERATIONS ===== 1167 + 1168 + func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) { 1169 + query := "SELECT source, last_bundle_number, last_scan_time, records_processed FROM scan_cursors WHERE source = $1" 1170 + 1171 + var cursor ScanCursor 1172 + err := p.db.QueryRowContext(ctx, query, source).Scan( 1173 + &cursor.Source, &cursor.LastBundleNumber, &cursor.LastScanTime, &cursor.RecordsProcessed, 1174 + ) 1175 + if err == sql.ErrNoRows { 1176 + return &ScanCursor{ 1177 + Source: source, 1178 + LastBundleNumber: 0, 1179 + LastScanTime: time.Time{}, 1180 + }, nil 1181 + } 1182 + return &cursor, err 1183 + } 1184 + 1185 + func (p *PostgresDB) UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error { 1186 + query := ` 1187 + INSERT INTO scan_cursors (source, last_bundle_number, last_scan_time, records_processed) 1188 + VALUES ($1, $2, $3, $4) 1189 + ON CONFLICT(source) DO UPDATE SET 1190 + last_bundle_number = EXCLUDED.last_bundle_number, 1191 + last_scan_time = EXCLUDED.last_scan_time, 1192 + records_processed = EXCLUDED.records_processed 1193 + ` 1194 + _, err := p.db.ExecContext(ctx, query, cursor.Source, cursor.LastBundleNumber, cursor.LastScanTime, cursor.RecordsProcessed) 1195 + return err 1196 + } 1197 + 1198 + // ===== METRICS OPERATIONS ===== 1199 + 1200 + func (p *PostgresDB) StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error { 1201 + query := ` 1202 + INSERT INTO plc_metrics (total_dids, total_pds, unique_pds, scan_duration_ms, error_count) 1203 + VALUES ($1, $2, $3, $4, $5) 1204 + ` 1205 + _, err := p.db.ExecContext(ctx, query, metrics.TotalDIDs, metrics.TotalPDS, 1206 + metrics.UniquePDS, metrics.ScanDuration, metrics.ErrorCount) 1207 + return err 1208 + } 1209 + 1210 + func (p *PostgresDB) GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) { 1211 + query := ` 1212 + SELECT total_dids, total_pds, unique_pds, scan_duration_ms, error_count, created_at 1213 + FROM plc_metrics 1214 + ORDER BY created_at DESC 1215 + LIMIT $1 1216 + ` 1217 + 1218 + rows, err := p.db.QueryContext(ctx, query, limit) 1219 + if err != nil { 1220 + return nil, err 1221 + } 1222 + defer rows.Close() 1223 + 1224 + var metrics []*PLCMetrics 1225 + for rows.Next() { 1226 + var m PLCMetrics 1227 + if err := rows.Scan(&m.TotalDIDs, &m.TotalPDS, &m.UniquePDS, &m.ScanDuration, &m.ErrorCount, &m.LastScanTime); err != nil { 1228 + return nil, err 1229 + } 1230 + metrics = append(metrics, &m) 1231 + } 1232 + 1233 + return metrics, rows.Err() 1234 + } 1235 + 1236 + // ===== DID OPERATIONS ===== 1237 + 1238 + func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error { 1239 + query := ` 1240 + INSERT INTO dids (did, handle, pds, bundle_numbers, created_at) 1241 + VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP) 1242 + ON CONFLICT(did) DO UPDATE SET 1243 + handle = EXCLUDED.handle, 1244 + pds = EXCLUDED.pds, 1245 + bundle_numbers = CASE 1246 + WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers 1247 + ELSE dids.bundle_numbers || jsonb_build_array($4::integer) 1248 + END, 1249 + updated_at = CURRENT_TIMESTAMP 1250 + ` 1251 + _, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum) 1252 + return err 1253 + } 1254 + 1255 + // UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers 1256 + func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error { 1257 + query := ` 1258 + INSERT INTO dids (did, handle, pds, bundle_numbers, created_at) 1259 + VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP) 1260 + ON CONFLICT(did) DO UPDATE SET 1261 + handle = EXCLUDED.handle, 1262 + pds = EXCLUDED.pds, 1263 + updated_at = CURRENT_TIMESTAMP 1264 + ` 1265 + _, err := p.db.ExecContext(ctx, query, did, handle, pds) 1266 + return err 1267 + } 1268 + 1269 + func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) { 1270 + query := ` 1271 + SELECT did, handle, pds, bundle_numbers, created_at 1272 + FROM dids 1273 + WHERE did = $1 1274 + ` 1275 + 1276 + var record DIDRecord 1277 + var bundleNumbersJSON []byte 1278 + var handle, pds sql.NullString 1279 + 1280 + err := p.db.QueryRowContext(ctx, query, did).Scan( 1281 + &record.DID, 1282 + &handle, 1283 + &pds, 1284 + &bundleNumbersJSON, 1285 + &record.CreatedAt, 1286 + ) 1287 + if err != nil { 1288 + return nil, err 1289 + } 1290 + 1291 + if handle.Valid { 1292 + record.Handle = handle.String 1293 + } 1294 + if pds.Valid { 1295 + record.CurrentPDS = pds.String 1296 + } 1297 + 1298 + if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil { 1299 + return nil, err 1300 + } 1301 + 1302 + return &record, nil 1303 + } 1304 + 1305 + func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) { 1306 + query := ` 1307 + SELECT did, handle, pds, bundle_numbers, created_at 1308 + FROM dids 1309 + WHERE handle = $1 1310 + ` 1311 + 1312 + var record DIDRecord 1313 + var bundleNumbersJSON []byte 1314 + var recordHandle, pds sql.NullString 1315 + 1316 + err := p.db.QueryRowContext(ctx, query, handle).Scan( 1317 + &record.DID, 1318 + &recordHandle, 1319 + &pds, 1320 + &bundleNumbersJSON, 1321 + &record.CreatedAt, 1322 + ) 1323 + if err != nil { 1324 + return nil, err 1325 + } 1326 + 1327 + if recordHandle.Valid { 1328 + record.Handle = recordHandle.String 1329 + } 1330 + if pds.Valid { 1331 + record.CurrentPDS = pds.String 1332 + } 1333 + 1334 + if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil { 1335 + return nil, err 1336 + } 1337 + 1338 + return &record, nil 1339 + } 1340 + 1341 + // GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos' 1342 + func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) { 1343 + query := ` 1344 + WITH primary_endpoints AS ( 1345 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 1346 + id 1347 + FROM endpoints 1348 + WHERE endpoint_type = 'pds' 1349 + ORDER BY COALESCE(server_did, id::text), discovered_at ASC 1350 + ) 1351 + SELECT 1352 + d.did, 1353 + d.handle, 1354 + d.pds, 1355 + d.bundle_numbers, 1356 + d.created_at, 1357 + COALESCE( 1358 + jsonb_agg( 1359 + jsonb_build_object( 1360 + 'id', pr.id, 1361 + 'endpoint_id', pr.endpoint_id, 1362 + 'endpoint', e.endpoint, 1363 + 'did', pr.did, 1364 + 'head', pr.head, 1365 + 'rev', pr.rev, 1366 + 'active', pr.active, 1367 + 'status', pr.status, 1368 + 'first_seen', pr.first_seen AT TIME ZONE 'UTC', 1369 + 'last_seen', pr.last_seen AT TIME ZONE 'UTC', 1370 + 'updated_at', pr.updated_at AT TIME ZONE 'UTC' 1371 + ) 1372 + ORDER BY pr.last_seen DESC 1373 + ) FILTER ( 1374 + WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL 1375 + ), 1376 + '[]'::jsonb 1377 + ) AS hosting_on 1378 + FROM 1379 + dids d 1380 + LEFT JOIN 1381 + pds_repos pr ON d.did = pr.did 1382 + LEFT JOIN 1383 + endpoints e ON pr.endpoint_id = e.id 1384 + LEFT JOIN 1385 + primary_endpoints pe ON pr.endpoint_id = pe.id 1386 + WHERE 1387 + d.did = $1 1388 + GROUP BY 1389 + d.did, d.handle, d.pds, d.bundle_numbers, d.created_at 1390 + ` 1391 + 1392 + var info GlobalDIDInfo 1393 + var bundleNumbersJSON []byte 1394 + var hostingOnJSON []byte 1395 + var handle, pds sql.NullString 1396 + 1397 + err := p.db.QueryRowContext(ctx, query, did).Scan( 1398 + &info.DID, 1399 + &handle, 1400 + &pds, 1401 + &bundleNumbersJSON, 1402 + &info.CreatedAt, 1403 + &hostingOnJSON, 1404 + ) 1405 + if err != nil { 1406 + return nil, err 1407 + } 1408 + 1409 + if handle.Valid { 1410 + info.Handle = handle.String 1411 + } 1412 + if pds.Valid { 1413 + info.CurrentPDS = pds.String 1414 + } 1415 + 1416 + if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil { 1417 + return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err) 1418 + } 1419 + 1420 + if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil { 1421 + return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err) 1422 + } 1423 + 1424 + return &info, nil 1425 + } 1426 + 1427 + func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error { 1428 + if len(dids) == 0 { 1429 + return nil 1430 + } 1431 + 1432 + // Acquire a connection from the pool 1433 + conn, err := p.pool.Acquire(ctx) 1434 + if err != nil { 1435 + return err 1436 + } 1437 + defer conn.Release() 1438 + 1439 + // Start transaction 1440 + tx, err := conn.Begin(ctx) 1441 + if err != nil { 1442 + return err 1443 + } 1444 + defer tx.Rollback(ctx) 1445 + 1446 + // Create temporary table 1447 + _, err = tx.Exec(ctx, ` 1448 + CREATE TEMP TABLE temp_dids (did TEXT PRIMARY KEY) ON COMMIT DROP 1449 + `) 1450 + if err != nil { 1451 + return err 1452 + } 1453 + 1454 + // Use COPY for blazing fast bulk insert 1455 + _, err = tx.Conn().CopyFrom( 1456 + ctx, 1457 + pgx.Identifier{"temp_dids"}, 1458 + []string{"did"}, 1459 + pgx.CopyFromSlice(len(dids), func(i int) ([]interface{}, error) { 1460 + return []interface{}{dids[i]}, nil 1461 + }), 1462 + ) 1463 + if err != nil { 1464 + return err 1465 + } 1466 + 1467 + // Step 1: Insert new DIDs 1468 + _, err = tx.Exec(ctx, ` 1469 + INSERT INTO dids (did, bundle_numbers, created_at) 1470 + SELECT td.did, $1::jsonb, CURRENT_TIMESTAMP 1471 + FROM temp_dids td 1472 + WHERE NOT EXISTS (SELECT 1 FROM dids WHERE dids.did = td.did) 1473 + `, fmt.Sprintf("[%d]", bundleNum)) 1474 + 1475 + if err != nil { 1476 + return err 1477 + } 1478 + 1479 + // Step 2: Update existing DIDs 1480 + _, err = tx.Exec(ctx, ` 1481 + UPDATE dids 1482 + SET bundle_numbers = bundle_numbers || $1::jsonb 1483 + FROM temp_dids 1484 + WHERE dids.did = temp_dids.did 1485 + AND NOT (bundle_numbers @> $1::jsonb) 1486 + `, fmt.Sprintf("[%d]", bundleNum)) 1487 + 1488 + if err != nil { 1489 + return err 1490 + } 1491 + 1492 + return tx.Commit(ctx) 1493 + } 1494 + 1495 + func (p *PostgresDB) GetTotalDIDCount(ctx context.Context) (int64, error) { 1496 + query := "SELECT COUNT(*) FROM dids" 1497 + var count int64 1498 + err := p.db.QueryRowContext(ctx, query).Scan(&count) 1499 + return count, err 1500 + } 1501 + 1502 + func (p *PostgresDB) GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) { 1503 + query := ` 1504 + WITH unique_servers AS ( 1505 + SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text)) 1506 + e.id, 1507 + e.ip, 1508 + e.status 1509 + FROM endpoints e 1510 + WHERE e.endpoint_type = 'pds' 1511 + ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC 1512 + ), 1513 + pds_by_country AS ( 1514 + SELECT 1515 + i.country, 1516 + i.country_code, 1517 + COUNT(DISTINCT us.id) as active_pds_count, 1518 + SUM(latest.user_count) as total_users, 1519 + AVG(latest.response_time) as avg_response_time 1520 + FROM unique_servers us 1521 + JOIN ip_infos i ON us.ip = i.ip 1522 + LEFT JOIN LATERAL ( 1523 + SELECT user_count, response_time 1524 + FROM endpoint_scans 1525 + WHERE endpoint_id = us.id 1526 + ORDER BY scanned_at DESC 1527 + LIMIT 1 1528 + ) latest ON true 1529 + WHERE us.status = 1 1530 + AND i.country IS NOT NULL 1531 + AND i.country != '' 1532 + GROUP BY i.country, i.country_code 1533 + ), 1534 + totals AS ( 1535 + SELECT 1536 + SUM(active_pds_count) as total_pds, 1537 + SUM(total_users) as total_users_global 1538 + FROM pds_by_country 1539 + ) 1540 + SELECT 1541 + pbc.country, 1542 + pbc.country_code, 1543 + pbc.active_pds_count, 1544 + ROUND((pbc.active_pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric, 2) as pds_percentage, 1545 + COALESCE(pbc.total_users, 0) as total_users, 1546 + ROUND((COALESCE(pbc.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric, 2) as users_percentage, 1547 + ROUND(COALESCE(pbc.avg_response_time, 0)::numeric, 2) as avg_response_time_ms 1548 + FROM pds_by_country pbc 1549 + CROSS JOIN totals t 1550 + ORDER BY pbc.active_pds_count DESC 1551 + ` 1552 + 1553 + rows, err := p.db.QueryContext(ctx, query) 1554 + if err != nil { 1555 + return nil, err 1556 + } 1557 + defer rows.Close() 1558 + 1559 + var stats []*CountryStats 1560 + for rows.Next() { 1561 + var s CountryStats 1562 + var pdsPercentage, usersPercentage sql.NullFloat64 1563 + 1564 + err := rows.Scan( 1565 + &s.Country, 1566 + &s.CountryCode, 1567 + &s.ActivePDSCount, 1568 + &pdsPercentage, 1569 + &s.TotalUsers, 1570 + &usersPercentage, 1571 + &s.AvgResponseTimeMS, 1572 + ) 1573 + if err != nil { 1574 + return nil, err 1575 + } 1576 + 1577 + if pdsPercentage.Valid { 1578 + s.PDSPercentage = pdsPercentage.Float64 1579 + } 1580 + if usersPercentage.Valid { 1581 + s.UsersPercentage = usersPercentage.Float64 1582 + } 1583 + 1584 + stats = append(stats, &s) 1585 + } 1586 + 1587 + return stats, rows.Err() 1588 + } 1589 + 1590 + func (p *PostgresDB) GetVersionStats(ctx context.Context) ([]*VersionStats, error) { 1591 + query := ` 1592 + WITH unique_servers AS ( 1593 + SELECT DISTINCT ON (COALESCE(e.server_did, e.id::text)) 1594 + e.id 1595 + FROM endpoints e 1596 + WHERE e.endpoint_type = 'pds' 1597 + AND e.status = 1 1598 + ORDER BY COALESCE(e.server_did, e.id::text), e.discovered_at ASC 1599 + ), 1600 + latest_scans AS ( 1601 + SELECT DISTINCT ON (us.id) 1602 + us.id, 1603 + es.version, 1604 + es.user_count, 1605 + es.scanned_at 1606 + FROM unique_servers us 1607 + JOIN endpoint_scans es ON us.id = es.endpoint_id 1608 + WHERE es.version IS NOT NULL 1609 + AND es.version != '' 1610 + ORDER BY us.id, es.scanned_at DESC 1611 + ), 1612 + version_groups AS ( 1613 + SELECT 1614 + version, 1615 + COUNT(*) as pds_count, 1616 + SUM(user_count) as total_users, 1617 + MIN(scanned_at) as first_seen, 1618 + MAX(scanned_at) as last_seen 1619 + FROM latest_scans 1620 + GROUP BY version 1621 + ), 1622 + totals AS ( 1623 + SELECT 1624 + SUM(pds_count) as total_pds, 1625 + SUM(total_users) as total_users_global 1626 + FROM version_groups 1627 + ) 1628 + SELECT 1629 + vg.version, 1630 + vg.pds_count, 1631 + (vg.pds_count * 100.0 / NULLIF(t.total_pds, 0))::numeric as percentage, 1632 + COALESCE(vg.total_users, 0) as total_users, 1633 + (COALESCE(vg.total_users, 0) * 100.0 / NULLIF(t.total_users_global, 0))::numeric as users_percentage, 1634 + vg.first_seen, 1635 + vg.last_seen 1636 + FROM version_groups vg 1637 + CROSS JOIN totals t 1638 + ORDER BY vg.pds_count DESC 1639 + ` 1640 + 1641 + rows, err := p.db.QueryContext(ctx, query) 1642 + if err != nil { 1643 + return nil, err 1644 + } 1645 + defer rows.Close() 1646 + 1647 + var stats []*VersionStats 1648 + for rows.Next() { 1649 + var s VersionStats 1650 + var percentage, usersPercentage sql.NullFloat64 1651 + 1652 + err := rows.Scan( 1653 + &s.Version, 1654 + &s.PDSCount, 1655 + &percentage, 1656 + &s.TotalUsers, 1657 + &usersPercentage, 1658 + &s.FirstSeen, 1659 + &s.LastSeen, 1660 + ) 1661 + if err != nil { 1662 + return nil, err 1663 + } 1664 + 1665 + if percentage.Valid { 1666 + s.Percentage = percentage.Float64 1667 + s.PercentageText = formatPercentage(percentage.Float64) 1668 + } 1669 + if usersPercentage.Valid { 1670 + s.UsersPercentage = usersPercentage.Float64 1671 + } 1672 + 1673 + stats = append(stats, &s) 1674 + } 1675 + 1676 + return stats, rows.Err() 1677 + } 1678 + 1679 + // Helper function (add if not already present) 1680 + func formatPercentage(pct float64) string { 1681 + if pct >= 10 { 1682 + return fmt.Sprintf("%.2f%%", pct) 1683 + } else if pct >= 1 { 1684 + return fmt.Sprintf("%.3f%%", pct) 1685 + } else if pct >= 0.01 { 1686 + return fmt.Sprintf("%.4f%%", pct) 1687 + } else if pct > 0 { 1688 + return fmt.Sprintf("%.6f%%", pct) 1689 + } 1690 + return "0%" 1691 + } 1692 + 1693 + func (p *PostgresDB) UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error { 1694 + if len(repos) == 0 { 1695 + return nil 1696 + } 1697 + 1698 + // Step 1: Load all existing repos for this endpoint into memory 1699 + query := ` 1700 + SELECT did, head, rev, active, status 1701 + FROM pds_repos 1702 + WHERE endpoint_id = $1 1703 + ` 1704 + 1705 + rows, err := p.db.QueryContext(ctx, query, endpointID) 1706 + if err != nil { 1707 + return err 1708 + } 1709 + 1710 + existingRepos := make(map[string]*PDSRepo) 1711 + for rows.Next() { 1712 + var repo PDSRepo 1713 + var head, rev, status sql.NullString 1714 + 1715 + err := rows.Scan(&repo.DID, &head, &rev, &repo.Active, &status) 1716 + if err != nil { 1717 + rows.Close() 1718 + return err 1719 + } 1720 + 1721 + if head.Valid { 1722 + repo.Head = head.String 1723 + } 1724 + if rev.Valid { 1725 + repo.Rev = rev.String 1726 + } 1727 + if status.Valid { 1728 + repo.Status = status.String 1729 + } 1730 + 1731 + existingRepos[repo.DID] = &repo 1732 + } 1733 + rows.Close() 1734 + 1735 + if err := rows.Err(); err != nil { 1736 + return err 1737 + } 1738 + 1739 + // Step 2: Compare and collect changes 1740 + var newRepos []PDSRepoData 1741 + var changedRepos []PDSRepoData 1742 + 1743 + for _, repo := range repos { 1744 + existing, exists := existingRepos[repo.DID] 1745 + if !exists { 1746 + // New repo 1747 + newRepos = append(newRepos, repo) 1748 + } else if existing.Head != repo.Head || 1749 + existing.Rev != repo.Rev || 1750 + existing.Active != repo.Active || 1751 + existing.Status != repo.Status { 1752 + // Repo changed 1753 + changedRepos = append(changedRepos, repo) 1754 + } 1755 + } 1756 + 1757 + // Log comparison results 1758 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, total=%d, existing=%d, new=%d, changed=%d, unchanged=%d", 1759 + endpointID, len(repos), len(existingRepos), len(newRepos), len(changedRepos), 1760 + len(repos)-len(newRepos)-len(changedRepos)) 1761 + 1762 + // If nothing changed, return early 1763 + if len(newRepos) == 0 && len(changedRepos) == 0 { 1764 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, no changes detected, skipping database operations", endpointID) 1765 + return nil 1766 + } 1767 + 1768 + // Step 3: Execute batched operations 1769 + conn, err := p.pool.Acquire(ctx) 1770 + if err != nil { 1771 + return err 1772 + } 1773 + defer conn.Release() 1774 + 1775 + tx, err := conn.Begin(ctx) 1776 + if err != nil { 1777 + return err 1778 + } 1779 + defer tx.Rollback(ctx) 1780 + 1781 + // Insert new repos 1782 + if len(newRepos) > 0 { 1783 + _, err := tx.Exec(ctx, ` 1784 + CREATE TEMP TABLE temp_new_repos ( 1785 + did TEXT, 1786 + head TEXT, 1787 + rev TEXT, 1788 + active BOOLEAN, 1789 + status TEXT 1790 + ) ON COMMIT DROP 1791 + `) 1792 + if err != nil { 1793 + return err 1794 + } 1795 + 1796 + _, err = tx.Conn().CopyFrom( 1797 + ctx, 1798 + pgx.Identifier{"temp_new_repos"}, 1799 + []string{"did", "head", "rev", "active", "status"}, 1800 + pgx.CopyFromSlice(len(newRepos), func(i int) ([]interface{}, error) { 1801 + repo := newRepos[i] 1802 + return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil 1803 + }), 1804 + ) 1805 + if err != nil { 1806 + return err 1807 + } 1808 + 1809 + result, err := tx.Exec(ctx, ` 1810 + INSERT INTO pds_repos (endpoint_id, did, head, rev, active, status, first_seen, last_seen) 1811 + SELECT $1, did, head, rev, active, status, 1812 + TIMEZONE('UTC', NOW()), 1813 + TIMEZONE('UTC', NOW()) 1814 + FROM temp_new_repos 1815 + `, endpointID) 1816 + if err != nil { 1817 + return err 1818 + } 1819 + 1820 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, inserted %d new repos", endpointID, result.RowsAffected()) 1821 + } 1822 + 1823 + // Update changed repos 1824 + if len(changedRepos) > 0 { 1825 + _, err := tx.Exec(ctx, ` 1826 + CREATE TEMP TABLE temp_changed_repos ( 1827 + did TEXT, 1828 + head TEXT, 1829 + rev TEXT, 1830 + active BOOLEAN, 1831 + status TEXT 1832 + ) ON COMMIT DROP 1833 + `) 1834 + if err != nil { 1835 + return err 1836 + } 1837 + 1838 + _, err = tx.Conn().CopyFrom( 1839 + ctx, 1840 + pgx.Identifier{"temp_changed_repos"}, 1841 + []string{"did", "head", "rev", "active", "status"}, 1842 + pgx.CopyFromSlice(len(changedRepos), func(i int) ([]interface{}, error) { 1843 + repo := changedRepos[i] 1844 + return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil 1845 + }), 1846 + ) 1847 + if err != nil { 1848 + return err 1849 + } 1850 + 1851 + result, err := tx.Exec(ctx, ` 1852 + UPDATE pds_repos 1853 + SET head = t.head, 1854 + rev = t.rev, 1855 + active = t.active, 1856 + status = t.status, 1857 + last_seen = TIMEZONE('UTC', NOW()), 1858 + updated_at = TIMEZONE('UTC', NOW()) 1859 + FROM temp_changed_repos t 1860 + WHERE pds_repos.endpoint_id = $1 1861 + AND pds_repos.did = t.did 1862 + `, endpointID) 1863 + if err != nil { 1864 + return err 1865 + } 1866 + 1867 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, updated %d changed repos", endpointID, result.RowsAffected()) 1868 + } 1869 + 1870 + if err := tx.Commit(ctx); err != nil { 1871 + return err 1872 + } 1873 + 1874 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, transaction committed successfully", endpointID) 1875 + return nil 1876 + } 1877 + 1878 + func (p *PostgresDB) GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) { 1879 + query := ` 1880 + SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at 1881 + FROM pds_repos 1882 + WHERE endpoint_id = $1 1883 + ` 1884 + 1885 + args := []interface{}{endpointID} 1886 + argIdx := 2 1887 + 1888 + if activeOnly { 1889 + query += " AND active = true" 1890 + } 1891 + 1892 + // Order by id (primary key) - fastest 1893 + query += " ORDER BY id DESC" 1894 + 1895 + if limit > 0 { 1896 + query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) 1897 + args = append(args, limit, offset) 1898 + } 1899 + 1900 + rows, err := p.db.QueryContext(ctx, query, args...) 1901 + if err != nil { 1902 + return nil, err 1903 + } 1904 + defer rows.Close() 1905 + 1906 + var repos []*PDSRepo 1907 + for rows.Next() { 1908 + var repo PDSRepo 1909 + var head, rev, status sql.NullString 1910 + 1911 + err := rows.Scan( 1912 + &repo.ID, &repo.EndpointID, &repo.DID, &head, &rev, 1913 + &repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt, 1914 + ) 1915 + if err != nil { 1916 + return nil, err 1917 + } 1918 + 1919 + if head.Valid { 1920 + repo.Head = head.String 1921 + } 1922 + if rev.Valid { 1923 + repo.Rev = rev.String 1924 + } 1925 + if status.Valid { 1926 + repo.Status = status.String 1927 + } 1928 + 1929 + repos = append(repos, &repo) 1930 + } 1931 + 1932 + return repos, rows.Err() 1933 + } 1934 + 1935 + func (p *PostgresDB) GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) { 1936 + query := ` 1937 + SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at 1938 + FROM pds_repos 1939 + WHERE did = $1 1940 + ORDER BY last_seen DESC 1941 + ` 1942 + 1943 + rows, err := p.db.QueryContext(ctx, query, did) 1944 + if err != nil { 1945 + return nil, err 1946 + } 1947 + defer rows.Close() 1948 + 1949 + var repos []*PDSRepo 1950 + for rows.Next() { 1951 + var repo PDSRepo 1952 + var head, rev, status sql.NullString 1953 + 1954 + err := rows.Scan( 1955 + &repo.ID, &repo.EndpointID, &repo.DID, &head, &rev, 1956 + &repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt, 1957 + ) 1958 + if err != nil { 1959 + return nil, err 1960 + } 1961 + 1962 + if head.Valid { 1963 + repo.Head = head.String 1964 + } 1965 + if rev.Valid { 1966 + repo.Rev = rev.String 1967 + } 1968 + if status.Valid { 1969 + repo.Status = status.String 1970 + } 1971 + 1972 + repos = append(repos, &repo) 1973 + } 1974 + 1975 + return repos, rows.Err() 1976 + } 1977 + 1978 + func (p *PostgresDB) GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) { 1979 + query := ` 1980 + SELECT 1981 + COUNT(*) as total_repos, 1982 + COUNT(*) FILTER (WHERE active = true) as active_repos, 1983 + COUNT(*) FILTER (WHERE active = false) as inactive_repos, 1984 + COUNT(*) FILTER (WHERE status IS NOT NULL AND status != '') as repos_with_status, 1985 + COUNT(*) FILTER (WHERE updated_at > CURRENT_TIMESTAMP - INTERVAL '1 hour') as recent_changes 1986 + FROM pds_repos 1987 + WHERE endpoint_id = $1 1988 + ` 1989 + 1990 + var totalRepos, activeRepos, inactiveRepos, reposWithStatus, recentChanges int64 1991 + 1992 + err := p.db.QueryRowContext(ctx, query, endpointID).Scan( 1993 + &totalRepos, &activeRepos, &inactiveRepos, &reposWithStatus, &recentChanges, 1994 + ) 1995 + if err != nil { 1996 + return nil, err 1997 + } 1998 + 1999 + return map[string]interface{}{ 2000 + "total_repos": totalRepos, 2001 + "active_repos": activeRepos, 2002 + "inactive_repos": inactiveRepos, 2003 + "repos_with_status": reposWithStatus, 2004 + "recent_changes": recentChanges, 2005 + }, nil 2006 + } 2007 + 2008 + // GetTableSizes fetches size information (in bytes) for all tables in the specified schema. 2009 + func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) { 2010 + // Query now selects raw byte values directly 2011 + query := ` 2012 + SELECT 2013 + c.relname AS table_name, 2014 + pg_total_relation_size(c.oid) AS total_bytes, 2015 + pg_relation_size(c.oid) AS table_heap_bytes, 2016 + pg_indexes_size(c.oid) AS indexes_bytes 2017 + FROM 2018 + pg_class c 2019 + LEFT JOIN 2020 + pg_namespace n ON n.oid = c.relnamespace 2021 + WHERE 2022 + c.relkind = 'r' -- 'r' = ordinary table 2023 + AND n.nspname = $1 2024 + ORDER BY 2025 + total_bytes DESC; 2026 + ` 2027 + rows, err := p.db.QueryContext(ctx, query, schema) 2028 + if err != nil { 2029 + return nil, fmt.Errorf("failed to query table sizes: %w", err) 2030 + } 2031 + defer rows.Close() 2032 + 2033 + var results []TableSizeInfo 2034 + for rows.Next() { 2035 + var info TableSizeInfo 2036 + // Scan directly into int64 fields 2037 + if err := rows.Scan( 2038 + &info.TableName, 2039 + &info.TotalBytes, 2040 + &info.TableHeapBytes, 2041 + &info.IndexesBytes, 2042 + ); err != nil { 2043 + return nil, fmt.Errorf("failed to scan table size row: %w", err) 2044 + } 2045 + results = append(results, info) 2046 + } 2047 + if err := rows.Err(); err != nil { 2048 + return nil, fmt.Errorf("error iterating table size rows: %w", err) 2049 + } 2050 + 2051 + return results, nil 2052 + } 2053 + 2054 + // GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema. 2055 + func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) { 2056 + // Query now selects raw byte values directly 2057 + query := ` 2058 + SELECT 2059 + c.relname AS index_name, 2060 + COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name, 2061 + pg_relation_size(c.oid) AS index_bytes 2062 + FROM 2063 + pg_class c 2064 + LEFT JOIN 2065 + pg_index i ON i.indexrelid = c.oid 2066 + LEFT JOIN 2067 + pg_namespace n ON n.oid = c.relnamespace 2068 + WHERE 2069 + c.relkind = 'i' -- 'i' = index 2070 + AND n.nspname = $1 2071 + ORDER BY 2072 + index_bytes DESC; 2073 + ` 2074 + rows, err := p.db.QueryContext(ctx, query, schema) 2075 + if err != nil { 2076 + return nil, fmt.Errorf("failed to query index sizes: %w", err) 2077 + } 2078 + defer rows.Close() 2079 + 2080 + var results []IndexSizeInfo 2081 + for rows.Next() { 2082 + var info IndexSizeInfo 2083 + var tableName sql.NullString 2084 + // Scan directly into int64 field 2085 + if err := rows.Scan( 2086 + &info.IndexName, 2087 + &tableName, 2088 + &info.IndexBytes, 2089 + ); err != nil { 2090 + return nil, fmt.Errorf("failed to scan index size row: %w", err) 2091 + } 2092 + if tableName.Valid { 2093 + info.TableName = tableName.String 2094 + } else { 2095 + info.TableName = "N/A" 2096 + } 2097 + results = append(results, info) 2098 + } 2099 + if err := rows.Err(); err != nil { 2100 + return nil, fmt.Errorf("error iterating index size rows: %w", err) 2101 + } 2102 + 2103 + return results, nil 2104 + }
-824
internal/storage/sqlite.go
··· 1 - package storage 2 - 3 - import ( 4 - "context" 5 - "database/sql" 6 - "encoding/json" 7 - "fmt" 8 - "strings" 9 - "time" 10 - 11 - _ "github.com/mattn/go-sqlite3" 12 - ) 13 - 14 - type SQLiteDB struct { 15 - db *sql.DB 16 - } 17 - 18 - func NewSQLiteDB(path string) (*SQLiteDB, error) { 19 - db, err := sql.Open("sqlite3", path) 20 - if err != nil { 21 - return nil, err 22 - } 23 - 24 - // Enable WAL mode for better concurrency 25 - if _, err := db.Exec("PRAGMA journal_mode=WAL"); err != nil { 26 - return nil, err 27 - } 28 - 29 - return &SQLiteDB{db: db}, nil 30 - } 31 - 32 - func (s *SQLiteDB) Close() error { 33 - return s.db.Close() 34 - } 35 - 36 - func (s *SQLiteDB) Migrate() error { 37 - schema := ` 38 - -- PDS tables (same as before) 39 - CREATE TABLE IF NOT EXISTS pds_servers ( 40 - id INTEGER PRIMARY KEY AUTOINCREMENT, 41 - endpoint TEXT UNIQUE NOT NULL, 42 - discovered_at TIMESTAMP NOT NULL, 43 - last_checked TIMESTAMP, 44 - status INTEGER DEFAULT 0, 45 - user_count INTEGER DEFAULT 0, 46 - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 47 - ); 48 - 49 - CREATE INDEX IF NOT EXISTS idx_pds_endpoint ON pds_servers(endpoint); 50 - CREATE INDEX IF NOT EXISTS idx_pds_status ON pds_servers(status); 51 - CREATE INDEX IF NOT EXISTS idx_pds_user_count ON pds_servers(user_count); 52 - 53 - CREATE TABLE IF NOT EXISTS pds_scans ( 54 - id INTEGER PRIMARY KEY AUTOINCREMENT, 55 - pds_id INTEGER NOT NULL, 56 - status INTEGER NOT NULL, 57 - response_time REAL, 58 - scan_data TEXT, 59 - scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 60 - FOREIGN KEY (pds_id) REFERENCES pds_servers(id) ON DELETE CASCADE 61 - ); 62 - 63 - CREATE INDEX IF NOT EXISTS idx_pds_scans_pds_id ON pds_scans(pds_id); 64 - CREATE INDEX IF NOT EXISTS idx_pds_scans_scanned_at ON pds_scans(scanned_at); 65 - 66 - -- Metrics 67 - CREATE TABLE IF NOT EXISTS plc_metrics ( 68 - id INTEGER PRIMARY KEY AUTOINCREMENT, 69 - total_dids INTEGER, 70 - total_pds INTEGER, 71 - unique_pds INTEGER, 72 - scan_duration_ms INTEGER, 73 - error_count INTEGER, 74 - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 75 - ); 76 - 77 - -- Scan cursors with bundle number 78 - CREATE TABLE IF NOT EXISTS scan_cursors ( 79 - source TEXT PRIMARY KEY, 80 - last_bundle_number INTEGER DEFAULT 0, 81 - last_scan_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 82 - records_processed INTEGER DEFAULT 0 83 - ); 84 - 85 - -- Bundles with dual hashing 86 - CREATE TABLE IF NOT EXISTS plc_bundles ( 87 - bundle_number INTEGER PRIMARY KEY, 88 - start_time TIMESTAMP NOT NULL, 89 - end_time TIMESTAMP NOT NULL, 90 - dids TEXT NOT NULL, 91 - hash TEXT NOT NULL, -- SHA256 of uncompressed JSONL 92 - compressed_hash TEXT NOT NULL, -- SHA256 of compressed file 93 - compressed_size INTEGER NOT NULL, -- Size of compressed file 94 - prev_bundle_hash TEXT, -- Chain link 95 - compressed BOOLEAN DEFAULT 1, 96 - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 97 - ); 98 - 99 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time); 100 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash); 101 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash); 102 - 103 - -- NEW: Mempool for pending operations 104 - CREATE TABLE IF NOT EXISTS plc_mempool ( 105 - id INTEGER PRIMARY KEY AUTOINCREMENT, 106 - did TEXT NOT NULL, 107 - operation TEXT NOT NULL, 108 - cid TEXT NOT NULL UNIQUE, -- ✅ Add UNIQUE constraint 109 - created_at TIMESTAMP NOT NULL, 110 - added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 111 - ); 112 - 113 - CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at); 114 - CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did); 115 - CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid); 116 - ` 117 - 118 - _, err := s.db.Exec(schema) 119 - return err 120 - } 121 - 122 - // GetBundleByNumber 123 - func (s *SQLiteDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) { 124 - query := ` 125 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 126 - FROM plc_bundles 127 - WHERE bundle_number = ? 128 - ` 129 - 130 - var bundle PLCBundle 131 - var didsJSON string 132 - var prevHash sql.NullString 133 - 134 - err := s.db.QueryRowContext(ctx, query, bundleNumber).Scan( 135 - &bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime, 136 - &didsJSON, &bundle.Hash, &bundle.CompressedHash, 137 - &bundle.CompressedSize, &prevHash, &bundle.Compressed, &bundle.CreatedAt, 138 - ) 139 - if err != nil { 140 - return nil, err 141 - } 142 - 143 - if prevHash.Valid { 144 - bundle.PrevBundleHash = prevHash.String 145 - } 146 - 147 - json.Unmarshal([]byte(didsJSON), &bundle.DIDs) 148 - return &bundle, nil 149 - } 150 - 151 - // GetBundleForTimestamp finds the bundle that should contain operations at or after the given time 152 - func (s *SQLiteDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) { 153 - query := ` 154 - SELECT bundle_number 155 - FROM plc_bundles 156 - WHERE start_time <= ? AND end_time >= ? 157 - ORDER BY bundle_number ASC 158 - LIMIT 1 159 - ` 160 - 161 - var bundleNum int 162 - err := s.db.QueryRowContext(ctx, query, afterTime, afterTime).Scan(&bundleNum) 163 - if err == sql.ErrNoRows { 164 - // No exact match, find the closest bundle before this time 165 - query = ` 166 - SELECT bundle_number 167 - FROM plc_bundles 168 - WHERE end_time < ? 169 - ORDER BY bundle_number DESC 170 - LIMIT 1 171 - ` 172 - err = s.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 173 - if err == sql.ErrNoRows { 174 - return 1, nil // Start from first bundle 175 - } 176 - if err != nil { 177 - return 0, err 178 - } 179 - return bundleNum, nil // Return the bundle just before 180 - } 181 - if err != nil { 182 - return 0, err 183 - } 184 - 185 - return bundleNum, nil 186 - } 187 - 188 - // GetLastBundleNumber gets the highest bundle number 189 - func (s *SQLiteDB) GetLastBundleNumber(ctx context.Context) (int, error) { 190 - query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles" 191 - var num int 192 - err := s.db.QueryRowContext(ctx, query).Scan(&num) 193 - return num, err 194 - } 195 - 196 - // AddToMempool adds operations to the mempool 197 - func (s *SQLiteDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error { 198 - if len(ops) == 0 { 199 - return nil 200 - } 201 - 202 - tx, err := s.db.BeginTx(ctx, nil) 203 - if err != nil { 204 - return err 205 - } 206 - defer tx.Rollback() 207 - 208 - // ✅ Use ON CONFLICT to skip duplicates 209 - stmt, err := tx.PrepareContext(ctx, ` 210 - INSERT INTO plc_mempool (did, operation, cid, created_at) 211 - VALUES (?, ?, ?, ?) 212 - ON CONFLICT(cid) DO NOTHING 213 - `) 214 - if err != nil { 215 - return err 216 - } 217 - defer stmt.Close() 218 - 219 - for _, op := range ops { 220 - _, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt) 221 - if err != nil { 222 - return err 223 - } 224 - } 225 - 226 - return tx.Commit() 227 - } 228 - 229 - // GetMempoolCount returns number of operations in mempool 230 - func (s *SQLiteDB) GetMempoolCount(ctx context.Context) (int, error) { 231 - query := "SELECT COUNT(*) FROM plc_mempool" 232 - var count int 233 - err := s.db.QueryRowContext(ctx, query).Scan(&count) 234 - return count, err 235 - } 236 - 237 - // GetMempoolOperations retrieves operations from mempool ordered by timestamp 238 - func (s *SQLiteDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) { 239 - query := ` 240 - SELECT id, did, operation, cid, created_at, added_at 241 - FROM plc_mempool 242 - ORDER BY created_at ASC 243 - LIMIT ? 244 - ` 245 - 246 - rows, err := s.db.QueryContext(ctx, query, limit) 247 - if err != nil { 248 - return nil, err 249 - } 250 - defer rows.Close() 251 - 252 - var ops []MempoolOperation 253 - for rows.Next() { 254 - var op MempoolOperation 255 - err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt) 256 - if err != nil { 257 - return nil, err 258 - } 259 - ops = append(ops, op) 260 - } 261 - 262 - return ops, rows.Err() 263 - } 264 - 265 - // DeleteFromMempool removes operations from mempool 266 - func (s *SQLiteDB) DeleteFromMempool(ctx context.Context, ids []int64) error { 267 - if len(ids) == 0 { 268 - return nil 269 - } 270 - 271 - placeholders := make([]string, len(ids)) 272 - args := make([]interface{}, len(ids)) 273 - for i, id := range ids { 274 - placeholders[i] = "?" 275 - args[i] = id 276 - } 277 - 278 - query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)", 279 - strings.Join(placeholders, ",")) 280 - 281 - _, err := s.db.ExecContext(ctx, query, args...) 282 - return err 283 - } 284 - 285 - // GetFirstMempoolOperation retrieves the oldest operation from mempool 286 - func (s *SQLiteDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 287 - query := ` 288 - SELECT id, did, operation, cid, created_at, added_at 289 - FROM plc_mempool 290 - ORDER BY created_at ASC, id ASC 291 - LIMIT 1 292 - ` 293 - 294 - var op MempoolOperation 295 - err := s.db.QueryRowContext(ctx, query).Scan( 296 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 297 - ) 298 - if err == sql.ErrNoRows { 299 - return nil, nil // No operations in mempool 300 - } 301 - if err != nil { 302 - return nil, err 303 - } 304 - 305 - return &op, nil 306 - } 307 - 308 - // GetLastMempoolOperation retrieves the most recent operation from mempool 309 - func (s *SQLiteDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 310 - query := ` 311 - SELECT id, did, operation, cid, created_at, added_at 312 - FROM plc_mempool 313 - ORDER BY created_at DESC, id DESC 314 - LIMIT 1 315 - ` 316 - 317 - var op MempoolOperation 318 - err := s.db.QueryRowContext(ctx, query).Scan( 319 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 320 - ) 321 - if err == sql.ErrNoRows { 322 - return nil, nil // No operations in mempool 323 - } 324 - if err != nil { 325 - return nil, err 326 - } 327 - 328 - return &op, nil 329 - } 330 - 331 - func (s *SQLiteDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error { 332 - didsJSON, err := json.Marshal(bundle.DIDs) 333 - if err != nil { 334 - return err 335 - } 336 - 337 - query := ` 338 - INSERT INTO plc_bundles ( 339 - bundle_number, start_time, end_time, dids, 340 - hash, compressed_hash, compressed_size, prev_bundle_hash, compressed 341 - ) 342 - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) 343 - ON CONFLICT(bundle_number) DO UPDATE SET 344 - start_time = excluded.start_time, 345 - end_time = excluded.end_time, 346 - dids = excluded.dids, 347 - hash = excluded.hash, 348 - compressed_hash = excluded.compressed_hash, 349 - compressed_size = excluded.compressed_size, 350 - prev_bundle_hash = excluded.prev_bundle_hash, 351 - compressed = excluded.compressed 352 - ` 353 - _, err = s.db.ExecContext(ctx, query, 354 - bundle.BundleNumber, bundle.StartTime, bundle.EndTime, 355 - string(didsJSON), bundle.Hash, bundle.CompressedHash, 356 - bundle.CompressedSize, bundle.PrevBundleHash, bundle.Compressed, 357 - ) 358 - 359 - return err 360 - } 361 - 362 - // GetBundles 363 - func (s *SQLiteDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) { 364 - query := ` 365 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 366 - FROM plc_bundles 367 - ORDER BY bundle_number DESC 368 - LIMIT ? 369 - ` 370 - 371 - rows, err := s.db.QueryContext(ctx, query, limit) 372 - if err != nil { 373 - return nil, err 374 - } 375 - defer rows.Close() 376 - 377 - return s.scanBundles(rows) 378 - } 379 - 380 - // GetBundlesForDID 381 - func (s *SQLiteDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) { 382 - query := ` 383 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 384 - FROM plc_bundles 385 - WHERE EXISTS ( 386 - SELECT 1 FROM json_each(dids) 387 - WHERE json_each.value = ? 388 - ) 389 - ORDER BY bundle_number ASC 390 - ` 391 - 392 - rows, err := s.db.QueryContext(ctx, query, did) 393 - if err != nil { 394 - return nil, err 395 - } 396 - defer rows.Close() 397 - 398 - return s.scanBundles(rows) 399 - } 400 - 401 - // GetBundle retrieves bundle by time (if needed, otherwise can be removed) 402 - func (s *SQLiteDB) GetBundle(ctx context.Context, afterTime time.Time) (*PLCBundle, error) { 403 - var query string 404 - var args []interface{} 405 - 406 - if afterTime.IsZero() { 407 - query = ` 408 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 409 - FROM plc_bundles 410 - ORDER BY start_time ASC 411 - LIMIT 1 412 - ` 413 - args = []interface{}{} 414 - } else { 415 - query = ` 416 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 417 - FROM plc_bundles 418 - WHERE start_time >= ? 419 - ORDER BY start_time ASC 420 - LIMIT 1 421 - ` 422 - args = []interface{}{afterTime} 423 - } 424 - 425 - var bundle PLCBundle 426 - var didsJSON string 427 - var prevHash sql.NullString 428 - 429 - err := s.db.QueryRowContext(ctx, query, args...).Scan( 430 - &bundle.BundleNumber, 431 - &bundle.StartTime, 432 - &bundle.EndTime, 433 - &didsJSON, 434 - &bundle.Hash, // Uncompressed hash 435 - &bundle.CompressedHash, // Compressed hash 436 - &bundle.CompressedSize, // Compressed size (not FileSize!) 437 - &prevHash, // Previous bundle hash 438 - &bundle.Compressed, 439 - &bundle.CreatedAt, 440 - ) 441 - if err == sql.ErrNoRows { 442 - return nil, nil 443 - } 444 - if err != nil { 445 - return nil, err 446 - } 447 - 448 - if prevHash.Valid { 449 - bundle.PrevBundleHash = prevHash.String 450 - } 451 - 452 - json.Unmarshal([]byte(didsJSON), &bundle.DIDs) 453 - return &bundle, nil 454 - } 455 - 456 - // scanBundles - Make sure it reads 10 columns 457 - func (s *SQLiteDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) { 458 - var bundles []*PLCBundle 459 - 460 - for rows.Next() { 461 - var bundle PLCBundle 462 - var didsJSON string 463 - var prevHash sql.NullString 464 - 465 - // Scan: bundle_number, start_time, end_time, dids, hash, compressed_hash, compressed_size, prev_bundle_hash, compressed, created_at 466 - if err := rows.Scan( 467 - &bundle.BundleNumber, 468 - &bundle.StartTime, 469 - &bundle.EndTime, 470 - &didsJSON, 471 - &bundle.Hash, // Uncompressed hash 472 - &bundle.CompressedHash, // Compressed hash 473 - &bundle.CompressedSize, // Compressed size 474 - &prevHash, 475 - &bundle.Compressed, 476 - &bundle.CreatedAt, 477 - ); err != nil { 478 - return nil, err 479 - } 480 - 481 - if prevHash.Valid { 482 - bundle.PrevBundleHash = prevHash.String 483 - } 484 - 485 - json.Unmarshal([]byte(didsJSON), &bundle.DIDs) 486 - bundles = append(bundles, &bundle) 487 - } 488 - 489 - return bundles, rows.Err() 490 - } 491 - 492 - // GetBundleStats - update to use compressed_size 493 - func (s *SQLiteDB) GetBundleStats(ctx context.Context) (int64, int64, error) { 494 - query := ` 495 - SELECT COUNT(*), COALESCE(SUM(compressed_size), 0) 496 - FROM plc_bundles 497 - ` 498 - 499 - var count, totalSize int64 500 - err := s.db.QueryRowContext(ctx, query).Scan(&count, &totalSize) 501 - return count, totalSize, err 502 - } 503 - 504 - // UpsertPDS inserts or updates a PDS server 505 - func (s *SQLiteDB) UpsertPDS(ctx context.Context, pds *PDS) error { 506 - query := ` 507 - INSERT INTO pds_servers (endpoint, discovered_at, last_checked, status) 508 - VALUES (?, ?, ?, ?) 509 - ON CONFLICT(endpoint) DO UPDATE SET 510 - last_checked = excluded.last_checked 511 - RETURNING id 512 - ` 513 - err := s.db.QueryRowContext(ctx, query, pds.Endpoint, pds.DiscoveredAt, pds.LastChecked, pds.Status).Scan(&pds.ID) 514 - return err 515 - } 516 - 517 - // PDSExists checks if a PDS endpoint already exists 518 - func (s *SQLiteDB) PDSExists(ctx context.Context, endpoint string) (bool, error) { 519 - query := "SELECT EXISTS(SELECT 1 FROM pds_servers WHERE endpoint = ?)" 520 - var exists bool 521 - err := s.db.QueryRowContext(ctx, query, endpoint).Scan(&exists) 522 - return exists, err 523 - } 524 - 525 - // GetPDSIDByEndpoint gets the ID for an endpoint 526 - func (s *SQLiteDB) GetPDSIDByEndpoint(ctx context.Context, endpoint string) (int64, error) { 527 - query := "SELECT id FROM pds_servers WHERE endpoint = ?" 528 - var id int64 529 - err := s.db.QueryRowContext(ctx, query, endpoint).Scan(&id) 530 - return id, err 531 - } 532 - 533 - // GetPDS retrieves a PDS by endpoint 534 - func (s *SQLiteDB) GetPDS(ctx context.Context, endpoint string) (*PDS, error) { 535 - query := ` 536 - SELECT id, endpoint, discovered_at, last_checked, status, user_count, updated_at 537 - FROM pds_servers 538 - WHERE endpoint = ? 539 - ` 540 - 541 - var pds PDS 542 - var lastChecked sql.NullTime 543 - 544 - err := s.db.QueryRowContext(ctx, query, endpoint).Scan( 545 - &pds.ID, &pds.Endpoint, &pds.DiscoveredAt, &lastChecked, 546 - &pds.Status, &pds.UserCount, &pds.UpdatedAt, 547 - ) 548 - if err != nil { 549 - return nil, err 550 - } 551 - 552 - if lastChecked.Valid { 553 - pds.LastChecked = lastChecked.Time 554 - } 555 - 556 - return &pds, nil 557 - } 558 - 559 - // GetPDSByID retrieves a PDS by ID 560 - func (s *SQLiteDB) GetPDSByID(ctx context.Context, id int64) (*PDS, error) { 561 - query := ` 562 - SELECT id, endpoint, discovered_at, last_checked, status, user_count, updated_at 563 - FROM pds_servers 564 - WHERE id = ? 565 - ` 566 - 567 - var pds PDS 568 - var lastChecked sql.NullTime 569 - 570 - err := s.db.QueryRowContext(ctx, query, id).Scan( 571 - &pds.ID, &pds.Endpoint, &pds.DiscoveredAt, &lastChecked, 572 - &pds.Status, &pds.UserCount, &pds.UpdatedAt, 573 - ) 574 - if err != nil { 575 - return nil, err 576 - } 577 - 578 - if lastChecked.Valid { 579 - pds.LastChecked = lastChecked.Time 580 - } 581 - 582 - return &pds, nil 583 - } 584 - 585 - // GetPDSServers retrieves multiple PDS servers 586 - func (s *SQLiteDB) GetPDSServers(ctx context.Context, filter *PDSFilter) ([]*PDS, error) { 587 - query := ` 588 - SELECT id, endpoint, discovered_at, last_checked, status, user_count, updated_at 589 - FROM pds_servers 590 - ` 591 - args := []interface{}{} 592 - 593 - if filter != nil && filter.Status != "" { 594 - // Map string status to int 595 - statusInt := PDSStatusUnknown 596 - switch filter.Status { 597 - case "online": 598 - statusInt = PDSStatusOnline 599 - case "offline": 600 - statusInt = PDSStatusOffline 601 - } 602 - query += " WHERE status = ?" 603 - args = append(args, statusInt) 604 - } 605 - 606 - query += " ORDER BY user_count DESC" 607 - 608 - if filter != nil && filter.Limit > 0 { 609 - query += fmt.Sprintf(" LIMIT %d OFFSET %d", filter.Limit, filter.Offset) 610 - } 611 - 612 - rows, err := s.db.QueryContext(ctx, query, args...) 613 - if err != nil { 614 - return nil, err 615 - } 616 - defer rows.Close() 617 - 618 - var servers []*PDS 619 - for rows.Next() { 620 - var pds PDS 621 - var lastChecked sql.NullTime 622 - 623 - err := rows.Scan( 624 - &pds.ID, &pds.Endpoint, &pds.DiscoveredAt, &lastChecked, 625 - &pds.Status, &pds.UserCount, &pds.UpdatedAt, 626 - ) 627 - if err != nil { 628 - return nil, err 629 - } 630 - 631 - if lastChecked.Valid { 632 - pds.LastChecked = lastChecked.Time 633 - } 634 - 635 - servers = append(servers, &pds) 636 - } 637 - 638 - return servers, rows.Err() 639 - } 640 - 641 - // UpdatePDSStatus updates the status and creates a scan record 642 - func (s *SQLiteDB) UpdatePDSStatus(ctx context.Context, pdsID int64, update *PDSUpdate) error { 643 - tx, err := s.db.BeginTx(ctx, nil) 644 - if err != nil { 645 - return err 646 - } 647 - defer tx.Rollback() 648 - 649 - // Calculate user count from scan data 650 - userCount := 0 651 - if update.ScanData != nil { 652 - userCount = update.ScanData.DIDCount 653 - } 654 - 655 - // Update main pds_servers record 656 - query := ` 657 - UPDATE pds_servers 658 - SET status = ?, last_checked = ?, user_count = ?, updated_at = ? 659 - WHERE id = ? 660 - ` 661 - _, err = tx.ExecContext(ctx, query, update.Status, update.LastChecked, userCount, time.Now(), pdsID) 662 - if err != nil { 663 - return err 664 - } 665 - 666 - // Marshal scan data 667 - var scanDataJSON []byte 668 - if update.ScanData != nil { 669 - scanDataJSON, _ = json.Marshal(update.ScanData) 670 - } 671 - 672 - // Insert scan history 673 - scanQuery := ` 674 - INSERT INTO pds_scans (pds_id, status, response_time, scan_data) 675 - VALUES (?, ?, ?, ?) 676 - ` 677 - _, err = tx.ExecContext(ctx, scanQuery, pdsID, update.Status, update.ResponseTime, string(scanDataJSON)) 678 - if err != nil { 679 - return err 680 - } 681 - 682 - return tx.Commit() 683 - } 684 - 685 - // GetPDSScans retrieves scan history for a PDS 686 - func (s *SQLiteDB) GetPDSScans(ctx context.Context, pdsID int64, limit int) ([]*PDSScan, error) { 687 - query := ` 688 - SELECT id, pds_id, status, response_time, scan_data, scanned_at 689 - FROM pds_scans 690 - WHERE pds_id = ? 691 - ORDER BY scanned_at DESC 692 - LIMIT ? 693 - ` 694 - 695 - rows, err := s.db.QueryContext(ctx, query, pdsID, limit) 696 - if err != nil { 697 - return nil, err 698 - } 699 - defer rows.Close() 700 - 701 - var scans []*PDSScan 702 - for rows.Next() { 703 - var scan PDSScan 704 - var responseTime sql.NullFloat64 705 - var scanDataJSON sql.NullString 706 - 707 - err := rows.Scan(&scan.ID, &scan.PDSID, &scan.Status, &responseTime, &scanDataJSON, &scan.ScannedAt) 708 - if err != nil { 709 - return nil, err 710 - } 711 - 712 - if responseTime.Valid { 713 - scan.ResponseTime = responseTime.Float64 714 - } 715 - 716 - if scanDataJSON.Valid && scanDataJSON.String != "" { 717 - var scanData PDSScanData 718 - if err := json.Unmarshal([]byte(scanDataJSON.String), &scanData); err == nil { 719 - scan.ScanData = &scanData 720 - } 721 - } 722 - 723 - scans = append(scans, &scan) 724 - } 725 - 726 - return scans, rows.Err() 727 - } 728 - 729 - // GetPDSStats returns aggregate statistics 730 - func (s *SQLiteDB) GetPDSStats(ctx context.Context) (*PDSStats, error) { 731 - query := ` 732 - SELECT 733 - COUNT(*) as total_pds, 734 - SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as online_pds, 735 - SUM(CASE WHEN status = 2 THEN 1 ELSE 0 END) as offline_pds, 736 - (SELECT AVG(response_time) FROM pds_scans WHERE response_time > 0 737 - AND scanned_at > datetime('now', '-1 hour')) as avg_response_time, 738 - SUM(user_count) as total_dids 739 - FROM pds_servers 740 - ` 741 - 742 - var stats PDSStats 743 - var avgResponseTime sql.NullFloat64 744 - 745 - err := s.db.QueryRowContext(ctx, query).Scan( 746 - &stats.TotalPDS, &stats.OnlinePDS, &stats.OfflinePDS, &avgResponseTime, &stats.TotalDIDs, 747 - ) 748 - 749 - if avgResponseTime.Valid { 750 - stats.AvgResponseTime = avgResponseTime.Float64 751 - } 752 - 753 - return &stats, err 754 - } 755 - 756 - // GetScanCursor retrieves cursor with bundle number 757 - func (s *SQLiteDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) { 758 - query := "SELECT source, last_bundle_number, last_scan_time, records_processed FROM scan_cursors WHERE source = ?" 759 - 760 - var cursor ScanCursor 761 - err := s.db.QueryRowContext(ctx, query, source).Scan( 762 - &cursor.Source, &cursor.LastBundleNumber, &cursor.LastScanTime, &cursor.RecordsProcessed, 763 - ) 764 - if err == sql.ErrNoRows { 765 - return &ScanCursor{ 766 - Source: source, 767 - LastBundleNumber: 0, 768 - LastScanTime: time.Time{}, 769 - }, nil 770 - } 771 - return &cursor, err 772 - } 773 - 774 - // UpdateScanCursor updates cursor with bundle number 775 - func (s *SQLiteDB) UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error { 776 - query := ` 777 - INSERT INTO scan_cursors (source, last_bundle_number, last_scan_time, records_processed) 778 - VALUES (?, ?, ?, ?) 779 - ON CONFLICT(source) DO UPDATE SET 780 - last_bundle_number = excluded.last_bundle_number, 781 - last_scan_time = excluded.last_scan_time, 782 - records_processed = excluded.records_processed 783 - ` 784 - _, err := s.db.ExecContext(ctx, query, cursor.Source, cursor.LastBundleNumber, cursor.LastScanTime, cursor.RecordsProcessed) 785 - return err 786 - } 787 - 788 - // StorePLCMetrics stores PLC scan metrics 789 - func (s *SQLiteDB) StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error { 790 - query := ` 791 - INSERT INTO plc_metrics (total_dids, total_pds, unique_pds, scan_duration_ms, error_count) 792 - VALUES (?, ?, ?, ?, ?) 793 - ` 794 - _, err := s.db.ExecContext(ctx, query, metrics.TotalDIDs, metrics.TotalPDS, 795 - metrics.UniquePDS, metrics.ScanDuration, metrics.ErrorCount) 796 - return err 797 - } 798 - 799 - // GetPLCMetrics retrieves recent PLC metrics 800 - func (s *SQLiteDB) GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) { 801 - query := ` 802 - SELECT total_dids, total_pds, unique_pds, scan_duration_ms, error_count, created_at 803 - FROM plc_metrics 804 - ORDER BY created_at DESC 805 - LIMIT ? 806 - ` 807 - 808 - rows, err := s.db.QueryContext(ctx, query, limit) 809 - if err != nil { 810 - return nil, err 811 - } 812 - defer rows.Close() 813 - 814 - var metrics []*PLCMetrics 815 - for rows.Next() { 816 - var m PLCMetrics 817 - if err := rows.Scan(&m.TotalDIDs, &m.TotalPDS, &m.UniquePDS, &m.ScanDuration, &m.ErrorCount, &m.LastScanTime); err != nil { 818 - return nil, err 819 - } 820 - metrics = append(metrics, &m) 821 - } 822 - 823 - return metrics, rows.Err() 824 - }
+245 -56
internal/storage/types.go
··· 1 1 package storage 2 2 3 3 import ( 4 + "database/sql" 4 5 "fmt" 5 6 "path/filepath" 6 7 "time" ··· 15 16 UpdatedAt time.Time 16 17 } 17 18 18 - // PDS represents a Personal Data Server 19 - type PDS struct { 20 - ID int64 // NEW: Primary key 21 - Endpoint string // UNIQUE but not primary key 19 + // Endpoint represents any AT Protocol service endpoint 20 + type Endpoint struct { 21 + ID int64 22 + EndpointType string 23 + Endpoint string 24 + ServerDID string 22 25 DiscoveredAt time.Time 23 26 LastChecked time.Time 24 - Status int // 0=unknown, 1=online, 2=offline 25 - UserCount int64 27 + Status int 28 + IP string 29 + IPv6 string 30 + IPResolvedAt time.Time 31 + Valid bool 26 32 UpdatedAt time.Time 27 33 } 28 34 29 - // PDSUpdate contains fields to update for a PDS 30 - type PDSUpdate struct { 35 + // EndpointUpdate contains fields to update for an Endpoint 36 + type EndpointUpdate struct { 31 37 Status int 32 38 LastChecked time.Time 33 - ResponseTime float64 // milliseconds as float 34 - ScanData *PDSScanData 39 + ResponseTime float64 40 + ScanData *EndpointScanData 35 41 } 36 42 37 - // PDSScanData contains data from a PDS scan 38 - type PDSScanData struct { 39 - ServerInfo interface{} `json:"server_info,omitempty"` 40 - DIDs []string `json:"dids,omitempty"` 41 - DIDCount int `json:"did_count"` 43 + // EndpointScanData contains data from an endpoint scan 44 + type EndpointScanData struct { 45 + ServerInfo interface{} `json:"server_info,omitempty"` 46 + DIDs []string `json:"dids,omitempty"` 47 + DIDCount int `json:"did_count"` 48 + Metadata map[string]interface{} `json:"metadata,omitempty"` 42 49 } 43 50 44 - // PDSScan represents a historical PDS scan 45 - type PDSScan struct { 51 + // EndpointScan represents a historical endpoint scan 52 + type EndpointScan struct { 46 53 ID int64 47 - PDSID int64 54 + EndpointID int64 48 55 Status int 49 56 ResponseTime float64 50 - ScanData *PDSScanData 57 + UserCount int64 58 + Version string 59 + UsedIP string // NEW: Track which IP was actually used 60 + ScanData *EndpointScanData 51 61 ScannedAt time.Time 52 62 } 53 63 ··· 58 68 PDSStatusOffline = 2 59 69 ) 60 70 61 - // PDSFilter for querying PDS servers 62 - type PDSFilter struct { 63 - Status string 64 - MinUserCount int64 65 - Limit int 66 - Offset int 71 + // Endpoint status constants (aliases for compatibility) 72 + const ( 73 + EndpointStatusUnknown = PDSStatusUnknown 74 + EndpointStatusOnline = PDSStatusOnline 75 + EndpointStatusOffline = PDSStatusOffline 76 + ) 77 + 78 + // EndpointFilter for querying endpoints 79 + type EndpointFilter struct { 80 + Type string 81 + Status string 82 + MinUserCount int64 83 + OnlyStale bool 84 + OnlyValid bool 85 + RecheckInterval time.Duration 86 + Random bool 87 + Limit int 88 + Offset int 67 89 } 68 90 69 - // PDSStats contains aggregate statistics about PDS servers 70 - type PDSStats struct { 71 - TotalPDS int64 `json:"total_pds"` 72 - UniquePDS int64 `json:"unique_pds"` 73 - OnlinePDS int64 `json:"online_pds"` 74 - OfflinePDS int64 `json:"offline_pds"` 75 - AvgResponseTime float64 `json:"avg_response_time"` 76 - TotalDIDs int64 `json:"total_dids"` 91 + // EndpointStats contains aggregate statistics about endpoints 92 + type EndpointStats struct { 93 + TotalEndpoints int64 `json:"total_endpoints"` 94 + ByType map[string]int64 `json:"by_type"` 95 + OnlineEndpoints int64 `json:"online_endpoints"` 96 + OfflineEndpoints int64 `json:"offline_endpoints"` 97 + AvgResponseTime float64 `json:"avg_response_time"` 98 + TotalDIDs int64 `json:"total_dids"` // Only for PDS 77 99 } 78 100 101 + // Legacy type aliases for backward compatibility in code 102 + type PDS = Endpoint 103 + type PDSUpdate = EndpointUpdate 104 + type PDSScanData = EndpointScanData 105 + type PDSScan = EndpointScan 106 + type PDSFilter = EndpointFilter 107 + type PDSStats = EndpointStats 108 + 79 109 // PLCMetrics contains metrics from PLC directory scans 80 110 type PLCMetrics struct { 81 111 TotalDIDs int64 `json:"total_dids"` ··· 88 118 89 119 // PLCBundle represents a cached bundle of PLC operations 90 120 type PLCBundle struct { 91 - BundleNumber int // PRIMARY KEY 92 - StartTime time.Time 93 - EndTime time.Time 94 - BoundaryCIDs []string 95 - DIDs []string 96 - Hash string // SHA256 of uncompressed JSONL (verifiable against PLC) 97 - CompressedHash string // SHA256 of compressed file on disk 98 - CompressedSize int64 // Size of compressed file in bytes 99 - PrevBundleHash string // Hash of previous bundle (for chain) 100 - Compressed bool 101 - CreatedAt time.Time 121 + BundleNumber int 122 + StartTime time.Time 123 + EndTime time.Time 124 + BoundaryCIDs []string 125 + DIDCount int // Changed from DIDs []string 126 + Hash string 127 + CompressedHash string 128 + CompressedSize int64 129 + UncompressedSize int64 130 + CumulativeCompressedSize int64 131 + CumulativeUncompressedSize int64 132 + Cursor string 133 + PrevBundleHash string 134 + Compressed bool 135 + CreatedAt time.Time 102 136 } 103 137 104 138 // GetFilePath returns the computed file path for this bundle ··· 106 140 return filepath.Join(bundleDir, fmt.Sprintf("%06d.jsonl.zst", b.BundleNumber)) 107 141 } 108 142 109 - // OperationCount() returns 1000 (all bundles have exactly 1000 operations) 143 + // OperationCount returns the number of operations in a bundle (always 10000) 110 144 func (b *PLCBundle) OperationCount() int { 111 - return 1000 145 + return 10000 112 146 } 113 147 114 - // MempoolOperation represents an operation waiting to be bundled 115 - type MempoolOperation struct { 116 - ID int64 117 - DID string 118 - Operation string // JSON of the full operation 119 - CID string 120 - CreatedAt time.Time 121 - AddedAt time.Time 148 + type PLCHistoryPoint struct { 149 + Date string `json:"date"` 150 + BundleNumber int `json:"last_bundle_number"` 151 + OperationCount int `json:"operations"` 152 + UncompressedSize int64 `json:"size_uncompressed"` 153 + CompressedSize int64 `json:"size_compressed"` 154 + CumulativeUncompressed int64 `json:"cumulative_uncompressed"` 155 + CumulativeCompressed int64 `json:"cumulative_compressed"` 122 156 } 123 157 124 - // ScanCursor now stores bundle number 158 + // ScanCursor stores scanning progress 125 159 type ScanCursor struct { 126 160 Source string 127 - LastBundleNumber int // NEW: Last processed bundle number 161 + LastBundleNumber int 128 162 LastScanTime time.Time 129 163 RecordsProcessed int64 130 164 } 165 + 166 + // DIDRecord represents a DID entry in the database 167 + type DIDRecord struct { 168 + DID string `json:"did"` 169 + Handle string `json:"handle,omitempty"` 170 + CurrentPDS string `json:"current_pds,omitempty"` 171 + LastOpAt time.Time `json:"last_op_at,omitempty"` 172 + BundleNumbers []int `json:"bundle_numbers"` 173 + CreatedAt time.Time `json:"created_at"` 174 + } 175 + 176 + // GlobalDIDInfo consolidates DID data from PLC and PDS tables 177 + type GlobalDIDInfo struct { 178 + DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc. 179 + HostingOn []*PDSRepo `json:"hosting_on"` 180 + } 181 + 182 + // IPInfo represents IP information (stored with IP as primary key) 183 + type IPInfo struct { 184 + IP string `json:"ip"` 185 + City string `json:"city,omitempty"` 186 + Country string `json:"country,omitempty"` 187 + CountryCode string `json:"country_code,omitempty"` 188 + ASN int `json:"asn,omitempty"` 189 + ASNOrg string `json:"asn_org,omitempty"` 190 + IsDatacenter bool `json:"is_datacenter"` 191 + IsVPN bool `json:"is_vpn"` 192 + IsCrawler bool `json:"is_crawler"` 193 + IsTor bool `json:"is_tor"` 194 + IsProxy bool `json:"is_proxy"` 195 + Latitude float32 `json:"latitude,omitempty"` 196 + Longitude float32 `json:"longitude,omitempty"` 197 + RawData map[string]interface{} `json:"raw_data,omitempty"` 198 + FetchedAt time.Time `json:"fetched_at"` 199 + UpdatedAt time.Time `json:"updated_at"` 200 + } 201 + 202 + // IsHome returns true if this is a residential/home IP 203 + // (not crawler, datacenter, tor, proxy, or vpn) 204 + func (i *IPInfo) IsHome() bool { 205 + return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN 206 + } 207 + 208 + // PDSListItem is a virtual type created by JOIN for /pds endpoint 209 + type PDSListItem struct { 210 + // From endpoints table 211 + ID int64 212 + Endpoint string 213 + ServerDID string 214 + DiscoveredAt time.Time 215 + LastChecked time.Time 216 + Status int 217 + IP string 218 + IPv6 string 219 + Valid bool // NEW 220 + 221 + // From latest endpoint_scans (via JOIN) 222 + LatestScan *struct { 223 + UserCount int 224 + ResponseTime float64 225 + Version string 226 + ScannedAt time.Time 227 + } 228 + 229 + // From ip_infos table (via JOIN on endpoints.ip) 230 + IPInfo *IPInfo 231 + } 232 + 233 + // PDSDetail is extended version for /pds/{endpoint} 234 + type PDSDetail struct { 235 + PDSListItem 236 + 237 + // Additional data from latest scan 238 + LatestScan *struct { 239 + UserCount int 240 + ResponseTime float64 241 + Version string 242 + ServerInfo interface{} // Full server description 243 + ScannedAt time.Time 244 + } 245 + 246 + // NEW: Aliases (other domains pointing to same server) 247 + Aliases []string `json:"aliases,omitempty"` 248 + IsPrimary bool `json:"is_primary"` 249 + } 250 + 251 + type CountryStats struct { 252 + Country string `json:"country"` 253 + CountryCode string `json:"country_code"` 254 + ActivePDSCount int64 `json:"active_pds_count"` 255 + PDSPercentage float64 `json:"pds_percentage"` 256 + TotalUsers int64 `json:"total_users"` 257 + UsersPercentage float64 `json:"users_percentage"` 258 + AvgResponseTimeMS float64 `json:"avg_response_time_ms"` 259 + } 260 + 261 + type VersionStats struct { 262 + Version string `json:"version"` 263 + PDSCount int64 `json:"pds_count"` 264 + Percentage float64 `json:"percentage"` 265 + PercentageText string `json:"percentage_text"` 266 + TotalUsers int64 `json:"total_users"` 267 + UsersPercentage float64 `json:"users_percentage"` 268 + FirstSeen time.Time `json:"first_seen"` 269 + LastSeen time.Time `json:"last_seen"` 270 + } 271 + 272 + type PDSRepo struct { 273 + ID int64 `json:"id"` 274 + EndpointID int64 `json:"endpoint_id"` 275 + Endpoint string `json:"endpoint,omitempty"` 276 + DID string `json:"did"` 277 + Head string `json:"head,omitempty"` 278 + Rev string `json:"rev,omitempty"` 279 + Active bool `json:"active"` 280 + Status string `json:"status,omitempty"` 281 + FirstSeen time.Time `json:"first_seen"` 282 + LastSeen time.Time `json:"last_seen"` 283 + UpdatedAt time.Time `json:"updated_at"` 284 + } 285 + 286 + type PDSRepoData struct { 287 + DID string 288 + Head string 289 + Rev string 290 + Active bool 291 + Status string 292 + } 293 + 294 + type DIDBackfillInfo struct { 295 + DID string 296 + LastBundleNum int 297 + } 298 + 299 + type DIDStateUpdateData struct { 300 + DID string 301 + Handle sql.NullString // Use sql.NullString for potential NULLs 302 + PDS sql.NullString 303 + OpTime time.Time 304 + } 305 + 306 + // TableSizeInfo holds size information for a database table. 307 + type TableSizeInfo struct { 308 + TableName string `json:"table_name"` 309 + TotalBytes int64 `json:"total_bytes"` // Raw bytes 310 + TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes 311 + IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes 312 + } 313 + 314 + // IndexSizeInfo holds size information for a database index. 315 + type IndexSizeInfo struct { 316 + IndexName string `json:"index_name"` 317 + TableName string `json:"table_name"` 318 + IndexBytes int64 `json:"index_bytes"` // Raw bytes 319 + }
+27 -3
internal/worker/scheduler.go
··· 5 5 "sync" 6 6 "time" 7 7 8 - "github.com/atscan/atscanner/internal/log" 8 + "github.com/atscan/atscand/internal/log" 9 + "github.com/atscan/atscand/internal/monitor" 9 10 ) 10 11 11 12 type Job struct { ··· 34 35 Interval: interval, 35 36 Fn: fn, 36 37 }) 38 + 39 + // Register job with tracker 40 + monitor.GetTracker().RegisterJob(name) 37 41 } 38 42 39 43 func (s *Scheduler) Start(ctx context.Context) { ··· 52 56 53 57 // Run immediately 54 58 log.Info("Starting job: %s", job.Name) 55 - job.Fn() 59 + s.executeJob(job) 56 60 57 61 for { 62 + // Set next run time 63 + monitor.GetTracker().SetNextRun(job.Name, time.Now().Add(job.Interval)) 64 + 58 65 select { 59 66 case <-ctx.Done(): 60 67 log.Info("Stopping job: %s", job.Name) 61 68 return 62 69 case <-ticker.C: 63 70 log.Info("Running job: %s", job.Name) 64 - job.Fn() 71 + s.executeJob(job) 65 72 } 66 73 } 67 74 } 75 + 76 + func (s *Scheduler) executeJob(job *Job) { 77 + monitor.GetTracker().StartJob(job.Name) 78 + 79 + // Run job and capture any panic 80 + func() { 81 + defer func() { 82 + if r := recover(); r != nil { 83 + log.Error("Job %s panicked: %v", job.Name, r) 84 + monitor.GetTracker().CompleteJob(job.Name, nil) 85 + } 86 + }() 87 + 88 + job.Fn() 89 + monitor.GetTracker().CompleteJob(job.Name, nil) 90 + }() 91 + }
+125
utils/db-sizes.sh
··· 1 + #!/bin/bash 2 + 3 + # === Configuration === 4 + CONFIG_FILE="config.yaml" # Path to your config file 5 + SCHEMA_NAME="public" # Replace if your schema is different 6 + 7 + # Check if config file exists 8 + if [ ! -f "$CONFIG_FILE" ]; then 9 + echo "Error: Config file not found at '$CONFIG_FILE'" 10 + exit 1 11 + fi 12 + 13 + # Check if yq is installed 14 + if ! command -v yq &> /dev/null; then 15 + echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)." 16 + echo "See: https://github.com/mikefarah/yq/" 17 + exit 1 18 + fi 19 + 20 + echo "--- Reading connection info from '$CONFIG_FILE' ---" 21 + 22 + # === Extract Database Config using yq === 23 + DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE") 24 + DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI 25 + 26 + if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then 27 + echo "Error: Could not read database type or path from '$CONFIG_FILE'." 28 + exit 1 29 + fi 30 + 31 + # === Parse the Connection String === 32 + DB_USER="" 33 + DB_PASSWORD="" 34 + DB_HOST="localhost" # Default 35 + DB_PORT="5432" # Default 36 + DB_NAME="" 37 + 38 + # Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters) 39 + if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then 40 + DB_USER="${BASH_REMATCH[2]}" 41 + DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group 42 + DB_HOST="${BASH_REMATCH[5]}" 43 + DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default 44 + DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?' 45 + else 46 + echo "Error: Could not parse database connection string URI: $DB_CONN_STRING" 47 + exit 1 48 + fi 49 + 50 + # Set PGPASSWORD environment variable if password was found 51 + if [ -n "$DB_PASSWORD" ]; then 52 + export PGPASSWORD="$DB_PASSWORD" 53 + else 54 + echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt." 55 + unset PGPASSWORD 56 + fi 57 + 58 + echo "--- Database Size Investigation ---" 59 + echo "Database: $DB_NAME" 60 + echo "Schema: $SCHEMA_NAME" 61 + echo "User: $DB_USER" 62 + echo "Host: $DB_HOST:$DB_PORT" 63 + echo "-----------------------------------" 64 + 65 + # === Table Sizes === 66 + echo "" 67 + echo "## Table Sizes (Schema: $SCHEMA_NAME) ##" 68 + # Removed --tuples-only and --no-align, added -P footer=off 69 + psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF 70 + SELECT 71 + c.relname AS "Table Name", 72 + pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size", 73 + pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size", 74 + pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size" 75 + FROM 76 + pg_class c 77 + LEFT JOIN 78 + pg_namespace n ON n.oid = c.relnamespace 79 + WHERE 80 + c.relkind = 'r' -- 'r' = ordinary table 81 + AND n.nspname = '$SCHEMA_NAME' 82 + ORDER BY 83 + pg_total_relation_size(c.oid) DESC; 84 + EOF 85 + 86 + if [ $? -ne 0 ]; then 87 + echo "Error querying table sizes. Check connection details, permissions, and password." 88 + unset PGPASSWORD 89 + exit 1 90 + fi 91 + 92 + # === Index Sizes === 93 + echo "" 94 + echo "## Index Sizes (Schema: $SCHEMA_NAME) ##" 95 + # Removed --tuples-only and --no-align, added -P footer=off 96 + psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF 97 + SELECT 98 + c.relname AS "Index Name", 99 + i.indrelid::regclass AS "Table Name", -- Show associated table 100 + pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size" 101 + FROM 102 + pg_class c 103 + LEFT JOIN 104 + pg_index i ON i.indexrelid = c.oid 105 + LEFT JOIN 106 + pg_namespace n ON n.oid = c.relnamespace 107 + WHERE 108 + c.relkind = 'i' -- 'i' = index 109 + AND n.nspname = '$SCHEMA_NAME' 110 + ORDER BY 111 + pg_relation_size(c.oid) DESC; 112 + EOF 113 + 114 + if [ $? -ne 0 ]; then 115 + echo "Error querying index sizes. Check connection details, permissions, and password." 116 + unset PGPASSWORD 117 + exit 1 118 + fi 119 + 120 + echo "" 121 + echo "-----------------------------------" 122 + echo "Investigation complete." 123 + 124 + # Unset the password variable for security 125 + unset PGPASSWORD
+113
utils/import-labels.js
··· 1 + import { file, write } from "bun"; 2 + import { join } from "path"; 3 + import { mkdir } from "fs/promises"; 4 + import { init, compress } from "@bokuweb/zstd-wasm"; 5 + 6 + // --- Configuration --- 7 + const CSV_FILE = process.argv[2]; 8 + const CONFIG_FILE = "config.yaml"; 9 + const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance) 10 + // --------------------- 11 + 12 + if (!CSV_FILE) { 13 + console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>"); 14 + process.exit(1); 15 + } 16 + 17 + console.log("========================================"); 18 + console.log("PLC Operation Labels Import (Bun + WASM)"); 19 + console.log("========================================"); 20 + 21 + // 1. Read and parse config 22 + console.log(`Loading config from ${CONFIG_FILE}...`); 23 + const configFile = await file(CONFIG_FILE).text(); 24 + const config = Bun.YAML.parse(configFile); 25 + const bundleDir = config?.plc?.bundle_dir; 26 + 27 + if (!bundleDir) { 28 + console.error("Error: Could not parse plc.bundle_dir from config.yaml"); 29 + process.exit(1); 30 + } 31 + 32 + const FINAL_LABELS_DIR = join(bundleDir, "labels"); 33 + await mkdir(FINAL_LABELS_DIR, { recursive: true }); 34 + 35 + console.log(`CSV File: ${CSV_FILE}`); 36 + console.log(`Output Dir: ${FINAL_LABELS_DIR}`); 37 + console.log(""); 38 + 39 + // 2. Initialize Zstd WASM module 40 + await init(); 41 + 42 + // --- Pass 1: Read entire file into memory and group by bundle --- 43 + console.log("Pass 1/2: Reading and grouping all lines by bundle..."); 44 + console.warn("This will use a large amount of RAM!"); 45 + 46 + const startTime = Date.now(); 47 + const bundles = new Map(); // Map<string, string[]> 48 + let lineCount = 0; 49 + 50 + const inputFile = file(CSV_FILE); 51 + const fileStream = inputFile.stream(); 52 + const decoder = new TextDecoder(); 53 + let remainder = ""; 54 + 55 + for await (const chunk of fileStream) { 56 + const text = remainder + decoder.decode(chunk); 57 + const lines = text.split("\n"); 58 + remainder = lines.pop() || ""; 59 + 60 + for (const line of lines) { 61 + if (line === "") continue; 62 + lineCount++; 63 + 64 + if (lineCount === 1 && line.startsWith("bundle,")) { 65 + continue; // Skip header 66 + } 67 + 68 + const firstCommaIndex = line.indexOf(","); 69 + if (firstCommaIndex === -1) { 70 + console.warn(`Skipping malformed line: ${line}`); 71 + continue; 72 + } 73 + const bundleNumStr = line.substring(0, firstCommaIndex); 74 + const bundleKey = bundleNumStr.padStart(6, "0"); 75 + 76 + // Add line to the correct bundle's array 77 + if (!bundles.has(bundleKey)) { 78 + bundles.set(bundleKey, []); 79 + } 80 + bundles.get(bundleKey).push(line); 81 + } 82 + } 83 + // Note: We ignore any final `remainder` as it's likely an empty line 84 + 85 + console.log(`Finished reading ${lineCount.toLocaleString()} lines.`); 86 + console.log(`Found ${bundles.size} unique bundles.`); 87 + 88 + // --- Pass 2: Compress and write each bundle --- 89 + console.log("\nPass 2/2: Compressing and writing bundle files..."); 90 + let i = 0; 91 + for (const [bundleKey, lines] of bundles.entries()) { 92 + i++; 93 + console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`); 94 + 95 + // Join all lines for this bundle into one big string 96 + const content = lines.join("\n"); 97 + 98 + // Compress the string 99 + const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL); 100 + 101 + // Write the compressed data to the file 102 + const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`); 103 + await write(outPath, compressedData); 104 + } 105 + 106 + // 3. Clean up 107 + const totalTime = (Date.now() - startTime) / 1000; 108 + console.log("\n========================================"); 109 + console.log("Import Summary"); 110 + console.log("========================================"); 111 + console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`); 112 + console.log(`Total lines processed: ${lineCount.toLocaleString()}`); 113 + console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
··· 1 + #!/bin/bash 2 + # import-labels-v4-sorted-pipe.sh 3 + 4 + set -e 5 + 6 + if [ $# -lt 1 ]; then 7 + echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>" 8 + exit 1 9 + fi 10 + 11 + CSV_FILE="$1" 12 + CONFIG_FILE="config.yaml" 13 + 14 + [ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1 15 + [ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1 16 + 17 + # Extract bundle directory path 18 + BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1) 19 + 20 + [ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1 21 + 22 + FINAL_LABELS_DIR="$BUNDLE_DIR/labels" 23 + 24 + echo "========================================" 25 + echo "PLC Operation Labels Import (Sorted Pipe)" 26 + echo "========================================" 27 + echo "CSV File: $CSV_FILE" 28 + echo "Output Dir: $FINAL_LABELS_DIR" 29 + echo "" 30 + 31 + # Ensure the final directory exists 32 + mkdir -p "$FINAL_LABELS_DIR" 33 + 34 + echo "Streaming, sorting, and compressing on the fly..." 35 + echo "This will take time. `pv` will show progress of the TAIL command." 36 + echo "The `sort` command will run after `pv` is complete." 37 + echo "" 38 + 39 + # This is the single-pass pipeline 40 + tail -n +2 "$CSV_FILE" | \ 41 + pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \ 42 + sort -t, -k1,1n | \ 43 + awk -F',' -v final_dir="$FINAL_LABELS_DIR" ' 44 + # This awk script EXPECTS input sorted by bundle number (col 1) 45 + BEGIN { 46 + # last_bundle_num tracks the bundle we are currently writing 47 + last_bundle_num = -1 48 + # cmd holds the current zstd pipe command 49 + cmd = "" 50 + } 51 + { 52 + current_bundle_num = $1 53 + 54 + # Check if the bundle number has changed 55 + if (current_bundle_num != last_bundle_num) { 56 + 57 + # If it changed, and we have an old pipe open, close it 58 + if (last_bundle_num != -1) { 59 + close(cmd) 60 + } 61 + 62 + # Create the new pipe command, writing to the final .zst file 63 + outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num) 64 + cmd = "zstd -T0 -o " outfile 65 + 66 + # Update the tracker 67 + last_bundle_num = current_bundle_num 68 + 69 + # Print progress to stderr 70 + printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr" 71 + } 72 + 73 + # Print the current line ($0) to the open pipe 74 + # The first time this runs for a bundle, it opens the pipe 75 + # Subsequent times, it writes to the already-open pipe 76 + print $0 | cmd 77 + } 78 + # END block: close the very last pipe 79 + END { 80 + if (last_bundle_num != -1) { 81 + close(cmd) 82 + } 83 + printf " Finished. Total lines: %d\n", NR > "/dev/stderr" 84 + }' 85 + 86 + echo "" 87 + echo "========================================" 88 + echo "Import Summary" 89 + echo "========================================" 90 + echo "✓ Import completed successfully!" 91 + echo "Label files are stored in: $FINAL_LABELS_DIR"
+160
utils/migrate-ipinfo.sh
··· 1 + #!/bin/bash 2 + # migrate_ipinfo.sh - Migrate IP info from endpoints to ip_infos table 3 + 4 + # Configuration (edit these) 5 + DB_HOST="localhost" 6 + DB_PORT="5432" 7 + DB_NAME="atscand" 8 + DB_USER="atscand" 9 + DB_PASSWORD="Noor1kooz5eeFai9leZagh5ua5eihai4" 10 + 11 + # Colors for output 12 + RED='\033[0;31m' 13 + GREEN='\033[0;32m' 14 + YELLOW='\033[1;33m' 15 + NC='\033[0m' # No Color 16 + 17 + echo -e "${GREEN}=== IP Info Migration Script ===${NC}" 18 + echo "" 19 + 20 + # Export password for psql 21 + export PGPASSWORD="$DB_PASSWORD" 22 + 23 + # Check if we can connect 24 + echo -e "${YELLOW}Testing database connection...${NC}" 25 + if ! psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c "SELECT 1;" > /dev/null 2>&1; then 26 + echo -e "${RED}Error: Cannot connect to database${NC}" 27 + exit 1 28 + fi 29 + echo -e "${GREEN}✓ Connected to database${NC}" 30 + echo "" 31 + 32 + # Create ip_infos table if it doesn't exist 33 + echo -e "${YELLOW}Creating ip_infos table...${NC}" 34 + psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" << 'SQL' 35 + CREATE TABLE IF NOT EXISTS ip_infos ( 36 + ip TEXT PRIMARY KEY, 37 + city TEXT, 38 + country TEXT, 39 + country_code TEXT, 40 + asn INTEGER, 41 + asn_org TEXT, 42 + is_datacenter BOOLEAN, 43 + is_vpn BOOLEAN, 44 + latitude REAL, 45 + longitude REAL, 46 + raw_data JSONB, 47 + fetched_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 48 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 49 + ); 50 + 51 + CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code); 52 + CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn); 53 + SQL 54 + 55 + if [ $? -eq 0 ]; then 56 + echo -e "${GREEN}✓ ip_infos table ready${NC}" 57 + else 58 + echo -e "${RED}✗ Failed to create table${NC}" 59 + exit 1 60 + fi 61 + echo "" 62 + 63 + # Count how many endpoints have IP info 64 + echo -e "${YELLOW}Checking existing data...${NC}" 65 + ENDPOINT_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \ 66 + "SELECT COUNT(*) FROM endpoints WHERE ip IS NOT NULL AND ip != '' AND ip_info IS NOT NULL;") 67 + echo -e "Endpoints with IP info: ${GREEN}${ENDPOINT_COUNT}${NC}" 68 + 69 + EXISTING_IP_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \ 70 + "SELECT COUNT(*) FROM ip_infos;") 71 + echo -e "Existing IPs in ip_infos table: ${GREEN}${EXISTING_IP_COUNT}${NC}" 72 + echo "" 73 + 74 + # Migrate data 75 + echo -e "${YELLOW}Migrating IP info data...${NC}" 76 + psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" << 'SQL' 77 + -- Migrate IP info from endpoints to ip_infos 78 + -- Only insert IPs that don't already exist in ip_infos 79 + INSERT INTO ip_infos ( 80 + ip, 81 + city, 82 + country, 83 + country_code, 84 + asn, 85 + asn_org, 86 + is_datacenter, 87 + is_vpn, 88 + latitude, 89 + longitude, 90 + raw_data, 91 + fetched_at, 92 + updated_at 93 + ) 94 + SELECT DISTINCT ON (e.ip) 95 + e.ip, 96 + e.ip_info->'location'->>'city' AS city, 97 + e.ip_info->'location'->>'country' AS country, 98 + e.ip_info->'location'->>'country_code' AS country_code, 99 + (e.ip_info->'asn'->>'asn')::INTEGER AS asn, 100 + e.ip_info->'asn'->>'org' AS asn_org, 101 + -- Check if company type is "hosting" for datacenter detection 102 + CASE 103 + WHEN e.ip_info->'company'->>'type' = 'hosting' THEN true 104 + ELSE false 105 + END AS is_datacenter, 106 + -- Check VPN from security field 107 + COALESCE((e.ip_info->'security'->>'vpn')::BOOLEAN, false) AS is_vpn, 108 + -- Latitude and longitude 109 + (e.ip_info->'location'->>'latitude')::REAL AS latitude, 110 + (e.ip_info->'location'->>'longitude')::REAL AS longitude, 111 + -- Store full raw data 112 + e.ip_info AS raw_data, 113 + COALESCE(e.updated_at, CURRENT_TIMESTAMP) AS fetched_at, 114 + CURRENT_TIMESTAMP AS updated_at 115 + FROM endpoints e 116 + WHERE 117 + e.ip IS NOT NULL 118 + AND e.ip != '' 119 + AND e.ip_info IS NOT NULL 120 + AND NOT EXISTS ( 121 + SELECT 1 FROM ip_infos WHERE ip_infos.ip = e.ip 122 + ) 123 + ORDER BY e.ip, e.updated_at DESC NULLS LAST; 124 + SQL 125 + 126 + if [ $? -eq 0 ]; then 127 + echo -e "${GREEN}✓ Data migration completed${NC}" 128 + else 129 + echo -e "${RED}✗ Migration failed${NC}" 130 + exit 1 131 + fi 132 + echo "" 133 + 134 + # Show results 135 + echo -e "${YELLOW}Migration summary:${NC}" 136 + NEW_IP_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c \ 137 + "SELECT COUNT(*) FROM ip_infos;") 138 + MIGRATED=$((NEW_IP_COUNT - EXISTING_IP_COUNT)) 139 + echo -e "Total IPs now in ip_infos: ${GREEN}${NEW_IP_COUNT}${NC}" 140 + echo -e "Newly migrated: ${GREEN}${MIGRATED}${NC}" 141 + echo "" 142 + 143 + # Show sample data 144 + echo -e "${YELLOW}Sample migrated data:${NC}" 145 + psql -h "$DB_HOST" -ps "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c \ 146 + "SELECT ip, city, country, country_code, asn, is_datacenter, is_vpn FROM ip_infos LIMIT 5;" 147 + echo "" 148 + 149 + # Optional: Drop old columns (commented out for safety) 150 + echo -e "${YELLOW}Cleanup options:${NC}" 151 + echo -e "To remove old ip_info column from endpoints table, run:" 152 + echo -e "${RED} ALTER TABLE endpoints DROP COLUMN IF EXISTS ip_info;${NC}" 153 + echo -e "To remove old user_count column from endpoints table, run:" 154 + echo -e "${RED} ALTER TABLE endpoints DROP COLUMN IF EXISTS user_count;${NC}" 155 + echo "" 156 + 157 + echo -e "${GREEN}=== Migration Complete ===${NC}" 158 + 159 + # Unset password 160 + unset PGPASSWORD
+199
utils/vuln-scanner-parallel.sh
··· 1 + #!/bin/bash 2 + 3 + # Configuration 4 + API_HOST="${API_HOST:-http://localhost:8080}" 5 + TIMEOUT=5 6 + PARALLEL_JOBS=20 7 + OUTPUT_DIR="./pds_scan_results" 8 + TIMESTAMP=$(date +%Y%m%d_%H%M%S) 9 + RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt" 10 + FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt" 11 + 12 + # Paths to check 13 + PATHS=( 14 + "/info.php" 15 + "/phpinfo.php" 16 + "/test.php" 17 + "/admin" 18 + "/admin.php" 19 + "/wp-admin" 20 + "/robots.txt" 21 + "/.env" 22 + "/.git/config" 23 + "/config.php" 24 + "/backup" 25 + "/db.sql" 26 + "/.DS_Store" 27 + "/server-status" 28 + "/.well-known/security.txt" 29 + ) 30 + 31 + # Colors 32 + RED='\033[0;31m' 33 + GREEN='\033[0;32m' 34 + YELLOW='\033[1;33m' 35 + BLUE='\033[0;34m' 36 + NC='\033[0m' 37 + 38 + # Check dependencies 39 + if ! command -v jq &> /dev/null; then 40 + echo -e "${RED}Error: jq is required${NC}" 41 + echo "Install: sudo apt-get install jq" 42 + exit 1 43 + fi 44 + 45 + if ! command -v parallel &> /dev/null; then 46 + echo -e "${RED}Error: GNU parallel is required${NC}" 47 + echo "Install: sudo apt-get install parallel (or brew install parallel)" 48 + exit 1 49 + fi 50 + 51 + mkdir -p "$OUTPUT_DIR" 52 + 53 + echo -e "${BLUE}╔════════════════════════════════════════╗${NC}" 54 + echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}" 55 + echo -e "${BLUE}╚════════════════════════════════════════╝${NC}" 56 + echo "" 57 + echo "API Host: $API_HOST" 58 + echo "Timeout: ${TIMEOUT}s per request" 59 + echo "Parallel jobs: ${PARALLEL_JOBS}" 60 + echo "Paths to check: ${#PATHS[@]}" 61 + echo "" 62 + 63 + # Scan function - will be called by GNU parallel 64 + scan_endpoint() { 65 + local endpoint="$1" 66 + local timeout="$2" 67 + shift 2 68 + local paths=("$@") 69 + 70 + for path in "${paths[@]}"; do 71 + url="${endpoint}${path}" 72 + 73 + response=$(curl -s -o /dev/null -w "%{http_code}" \ 74 + --max-time "$timeout" \ 75 + --connect-timeout "$timeout" \ 76 + --retry 0 \ 77 + -A "Mozilla/5.0 (Security Scanner)" \ 78 + "$url" 2>/dev/null) 79 + 80 + if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then 81 + if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then 82 + echo "FOUND|$endpoint|$path|$response" 83 + elif [ "$response" != "403" ] && [ "$response" != "401" ]; then 84 + echo "MAYBE|$endpoint|$path|$response" 85 + fi 86 + fi 87 + done 88 + } 89 + 90 + export -f scan_endpoint 91 + 92 + # Fetch active PDS endpoints 93 + echo -e "${YELLOW}Fetching active PDS endpoints...${NC}" 94 + ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \ 95 + jq -r '.[].endpoint' 2>/dev/null) 96 + 97 + if [ -z "$ENDPOINTS" ]; then 98 + echo -e "${RED}Error: Could not fetch endpoints from API${NC}" 99 + echo "Check that the API is running at: $API_HOST" 100 + exit 1 101 + fi 102 + 103 + ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ') 104 + echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}" 105 + echo "" 106 + 107 + # Write header to results file 108 + { 109 + echo "PDS Security Scan Results" 110 + echo "=========================" 111 + echo "Scan started: $(date)" 112 + echo "Endpoints scanned: ${ENDPOINT_COUNT}" 113 + echo "Paths checked: ${#PATHS[@]}" 114 + echo "Parallel jobs: ${PARALLEL_JOBS}" 115 + echo "" 116 + echo "Results:" 117 + echo "--------" 118 + } > "$RESULTS_FILE" 119 + 120 + # Run parallel scan 121 + echo -e "${YELLOW}Starting parallel scan...${NC}" 122 + echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}" 123 + echo "" 124 + 125 + echo "$ENDPOINTS" | \ 126 + parallel \ 127 + -j "$PARALLEL_JOBS" \ 128 + --bar \ 129 + --joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \ 130 + scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \ 131 + >> "$RESULTS_FILE" 132 + 133 + echo "" 134 + echo -e "${YELLOW}Processing results...${NC}" 135 + 136 + # Count results 137 + FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0) 138 + MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0) 139 + 140 + # Extract found URLs to separate file 141 + { 142 + echo "Found URLs (HTTP 200/301/302)" 143 + echo "==============================" 144 + echo "Scan: $(date)" 145 + echo "" 146 + } > "$FOUND_FILE" 147 + 148 + grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do 149 + echo "$endpoint$path [$code]" 150 + done >> "$FOUND_FILE" 151 + 152 + # Create summary at end of results file 153 + { 154 + echo "" 155 + echo "Summary" 156 + echo "=======" 157 + echo "Scan completed: $(date)" 158 + echo "Total endpoints scanned: ${ENDPOINT_COUNT}" 159 + echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))" 160 + echo "Found (200/301/302): ${FOUND_COUNT}" 161 + echo "Maybe (other codes): ${MAYBE_COUNT}" 162 + } >> "$RESULTS_FILE" 163 + 164 + # Display summary 165 + echo "" 166 + echo -e "${BLUE}╔════════════════════════════════════════╗${NC}" 167 + echo -e "${BLUE}║ Scan Complete! ║${NC}" 168 + echo -e "${BLUE}╚════════════════════════════════════════╝${NC}" 169 + echo "" 170 + echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}" 171 + echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}" 172 + echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}" 173 + echo "" 174 + echo -e "Results:" 175 + echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}" 176 + echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}" 177 + echo "" 178 + echo "Files created:" 179 + echo " Full results: $RESULTS_FILE" 180 + echo " Found URLs: $FOUND_FILE" 181 + echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" 182 + 183 + # Show sample of found URLs if any 184 + if [ "$FOUND_COUNT" -gt 0 ]; then 185 + echo "" 186 + echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}" 187 + echo "" 188 + echo "Sample findings (first 10):" 189 + grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do 190 + echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]" 191 + done 192 + 193 + if [ "$FOUND_COUNT" -gt 10 ]; then 194 + echo "" 195 + echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)" 196 + fi 197 + fi 198 + 199 + echo ""
+117
utils/vuln-scanner.sh
··· 1 + #!/bin/bash 2 + 3 + # Configuration 4 + API_HOST="${API_HOST:-http://localhost:8080}" 5 + TIMEOUT=5 6 + OUTPUT_DIR="./pds_scan_results" 7 + TIMESTAMP=$(date +%Y%m%d_%H%M%S) 8 + RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt" 9 + FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt" 10 + 11 + # Paths to check (one per line for easier editing) 12 + PATHS=( 13 + "/info.php" 14 + "/phpinfo.php" 15 + "/test.php" 16 + "/admin" 17 + "/admin.php" 18 + "/wp-admin" 19 + "/robots.txt" 20 + "/.env" 21 + "/.git/config" 22 + "/config.php" 23 + "/backup" 24 + "/db.sql" 25 + "/.DS_Store" 26 + "/server-status" 27 + "/.well-known/security.txt" 28 + ) 29 + 30 + # Colors 31 + RED='\033[0;31m' 32 + GREEN='\033[0;32m' 33 + YELLOW='\033[1;33m' 34 + BLUE='\033[0;34m' 35 + NC='\033[0m' 36 + 37 + mkdir -p "$OUTPUT_DIR" 38 + 39 + echo -e "${BLUE}=== PDS Security Scanner ===${NC}" 40 + echo "API Host: $API_HOST" 41 + echo "Timeout: ${TIMEOUT}s" 42 + echo "Scanning for ${#PATHS[@]} paths" 43 + echo "Results: $RESULTS_FILE" 44 + echo "" 45 + 46 + # Fetch active PDS endpoints 47 + echo -e "${YELLOW}Fetching active PDS endpoints...${NC}" 48 + ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \ 49 + jq -r '.[].endpoint' 2>/dev/null) 50 + 51 + if [ -z "$ENDPOINTS" ]; then 52 + echo -e "${RED}Error: Could not fetch endpoints from API${NC}" 53 + exit 1 54 + fi 55 + 56 + ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l) 57 + echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}" 58 + echo "" 59 + 60 + # Write header 61 + echo "PDS Security Scan - $(date)" > "$RESULTS_FILE" 62 + echo "========================================" >> "$RESULTS_FILE" 63 + echo "" >> "$RESULTS_FILE" 64 + 65 + # Counters 66 + CURRENT=0 67 + TOTAL_FOUND=0 68 + TOTAL_MAYBE=0 69 + 70 + # Scan each endpoint sequentially 71 + while IFS= read -r endpoint; do 72 + CURRENT=$((CURRENT + 1)) 73 + 74 + echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint" 75 + 76 + # Scan each path 77 + for path in "${PATHS[@]}"; do 78 + url="${endpoint}${path}" 79 + 80 + # Make request with timeout 81 + response=$(curl -s -o /dev/null -w "%{http_code}" \ 82 + --max-time "$TIMEOUT" \ 83 + --connect-timeout "$TIMEOUT" \ 84 + -L \ 85 + -A "Mozilla/5.0 (Security Scanner)" \ 86 + "$url" 2>/dev/null) 87 + 88 + # Check response 89 + if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then 90 + if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then 91 + echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}" 92 + echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE" 93 + echo "$endpoint$path" >> "$FOUND_FILE" 94 + TOTAL_FOUND=$((TOTAL_FOUND + 1)) 95 + elif [ "$response" != "403" ]; then 96 + echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}" 97 + echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE" 98 + TOTAL_MAYBE=$((TOTAL_MAYBE + 1)) 99 + fi 100 + fi 101 + done 102 + 103 + echo "" >> "$RESULTS_FILE" 104 + 105 + done <<< "$ENDPOINTS" 106 + 107 + # Summary 108 + echo "" 109 + echo -e "${BLUE}========================================${NC}" 110 + echo -e "${GREEN}Scan Complete!${NC}" 111 + echo "Scanned: ${ENDPOINT_COUNT} endpoints" 112 + echo "Paths checked per endpoint: ${#PATHS[@]}" 113 + echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}" 114 + echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}" 115 + echo "" 116 + echo "Full results: $RESULTS_FILE" 117 + [ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"