Compare changes

Choose any two refs to compare.

+2
.gitignore
··· 5 5 .DS_Store 6 6 plc_cache\.tmp/* 7 7 plc_bundles* 8 + config.yaml 9 + /atscand
+39 -5
Makefile
··· 1 - all: run 1 + .PHONY: all build install test clean fmt lint help 2 + 3 + # Binary name 4 + BINARY_NAME=atscand 5 + INSTALL_PATH=$(GOPATH)/bin 6 + 7 + # Go commands 8 + GOCMD=go 9 + GOBUILD=$(GOCMD) build 10 + GOINSTALL=$(GOCMD) install 11 + GOCLEAN=$(GOCMD) clean 12 + GOTEST=$(GOCMD) test 13 + GOGET=$(GOCMD) get 14 + GOFMT=$(GOCMD) fmt 15 + GOMOD=$(GOCMD) mod 16 + GORUN=$(GOCMD) run 17 + 18 + # Default target 19 + all: build 20 + 21 + # Build the CLI tool 22 + build: 23 + @echo "Building $(BINARY_NAME)..." 24 + $(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand 25 + 26 + # Install the CLI tool globally 27 + install: 28 + @echo "Installing $(BINARY_NAME)..." 29 + $(GOINSTALL) ./cmd/atscand 2 30 3 31 run: 4 - go run cmd/atscanner.go -verbose 32 + $(GORUN) cmd/atscand/main.go -verbose 5 33 6 - clean-db: 7 - dropdb -U atscanner atscanner 8 - createdb atscanner -O atscanner 34 + update-plcbundle: 35 + GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest 36 + 37 + # Show help 38 + help: 39 + @echo "Available targets:" 40 + @echo " make build - Build the binary" 41 + @echo " make install - Install binary globally" 42 + @echo " make run - Run app"
+159
cmd/atscand/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "os/signal" 9 + "syscall" 10 + "time" 11 + 12 + "github.com/atscan/atscand/internal/api" 13 + "github.com/atscan/atscand/internal/config" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/pds" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 18 + "github.com/atscan/atscand/internal/worker" 19 + ) 20 + 21 + const VERSION = "1.0.0" 22 + 23 + func main() { 24 + configPath := flag.String("config", "config.yaml", "path to config file") 25 + verbose := flag.Bool("verbose", false, "enable verbose logging") 26 + flag.Parse() 27 + 28 + // Load configuration 29 + cfg, err := config.Load(*configPath) 30 + if err != nil { 31 + fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 + os.Exit(1) 33 + } 34 + 35 + // Override verbose setting if flag is provided 36 + if *verbose { 37 + cfg.API.Verbose = true 38 + } 39 + 40 + // Initialize logger 41 + log.Init(cfg.API.Verbose) 42 + 43 + // Print banner 44 + log.Banner(VERSION) 45 + 46 + // Print configuration summary 47 + log.PrintConfig(map[string]string{ 48 + "Database Type": cfg.Database.Type, 49 + "Database Path": cfg.Database.Path, // Will be auto-redacted 50 + "PLC Directory": cfg.PLC.DirectoryURL, 51 + "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 + "PLC Bundle Dir": cfg.PLC.BundleDir, 53 + "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 + "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 + "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 + "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 + "PDS Timeout": cfg.PDS.Timeout.String(), 58 + "API Host": cfg.API.Host, 59 + "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 + "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 + }) 62 + 63 + // Initialize database using factory pattern 64 + db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 + if err != nil { 66 + log.Fatal("Failed to initialize database: %v", err) 67 + } 68 + defer func() { 69 + log.Info("Closing database connection...") 70 + db.Close() 71 + }() 72 + 73 + // Set scan retention from config 74 + if cfg.PDS.ScanRetention > 0 { 75 + db.SetScanRetention(cfg.PDS.ScanRetention) 76 + log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 + } 78 + 79 + // Run migrations 80 + if err := db.Migrate(); err != nil { 81 + log.Fatal("Failed to run migrations: %v", err) 82 + } 83 + 84 + ctx, cancel := context.WithCancel(context.Background()) 85 + defer cancel() 86 + 87 + // Initialize workers 88 + log.Info("Initializing scanners...") 89 + 90 + bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs) 91 + if err != nil { 92 + log.Fatal("Failed to create bundle manager: %v", err) 93 + } 94 + defer bundleManager.Close() 95 + log.Verbose("✓ Bundle manager initialized (shared)") 96 + 97 + plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager) 98 + defer plcScanner.Close() 99 + log.Verbose("✓ PLC scanner initialized") 100 + 101 + pdsScanner := pds.NewScanner(db, cfg.PDS) 102 + log.Verbose("✓ PDS scanner initialized") 103 + 104 + scheduler := worker.NewScheduler() 105 + 106 + // Schedule PLC directory scan 107 + scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 108 + if err := plcScanner.Scan(ctx); err != nil { 109 + log.Error("PLC scan error: %v", err) 110 + } 111 + }) 112 + log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 113 + 114 + // Schedule PDS availability checks 115 + scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 116 + if err := pdsScanner.ScanAll(ctx); err != nil { 117 + log.Error("PDS scan error: %v", err) 118 + } 119 + }) 120 + log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 121 + 122 + // Start API server 123 + log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 124 + apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager) 125 + go func() { 126 + if err := apiServer.Start(); err != nil { 127 + log.Fatal("API server error: %v", err) 128 + } 129 + }() 130 + 131 + // Give the API server a moment to start 132 + time.Sleep(100 * time.Millisecond) 133 + log.Info("✓ API server started successfully") 134 + log.Info("") 135 + log.Info("🚀 ATScanner is running!") 136 + log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 137 + log.Info(" Press Ctrl+C to stop") 138 + log.Info("") 139 + 140 + // Start scheduler 141 + scheduler.Start(ctx) 142 + 143 + // Wait for interrupt 144 + sigChan := make(chan os.Signal, 1) 145 + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 146 + <-sigChan 147 + 148 + log.Info("") 149 + log.Info("Shutting down gracefully...") 150 + cancel() 151 + 152 + log.Info("Stopping API server...") 153 + apiServer.Shutdown(context.Background()) 154 + 155 + log.Info("Waiting for active tasks to complete...") 156 + time.Sleep(2 * time.Second) 157 + 158 + log.Info("✓ Shutdown complete. Goodbye!") 159 + }
-152
cmd/atscanner.go
··· 1 - package main 2 - 3 - import ( 4 - "context" 5 - "flag" 6 - "fmt" 7 - "os" 8 - "os/signal" 9 - "syscall" 10 - "time" 11 - 12 - "github.com/atscan/atscanner/internal/api" 13 - "github.com/atscan/atscanner/internal/config" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/pds" 16 - "github.com/atscan/atscanner/internal/plc" 17 - "github.com/atscan/atscanner/internal/storage" 18 - "github.com/atscan/atscanner/internal/worker" 19 - ) 20 - 21 - const VERSION = "1.0.0" 22 - 23 - func main() { 24 - configPath := flag.String("config", "config.yaml", "path to config file") 25 - verbose := flag.Bool("verbose", false, "enable verbose logging") 26 - flag.Parse() 27 - 28 - // Load configuration 29 - cfg, err := config.Load(*configPath) 30 - if err != nil { 31 - fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 - os.Exit(1) 33 - } 34 - 35 - // Override verbose setting if flag is provided 36 - if *verbose { 37 - cfg.API.Verbose = true 38 - } 39 - 40 - // Initialize logger 41 - log.Init(cfg.API.Verbose) 42 - 43 - // Print banner 44 - log.Banner(VERSION) 45 - 46 - // Print configuration summary 47 - log.PrintConfig(map[string]string{ 48 - "Database Type": cfg.Database.Type, 49 - "Database Path": cfg.Database.Path, // Will be auto-redacted 50 - "PLC Directory": cfg.PLC.DirectoryURL, 51 - "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 - "PLC Bundle Dir": cfg.PLC.BundleDir, 53 - "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 - "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 - "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 - "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 - "PDS Timeout": cfg.PDS.Timeout.String(), 58 - "API Host": cfg.API.Host, 59 - "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 - "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 - }) 62 - 63 - // Initialize database using factory pattern 64 - db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 - if err != nil { 66 - log.Fatal("Failed to initialize database: %v", err) 67 - } 68 - defer func() { 69 - log.Info("Closing database connection...") 70 - db.Close() 71 - }() 72 - 73 - // Set scan retention from config 74 - if cfg.PDS.ScanRetention > 0 { 75 - db.SetScanRetention(cfg.PDS.ScanRetention) 76 - log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 - } 78 - 79 - // Run migrations 80 - if err := db.Migrate(); err != nil { 81 - log.Fatal("Failed to run migrations: %v", err) 82 - } 83 - 84 - ctx, cancel := context.WithCancel(context.Background()) 85 - defer cancel() 86 - 87 - // Initialize workers 88 - log.Info("Initializing scanners...") 89 - 90 - plcScanner := plc.NewScanner(db, cfg.PLC) 91 - defer plcScanner.Close() 92 - log.Verbose("✓ PLC scanner initialized") 93 - 94 - pdsScanner := pds.NewScanner(db, cfg.PDS) 95 - log.Verbose("✓ PDS scanner initialized") 96 - 97 - scheduler := worker.NewScheduler() 98 - 99 - // Schedule PLC directory scan 100 - scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 101 - if err := plcScanner.Scan(ctx); err != nil { 102 - log.Error("PLC scan error: %v", err) 103 - } 104 - }) 105 - log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 106 - 107 - // Schedule PDS availability checks 108 - scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 109 - if err := pdsScanner.ScanAll(ctx); err != nil { 110 - log.Error("PDS scan error: %v", err) 111 - } 112 - }) 113 - log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 114 - 115 - // Start API server 116 - log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 117 - apiServer := api.NewServer(db, cfg.API, cfg.PLC) 118 - go func() { 119 - if err := apiServer.Start(); err != nil { 120 - log.Fatal("API server error: %v", err) 121 - } 122 - }() 123 - 124 - // Give the API server a moment to start 125 - time.Sleep(100 * time.Millisecond) 126 - log.Info("✓ API server started successfully") 127 - log.Info("") 128 - log.Info("🚀 ATScanner is running!") 129 - log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 130 - log.Info(" Press Ctrl+C to stop") 131 - log.Info("") 132 - 133 - // Start scheduler 134 - scheduler.Start(ctx) 135 - 136 - // Wait for interrupt 137 - sigChan := make(chan os.Signal, 1) 138 - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 139 - <-sigChan 140 - 141 - log.Info("") 142 - log.Info("Shutting down gracefully...") 143 - cancel() 144 - 145 - log.Info("Stopping API server...") 146 - apiServer.Shutdown(context.Background()) 147 - 148 - log.Info("Waiting for active tasks to complete...") 149 - time.Sleep(2 * time.Second) 150 - 151 - log.Info("✓ Shutdown complete. Goodbye!") 152 - }
+168
cmd/import-labels/main.go
··· 1 + package main 2 + 3 + import ( 4 + "bufio" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + "time" 11 + 12 + "github.com/klauspost/compress/zstd" 13 + "gopkg.in/yaml.v3" 14 + ) 15 + 16 + type Config struct { 17 + PLC struct { 18 + BundleDir string `yaml:"bundle_dir"` 19 + } `yaml:"plc"` 20 + } 21 + 22 + var CONFIG_FILE = "config.yaml" 23 + 24 + // --------------------- 25 + 26 + func main() { 27 + // Define a new flag for changing the directory 28 + workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)") 29 + flag.Usage = func() { // Custom usage message 30 + fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0]) 31 + fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.") 32 + flag.PrintDefaults() 33 + } 34 + flag.Parse() // Parse all defined flags 35 + 36 + // Change directory if the flag was used 37 + if *workDir != "." { 38 + fmt.Printf("Changing working directory to %s...\n", *workDir) 39 + if err := os.Chdir(*workDir); err != nil { 40 + fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err) 41 + os.Exit(1) 42 + } 43 + } 44 + 45 + // --- REMOVED UNUSED CODE --- 46 + // The csvFilePath variable and NArg check were removed 47 + // as the script now reads from stdin. 48 + // --------------------------- 49 + 50 + fmt.Println("========================================") 51 + fmt.Println("PLC Operation Labels Import (Go STDIN)") 52 + fmt.Println("========================================") 53 + 54 + // 1. Read config (will now read from the new CWD) 55 + fmt.Printf("Loading config from %s...\n", CONFIG_FILE) 56 + configData, err := os.ReadFile(CONFIG_FILE) 57 + if err != nil { 58 + fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err) 59 + os.Exit(1) 60 + } 61 + 62 + var config Config 63 + if err := yaml.Unmarshal(configData, &config); err != nil { 64 + fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err) 65 + os.Exit(1) 66 + } 67 + 68 + if config.PLC.BundleDir == "" { 69 + fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml") 70 + os.Exit(1) 71 + } 72 + 73 + finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels") 74 + if err := os.MkdirAll(finalLabelsDir, 0755); err != nil { 75 + fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) 76 + os.Exit(1) 77 + } 78 + 79 + fmt.Printf("Output Dir: %s\n", finalLabelsDir) 80 + fmt.Println("Waiting for sorted data from stdin...") 81 + 82 + // 2. Process sorted data from stdin 83 + // This script *requires* the input to be sorted by bundle number. 84 + 85 + var currentWriter *zstd.Encoder 86 + var currentFile *os.File 87 + var lastBundleKey string = "" 88 + 89 + lineCount := 0 90 + startTime := time.Now() 91 + 92 + scanner := bufio.NewScanner(os.Stdin) 93 + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) 94 + 95 + for scanner.Scan() { 96 + line := scanner.Text() 97 + lineCount++ 98 + 99 + parts := strings.SplitN(line, ",", 2) 100 + if len(parts) < 1 { 101 + continue // Skip empty/bad lines 102 + } 103 + 104 + bundleNumStr := parts[0] 105 + bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros 106 + 107 + // If the bundle key is new, close the old writer and open a new one. 108 + if bundleKey != lastBundleKey { 109 + // Close the previous writer/file 110 + if currentWriter != nil { 111 + if err := currentWriter.Close(); err != nil { 112 + fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err) 113 + } 114 + currentFile.Close() 115 + } 116 + 117 + // Start the new one 118 + fmt.Printf(" -> Writing bundle %s\n", bundleKey) 119 + outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey)) 120 + 121 + file, err := os.Create(outPath) 122 + if err != nil { 123 + fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err) 124 + os.Exit(1) 125 + } 126 + currentFile = file 127 + 128 + writer, err := zstd.NewWriter(file) 129 + if err != nil { 130 + fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err) 131 + os.Exit(1) 132 + } 133 + currentWriter = writer 134 + lastBundleKey = bundleKey 135 + } 136 + 137 + // Write the line to the currently active writer 138 + if _, err := currentWriter.Write([]byte(line + "\n")); err != nil { 139 + fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err) 140 + } 141 + 142 + // Progress update 143 + if lineCount%100000 == 0 { 144 + elapsed := time.Since(startTime).Seconds() 145 + rate := float64(lineCount) / elapsed 146 + fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate) 147 + } 148 + } 149 + 150 + // 3. Close the very last writer 151 + if currentWriter != nil { 152 + if err := currentWriter.Close(); err != nil { 153 + fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err) 154 + } 155 + currentFile.Close() 156 + } 157 + 158 + if err := scanner.Err(); err != nil { 159 + fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err) 160 + } 161 + 162 + totalTime := time.Since(startTime) 163 + fmt.Println("\n========================================") 164 + fmt.Println("Import Summary") 165 + fmt.Println("========================================") 166 + fmt.Printf("✓ Import completed in %v\n", totalTime) 167 + fmt.Printf("Total lines processed: %d\n", lineCount) 168 + }
+22
config.sample.yaml
··· 1 + database: 2 + type: "postgres" # or "sqlite" 3 + path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable" 4 + # For SQLite: path: "atscan.db" 5 + 6 + plc: 7 + directory_url: "https://plc.directory" 8 + scan_interval: "5s" 9 + bundle_dir: "./plc_bundles" 10 + use_cache: true 11 + index_dids: true 12 + 13 + pds: 14 + scan_interval: "30m" 15 + timeout: "30s" 16 + workers: 20 17 + recheck_interval: "1.5h" 18 + scan_retention: 20 19 + 20 + api: 21 + host: "0.0.0.0" 22 + port: 8080
-22
config.yaml
··· 1 - database: 2 - type: "postgres" # or "sqlite" 3 - path: "postgres://atscanner:Noor1kooz5eeFai9leZagh5ua5eihai4@localhost:5432/atscanner?sslmode=disable" 4 - # For SQLite: path: "atscan.db" 5 - 6 - plc: 7 - directory_url: "https://plc.directory" 8 - scan_interval: "5s" 9 - bundle_dir: "./plc_bundles" 10 - use_cache: true 11 - index_dids: true 12 - 13 - pds: 14 - scan_interval: "30m" 15 - timeout: "30s" 16 - workers: 20 17 - recheck_interval: "1.5h" 18 - scan_retention: 3 19 - 20 - api: 21 - host: "0.0.0.0" 22 - port: 8080
+6 -5
go.mod
··· 1 - module github.com/atscan/atscanner 1 + module github.com/atscan/atscand 2 2 3 3 go 1.23.0 4 4 5 5 require ( 6 6 github.com/gorilla/mux v1.8.1 7 7 github.com/lib/pq v1.10.9 8 - github.com/mattn/go-sqlite3 v1.14.18 9 8 gopkg.in/yaml.v3 v3.0.1 10 9 ) 11 10 12 - require github.com/klauspost/compress v1.18.0 11 + require github.com/klauspost/compress v1.18.1 13 12 14 13 require ( 15 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d 16 14 github.com/gorilla/handlers v1.5.2 15 + github.com/jackc/pgx/v5 v5.7.6 16 + tangled.org/atscan.net/plcbundle v0.3.6 17 17 ) 18 18 19 19 require ( 20 20 github.com/felixge/httpsnoop v1.0.3 // indirect 21 21 github.com/jackc/pgpassfile v1.0.0 // indirect 22 22 github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 23 - github.com/jackc/pgx/v5 v5.7.6 // indirect 24 23 github.com/jackc/puddle/v2 v2.2.2 // indirect 24 + github.com/kr/text v0.2.0 // indirect 25 + github.com/rogpeppe/go-internal v1.14.1 // indirect 25 26 golang.org/x/crypto v0.37.0 // indirect 26 27 golang.org/x/sync v0.13.0 // indirect 27 28 golang.org/x/text v0.24.0 // indirect
+17 -7
go.sum
··· 1 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 2 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 1 + github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 3 2 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 5 github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= 5 6 github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 6 7 github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= ··· 15 16 github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= 16 17 github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 17 18 github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 18 - github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 19 - github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 19 + github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= 20 + github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= 21 + github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 22 + github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 23 + github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 24 + github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 20 25 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= 21 26 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 22 - github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI= 23 - github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= 27 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 24 28 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 29 + github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= 30 + github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= 25 31 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 26 32 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 27 33 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 34 + github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 35 + github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 28 36 golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 29 37 golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 30 38 golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= 31 39 golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 32 40 golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 33 41 golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 34 - gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 35 42 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 36 43 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 44 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 37 45 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 38 46 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 39 47 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 48 + tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY= 49 + tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+552 -455
internal/api/handlers.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "crypto/sha256" 6 5 "database/sql" 7 - "encoding/hex" 8 6 "encoding/json" 9 7 "fmt" 8 + "io" 10 9 "net/http" 11 - "os" 12 - "path/filepath" 13 10 "strconv" 14 11 "strings" 15 12 "time" 16 13 17 - "github.com/atscan/atscanner/internal/log" 18 - "github.com/atscan/atscanner/internal/monitor" 19 - "github.com/atscan/atscanner/internal/plc" 20 - "github.com/atscan/atscanner/internal/storage" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/monitor" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 21 18 "github.com/gorilla/mux" 19 + "tangled.org/atscan.net/plcbundle" 22 20 ) 23 21 24 22 // ===== RESPONSE HELPERS ===== ··· 40 38 http.Error(r.w, msg, code) 41 39 } 42 40 43 - func (r *response) bundleHeaders(bundle *storage.PLCBundle) { 41 + func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) { 44 42 r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber)) 45 43 r.w.Header().Set("X-Bundle-Hash", bundle.Hash) 46 44 r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash) 47 45 r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano)) 48 46 r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano)) 49 47 r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 50 - r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs))) 48 + r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount)) 51 49 } 52 50 53 51 // ===== REQUEST HELPERS ===== ··· 77 75 78 76 // ===== FORMATTING HELPERS ===== 79 77 80 - func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} { 81 - return map[string]interface{}{ 82 - "plc_bundle_number": bundle.BundleNumber, 83 - "start_time": bundle.StartTime, 84 - "end_time": bundle.EndTime, 85 - "operation_count": plc.BUNDLE_SIZE, 86 - "did_count": len(bundle.DIDs), 87 - "hash": bundle.Hash, 88 - "compressed_hash": bundle.CompressedHash, 89 - "compressed_size": bundle.CompressedSize, 90 - "uncompressed_size": bundle.UncompressedSize, 91 - "compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize), 92 - "cursor": bundle.Cursor, 93 - "prev_bundle_hash": bundle.PrevBundleHash, 94 - "created_at": bundle.CreatedAt, 95 - } 96 - } 97 - 98 78 func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} { 99 79 response := map[string]interface{}{ 100 80 "id": ep.ID, ··· 103 83 "discovered_at": ep.DiscoveredAt, 104 84 "last_checked": ep.LastChecked, 105 85 "status": statusToString(ep.Status), 106 - // REMOVED: "user_count": ep.UserCount, // No longer exists 107 86 } 108 87 109 - // Add IP if available 88 + // Add IPs if available 110 89 if ep.IP != "" { 111 90 response["ip"] = ep.IP 112 91 } 113 - 114 - // REMOVED: IP info extraction - no longer in Endpoint struct 115 - // IPInfo is now in separate table, joined only in PDS handlers 92 + if ep.IPv6 != "" { 93 + response["ipv6"] = ep.IPv6 94 + } 116 95 117 96 return response 118 97 } ··· 165 144 resp.json(stats) 166 145 } 167 146 147 + // handleGetRandomEndpoint returns a random endpoint of specified type 148 + func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) { 149 + resp := newResponse(w) 150 + 151 + // Get required type parameter 152 + endpointType := r.URL.Query().Get("type") 153 + if endpointType == "" { 154 + resp.error("type parameter is required", http.StatusBadRequest) 155 + return 156 + } 157 + 158 + // Get optional status parameter 159 + status := r.URL.Query().Get("status") 160 + 161 + filter := &storage.EndpointFilter{ 162 + Type: endpointType, 163 + Status: status, 164 + Random: true, 165 + Limit: 1, 166 + Offset: 0, 167 + } 168 + 169 + endpoints, err := s.db.GetEndpoints(r.Context(), filter) 170 + if err != nil { 171 + resp.error(err.Error(), http.StatusInternalServerError) 172 + return 173 + } 174 + 175 + if len(endpoints) == 0 { 176 + resp.error("no endpoints found matching criteria", http.StatusNotFound) 177 + return 178 + } 179 + 180 + resp.json(formatEndpointResponse(endpoints[0])) 181 + } 182 + 168 183 // ===== PDS HANDLERS ===== 169 184 170 185 func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) { ··· 233 248 "endpoint": pds.Endpoint, 234 249 "discovered_at": pds.DiscoveredAt, 235 250 "status": statusToString(pds.Status), 251 + "valid": pds.Valid, // NEW 236 252 } 237 253 238 254 // Add server_did if available ··· 257 273 } 258 274 } 259 275 260 - // Add IP if available 276 + // Add IPs if available 261 277 if pds.IP != "" { 262 278 response["ip"] = pds.IP 279 + } 280 + if pds.IPv6 != "" { 281 + response["ipv6"] = pds.IPv6 263 282 } 264 283 265 284 // Add IP info (from ip_infos table via JOIN) ··· 276 295 if pds.IPInfo.ASN > 0 { 277 296 response["asn"] = pds.IPInfo.ASN 278 297 } 279 - if pds.IPInfo.IsDatacenter { 280 - response["is_datacenter"] = pds.IPInfo.IsDatacenter 281 - } 298 + 299 + // Add all network type flags 300 + response["is_datacenter"] = pds.IPInfo.IsDatacenter 301 + response["is_vpn"] = pds.IPInfo.IsVPN 302 + response["is_crawler"] = pds.IPInfo.IsCrawler 303 + response["is_tor"] = pds.IPInfo.IsTor 304 + response["is_proxy"] = pds.IPInfo.IsProxy 305 + 306 + // Add computed is_home field 307 + response["is_home"] = pds.IPInfo.IsHome() 282 308 } 283 309 284 310 return response ··· 316 342 } 317 343 } 318 344 319 - // Add full IP info 345 + // Add full IP info with computed is_home field 320 346 if pds.IPInfo != nil { 321 - response["ip_info"] = pds.IPInfo 347 + // Convert IPInfo to map 348 + ipInfoMap := make(map[string]interface{}) 349 + ipInfoJSON, _ := json.Marshal(pds.IPInfo) 350 + json.Unmarshal(ipInfoJSON, &ipInfoMap) 351 + 352 + // Add computed is_home field 353 + ipInfoMap["is_home"] = pds.IPInfo.IsHome() 354 + 355 + response["ip_info"] = ipInfoMap 322 356 } 323 357 324 358 return response ··· 333 367 "scanned_at": scan.ScannedAt, 334 368 } 335 369 370 + if scan.Status != storage.EndpointStatusOnline && scan.ScanData != nil && scan.ScanData.Metadata != nil { 371 + if errorMsg, ok := scan.ScanData.Metadata["error"].(string); ok && errorMsg != "" { 372 + scanMap["error"] = errorMsg 373 + } 374 + } 375 + 336 376 if scan.ResponseTime > 0 { 337 377 scanMap["response_time"] = scan.ResponseTime 338 378 } 339 379 340 - // NEW: Add version if available 341 380 if scan.Version != "" { 342 381 scanMap["version"] = scan.Version 343 382 } 344 383 384 + if scan.UsedIP != "" { 385 + scanMap["used_ip"] = scan.UsedIP 386 + } 387 + 345 388 // Use the top-level UserCount field first 346 389 if scan.UserCount > 0 { 347 390 scanMap["user_count"] = scan.UserCount ··· 366 409 return result 367 410 } 368 411 412 + // Get repos for a specific PDS 413 + func (s *Server) handleGetPDSRepos(w http.ResponseWriter, r *http.Request) { 414 + resp := newResponse(w) 415 + vars := mux.Vars(r) 416 + endpoint := "https://" + normalizeEndpoint(vars["endpoint"]) 417 + 418 + pds, err := s.db.GetPDSDetail(r.Context(), endpoint) 419 + if err != nil { 420 + resp.error("PDS not found", http.StatusNotFound) 421 + return 422 + } 423 + 424 + // Parse query parameters 425 + activeOnly := r.URL.Query().Get("active") == "true" 426 + limit := getQueryInt(r, "limit", 100) 427 + offset := getQueryInt(r, "offset", 0) 428 + 429 + // Cap limit at 1000 430 + if limit > 1000 { 431 + limit = 1000 432 + } 433 + 434 + repos, err := s.db.GetPDSRepos(r.Context(), pds.ID, activeOnly, limit, offset) 435 + if err != nil { 436 + resp.error(err.Error(), http.StatusInternalServerError) 437 + return 438 + } 439 + 440 + // Get total from latest scan (same as user_count) 441 + totalRepos := 0 442 + if pds.LatestScan != nil { 443 + totalRepos = pds.LatestScan.UserCount 444 + } 445 + 446 + resp.json(map[string]interface{}{ 447 + "endpoint": pds.Endpoint, 448 + "total_repos": totalRepos, 449 + "returned": len(repos), 450 + "limit": limit, 451 + "offset": offset, 452 + "repos": repos, 453 + }) 454 + } 455 + 456 + // Find which PDS hosts a specific DID 457 + func (s *Server) handleGetDIDRepos(w http.ResponseWriter, r *http.Request) { 458 + resp := newResponse(w) 459 + vars := mux.Vars(r) 460 + did := vars["did"] 461 + 462 + repos, err := s.db.GetReposByDID(r.Context(), did) 463 + if err != nil { 464 + resp.error(err.Error(), http.StatusInternalServerError) 465 + return 466 + } 467 + 468 + resp.json(map[string]interface{}{ 469 + "did": did, 470 + "pds_count": len(repos), 471 + "hosting_on": repos, 472 + }) 473 + } 474 + 475 + // Add to internal/api/handlers.go 476 + func (s *Server) handleGetPDSRepoStats(w http.ResponseWriter, r *http.Request) { 477 + resp := newResponse(w) 478 + vars := mux.Vars(r) 479 + endpoint := "https://" + normalizeEndpoint(vars["endpoint"]) 480 + 481 + pds, err := s.db.GetPDSDetail(r.Context(), endpoint) 482 + if err != nil { 483 + resp.error("PDS not found", http.StatusNotFound) 484 + return 485 + } 486 + 487 + stats, err := s.db.GetPDSRepoStats(r.Context(), pds.ID) 488 + if err != nil { 489 + resp.error(err.Error(), http.StatusInternalServerError) 490 + return 491 + } 492 + 493 + resp.json(stats) 494 + } 495 + 496 + // ===== GLOBAL DID HANDLER ===== 497 + 498 + // handleGetGlobalDID provides a consolidated view of a DID 499 + func (s *Server) handleGetGlobalDID(w http.ResponseWriter, r *http.Request) { 500 + resp := newResponse(w) 501 + vars := mux.Vars(r) 502 + did := vars["did"] 503 + ctx := r.Context() 504 + 505 + // Get DID info (now includes handle and pds from database) 506 + didInfo, err := s.db.GetGlobalDIDInfo(ctx, did) 507 + if err != nil { 508 + if err == sql.ErrNoRows { 509 + if !s.plcIndexDIDs { 510 + resp.error("DID not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound) 511 + } else { 512 + resp.error("DID not found in PLC index.", http.StatusNotFound) 513 + } 514 + } else { 515 + resp.error(err.Error(), http.StatusInternalServerError) 516 + } 517 + return 518 + } 519 + 520 + // Optionally include latest operation details if requested 521 + var latestOperation *plc.PLCOperation 522 + if r.URL.Query().Get("include_operation") == "true" && len(didInfo.BundleNumbers) > 0 { 523 + lastBundleNum := didInfo.BundleNumbers[len(didInfo.BundleNumbers)-1] 524 + ops, err := s.bundleManager.LoadBundleOperations(ctx, lastBundleNum) 525 + if err != nil { 526 + log.Error("Failed to load bundle %d for DID %s: %v", lastBundleNum, did, err) 527 + } else { 528 + // Find latest operation for this DID (in reverse) 529 + for i := len(ops) - 1; i >= 0; i-- { 530 + if ops[i].DID == did { 531 + latestOperation = &ops[i] 532 + break 533 + } 534 + } 535 + } 536 + } 537 + 538 + result := map[string]interface{}{ 539 + "did": didInfo.DID, 540 + "handle": didInfo.Handle, // From database! 541 + "current_pds": didInfo.CurrentPDS, // From database! 542 + "plc_index_created_at": didInfo.CreatedAt, 543 + "plc_bundle_history": didInfo.BundleNumbers, 544 + "pds_hosting_on": didInfo.HostingOn, 545 + } 546 + 547 + // Only include operation if requested 548 + if latestOperation != nil { 549 + result["latest_plc_operation"] = latestOperation 550 + } 551 + 552 + resp.json(result) 553 + } 554 + 555 + // handleGetDIDByHandle resolves a handle to a DID 556 + func (s *Server) handleGetDIDByHandle(w http.ResponseWriter, r *http.Request) { 557 + resp := newResponse(w) 558 + vars := mux.Vars(r) 559 + handle := vars["handle"] 560 + 561 + // Normalize handle (remove @ prefix if present) 562 + handle = strings.TrimPrefix(handle, "@") 563 + 564 + // Look up DID by handle 565 + didRecord, err := s.db.GetDIDByHandle(r.Context(), handle) 566 + if err != nil { 567 + if err == sql.ErrNoRows { 568 + if !s.plcIndexDIDs { 569 + resp.error("Handle not found. Note: DID indexing is disabled in configuration.", http.StatusNotFound) 570 + } else { 571 + resp.error("Handle not found.", http.StatusNotFound) 572 + } 573 + } else { 574 + resp.error(err.Error(), http.StatusInternalServerError) 575 + } 576 + return 577 + } 578 + 579 + // Return just the handle and DID 580 + resp.json(map[string]string{ 581 + "handle": handle, 582 + "did": didRecord.DID, 583 + }) 584 + } 585 + 369 586 // ===== DID HANDLERS ===== 370 587 371 588 func (s *Server) handleGetDID(w http.ResponseWriter, r *http.Request) { ··· 467 684 return 468 685 } 469 686 470 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 471 - if err != nil { 472 - resp.error(err.Error(), http.StatusInternalServerError) 473 - return 474 - } 475 - 687 + lastBundle := s.bundleManager.GetLastBundleNumber() 476 688 resp.json(map[string]interface{}{ 477 689 "total_unique_dids": totalDIDs, 478 690 "last_bundle": lastBundle, ··· 483 695 484 696 func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) { 485 697 resp := newResponse(w) 486 - 487 698 bundleNum, err := getBundleNumber(r) 488 699 if err != nil { 489 700 resp.error("invalid bundle number", http.StatusBadRequest) 490 701 return 491 702 } 492 703 493 - // Try to get existing bundle 494 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 495 - if err == nil { 496 - // Bundle exists, return it normally 497 - resp.json(formatBundleResponse(bundle)) 498 - return 499 - } 500 - 501 - // Bundle not found - check if it's the next upcoming bundle 502 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 704 + // Get from library's index 705 + index := s.bundleManager.GetIndex() 706 + bundleMeta, err := index.GetBundle(bundleNum) 503 707 if err != nil { 504 - resp.error("bundle not found", http.StatusNotFound) 505 - return 506 - } 507 - 508 - if bundleNum == lastBundle+1 { 509 - // This is the upcoming bundle - return preview based on mempool 510 - upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum) 511 - if err != nil { 512 - resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError) 708 + // Check if it's upcoming bundle 709 + lastBundle := index.GetLastBundle() 710 + if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 { 711 + upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum) 712 + if err != nil { 713 + resp.error(err.Error(), http.StatusInternalServerError) 714 + return 715 + } 716 + resp.json(upcomingBundle) 513 717 return 514 718 } 515 - resp.json(upcomingBundle) 719 + resp.error("bundle not found", http.StatusNotFound) 516 720 return 517 721 } 518 722 519 - // Not an upcoming bundle, just not found 520 - resp.error("bundle not found", http.StatusNotFound) 723 + resp.json(formatBundleMetadata(bundleMeta)) 521 724 } 522 725 523 - func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) { 524 - // Get mempool stats 525 - mempoolCount, err := s.db.GetMempoolCount(ctx) 526 - if err != nil { 527 - return nil, err 726 + // Helper to format library's BundleMetadata 727 + func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} { 728 + return map[string]interface{}{ 729 + "plc_bundle_number": meta.BundleNumber, 730 + "start_time": meta.StartTime, 731 + "end_time": meta.EndTime, 732 + "operation_count": meta.OperationCount, 733 + "did_count": meta.DIDCount, 734 + "hash": meta.Hash, // Chain hash (primary) 735 + "content_hash": meta.ContentHash, // Content hash 736 + "parent": meta.Parent, // Parent chain hash 737 + "compressed_hash": meta.CompressedHash, 738 + "compressed_size": meta.CompressedSize, 739 + "uncompressed_size": meta.UncompressedSize, 740 + "compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize), 741 + "cursor": meta.Cursor, 742 + "created_at": meta.CreatedAt, 528 743 } 744 + } 529 745 530 - if mempoolCount == 0 { 746 + func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) { 747 + // Get mempool stats from library via wrapper 748 + stats := s.bundleManager.GetMempoolStats() 749 + 750 + count, ok := stats["count"].(int) 751 + if !ok || count == 0 { 531 752 return map[string]interface{}{ 532 753 "plc_bundle_number": bundleNum, 533 754 "is_upcoming": true, ··· 537 758 }, nil 538 759 } 539 760 540 - // Get first and last operations for time range 541 - firstOp, err := s.db.GetFirstMempoolOperation(ctx) 542 - if err != nil { 543 - return nil, err 761 + // Build response 762 + result := map[string]interface{}{ 763 + "plc_bundle_number": bundleNum, 764 + "is_upcoming": true, 765 + "status": "filling", 766 + "operation_count": count, 767 + "did_count": stats["did_count"], 768 + "target_operation_count": 10000, 769 + "progress_percent": float64(count) / 100.0, 770 + "operations_needed": 10000 - count, 544 771 } 545 772 546 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 547 - if err != nil { 548 - return nil, err 773 + if count >= 10000 { 774 + result["status"] = "ready" 549 775 } 550 776 551 - // Get unique DID count 552 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 553 - if err != nil { 554 - return nil, err 777 + // Add time range if available 778 + if firstTime, ok := stats["first_time"]; ok { 779 + result["start_time"] = firstTime 555 780 } 556 - 557 - // Get uncompressed size estimate 558 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 559 - if err != nil { 560 - return nil, err 781 + if lastTime, ok := stats["last_time"]; ok { 782 + result["current_end_time"] = lastTime 561 783 } 562 784 563 - // Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data) 564 - estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12) 565 - 566 - // Calculate completion estimate 567 - var estimatedCompletionTime *time.Time 568 - var operationsNeeded int 569 - var currentRate float64 570 - 571 - operationsNeeded = plc.BUNDLE_SIZE - mempoolCount 572 - 573 - if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 { 574 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 575 - if timeSpan > 0 { 576 - currentRate = float64(mempoolCount) / timeSpan 577 - if currentRate > 0 { 578 - secondsNeeded := float64(operationsNeeded) / currentRate 579 - completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 580 - estimatedCompletionTime = &completionTime 581 - } 582 - } 785 + // Add size info if available 786 + if sizeBytes, ok := stats["size_bytes"]; ok { 787 + result["uncompressed_size"] = sizeBytes 788 + result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12) 583 789 } 584 790 585 - // Get previous bundle for cursor context 586 - var prevBundleHash string 587 - var cursor string 791 + // Get previous bundle info 588 792 if bundleNum > 1 { 589 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 590 - if err == nil { 591 - prevBundleHash = prevBundle.Hash 592 - cursor = prevBundle.EndTime.Format(time.RFC3339Nano) 593 - } 594 - } 595 - 596 - // Determine bundle status 597 - status := "filling" 598 - if mempoolCount >= plc.BUNDLE_SIZE { 599 - status = "ready" 600 - } 601 - 602 - // Build upcoming bundle response 603 - result := map[string]interface{}{ 604 - "plc_bundle_number": bundleNum, 605 - "is_upcoming": true, 606 - "status": status, 607 - "operation_count": mempoolCount, 608 - "target_operation_count": plc.BUNDLE_SIZE, 609 - "progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100, 610 - "operations_needed": operationsNeeded, 611 - "did_count": uniqueDIDCount, 612 - "start_time": firstOp.CreatedAt, // This is FIXED once first op exists 613 - "current_end_time": lastOp.CreatedAt, // This will change as more ops arrive 614 - "uncompressed_size": uncompressedSize, 615 - "estimated_compressed_size": estimatedCompressedSize, 616 - "compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize), 617 - "prev_bundle_hash": prevBundleHash, 618 - "cursor": cursor, 619 - } 620 - 621 - if estimatedCompletionTime != nil { 622 - result["estimated_completion_time"] = *estimatedCompletionTime 623 - result["current_rate_per_second"] = currentRate 624 - } 625 - 626 - // Get actual mempool operations if requested 627 - if r.URL.Query().Get("include_dids") == "true" { 628 - ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 629 - if err == nil { 630 - // Extract unique DIDs 631 - didSet := make(map[string]bool) 632 - for _, op := range ops { 633 - didSet[op.DID] = true 634 - } 635 - dids := make([]string, 0, len(didSet)) 636 - for did := range didSet { 637 - dids = append(dids, did) 638 - } 639 - result["dids"] = dids 793 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 794 + result["parent"] = prevBundle.Hash // Parent chain hash 795 + result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano) 640 796 } 641 797 } 642 798 ··· 652 808 return 653 809 } 654 810 655 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 811 + // Get from library 812 + dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum) 656 813 if err != nil { 657 814 resp.error("bundle not found", http.StatusNotFound) 658 815 return 659 816 } 660 817 661 818 resp.json(map[string]interface{}{ 662 - "plc_bundle_number": bundle.BundleNumber, 663 - "did_count": len(bundle.DIDs), 664 - "dids": bundle.DIDs, 819 + "plc_bundle_number": bundleNum, 820 + "did_count": didCount, 821 + "dids": dids, 665 822 }) 666 823 } 667 824 ··· 676 833 677 834 compressed := r.URL.Query().Get("compressed") != "false" 678 835 679 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 836 + bundle, err := s.bundleManager.GetBundleMetadata(bundleNum) 680 837 if err == nil { 681 838 // Bundle exists, serve it normally 682 839 resp.bundleHeaders(bundle) ··· 690 847 } 691 848 692 849 // Bundle not found - check if it's the upcoming bundle 693 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 694 - if err != nil { 695 - resp.error("bundle not found", http.StatusNotFound) 696 - return 697 - } 698 - 850 + lastBundle := s.bundleManager.GetLastBundleNumber() 699 851 if bundleNum == lastBundle+1 { 700 852 // This is the upcoming bundle - serve from mempool 701 - s.serveUpcomingBundle(w, r, bundleNum) 853 + s.serveUpcomingBundle(w, bundleNum) 702 854 return 703 855 } 704 856 ··· 706 858 resp.error("bundle not found", http.StatusNotFound) 707 859 } 708 860 709 - func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) { 710 - ctx := r.Context() 711 - 712 - // Get mempool count 713 - mempoolCount, err := s.db.GetMempoolCount(ctx) 714 - if err != nil { 715 - http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError) 716 - return 717 - } 861 + func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) { 862 + // Get mempool stats 863 + stats := s.bundleManager.GetMempoolStats() 864 + count, ok := stats["count"].(int) 718 865 719 - if mempoolCount == 0 { 866 + if !ok || count == 0 { 720 867 http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound) 721 868 return 722 869 } 723 870 724 - // Get mempool operations (up to BUNDLE_SIZE) 725 - mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 871 + // Get operations from mempool 872 + ops, err := s.bundleManager.GetMempoolOperations() 726 873 if err != nil { 727 874 http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError) 728 875 return 729 876 } 730 877 731 - if len(mempoolOps) == 0 { 732 - http.Error(w, "upcoming bundle is empty", http.StatusNotFound) 878 + if len(ops) == 0 { 879 + http.Error(w, "no operations in mempool", http.StatusNotFound) 733 880 return 734 881 } 735 882 736 - // Get time range 737 - firstOp := mempoolOps[0] 738 - lastOp := mempoolOps[len(mempoolOps)-1] 883 + // Calculate times 884 + firstOp := ops[0] 885 + lastOp := ops[len(ops)-1] 739 886 740 887 // Extract unique DIDs 741 888 didSet := make(map[string]bool) 742 - for _, op := range mempoolOps { 889 + for _, op := range ops { 743 890 didSet[op.DID] = true 744 891 } 745 892 893 + // Calculate uncompressed size 894 + uncompressedSize := int64(0) 895 + for _, op := range ops { 896 + uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 897 + } 898 + 746 899 // Get previous bundle hash 747 900 prevBundleHash := "" 748 901 if bundleNum > 1 { 749 - if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 902 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 750 903 prevBundleHash = prevBundle.Hash 751 904 } 752 905 } 753 906 754 - // Serialize operations to JSONL 755 - var buf []byte 756 - for _, mop := range mempoolOps { 757 - buf = append(buf, []byte(mop.Operation)...) 758 - buf = append(buf, '\n') 759 - } 760 - 761 - // Calculate size 762 - uncompressedSize := int64(len(buf)) 763 - 764 907 // Set headers 765 908 w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum)) 766 909 w.Header().Set("X-Bundle-Is-Upcoming", "true") 767 910 w.Header().Set("X-Bundle-Status", "preview") 768 911 w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano)) 769 912 w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano)) 770 - w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps))) 771 - w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 772 - w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100)) 913 + w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops))) 914 + w.Header().Set("X-Bundle-Target-Count", "10000") 915 + w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0)) 773 916 w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet))) 774 917 w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash) 918 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 775 919 776 920 w.Header().Set("Content-Type", "application/jsonl") 777 921 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum)) 778 - w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize)) 779 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 780 922 923 + // Stream operations as JSONL 781 924 w.WriteHeader(http.StatusOK) 782 - w.Write(buf) 925 + 926 + for _, op := range ops { 927 + // Use RawJSON if available (preserves exact format) 928 + if len(op.RawJSON) > 0 { 929 + w.Write(op.RawJSON) 930 + } else { 931 + // Fallback to marshaling 932 + data, _ := json.Marshal(op) 933 + w.Write(data) 934 + } 935 + w.Write([]byte("\n")) 936 + } 783 937 } 784 938 785 - func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 939 + func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 786 940 resp := newResponse(w) 787 - path := bundle.GetFilePath(s.plcBundleDir) 788 941 789 - file, err := os.Open(path) 942 + // Use the new streaming API for compressed data 943 + reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber) 790 944 if err != nil { 791 - resp.error("bundle file not found on disk", http.StatusNotFound) 945 + resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError) 792 946 return 793 947 } 794 - defer file.Close() 795 - 796 - fileInfo, _ := file.Stat() 948 + defer reader.Close() 797 949 798 950 w.Header().Set("Content-Type", "application/zstd") 799 951 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber)) 800 - w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size())) 801 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size())) 952 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize)) 953 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 802 954 803 - http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file) 955 + // Stream the data directly to the response 956 + w.WriteHeader(http.StatusOK) 957 + io.Copy(w, reader) 804 958 } 805 959 806 - func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 960 + func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 807 961 resp := newResponse(w) 808 962 809 - ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber) 963 + // Use the new streaming API for decompressed data 964 + reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber) 810 965 if err != nil { 811 - resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError) 966 + resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError) 812 967 return 813 968 } 814 - 815 - // Serialize to JSONL 816 - var buf []byte 817 - for _, op := range ops { 818 - buf = append(buf, op.RawJSON...) 819 - buf = append(buf, '\n') 820 - } 821 - 822 - fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir)) 823 - compressedSize := int64(0) 824 - if fileInfo != nil { 825 - compressedSize = fileInfo.Size() 826 - } 969 + defer reader.Close() 827 970 828 971 w.Header().Set("Content-Type", "application/jsonl") 829 972 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber)) 830 - w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf))) 831 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize)) 832 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf))) 833 - if compressedSize > 0 { 834 - w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize))) 973 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize)) 974 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 975 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize)) 976 + if bundle.CompressedSize > 0 { 977 + w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize))) 835 978 } 836 979 980 + // Stream the data directly to the response 837 981 w.WriteHeader(http.StatusOK) 838 - w.Write(buf) 982 + io.Copy(w, reader) 839 983 } 840 984 841 985 func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) { 842 986 resp := newResponse(w) 843 987 limit := getQueryInt(r, "limit", 50) 844 988 845 - bundles, err := s.db.GetBundles(r.Context(), limit) 846 - if err != nil { 847 - resp.error(err.Error(), http.StatusInternalServerError) 848 - return 849 - } 989 + bundles := s.bundleManager.GetBundles(limit) 850 990 851 991 response := make([]map[string]interface{}, len(bundles)) 852 992 for i, bundle := range bundles { 853 - response[i] = formatBundleResponse(bundle) 993 + response[i] = formatBundleMetadata(bundle) 854 994 } 855 995 856 996 resp.json(response) ··· 859 999 func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) { 860 1000 resp := newResponse(w) 861 1001 862 - count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context()) 863 - if err != nil { 864 - resp.error(err.Error(), http.StatusInternalServerError) 865 - return 866 - } 1002 + stats := s.bundleManager.GetBundleStats() 1003 + 1004 + bundleCount := stats["bundle_count"].(int64) 1005 + totalSize := stats["total_size"].(int64) 1006 + totalUncompressedSize := stats["total_uncompressed_size"].(int64) 1007 + lastBundle := stats["last_bundle"].(int64) 867 1008 868 1009 resp.json(map[string]interface{}{ 869 - "plc_bundle_count": count, 870 - "last_bundle_number": lastBundle, 871 - "total_compressed_size": compressedSize, 872 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 873 - "total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024, 874 - "total_uncompressed_size": uncompressedSize, 875 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 876 - "total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024, 877 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1010 + "plc_bundle_count": bundleCount, 1011 + "last_bundle_number": lastBundle, 1012 + "total_compressed_size": totalSize, 1013 + "total_uncompressed_size": totalUncompressedSize, 1014 + "overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize), 878 1015 }) 879 1016 } 880 1017 ··· 882 1019 883 1020 func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) { 884 1021 resp := newResponse(w) 885 - ctx := r.Context() 886 1022 887 - count, err := s.db.GetMempoolCount(ctx) 888 - if err != nil { 889 - resp.error(err.Error(), http.StatusInternalServerError) 890 - return 891 - } 1023 + // Get stats from library's mempool via wrapper method 1024 + stats := s.bundleManager.GetMempoolStats() 892 1025 893 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 894 - if err != nil { 895 - resp.error(err.Error(), http.StatusInternalServerError) 896 - return 1026 + // Convert to API response format 1027 + result := map[string]interface{}{ 1028 + "operation_count": stats["count"], 1029 + "can_create_bundle": stats["can_create_bundle"], 897 1030 } 898 1031 899 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 900 - if err != nil { 901 - resp.error(err.Error(), http.StatusInternalServerError) 902 - return 1032 + // Add size information 1033 + if sizeBytes, ok := stats["size_bytes"]; ok { 1034 + result["uncompressed_size"] = sizeBytes 1035 + result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024 903 1036 } 904 1037 905 - result := map[string]interface{}{ 906 - "operation_count": count, 907 - "unique_did_count": uniqueDIDCount, 908 - "uncompressed_size": uncompressedSize, 909 - "uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 910 - "can_create_bundle": count >= plc.BUNDLE_SIZE, 911 - } 1038 + // Add time range and calculate estimated completion 1039 + if count, ok := stats["count"].(int); ok && count > 0 { 1040 + if firstTime, ok := stats["first_time"].(time.Time); ok { 1041 + result["mempool_start_time"] = firstTime 912 1042 913 - if count > 0 { 914 - if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil { 915 - result["mempool_start_time"] = firstOp.CreatedAt 1043 + if lastTime, ok := stats["last_time"].(time.Time); ok { 1044 + result["mempool_end_time"] = lastTime 916 1045 917 - if count < plc.BUNDLE_SIZE { 918 - if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil { 919 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 1046 + // Calculate estimated next bundle time if not complete 1047 + if count < 10000 { 1048 + timeSpan := lastTime.Sub(firstTime).Seconds() 920 1049 if timeSpan > 0 { 921 1050 opsPerSecond := float64(count) / timeSpan 922 1051 if opsPerSecond > 0 { 923 - remainingOps := plc.BUNDLE_SIZE - count 1052 + remainingOps := 10000 - count 924 1053 secondsNeeded := float64(remainingOps) / opsPerSecond 925 - result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second) 926 - result["operations_needed"] = remainingOps 1054 + estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 1055 + 1056 + result["estimated_next_bundle_time"] = estimatedTime 927 1057 result["current_rate_per_second"] = opsPerSecond 1058 + result["operations_needed"] = remainingOps 928 1059 } 929 1060 } 1061 + result["progress_percent"] = float64(count) / 100.0 1062 + } else { 1063 + // Ready to create bundle 1064 + result["estimated_next_bundle_time"] = time.Now() 1065 + result["operations_needed"] = 0 930 1066 } 931 - } else { 932 - result["estimated_next_bundle_time"] = time.Now() 933 - result["operations_needed"] = 0 934 1067 } 935 1068 } 936 1069 } else { 1070 + // Empty mempool 937 1071 result["mempool_start_time"] = nil 938 1072 result["estimated_next_bundle_time"] = nil 939 1073 } ··· 958 1092 959 1093 // ===== VERIFICATION HANDLERS ===== 960 1094 961 - func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) { 962 - resp := newResponse(w) 963 - vars := mux.Vars(r) 964 - 965 - bundleNumber, err := strconv.Atoi(vars["bundleNumber"]) 966 - if err != nil { 967 - resp.error("Invalid bundle number", http.StatusBadRequest) 968 - return 969 - } 970 - 971 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber) 972 - if err != nil { 973 - resp.error("Bundle not found", http.StatusNotFound) 974 - return 975 - } 976 - 977 - // Fetch from PLC and verify 978 - remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber) 979 - if err != nil { 980 - resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError) 981 - return 982 - } 983 - 984 - remoteHash := computeOperationsHash(remoteOps) 985 - verified := bundle.Hash == remoteHash 986 - 987 - resp.json(map[string]interface{}{ 988 - "bundle_number": bundleNumber, 989 - "verified": verified, 990 - "local_hash": bundle.Hash, 991 - "remote_hash": remoteHash, 992 - "local_op_count": plc.BUNDLE_SIZE, 993 - "remote_op_count": len(remoteOps), 994 - "boundary_cids_used": len(prevCIDs), 995 - }) 996 - } 997 - 998 - func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) { 999 - var after string 1000 - var prevBoundaryCIDs map[string]bool 1001 - 1002 - if bundleNum > 1 { 1003 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 1004 - if err != nil { 1005 - return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err) 1006 - } 1007 - 1008 - after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z") 1009 - 1010 - if len(prevBundle.BoundaryCIDs) > 0 { 1011 - prevBoundaryCIDs = make(map[string]bool) 1012 - for _, cid := range prevBundle.BoundaryCIDs { 1013 - prevBoundaryCIDs[cid] = true 1014 - } 1015 - } 1016 - } 1017 - 1018 - var allRemoteOps []plc.PLCOperation 1019 - seenCIDs := make(map[string]bool) 1020 - 1021 - for cid := range prevBoundaryCIDs { 1022 - seenCIDs[cid] = true 1023 - } 1024 - 1025 - currentAfter := after 1026 - maxFetches := 20 1027 - 1028 - for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ { 1029 - batch, err := s.plcClient.Export(ctx, plc.ExportOptions{ 1030 - Count: 1000, 1031 - After: currentAfter, 1032 - }) 1033 - if err != nil || len(batch) == 0 { 1034 - break 1035 - } 1036 - 1037 - for _, op := range batch { 1038 - if !seenCIDs[op.CID] { 1039 - seenCIDs[op.CID] = true 1040 - allRemoteOps = append(allRemoteOps, op) 1041 - if len(allRemoteOps) >= plc.BUNDLE_SIZE { 1042 - break 1043 - } 1044 - } 1045 - } 1046 - 1047 - if len(batch) > 0 { 1048 - lastOp := batch[len(batch)-1] 1049 - currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z") 1050 - } 1051 - 1052 - if len(batch) < 1000 { 1053 - break 1054 - } 1055 - } 1056 - 1057 - if len(allRemoteOps) > plc.BUNDLE_SIZE { 1058 - allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE] 1059 - } 1060 - 1061 - return allRemoteOps, prevBoundaryCIDs, nil 1062 - } 1063 - 1064 1095 func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) { 1065 1096 resp := newResponse(w) 1066 - ctx := r.Context() 1067 1097 1068 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1069 - if err != nil { 1070 - resp.error(err.Error(), http.StatusInternalServerError) 1071 - return 1072 - } 1073 - 1098 + lastBundle := s.bundleManager.GetLastBundleNumber() 1074 1099 if lastBundle == 0 { 1075 1100 resp.json(map[string]interface{}{ 1076 1101 "status": "empty", ··· 1084 1109 var errorMsg string 1085 1110 1086 1111 for i := 1; i <= lastBundle; i++ { 1087 - bundle, err := s.db.GetBundleByNumber(ctx, i) 1112 + bundle, err := s.bundleManager.GetBundleMetadata(i) 1088 1113 if err != nil { 1089 1114 valid = false 1090 1115 brokenAt = i ··· 1093 1118 } 1094 1119 1095 1120 if i > 1 { 1096 - prevBundle, err := s.db.GetBundleByNumber(ctx, i-1) 1121 + prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1) 1097 1122 if err != nil { 1098 1123 valid = false 1099 1124 brokenAt = i ··· 1101 1126 break 1102 1127 } 1103 1128 1104 - if bundle.PrevBundleHash != prevBundle.Hash { 1129 + if bundle.Parent != prevBundle.Hash { 1105 1130 valid = false 1106 1131 brokenAt = i 1107 - errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1) 1132 + errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1) 1108 1133 break 1109 1134 } 1110 1135 } ··· 1125 1150 1126 1151 func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) { 1127 1152 resp := newResponse(w) 1128 - ctx := r.Context() 1129 1153 1130 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1131 - if err != nil { 1132 - resp.error(err.Error(), http.StatusInternalServerError) 1133 - return 1134 - } 1135 - 1154 + lastBundle := s.bundleManager.GetLastBundleNumber() 1136 1155 if lastBundle == 0 { 1137 1156 resp.json(map[string]interface{}{ 1138 1157 "chain_length": 0, ··· 1141 1160 return 1142 1161 } 1143 1162 1144 - firstBundle, _ := s.db.GetBundleByNumber(ctx, 1) 1145 - lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle) 1146 - 1147 - // Updated to receive 5 values instead of 3 1148 - count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx) 1149 - if err != nil { 1150 - resp.error(err.Error(), http.StatusInternalServerError) 1151 - return 1152 - } 1163 + firstBundle, _ := s.bundleManager.GetBundleMetadata(1) 1164 + lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle) 1165 + stats := s.bundleManager.GetBundleStats() 1153 1166 1154 1167 resp.json(map[string]interface{}{ 1155 - "chain_length": lastBundle, 1156 - "total_bundles": count, 1157 - "total_compressed_size": compressedSize, 1158 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 1159 - "total_uncompressed_size": uncompressedSize, 1160 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1161 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1162 - "chain_start_time": firstBundle.StartTime, 1163 - "chain_end_time": lastBundleData.EndTime, 1164 - "chain_head_hash": lastBundleData.Hash, 1165 - "first_prev_hash": firstBundle.PrevBundleHash, 1166 - "last_prev_hash": lastBundleData.PrevBundleHash, 1168 + "chain_length": lastBundle, 1169 + "total_bundles": stats["bundle_count"], 1170 + "total_compressed_size": stats["total_size"], 1171 + "total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024, 1172 + "chain_start_time": firstBundle.StartTime, 1173 + "chain_end_time": lastBundleData.EndTime, 1174 + "chain_head_hash": lastBundleData.Hash, 1175 + "first_parent": firstBundle.Parent, 1176 + "last_parent": lastBundleData.Parent, 1167 1177 }) 1168 1178 } 1169 1179 ··· 1184 1194 return 1185 1195 } 1186 1196 1187 - startBundle := s.findStartBundle(ctx, afterTime) 1197 + startBundle := s.findStartBundle(afterTime) 1188 1198 ops := s.collectOperations(ctx, startBundle, afterTime, count) 1189 1199 1190 1200 w.Header().Set("Content-Type", "application/jsonl") ··· 1224 1234 return time.Time{}, fmt.Errorf("invalid timestamp format") 1225 1235 } 1226 1236 1227 - func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int { 1237 + func (s *Server) findStartBundle(afterTime time.Time) int { 1228 1238 if afterTime.IsZero() { 1229 1239 return 1 1230 1240 } 1231 1241 1232 - foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime) 1233 - if err != nil { 1234 - return 1 1235 - } 1236 - 1242 + foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime) 1237 1243 if foundBundle > 1 { 1238 1244 return foundBundle - 1 1239 1245 } ··· 1244 1250 var allOps []plc.PLCOperation 1245 1251 seenCIDs := make(map[string]bool) 1246 1252 1247 - lastBundle, _ := s.db.GetLastBundleNumber(ctx) 1253 + lastBundle := s.bundleManager.GetLastBundleNumber() 1248 1254 1249 1255 for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ { 1250 1256 ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum) ··· 1398 1404 }) 1399 1405 } 1400 1406 1401 - // ===== UTILITY FUNCTIONS ===== 1407 + func (s *Server) handleGetPLCHistory(w http.ResponseWriter, r *http.Request) { 1408 + resp := newResponse(w) 1409 + 1410 + limit := getQueryInt(r, "limit", 0) 1411 + fromBundle := getQueryInt(r, "from", 1) 1412 + 1413 + // Use BundleManager instead of database 1414 + history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle) 1415 + if err != nil { 1416 + resp.error(err.Error(), http.StatusInternalServerError) 1417 + return 1418 + } 1419 + 1420 + var totalOps int64 1421 + var totalUncompressed int64 1422 + var totalCompressed int64 1423 + 1424 + for _, point := range history { 1425 + totalOps += int64(point.OperationCount) 1426 + totalUncompressed += point.UncompressedSize 1427 + totalCompressed += point.CompressedSize 1428 + } 1429 + 1430 + result := map[string]interface{}{ 1431 + "data": history, 1432 + "summary": map[string]interface{}{ 1433 + "days": len(history), 1434 + "total_operations": totalOps, 1435 + "total_uncompressed": totalUncompressed, 1436 + "total_compressed": totalCompressed, 1437 + "compression_ratio": 0.0, 1438 + }, 1439 + } 1440 + 1441 + if len(history) > 0 { 1442 + result["summary"].(map[string]interface{})["first_date"] = history[0].Date 1443 + result["summary"].(map[string]interface{})["last_date"] = history[len(history)-1].Date 1444 + result["summary"].(map[string]interface{})["time_span_days"] = len(history) 1445 + 1446 + if totalCompressed > 0 { 1447 + result["summary"].(map[string]interface{})["compression_ratio"] = float64(totalUncompressed) / float64(totalCompressed) 1448 + } 1449 + 1450 + result["summary"].(map[string]interface{})["avg_operations_per_day"] = totalOps / int64(len(history)) 1451 + result["summary"].(map[string]interface{})["avg_size_per_day"] = totalUncompressed / int64(len(history)) 1452 + } 1453 + 1454 + resp.json(result) 1455 + } 1456 + 1457 + // ===== DEBUG HANDLERS ===== 1458 + 1459 + func (s *Server) handleGetDBSizes(w http.ResponseWriter, r *http.Request) { 1460 + resp := newResponse(w) 1461 + ctx := r.Context() 1462 + schema := "public" // Or make configurable if needed 1463 + 1464 + tableSizes, err := s.db.GetTableSizes(ctx, schema) 1465 + if err != nil { 1466 + log.Error("Failed to get table sizes: %v", err) 1467 + resp.error("Failed to retrieve table sizes", http.StatusInternalServerError) 1468 + return 1469 + } 1402 1470 1403 - func computeOperationsHash(ops []plc.PLCOperation) string { 1404 - var jsonlData []byte 1405 - for _, op := range ops { 1406 - jsonlData = append(jsonlData, op.RawJSON...) 1407 - jsonlData = append(jsonlData, '\n') 1471 + indexSizes, err := s.db.GetIndexSizes(ctx, schema) 1472 + if err != nil { 1473 + log.Error("Failed to get index sizes: %v", err) 1474 + resp.error("Failed to retrieve index sizes", http.StatusInternalServerError) 1475 + return 1408 1476 } 1409 - hash := sha256.Sum256(jsonlData) 1410 - return hex.EncodeToString(hash[:]) 1477 + 1478 + resp.json(map[string]interface{}{ 1479 + "schema": schema, 1480 + "tables": tableSizes, 1481 + "indexes": indexSizes, 1482 + "retrievedAt": time.Now().UTC(), 1483 + }) 1411 1484 } 1485 + 1486 + func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) { 1487 + resp := newResponse(w) 1488 + 1489 + bundleNum, err := getBundleNumber(r) 1490 + if err != nil { 1491 + resp.error("invalid bundle number", http.StatusBadRequest) 1492 + return 1493 + } 1494 + 1495 + labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum) 1496 + if err != nil { 1497 + resp.error(err.Error(), http.StatusInternalServerError) 1498 + return 1499 + } 1500 + 1501 + resp.json(map[string]interface{}{ 1502 + "bundle": bundleNum, 1503 + "count": len(labels), 1504 + "labels": labels, 1505 + }) 1506 + } 1507 + 1508 + // ===== UTILITY FUNCTIONS ===== 1412 1509 1413 1510 func normalizeEndpoint(endpoint string) string { 1414 1511 endpoint = strings.TrimPrefix(endpoint, "https://")
+26 -14
internal/api/server.go
··· 6 6 "net/http" 7 7 "time" 8 8 9 - "github.com/atscan/atscanner/internal/config" 10 - "github.com/atscan/atscanner/internal/log" 11 - "github.com/atscan/atscanner/internal/plc" 12 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/plc" 12 + "github.com/atscan/atscand/internal/storage" 13 13 "github.com/gorilla/handlers" 14 14 "github.com/gorilla/mux" 15 15 ) ··· 18 18 router *mux.Router 19 19 server *http.Server 20 20 db storage.Database 21 - plcClient *plc.Client 22 21 plcBundleDir string 23 22 bundleManager *plc.BundleManager 23 + plcIndexDIDs bool 24 24 } 25 25 26 - func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server { 27 - bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs) 28 - 26 + func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server { 29 27 s := &Server{ 30 28 router: mux.NewRouter(), 31 29 db: db, 32 - plcClient: plc.NewClient(plcCfg.DirectoryURL), 33 30 plcBundleDir: plcCfg.BundleDir, 34 - bundleManager: bundleManager, 31 + bundleManager: bundleManager, // Use provided shared instance 32 + plcIndexDIDs: plcCfg.IndexDIDs, 35 33 } 36 34 37 35 s.setupRoutes() ··· 59 57 // Generic endpoints (keep as-is) 60 58 api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET") 61 59 api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET") 60 + api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET") 62 61 63 - // NEW: PDS-specific endpoints (virtual, created via JOINs) 62 + //PDS-specific endpoints (virtual, created via JOINs) 64 63 api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET") 65 64 api.HandleFunc("/pds/stats", s.handleGetPDSStats).Methods("GET") 66 65 api.HandleFunc("/pds/countries", s.handleGetCountryLeaderboard).Methods("GET") 67 66 api.HandleFunc("/pds/versions", s.handleGetVersionStats).Methods("GET") 68 67 api.HandleFunc("/pds/duplicates", s.handleGetDuplicateEndpoints).Methods("GET") 69 68 api.HandleFunc("/pds/{endpoint}", s.handleGetPDSDetail).Methods("GET") 69 + 70 + // PDS repos 71 + api.HandleFunc("/pds/{endpoint}/repos", s.handleGetPDSRepos).Methods("GET") 72 + api.HandleFunc("/pds/{endpoint}/repos/stats", s.handleGetPDSRepoStats).Methods("GET") 73 + api.HandleFunc("/pds/repos/{did}", s.handleGetDIDRepos).Methods("GET") 74 + 75 + // Global DID routes 76 + api.HandleFunc("/did/{did}", s.handleGetGlobalDID).Methods("GET") 77 + api.HandleFunc("/handle/{handle}", s.handleGetDIDByHandle).Methods("GET") // NEW 70 78 71 79 // PLC Bundle routes 72 80 api.HandleFunc("/plc/bundles", s.handleGetPLCBundles).Methods("GET") ··· 76 84 api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET") 77 85 api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET") 78 86 api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET") 79 - api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST") 87 + api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET") 88 + 89 + // PLC history/metrics 90 + api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET") 80 91 81 92 // PLC Export endpoint (simulates PLC directory) 82 93 api.HandleFunc("/plc/export", s.handlePLCExport).Methods("GET") ··· 84 95 // DID routes 85 96 api.HandleFunc("/plc/did/{did}", s.handleGetDID).Methods("GET") 86 97 api.HandleFunc("/plc/did/{did}/history", s.handleGetDIDHistory).Methods("GET") 87 - api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") // NEW 98 + api.HandleFunc("/plc/dids/stats", s.handleGetDIDStats).Methods("GET") 88 99 89 100 // Mempool routes 90 101 api.HandleFunc("/mempool/stats", s.handleGetMempoolStats).Methods("GET") ··· 92 103 // Metrics routes 93 104 api.HandleFunc("/metrics/plc", s.handleGetPLCMetrics).Methods("GET") 94 105 95 - // Job status endpoint 106 + // Debug Endpoints 107 + api.HandleFunc("/debug/db/sizes", s.handleGetDBSizes).Methods("GET") 96 108 api.HandleFunc("/jobs", s.handleGetJobStatus).Methods("GET") 97 109 98 110 // Health check
+36 -13
internal/ipinfo/client.go
··· 99 99 return ipInfo, nil 100 100 } 101 101 102 - // ExtractIPFromEndpoint extracts IP from endpoint URL 103 - func ExtractIPFromEndpoint(endpoint string) (string, error) { 102 + // IPAddresses holds both IPv4 and IPv6 addresses 103 + type IPAddresses struct { 104 + IPv4 string 105 + IPv6 string 106 + } 107 + 108 + // ExtractIPsFromEndpoint extracts both IPv4 and IPv6 from endpoint URL 109 + func ExtractIPsFromEndpoint(endpoint string) (*IPAddresses, error) { 104 110 // Parse URL 105 111 parsedURL, err := url.Parse(endpoint) 106 112 if err != nil { 107 - return "", fmt.Errorf("failed to parse endpoint URL: %w", err) 113 + return nil, fmt.Errorf("failed to parse endpoint URL: %w", err) 108 114 } 109 115 110 116 host := parsedURL.Hostname() 111 117 if host == "" { 112 - return "", fmt.Errorf("no hostname in endpoint") 118 + return nil, fmt.Errorf("no hostname in endpoint") 113 119 } 120 + 121 + result := &IPAddresses{} 114 122 115 123 // Check if host is already an IP 116 - if net.ParseIP(host) != nil { 117 - return host, nil 124 + if ip := net.ParseIP(host); ip != nil { 125 + if ip.To4() != nil { 126 + result.IPv4 = host 127 + } else { 128 + result.IPv6 = host 129 + } 130 + return result, nil 118 131 } 119 132 120 - // Resolve hostname to IP 133 + // Resolve hostname to IPs 121 134 ips, err := net.LookupIP(host) 122 135 if err != nil { 123 - return "", fmt.Errorf("failed to resolve hostname: %w", err) 136 + return nil, fmt.Errorf("failed to resolve hostname: %w", err) 124 137 } 125 138 126 139 if len(ips) == 0 { 127 - return "", fmt.Errorf("no IPs found for hostname") 140 + return nil, fmt.Errorf("no IPs found for hostname") 128 141 } 129 142 130 - // Return first IPv4 address 143 + // Extract both IPv4 and IPv6 131 144 for _, ip := range ips { 132 145 if ipv4 := ip.To4(); ipv4 != nil { 133 - return ipv4.String(), nil 146 + if result.IPv4 == "" { 147 + result.IPv4 = ipv4.String() 148 + } 149 + } else { 150 + if result.IPv6 == "" { 151 + result.IPv6 = ip.String() 152 + } 134 153 } 135 154 } 136 155 137 - // Fallback to first IP (might be IPv6) 138 - return ips[0].String(), nil 156 + // Must have at least one IP 157 + if result.IPv4 == "" && result.IPv6 == "" { 158 + return nil, fmt.Errorf("no valid IPs found") 159 + } 160 + 161 + return result, nil 139 162 }
+6 -2
internal/log/log.go
··· 28 28 errorLog = log.New(os.Stderr, "", 0) 29 29 } 30 30 31 - // timestamp returns current time in ISO 8601 format 31 + // timestamp returns current time with milliseconds (local time, no timezone) 32 32 func timestamp() string { 33 - return time.Now().Format(time.RFC3339) 33 + return time.Now().Format("2006-01-02T15:04:05.000") 34 34 } 35 35 36 36 func Verbose(format string, v ...interface{}) { ··· 39 39 40 40 func Info(format string, v ...interface{}) { 41 41 infoLog.Printf("%s [INFO] %s", timestamp(), fmt.Sprintf(format, v...)) 42 + } 43 + 44 + func Warn(format string, v ...interface{}) { 45 + infoLog.Printf("%s [WARN] %s", timestamp(), fmt.Sprintf(format, v...)) 42 46 } 43 47 44 48 func Error(format string, v ...interface{}) {
+47 -18
internal/pds/client.go
··· 4 4 "context" 5 5 "encoding/json" 6 6 "fmt" 7 + "net" 7 8 "net/http" 8 9 "time" 9 10 ) ··· 28 29 29 30 // Repo represents a repository in the list 30 31 type Repo struct { 31 - DID string `json:"did"` 32 - Head string `json:"head,omitempty"` 33 - Rev string `json:"rev,omitempty"` 32 + DID string `json:"did"` 33 + Head string `json:"head,omitempty"` 34 + Rev string `json:"rev,omitempty"` 35 + Active *bool `json:"active,omitempty"` 36 + Status *string `json:"status,omitempty"` 34 37 } 35 38 36 39 // ListRepos fetches all repositories from a PDS with pagination 37 - func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]string, error) { 38 - var allDIDs []string 40 + func (c *Client) ListRepos(ctx context.Context, endpoint string) ([]Repo, error) { 41 + var allRepos []Repo 39 42 var cursor *string 40 43 41 44 for { ··· 67 70 } 68 71 resp.Body.Close() 69 72 70 - // Collect DIDs 71 - for _, repo := range result.Repos { 72 - allDIDs = append(allDIDs, repo.DID) 73 - } 73 + // Collect repos 74 + allRepos = append(allRepos, result.Repos...) 74 75 75 76 // Check if there are more pages 76 77 if result.Cursor == nil || *result.Cursor == "" { ··· 79 80 cursor = result.Cursor 80 81 } 81 82 82 - return allDIDs, nil 83 + return allRepos, nil 83 84 } 84 85 85 86 // DescribeServer fetches com.atproto.server.describeServer 86 - func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) { 87 + // Returns: description, responseTime, usedIP, error 88 + func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) { 89 + startTime := time.Now() 87 90 url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint) 88 91 89 - //fmt.Println(url) 92 + // Track which IP was used 93 + var usedIP string 94 + transport := &http.Transport{ 95 + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 96 + conn, err := (&net.Dialer{ 97 + Timeout: 30 * time.Second, 98 + KeepAlive: 30 * time.Second, 99 + }).DialContext(ctx, network, addr) 100 + 101 + if err == nil && conn != nil { 102 + if remoteAddr := conn.RemoteAddr(); remoteAddr != nil { 103 + if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok { 104 + usedIP = tcpAddr.IP.String() 105 + } 106 + } 107 + } 108 + return conn, err 109 + }, 110 + } 111 + 112 + client := &http.Client{ 113 + Timeout: c.httpClient.Timeout, 114 + Transport: transport, 115 + } 90 116 91 117 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 92 118 if err != nil { 93 - return nil, err 119 + return nil, 0, "", err 94 120 } 95 121 96 - resp, err := c.httpClient.Do(req) 122 + resp, err := client.Do(req) 123 + responseTime := time.Since(startTime) 124 + 97 125 if err != nil { 98 - return nil, err 126 + return nil, responseTime, usedIP, err 99 127 } 100 128 defer resp.Body.Close() 101 129 102 130 if resp.StatusCode != http.StatusOK { 103 - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 131 + return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 104 132 } 105 133 106 134 var desc ServerDescription 107 135 if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil { 108 - return nil, err 136 + return nil, responseTime, usedIP, err 109 137 } 110 138 111 - return &desc, nil 139 + return &desc, responseTime, usedIP, nil 112 140 } 113 141 114 142 // CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version" 143 + // Returns: available, responseTime, version, error 115 144 func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) { 116 145 startTime := time.Now() 117 146
+100 -36
internal/pds/scanner.go
··· 8 8 "sync/atomic" 9 9 "time" 10 10 11 - "github.com/acarl005/stripansi" 12 - "github.com/atscan/atscanner/internal/config" 13 - "github.com/atscan/atscanner/internal/ipinfo" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/monitor" 16 - "github.com/atscan/atscanner/internal/storage" 11 + "github.com/atscan/atscand/internal/config" 12 + "github.com/atscan/atscand/internal/ipinfo" 13 + "github.com/atscan/atscand/internal/log" 14 + "github.com/atscan/atscand/internal/monitor" 15 + "github.com/atscan/atscand/internal/storage" 17 16 ) 18 17 19 18 type Scanner struct { ··· 40 39 servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{ 41 40 Type: "pds", 42 41 OnlyStale: true, 42 + OnlyValid: true, 43 43 RecheckInterval: s.config.RecheckInterval, 44 44 }) 45 45 if err != nil { ··· 124 124 } 125 125 126 126 func (s *Scanner) scanAndSaveEndpoint(ctx context.Context, ep *storage.Endpoint) { 127 - // STEP 1: Resolve IP (before any network call) 128 - ip, err := ipinfo.ExtractIPFromEndpoint(ep.Endpoint) 127 + // STEP 1: Resolve IPs (both IPv4 and IPv6) 128 + ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint) 129 129 if err != nil { 130 - // Mark as offline due to DNS failure 131 130 s.saveScanResult(ctx, ep.ID, &ScanResult{ 132 131 Status: storage.EndpointStatusOffline, 133 132 ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err), ··· 135 134 return 136 135 } 137 136 138 - // Update IP immediately 139 - s.db.UpdateEndpointIP(ctx, ep.ID, ip, time.Now().UTC()) 137 + // Update IPs immediately 138 + s.db.UpdateEndpointIPs(ctx, ep.ID, ips.IPv4, ips.IPv6, time.Now().UTC()) 140 139 141 - // STEP 2: Health check 142 - available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 143 - if err != nil || !available { 144 - errMsg := "health check failed" 145 - if err != nil { 146 - errMsg = err.Error() 147 - } 140 + // STEP 1.5: Fetch IP info asynchronously for both IPs 141 + if ips.IPv4 != "" { 142 + go s.updateIPInfoIfNeeded(ctx, ips.IPv4) 143 + } 144 + if ips.IPv6 != "" { 145 + go s.updateIPInfoIfNeeded(ctx, ips.IPv6) 146 + } 147 + 148 + // STEP 2: Call describeServer (primary health check + metadata) 149 + desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint) 150 + if err != nil { 148 151 s.saveScanResult(ctx, ep.ID, &ScanResult{ 149 152 Status: storage.EndpointStatusOffline, 150 - ResponseTime: responseTime, 151 - ErrorMessage: errMsg, 153 + ResponseTime: descResponseTime, 154 + ErrorMessage: fmt.Sprintf("describeServer failed: %v", err), 155 + UsedIP: usedIP, 152 156 }) 153 157 return 154 158 } 155 159 156 - // STEP 3: Fetch PDS-specific data 157 - desc, err := s.client.DescribeServer(ctx, ep.Endpoint) 158 - if err != nil { 159 - log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err) 160 - } else if desc != nil && desc.DID != "" { 161 - // NEW: Update server DID 160 + // Update server DID immediately 161 + if desc.DID != "" { 162 162 s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID) 163 163 } 164 164 165 - dids, err := s.client.ListRepos(ctx, ep.Endpoint) 165 + // STEP 3: Call _health to get version 166 + available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 167 + if err != nil || !available { 168 + log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err) 169 + // Server is online (describeServer worked) but _health failed 170 + // Continue with empty version 171 + version = "" 172 + } 173 + 174 + // Calculate average response time from both calls 175 + avgResponseTime := descResponseTime 176 + if available { 177 + avgResponseTime = (descResponseTime + healthResponseTime) / 2 178 + } 179 + 180 + // STEP 4: Fetch repos 181 + repoList, err := s.client.ListRepos(ctx, ep.Endpoint) 166 182 if err != nil { 167 183 log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err) 168 - dids = []string{} 184 + repoList = []Repo{} 185 + } 186 + 187 + // Convert to DIDs 188 + dids := make([]string, len(repoList)) 189 + for i, repo := range repoList { 190 + dids[i] = repo.DID 169 191 } 170 192 171 - // STEP 4: SAVE IMMEDIATELY 193 + // STEP 5: SAVE scan result 172 194 s.saveScanResult(ctx, ep.ID, &ScanResult{ 173 195 Status: storage.EndpointStatusOnline, 174 - ResponseTime: responseTime, 196 + ResponseTime: avgResponseTime, 175 197 Description: desc, 176 198 DIDs: dids, 177 199 Version: version, 200 + UsedIP: usedIP, // Only from describeServer 178 201 }) 179 202 180 - // STEP 5: Fetch IP info if needed (async, with backoff) 181 - go s.updateIPInfoIfNeeded(ctx, ip) 203 + // STEP 6: Save repos in batches (only tracks changes) 204 + if len(repoList) > 0 { 205 + batchSize := 100_000 206 + 207 + log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint) 208 + 209 + for i := 0; i < len(repoList); i += batchSize { 210 + end := i + batchSize 211 + if end > len(repoList) { 212 + end = len(repoList) 213 + } 214 + 215 + batch := repoList[i:end] 216 + repoData := make([]storage.PDSRepoData, len(batch)) 217 + 218 + for j, repo := range batch { 219 + active := true 220 + if repo.Active != nil { 221 + active = *repo.Active 222 + } 223 + 224 + status := "" 225 + if repo.Status != nil { 226 + status = *repo.Status 227 + } 228 + 229 + repoData[j] = storage.PDSRepoData{ 230 + DID: repo.DID, 231 + Head: repo.Head, 232 + Rev: repo.Rev, 233 + Active: active, 234 + Status: status, 235 + } 236 + } 237 + 238 + if err := s.db.UpsertPDSRepos(ctx, ep.ID, repoData); err != nil { 239 + log.Error("Failed to save repo batch for endpoint %d: %v", ep.ID, err) 240 + } 241 + } 242 + 243 + log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint) 244 + } 182 245 } 183 246 184 247 func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) { ··· 188 251 Metadata: make(map[string]interface{}), 189 252 } 190 253 191 - var userCount int64 // NEW: Declare user count 254 + var userCount int64 192 255 193 256 // Add PDS-specific metadata 194 257 if result.Status == storage.EndpointStatusOnline { 195 - userCount = int64(len(result.DIDs)) // NEW: Get user count 196 - scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness 258 + userCount = int64(len(result.DIDs)) 259 + scanData.Metadata["user_count"] = userCount 197 260 if result.Description != nil { 198 261 scanData.Metadata["server_info"] = result.Description 199 262 } ··· 210 273 Status: result.Status, 211 274 ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms 212 275 UserCount: userCount, 213 - Version: result.Version, // NEW: Set the version field 276 + Version: result.Version, 277 + UsedIP: result.UsedIP, // NEW 214 278 ScanData: scanData, 215 279 ScannedAt: time.Now().UTC(), 216 280 }
+2 -1
internal/pds/types.go
··· 37 37 ErrorMessage string 38 38 Description *ServerDescription 39 39 DIDs []string 40 - Version string // NEW: Add this field to pass the version 40 + Version string 41 + UsedIP string // NEW 41 42 }
-662
internal/plc/bundle.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "bytes" 6 - "context" 7 - "crypto/sha256" 8 - "encoding/hex" 9 - "encoding/json" 10 - "fmt" 11 - "os" 12 - "path/filepath" 13 - "time" 14 - 15 - "github.com/atscan/atscanner/internal/log" 16 - "github.com/atscan/atscanner/internal/storage" 17 - "github.com/klauspost/compress/zstd" 18 - ) 19 - 20 - const BUNDLE_SIZE = 10000 21 - 22 - type BundleManager struct { 23 - dir string 24 - enabled bool 25 - encoder *zstd.Encoder 26 - decoder *zstd.Decoder 27 - db storage.Database 28 - indexDIDs bool 29 - } 30 - 31 - // ===== INITIALIZATION ===== 32 - 33 - func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) { 34 - if !enabled { 35 - return &BundleManager{enabled: false}, nil 36 - } 37 - 38 - if err := os.MkdirAll(dir, 0755); err != nil { 39 - return nil, fmt.Errorf("failed to create bundle dir: %w", err) 40 - } 41 - 42 - encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 43 - if err != nil { 44 - return nil, err 45 - } 46 - 47 - decoder, err := zstd.NewReader(nil) 48 - if err != nil { 49 - return nil, err 50 - } 51 - 52 - return &BundleManager{ 53 - dir: dir, 54 - enabled: enabled, 55 - encoder: encoder, 56 - decoder: decoder, 57 - db: db, 58 - indexDIDs: indexDIDs, // NEW 59 - }, nil 60 - } 61 - 62 - func (bm *BundleManager) Close() { 63 - if bm.encoder != nil { 64 - bm.encoder.Close() 65 - } 66 - if bm.decoder != nil { 67 - bm.decoder.Close() 68 - } 69 - } 70 - 71 - // ===== BUNDLE FILE ABSTRACTION ===== 72 - 73 - type bundleFile struct { 74 - path string 75 - operations []PLCOperation 76 - uncompressedHash string 77 - compressedHash string 78 - } 79 - 80 - func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile { 81 - return &bundleFile{ 82 - path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)), 83 - } 84 - } 85 - 86 - func (bf *bundleFile) exists() bool { 87 - _, err := os.Stat(bf.path) 88 - return err == nil 89 - } 90 - 91 - func (bm *BundleManager) load(bf *bundleFile) error { 92 - compressed, err := os.ReadFile(bf.path) 93 - if err != nil { 94 - return fmt.Errorf("read failed: %w", err) 95 - } 96 - 97 - decompressed, err := bm.decoder.DecodeAll(compressed, nil) 98 - if err != nil { 99 - return fmt.Errorf("decompress failed: %w", err) 100 - } 101 - 102 - bf.operations = bm.parseJSONL(decompressed) 103 - return nil 104 - } 105 - 106 - func (bm *BundleManager) save(bf *bundleFile) error { 107 - jsonlData := bm.serializeJSONL(bf.operations) 108 - bf.uncompressedHash = bm.hash(jsonlData) 109 - 110 - compressed := bm.encoder.EncodeAll(jsonlData, nil) 111 - bf.compressedHash = bm.hash(compressed) 112 - 113 - return os.WriteFile(bf.path, compressed, 0644) 114 - } 115 - 116 - func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation { 117 - var ops []PLCOperation 118 - scanner := bufio.NewScanner(bytes.NewReader(data)) 119 - 120 - for scanner.Scan() { 121 - line := scanner.Bytes() 122 - if len(line) == 0 { 123 - continue 124 - } 125 - 126 - var op PLCOperation 127 - if err := json.Unmarshal(line, &op); err == nil { 128 - op.RawJSON = append([]byte(nil), line...) 129 - ops = append(ops, op) 130 - } 131 - } 132 - 133 - return ops 134 - } 135 - 136 - func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte { 137 - var buf []byte 138 - for _, op := range ops { 139 - buf = append(buf, op.RawJSON...) 140 - buf = append(buf, '\n') 141 - } 142 - return buf 143 - } 144 - 145 - // ===== BUNDLE FETCHING ===== 146 - 147 - type bundleFetcher struct { 148 - client *Client 149 - seenCIDs map[string]bool 150 - currentAfter string 151 - fetchCount int 152 - } 153 - 154 - func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher { 155 - seen := make(map[string]bool) 156 - for cid := range prevBoundaryCIDs { 157 - seen[cid] = true 158 - } 159 - 160 - return &bundleFetcher{ 161 - client: client, 162 - seenCIDs: seen, 163 - currentAfter: afterTime, 164 - } 165 - } 166 - 167 - func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) { 168 - var ops []PLCOperation 169 - maxFetches := (target / 900) + 5 170 - 171 - for len(ops) < target && bf.fetchCount < maxFetches { 172 - bf.fetchCount++ 173 - batchSize := bf.calculateBatchSize(target - len(ops)) 174 - 175 - log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize) 176 - 177 - batch, shouldContinue := bf.fetchBatch(ctx, batchSize) 178 - 179 - for _, op := range batch { 180 - if !bf.seenCIDs[op.CID] { 181 - bf.seenCIDs[op.CID] = true 182 - ops = append(ops, op) 183 - 184 - if len(ops) >= target { 185 - return ops[:target], true 186 - } 187 - } 188 - } 189 - 190 - if !shouldContinue { 191 - break 192 - } 193 - } 194 - 195 - return ops, len(ops) >= target 196 - } 197 - 198 - func (bf *bundleFetcher) calculateBatchSize(remaining int) int { 199 - if bf.fetchCount == 0 { 200 - return 1000 201 - } 202 - if remaining < 100 { 203 - return 50 204 - } 205 - if remaining < 500 { 206 - return 200 207 - } 208 - return 1000 209 - } 210 - 211 - func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) { 212 - ops, err := bf.client.Export(ctx, ExportOptions{ 213 - Count: size, 214 - After: bf.currentAfter, 215 - }) 216 - 217 - if err != nil || len(ops) == 0 { 218 - return nil, false 219 - } 220 - 221 - if len(ops) > 0 { 222 - bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano) 223 - } 224 - 225 - return ops, len(ops) >= size 226 - } 227 - 228 - // ===== MAIN BUNDLE LOADING ===== 229 - 230 - func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) { 231 - if !bm.enabled { 232 - return nil, false, fmt.Errorf("bundle manager disabled") 233 - } 234 - 235 - bf := bm.newBundleFile(bundleNum) 236 - 237 - // Try local file first 238 - if bf.exists() { 239 - return bm.loadFromFile(ctx, bundleNum, bf) 240 - } 241 - 242 - // Fetch from PLC 243 - return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient) 244 - } 245 - 246 - func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) { 247 - log.Verbose("→ Loading bundle %06d from local file", bundleNum) 248 - 249 - // Verify hash if bundle is in DB 250 - if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil { 251 - if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil { 252 - log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum) 253 - os.Remove(bf.path) 254 - return nil, false, fmt.Errorf("hash mismatch") 255 - } 256 - log.Verbose("✓ Hash verified for bundle %06d", bundleNum) 257 - } 258 - 259 - if err := bm.load(bf); err != nil { 260 - return nil, false, err 261 - } 262 - 263 - // Index if not in DB 264 - if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil { 265 - bf.compressedHash = bm.hashFile(bf.path) 266 - bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations)) 267 - 268 - // Calculate cursor from previous bundle 269 - cursor := bm.calculateCursor(ctx, bundleNum) 270 - 271 - bm.indexBundle(ctx, bundleNum, bf, cursor) 272 - } 273 - 274 - return bf.operations, true, nil 275 - } 276 - 277 - func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) { 278 - log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum) 279 - 280 - afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum) 281 - fetcher := newBundleFetcher(client, afterTime, prevCIDs) 282 - 283 - ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE) 284 - 285 - log.Info(" Collected %d unique operations after %d fetches (complete=%v)", 286 - len(ops), fetcher.fetchCount, isComplete) 287 - 288 - if isComplete { 289 - bf.operations = ops 290 - if err := bm.save(bf); err != nil { 291 - log.Error("Warning: failed to save bundle: %v", err) 292 - } else { 293 - // The cursor is the afterTime that was used to fetch this bundle 294 - cursor := afterTime 295 - bm.indexBundle(ctx, bundleNum, bf, cursor) 296 - log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]", 297 - bundleNum, len(ops), bf.uncompressedHash[:16], cursor) 298 - } 299 - } 300 - 301 - return ops, isComplete, nil 302 - } 303 - 304 - func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) { 305 - if bundleNum == 1 { 306 - return "", nil 307 - } 308 - 309 - prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1) 310 - if err != nil { 311 - return "", nil 312 - } 313 - 314 - afterTime := prevBundle.EndTime.Format(time.RFC3339Nano) 315 - 316 - // Return stored boundary CIDs if available 317 - if len(prevBundle.BoundaryCIDs) > 0 { 318 - cids := make(map[string]bool) 319 - for _, cid := range prevBundle.BoundaryCIDs { 320 - cids[cid] = true 321 - } 322 - return afterTime, cids 323 - } 324 - 325 - // Fallback: compute from file 326 - bf := bm.newBundleFile(bundleNum - 1) 327 - if bf.exists() { 328 - if err := bm.load(bf); err == nil { 329 - _, cids := GetBoundaryCIDs(bf.operations) 330 - return afterTime, cids 331 - } 332 - } 333 - 334 - return afterTime, nil 335 - } 336 - 337 - // ===== BUNDLE INDEXING ===== 338 - 339 - func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error { 340 - prevHash := "" 341 - if bundleNum > 1 { 342 - if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 343 - prevHash = prev.Hash 344 - } 345 - } 346 - 347 - dids := bm.extractUniqueDIDs(bf.operations) 348 - compressedFileSize := bm.getFileSize(bf.path) 349 - 350 - // Calculate uncompressed size 351 - uncompressedSize := int64(0) 352 - for _, op := range bf.operations { 353 - uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 354 - } 355 - 356 - // Get time range from operations 357 - firstSeenAt := bf.operations[0].CreatedAt 358 - lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt 359 - 360 - bundle := &storage.PLCBundle{ 361 - BundleNumber: bundleNum, 362 - StartTime: firstSeenAt, 363 - EndTime: lastSeenAt, 364 - DIDs: dids, 365 - Hash: bf.uncompressedHash, 366 - CompressedHash: bf.compressedHash, 367 - CompressedSize: compressedFileSize, 368 - UncompressedSize: uncompressedSize, 369 - Cursor: cursor, 370 - PrevBundleHash: prevHash, 371 - Compressed: true, 372 - CreatedAt: time.Now().UTC(), 373 - } 374 - 375 - // Create bundle first 376 - if err := bm.db.CreateBundle(ctx, bundle); err != nil { 377 - return err 378 - } 379 - 380 - // NEW: Only index DIDs if enabled 381 - if bm.indexDIDs { 382 - start := time.Now() 383 - if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil { 384 - log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err) 385 - // Don't return error - bundle is already created 386 - } else { 387 - elapsed := time.Since(start) 388 - log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed) 389 - } 390 - } else { 391 - log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum) 392 - } 393 - 394 - return nil 395 - } 396 - 397 - func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string { 398 - didSet := make(map[string]bool) 399 - for _, op := range ops { 400 - didSet[op.DID] = true 401 - } 402 - 403 - dids := make([]string, 0, len(didSet)) 404 - for did := range didSet { 405 - dids = append(dids, did) 406 - } 407 - return dids 408 - } 409 - 410 - // ===== MEMPOOL BUNDLE CREATION ===== 411 - 412 - func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) { 413 - if !bm.enabled { 414 - return 0, fmt.Errorf("bundle manager disabled") 415 - } 416 - 417 - if len(operations) != BUNDLE_SIZE { 418 - return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations)) 419 - } 420 - 421 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 422 - if err != nil { 423 - return 0, err 424 - } 425 - bundleNum := lastBundle + 1 426 - 427 - bf := bm.newBundleFile(bundleNum) 428 - bf.operations = operations 429 - 430 - if err := bm.save(bf); err != nil { 431 - return 0, err 432 - } 433 - 434 - if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil { 435 - return 0, err 436 - } 437 - 438 - log.Info("✓ Created bundle %06d from mempool (hash: %s...)", 439 - bundleNum, bf.uncompressedHash[:16]) 440 - 441 - return bundleNum, nil 442 - } 443 - 444 - // ===== VERIFICATION ===== 445 - 446 - func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 447 - if !bm.enabled { 448 - return fmt.Errorf("bundle manager disabled") 449 - } 450 - 451 - log.Info("Verifying bundle chain from 1 to %06d...", endBundle) 452 - 453 - for i := 1; i <= endBundle; i++ { 454 - bundle, err := bm.db.GetBundleByNumber(ctx, i) 455 - if err != nil { 456 - return fmt.Errorf("bundle %06d not found: %w", i, err) 457 - } 458 - 459 - // Verify file hash 460 - path := bm.newBundleFile(i).path 461 - if err := bm.verifyHash(path, bundle.CompressedHash); err != nil { 462 - return fmt.Errorf("bundle %06d hash verification failed: %w", i, err) 463 - } 464 - 465 - // Verify chain link 466 - if i > 1 { 467 - prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1) 468 - if err != nil { 469 - return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i) 470 - } 471 - 472 - if bundle.PrevBundleHash != prevBundle.Hash { 473 - return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s", 474 - i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16]) 475 - } 476 - } 477 - 478 - if i%100 == 0 { 479 - log.Verbose(" ✓ Verified bundles 1-%06d", i) 480 - } 481 - } 482 - 483 - log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle) 484 - return nil 485 - } 486 - 487 - func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error { 488 - if !bm.enabled { 489 - return nil 490 - } 491 - 492 - for i := 1; i < targetBundle; i++ { 493 - if !bm.newBundleFile(i).exists() { 494 - if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil { 495 - return fmt.Errorf("bundle %06d is missing (required for continuity)", i) 496 - } 497 - } 498 - } 499 - 500 - return nil 501 - } 502 - 503 - // ===== UTILITY METHODS ===== 504 - 505 - func (bm *BundleManager) hash(data []byte) string { 506 - h := sha256.Sum256(data) 507 - return hex.EncodeToString(h[:]) 508 - } 509 - 510 - func (bm *BundleManager) hashFile(path string) string { 511 - data, _ := os.ReadFile(path) 512 - return bm.hash(data) 513 - } 514 - 515 - func (bm *BundleManager) verifyHash(path, expectedHash string) error { 516 - if expectedHash == "" { 517 - return nil 518 - } 519 - 520 - actualHash := bm.hashFile(path) 521 - if actualHash != expectedHash { 522 - return fmt.Errorf("hash mismatch") 523 - } 524 - return nil 525 - } 526 - 527 - func (bm *BundleManager) getFileSize(path string) int64 { 528 - if info, err := os.Stat(path); err == nil { 529 - return info.Size() 530 - } 531 - return 0 532 - } 533 - 534 - func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) { 535 - if !bm.enabled { 536 - return 0, 0, 0, 0, nil 537 - } 538 - return bm.db.GetBundleStats(ctx) 539 - } 540 - 541 - func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 542 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 543 - if err != nil { 544 - return nil, err 545 - } 546 - 547 - if lastBundle == 0 { 548 - return map[string]interface{}{ 549 - "chain_length": 0, 550 - "status": "empty", 551 - }, nil 552 - } 553 - 554 - firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1) 555 - lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle) 556 - 557 - return map[string]interface{}{ 558 - "chain_length": lastBundle, 559 - "first_bundle": 1, 560 - "last_bundle": lastBundle, 561 - "chain_start_time": firstBundle.StartTime, 562 - "chain_end_time": lastBundleData.EndTime, 563 - "chain_head_hash": lastBundleData.Hash, 564 - }, nil 565 - } 566 - 567 - // ===== EXPORTED HELPERS ===== 568 - 569 - func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) { 570 - if len(operations) == 0 { 571 - return time.Time{}, nil 572 - } 573 - 574 - lastOp := operations[len(operations)-1] 575 - boundaryTime := lastOp.CreatedAt 576 - cidSet := make(map[string]bool) 577 - 578 - for i := len(operations) - 1; i >= 0; i-- { 579 - op := operations[i] 580 - if op.CreatedAt.Equal(boundaryTime) { 581 - cidSet[op.CID] = true 582 - } else { 583 - break 584 - } 585 - } 586 - 587 - return boundaryTime, cidSet 588 - } 589 - 590 - func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation { 591 - if len(operations) == 0 { 592 - return operations 593 - } 594 - 595 - boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp) 596 - if err != nil { 597 - return operations 598 - } 599 - 600 - startIdx := 0 601 - for startIdx < len(operations) { 602 - op := operations[startIdx] 603 - 604 - if op.CreatedAt.After(boundaryTime) { 605 - break 606 - } 607 - 608 - if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] { 609 - startIdx++ 610 - continue 611 - } 612 - 613 - break 614 - } 615 - 616 - return operations[startIdx:] 617 - } 618 - 619 - // LoadBundleOperations is a public method for external access (e.g., API handlers) 620 - func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 621 - if !bm.enabled { 622 - return nil, fmt.Errorf("bundle manager disabled") 623 - } 624 - 625 - bf := bm.newBundleFile(bundleNum) 626 - 627 - if !bf.exists() { 628 - return nil, fmt.Errorf("bundle %06d not found", bundleNum) 629 - } 630 - 631 - if err := bm.load(bf); err != nil { 632 - return nil, err 633 - } 634 - 635 - return bf.operations, nil 636 - } 637 - 638 - // calculateCursor determines the cursor value for a given bundle 639 - // For bundle 1: returns empty string 640 - // For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format 641 - func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string { 642 - if bundleNum == 1 { 643 - return "" 644 - } 645 - 646 - // Try to get cursor from previous bundle in DB 647 - if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 648 - return prevBundle.EndTime.Format(time.RFC3339Nano) 649 - } 650 - 651 - // If previous bundle not in DB, try to load it from file 652 - prevBf := bm.newBundleFile(bundleNum - 1) 653 - if prevBf.exists() { 654 - if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 { 655 - // Return the createdAt of the last operation in previous bundle 656 - lastOp := prevBf.operations[len(prevBf.operations)-1] 657 - return lastOp.CreatedAt.Format(time.RFC3339Nano) 658 - } 659 - } 660 - 661 - return "" 662 - }
-237
internal/plc/client.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "context" 6 - "encoding/json" 7 - "fmt" 8 - "io" 9 - "net/http" 10 - "strconv" 11 - "time" 12 - 13 - "github.com/atscan/atscanner/internal/log" 14 - ) 15 - 16 - type Client struct { 17 - baseURL string 18 - httpClient *http.Client 19 - rateLimiter *RateLimiter 20 - } 21 - 22 - func NewClient(baseURL string) *Client { 23 - // Rate limit: 90 requests per minute (leaving buffer below 100/min limit) 24 - rateLimiter := NewRateLimiter(90, time.Minute) 25 - 26 - return &Client{ 27 - baseURL: baseURL, 28 - httpClient: &http.Client{ 29 - Timeout: 60 * time.Second, 30 - }, 31 - rateLimiter: rateLimiter, 32 - } 33 - } 34 - 35 - func (c *Client) Close() { 36 - if c.rateLimiter != nil { 37 - c.rateLimiter.Stop() 38 - } 39 - } 40 - 41 - type ExportOptions struct { 42 - Count int 43 - After string // ISO 8601 datetime string 44 - } 45 - 46 - // Export fetches export data from PLC directory with rate limiting and retry 47 - func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) { 48 - return c.exportWithRetry(ctx, opts, 5) 49 - } 50 - 51 - // exportWithRetry implements retry logic with exponential backoff for rate limits 52 - func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) { 53 - var lastErr error 54 - backoff := 1 * time.Second 55 - 56 - for attempt := 1; attempt <= maxRetries; attempt++ { 57 - // Wait for rate limiter token 58 - if err := c.rateLimiter.Wait(ctx); err != nil { 59 - return nil, err 60 - } 61 - 62 - operations, retryAfter, err := c.doExport(ctx, opts) 63 - 64 - if err == nil { 65 - return operations, nil 66 - } 67 - 68 - lastErr = err 69 - 70 - // Check if it's a rate limit error (429) 71 - if retryAfter > 0 { 72 - log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d", 73 - retryAfter, attempt, maxRetries) 74 - 75 - select { 76 - case <-time.After(retryAfter): 77 - continue 78 - case <-ctx.Done(): 79 - return nil, ctx.Err() 80 - } 81 - } 82 - 83 - // Other errors - exponential backoff 84 - if attempt < maxRetries { 85 - log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v", 86 - attempt, maxRetries, err, backoff) 87 - 88 - select { 89 - case <-time.After(backoff): 90 - backoff *= 2 // Exponential backoff 91 - case <-ctx.Done(): 92 - return nil, ctx.Err() 93 - } 94 - } 95 - } 96 - 97 - return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr) 98 - } 99 - 100 - // doExport performs the actual HTTP request 101 - func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) { 102 - url := fmt.Sprintf("%s/export", c.baseURL) 103 - 104 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 105 - if err != nil { 106 - return nil, 0, err 107 - } 108 - 109 - // Add query parameters 110 - q := req.URL.Query() 111 - if opts.Count > 0 { 112 - q.Add("count", fmt.Sprintf("%d", opts.Count)) 113 - } 114 - if opts.After != "" { 115 - q.Add("after", opts.After) 116 - } 117 - req.URL.RawQuery = q.Encode() 118 - 119 - resp, err := c.httpClient.Do(req) 120 - if err != nil { 121 - return nil, 0, fmt.Errorf("request failed: %w", err) 122 - } 123 - defer resp.Body.Close() 124 - 125 - // Handle rate limiting (429) 126 - if resp.StatusCode == http.StatusTooManyRequests { 127 - retryAfter := parseRetryAfter(resp) 128 - 129 - // Also check x-ratelimit headers for info 130 - if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" { 131 - log.Verbose("Rate limit: %s", limit) 132 - } 133 - 134 - return nil, retryAfter, fmt.Errorf("rate limited (429)") 135 - } 136 - 137 - if resp.StatusCode != http.StatusOK { 138 - body, _ := io.ReadAll(resp.Body) 139 - return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 140 - } 141 - 142 - var operations []PLCOperation 143 - 144 - // PLC export returns newline-delimited JSON 145 - scanner := bufio.NewScanner(resp.Body) 146 - buf := make([]byte, 0, 64*1024) 147 - scanner.Buffer(buf, 1024*1024) 148 - 149 - lineCount := 0 150 - for scanner.Scan() { 151 - lineCount++ 152 - line := scanner.Bytes() 153 - 154 - if len(line) == 0 { 155 - continue 156 - } 157 - 158 - var op PLCOperation 159 - if err := json.Unmarshal(line, &op); err != nil { 160 - log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err) 161 - continue 162 - } 163 - 164 - // CRITICAL: Store the original raw JSON bytes 165 - op.RawJSON = make([]byte, len(line)) 166 - copy(op.RawJSON, line) 167 - 168 - operations = append(operations, op) 169 - } 170 - 171 - if err := scanner.Err(); err != nil { 172 - return nil, 0, fmt.Errorf("error reading response: %w", err) 173 - } 174 - 175 - return operations, 0, nil 176 - 177 - } 178 - 179 - // parseRetryAfter parses the Retry-After header 180 - func parseRetryAfter(resp *http.Response) time.Duration { 181 - retryAfter := resp.Header.Get("Retry-After") 182 - if retryAfter == "" { 183 - // Default to 5 minutes if no header 184 - return 5 * time.Minute 185 - } 186 - 187 - // Try parsing as seconds 188 - if seconds, err := strconv.Atoi(retryAfter); err == nil { 189 - return time.Duration(seconds) * time.Second 190 - } 191 - 192 - // Try parsing as HTTP date 193 - if t, err := http.ParseTime(retryAfter); err == nil { 194 - return time.Until(t) 195 - } 196 - 197 - // Default 198 - return 5 * time.Minute 199 - } 200 - 201 - // GetDID fetches a specific DID document from PLC 202 - func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) { 203 - // Wait for rate limiter 204 - if err := c.rateLimiter.Wait(ctx); err != nil { 205 - return nil, err 206 - } 207 - 208 - url := fmt.Sprintf("%s/%s", c.baseURL, did) 209 - 210 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 211 - if err != nil { 212 - return nil, err 213 - } 214 - 215 - resp, err := c.httpClient.Do(req) 216 - if err != nil { 217 - return nil, err 218 - } 219 - defer resp.Body.Close() 220 - 221 - if resp.StatusCode == http.StatusTooManyRequests { 222 - retryAfter := parseRetryAfter(resp) 223 - return nil, fmt.Errorf("rate limited, retry after %v", retryAfter) 224 - } 225 - 226 - if resp.StatusCode != http.StatusOK { 227 - body, _ := io.ReadAll(resp.Body) 228 - return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 229 - } 230 - 231 - var doc DIDDocument 232 - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { 233 - return nil, err 234 - } 235 - 236 - return &doc, nil 237 - }
+112
internal/plc/helpers.go
··· 1 + package plc 2 + 3 + import ( 4 + "regexp" 5 + "strings" 6 + ) 7 + 8 + // MaxHandleLength is the maximum allowed handle length for database storage 9 + const MaxHandleLength = 500 10 + 11 + // Handle validation regex per AT Protocol spec 12 + // Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter 13 + var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`) 14 + 15 + // ExtractHandle safely extracts the handle from a PLC operation 16 + func ExtractHandle(op *PLCOperation) string { 17 + if op == nil || op.Operation == nil { 18 + return "" 19 + } 20 + 21 + // Get "alsoKnownAs" 22 + aka, ok := op.Operation["alsoKnownAs"].([]interface{}) 23 + if !ok { 24 + return "" 25 + } 26 + 27 + // Find the handle (e.g., "at://handle.bsky.social") 28 + for _, item := range aka { 29 + if handle, ok := item.(string); ok { 30 + if strings.HasPrefix(handle, "at://") { 31 + return strings.TrimPrefix(handle, "at://") 32 + } 33 + } 34 + } 35 + return "" 36 + } 37 + 38 + // ValidateHandle checks if a handle is valid for database storage 39 + // Returns empty string if handle is invalid (too long or wrong format) 40 + func ValidateHandle(handle string) string { 41 + if handle == "" { 42 + return "" 43 + } 44 + 45 + // Check length first (faster) 46 + if len(handle) > MaxHandleLength { 47 + return "" 48 + } 49 + 50 + // Validate format using regex 51 + if !handleRegex.MatchString(handle) { 52 + return "" 53 + } 54 + 55 + return handle 56 + } 57 + 58 + // ExtractPDS safely extracts the PDS endpoint from a PLC operation 59 + func ExtractPDS(op *PLCOperation) string { 60 + if op == nil || op.Operation == nil { 61 + return "" 62 + } 63 + 64 + // Get "services" 65 + services, ok := op.Operation["services"].(map[string]interface{}) 66 + if !ok { 67 + return "" 68 + } 69 + 70 + // Get "atproto_pds" 71 + pdsService, ok := services["atproto_pds"].(map[string]interface{}) 72 + if !ok { 73 + return "" 74 + } 75 + 76 + // Get "endpoint" 77 + if endpoint, ok := pdsService["endpoint"].(string); ok { 78 + return endpoint 79 + } 80 + 81 + return "" 82 + } 83 + 84 + // DIDInfo contains extracted metadata from a PLC operation 85 + type DIDInfo struct { 86 + Handle string 87 + PDS string 88 + } 89 + 90 + // ExtractDIDInfo extracts both handle and PDS from an operation 91 + func ExtractDIDInfo(op *PLCOperation) DIDInfo { 92 + return DIDInfo{ 93 + Handle: ExtractHandle(op), 94 + PDS: ExtractPDS(op), 95 + } 96 + } 97 + 98 + // ExtractDIDInfoMap creates a map of DID -> info from operations 99 + // Processes in reverse order to get the latest state for each DID 100 + func ExtractDIDInfoMap(ops []PLCOperation) map[string]DIDInfo { 101 + infoMap := make(map[string]DIDInfo) 102 + 103 + // Process in reverse to get latest state 104 + for i := len(ops) - 1; i >= 0; i-- { 105 + op := ops[i] 106 + if _, exists := infoMap[op.DID]; !exists { 107 + infoMap[op.DID] = ExtractDIDInfo(&op) 108 + } 109 + } 110 + 111 + return infoMap 112 + }
+522
internal/plc/manager.go
··· 1 + package plc 2 + 3 + import ( 4 + "context" 5 + "encoding/csv" 6 + "fmt" 7 + "io" 8 + "os" 9 + "path/filepath" 10 + "sort" 11 + "strconv" 12 + "strings" 13 + "time" 14 + 15 + "github.com/atscan/atscand/internal/log" 16 + "github.com/atscan/atscand/internal/storage" 17 + "github.com/klauspost/compress/zstd" 18 + plcbundle "tangled.org/atscan.net/plcbundle" 19 + ) 20 + 21 + // BundleManager wraps the library's manager with database integration 22 + type BundleManager struct { 23 + libManager *plcbundle.Manager 24 + db storage.Database 25 + bundleDir string 26 + indexDIDs bool 27 + } 28 + 29 + func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) { 30 + // Create library config 31 + config := plcbundle.DefaultConfig(bundleDir) 32 + 33 + // Create PLC client 34 + var client *plcbundle.PLCClient 35 + if plcURL != "" { 36 + client = plcbundle.NewPLCClient(plcURL) 37 + } 38 + 39 + // Create library manager 40 + libMgr, err := plcbundle.NewManager(config, client) 41 + if err != nil { 42 + return nil, fmt.Errorf("failed to create library manager: %w", err) 43 + } 44 + 45 + return &BundleManager{ 46 + libManager: libMgr, 47 + db: db, 48 + bundleDir: bundleDir, 49 + indexDIDs: indexDIDs, 50 + }, nil 51 + } 52 + 53 + func (bm *BundleManager) Close() { 54 + if bm.libManager != nil { 55 + bm.libManager.Close() 56 + } 57 + } 58 + 59 + // LoadBundle loads a bundle (from library) and returns operations 60 + func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 61 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 62 + if err != nil { 63 + return nil, err 64 + } 65 + return bundle.Operations, nil 66 + } 67 + 68 + // LoadBundle loads a full bundle with metadata 69 + func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) { 70 + return bm.libManager.LoadBundle(ctx, bundleNum) 71 + } 72 + 73 + // FetchAndSaveBundle fetches next bundle from PLC and saves 74 + func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) { 75 + // Fetch from PLC using library 76 + bundle, err := bm.libManager.FetchNextBundle(ctx) 77 + if err != nil { 78 + return nil, err 79 + } 80 + 81 + // Save to disk (library handles this) 82 + if err := bm.libManager.SaveBundle(ctx, bundle); err != nil { 83 + return nil, fmt.Errorf("failed to save bundle to disk: %w", err) 84 + } 85 + 86 + // Index DIDs if enabled (still use database for this) 87 + if bm.indexDIDs && len(bundle.Operations) > 0 { 88 + if err := bm.indexBundleDIDs(ctx, bundle); err != nil { 89 + log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err) 90 + } 91 + } 92 + 93 + log.Info("✓ Saved bundle %06d", bundle.BundleNumber) 94 + 95 + return bundle, nil 96 + } 97 + 98 + // indexBundleDIDs indexes DIDs from a bundle into the database 99 + func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error { 100 + start := time.Now() 101 + log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber) 102 + 103 + // Extract DID info from operations 104 + didInfoMap := ExtractDIDInfoMap(bundle.Operations) 105 + 106 + successCount := 0 107 + errorCount := 0 108 + invalidHandleCount := 0 109 + 110 + // Upsert each DID 111 + for did, info := range didInfoMap { 112 + validHandle := ValidateHandle(info.Handle) 113 + if info.Handle != "" && validHandle == "" { 114 + invalidHandleCount++ 115 + } 116 + 117 + if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil { 118 + log.Error("Failed to index DID %s: %v", did, err) 119 + errorCount++ 120 + } else { 121 + successCount++ 122 + } 123 + } 124 + 125 + elapsed := time.Since(start) 126 + log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v", 127 + successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed) 128 + 129 + return nil 130 + } 131 + 132 + // VerifyChain verifies bundle chain integrity 133 + func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 134 + result, err := bm.libManager.VerifyChain(ctx) 135 + if err != nil { 136 + return err 137 + } 138 + 139 + if !result.Valid { 140 + return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error) 141 + } 142 + 143 + return nil 144 + } 145 + 146 + // GetChainInfo returns chain information 147 + func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 148 + return bm.libManager.GetInfo(), nil 149 + } 150 + 151 + // GetMempoolStats returns mempool statistics from the library 152 + func (bm *BundleManager) GetMempoolStats() map[string]interface{} { 153 + return bm.libManager.GetMempoolStats() 154 + } 155 + 156 + // GetMempoolOperations returns all operations currently in mempool 157 + func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) { 158 + return bm.libManager.GetMempoolOperations() 159 + } 160 + 161 + // GetIndex returns the library's bundle index 162 + func (bm *BundleManager) GetIndex() *plcbundle.Index { 163 + return bm.libManager.GetIndex() 164 + } 165 + 166 + // GetLastBundleNumber returns the last bundle number 167 + func (bm *BundleManager) GetLastBundleNumber() int { 168 + index := bm.libManager.GetIndex() 169 + lastBundle := index.GetLastBundle() 170 + if lastBundle == nil { 171 + return 0 172 + } 173 + return lastBundle.BundleNumber 174 + } 175 + 176 + // GetBundleMetadata gets bundle metadata by number 177 + func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) { 178 + index := bm.libManager.GetIndex() 179 + return index.GetBundle(bundleNum) 180 + } 181 + 182 + // GetBundles returns the most recent bundles (newest first) 183 + func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata { 184 + index := bm.libManager.GetIndex() 185 + allBundles := index.GetBundles() 186 + 187 + // Determine how many bundles to return 188 + count := limit 189 + if count <= 0 || count > len(allBundles) { 190 + count = len(allBundles) 191 + } 192 + 193 + // Build result in reverse order (newest first) 194 + result := make([]*plcbundle.BundleMetadata, count) 195 + for i := 0; i < count; i++ { 196 + result[i] = allBundles[len(allBundles)-1-i] 197 + } 198 + 199 + return result 200 + } 201 + 202 + // GetBundleStats returns bundle statistics 203 + func (bm *BundleManager) GetBundleStats() map[string]interface{} { 204 + index := bm.libManager.GetIndex() 205 + stats := index.GetStats() 206 + 207 + // Convert to expected format 208 + lastBundle := stats["last_bundle"] 209 + if lastBundle == nil { 210 + lastBundle = int64(0) 211 + } 212 + 213 + // Calculate total uncompressed size by iterating through all bundles 214 + totalUncompressedSize := int64(0) 215 + allBundles := index.GetBundles() 216 + for _, bundle := range allBundles { 217 + totalUncompressedSize += bundle.UncompressedSize 218 + } 219 + 220 + return map[string]interface{}{ 221 + "bundle_count": int64(stats["bundle_count"].(int)), 222 + "total_size": stats["total_size"].(int64), 223 + "total_uncompressed_size": totalUncompressedSize, 224 + "last_bundle": int64(lastBundle.(int)), 225 + } 226 + } 227 + 228 + // GetDIDsForBundle gets DIDs from a bundle (loads and extracts) 229 + func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) { 230 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 231 + if err != nil { 232 + return nil, 0, err 233 + } 234 + 235 + // Extract unique DIDs 236 + didSet := make(map[string]bool) 237 + for _, op := range bundle.Operations { 238 + didSet[op.DID] = true 239 + } 240 + 241 + dids := make([]string, 0, len(didSet)) 242 + for did := range didSet { 243 + dids = append(dids, did) 244 + } 245 + 246 + return dids, bundle.DIDCount, nil 247 + } 248 + 249 + // FindBundleForTimestamp finds bundle containing a timestamp 250 + func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int { 251 + index := bm.libManager.GetIndex() 252 + bundles := index.GetBundles() 253 + 254 + // Find bundle containing this time 255 + for _, bundle := range bundles { 256 + if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) && 257 + (bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) { 258 + return bundle.BundleNumber 259 + } 260 + } 261 + 262 + // Return closest bundle before this time 263 + for i := len(bundles) - 1; i >= 0; i-- { 264 + if bundles[i].EndTime.Before(afterTime) { 265 + return bundles[i].BundleNumber 266 + } 267 + } 268 + 269 + return 1 // Default to first bundle 270 + } 271 + 272 + // StreamRaw streams raw compressed bundle data 273 + func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 274 + return bm.libManager.StreamBundleRaw(ctx, bundleNumber) 275 + } 276 + 277 + // StreamDecompressed streams decompressed bundle data 278 + func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 279 + return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber) 280 + } 281 + 282 + // GetPLCHistory calculates historical statistics from the bundle index 283 + func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) { 284 + index := bm.libManager.GetIndex() 285 + allBundles := index.GetBundles() 286 + 287 + // Filter bundles >= fromBundle 288 + var filtered []*plcbundle.BundleMetadata 289 + for _, b := range allBundles { 290 + if b.BundleNumber >= fromBundle { 291 + filtered = append(filtered, b) 292 + } 293 + } 294 + 295 + if len(filtered) == 0 { 296 + return []*storage.PLCHistoryPoint{}, nil 297 + } 298 + 299 + // Sort bundles by bundle number to ensure proper cumulative calculation 300 + sort.Slice(filtered, func(i, j int) bool { 301 + return filtered[i].BundleNumber < filtered[j].BundleNumber 302 + }) 303 + 304 + // Group by date 305 + type dailyStat struct { 306 + lastBundle int 307 + bundleCount int 308 + totalUncompressed int64 309 + totalCompressed int64 310 + } 311 + 312 + dailyStats := make(map[string]*dailyStat) 313 + 314 + // Map to store the cumulative values at the end of each date 315 + dateCumulatives := make(map[string]struct { 316 + uncompressed int64 317 + compressed int64 318 + }) 319 + 320 + // Calculate cumulative totals as we iterate through sorted bundles 321 + cumulativeUncompressed := int64(0) 322 + cumulativeCompressed := int64(0) 323 + 324 + for _, bundle := range filtered { 325 + dateStr := bundle.StartTime.Format("2006-01-02") 326 + 327 + // Update cumulative totals 328 + cumulativeUncompressed += bundle.UncompressedSize 329 + cumulativeCompressed += bundle.CompressedSize 330 + 331 + if stat, exists := dailyStats[dateStr]; exists { 332 + // Update existing day 333 + if bundle.BundleNumber > stat.lastBundle { 334 + stat.lastBundle = bundle.BundleNumber 335 + } 336 + stat.bundleCount++ 337 + stat.totalUncompressed += bundle.UncompressedSize 338 + stat.totalCompressed += bundle.CompressedSize 339 + } else { 340 + // Create new day entry 341 + dailyStats[dateStr] = &dailyStat{ 342 + lastBundle: bundle.BundleNumber, 343 + bundleCount: 1, 344 + totalUncompressed: bundle.UncompressedSize, 345 + totalCompressed: bundle.CompressedSize, 346 + } 347 + } 348 + 349 + // Store the cumulative values at the end of this date 350 + // (will be overwritten if there are multiple bundles on the same day) 351 + dateCumulatives[dateStr] = struct { 352 + uncompressed int64 353 + compressed int64 354 + }{ 355 + uncompressed: cumulativeUncompressed, 356 + compressed: cumulativeCompressed, 357 + } 358 + } 359 + 360 + // Convert map to sorted slice by date 361 + var dates []string 362 + for date := range dailyStats { 363 + dates = append(dates, date) 364 + } 365 + sort.Strings(dates) 366 + 367 + // Build history points with cumulative operations 368 + var history []*storage.PLCHistoryPoint 369 + cumulativeOps := 0 370 + 371 + for _, date := range dates { 372 + stat := dailyStats[date] 373 + cumulativeOps += stat.bundleCount * 10000 374 + cumulative := dateCumulatives[date] 375 + 376 + history = append(history, &storage.PLCHistoryPoint{ 377 + Date: date, 378 + BundleNumber: stat.lastBundle, 379 + OperationCount: cumulativeOps, 380 + UncompressedSize: stat.totalUncompressed, 381 + CompressedSize: stat.totalCompressed, 382 + CumulativeUncompressed: cumulative.uncompressed, 383 + CumulativeCompressed: cumulative.compressed, 384 + }) 385 + } 386 + 387 + // Apply limit if specified 388 + if limit > 0 && len(history) > limit { 389 + history = history[:limit] 390 + } 391 + 392 + return history, nil 393 + } 394 + 395 + // GetBundleLabels reads labels from a compressed CSV file for a specific bundle 396 + func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) { 397 + // Define the path to the labels file 398 + labelsDir := filepath.Join(bm.bundleDir, "labels") 399 + labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum)) 400 + 401 + // Check if file exists 402 + if _, err := os.Stat(labelsFile); os.IsNotExist(err) { 403 + log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile) 404 + // Return empty, not an error 405 + return []*PLCOpLabel{}, nil 406 + } 407 + 408 + // Open the Zstd-compressed file 409 + file, err := os.Open(labelsFile) 410 + if err != nil { 411 + return nil, fmt.Errorf("failed to open labels file: %w", err) 412 + } 413 + defer file.Close() 414 + 415 + // Create a Zstd reader 416 + zstdReader, err := zstd.NewReader(file) 417 + if err != nil { 418 + return nil, fmt.Errorf("failed to create zstd reader: %w", err) 419 + } 420 + defer zstdReader.Close() 421 + 422 + // Create a CSV reader 423 + csvReader := csv.NewReader(zstdReader) 424 + // We skipped the header, so no header read needed 425 + // Set FieldsPerRecord to 7 for validation 426 + //csvReader.FieldsPerRecord = 7 427 + 428 + var labels []*PLCOpLabel 429 + 430 + // Read all records 431 + for { 432 + // Check for context cancellation 433 + if err := ctx.Err(); err != nil { 434 + return nil, err 435 + } 436 + 437 + record, err := csvReader.Read() 438 + if err == io.EOF { 439 + break // End of file 440 + } 441 + if err != nil { 442 + log.Error("Error reading CSV record in %s: %v", labelsFile, err) 443 + continue // Skip bad line 444 + } 445 + 446 + // Parse the CSV record (which is []string) 447 + label, err := parseLabelRecord(record) 448 + if err != nil { 449 + log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err) 450 + continue // Skip bad data 451 + } 452 + 453 + labels = append(labels, label) 454 + } 455 + 456 + return labels, nil 457 + } 458 + 459 + // parseLabelRecord converts a new format CSV record into a PLCOpLabel struct 460 + func parseLabelRecord(record []string) (*PLCOpLabel, error) { 461 + // New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels 462 + if len(record) != 6 { 463 + err := fmt.Errorf("invalid record length: expected 6, got %d", len(record)) 464 + // --- ADDED LOG --- 465 + log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ",")) 466 + // --- 467 + return nil, err 468 + } 469 + 470 + // 0:bundle 471 + bundle, err := strconv.Atoi(record[0]) 472 + if err != nil { 473 + // --- ADDED LOG --- 474 + log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ",")) 475 + // --- 476 + return nil, fmt.Errorf("parsing 'bundle': %w", err) 477 + } 478 + 479 + // 1:position 480 + position, err := strconv.Atoi(record[1]) 481 + if err != nil { 482 + // --- ADDED LOG --- 483 + log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ",")) 484 + // --- 485 + return nil, fmt.Errorf("parsing 'position': %w", err) 486 + } 487 + 488 + // 2:cid(short) 489 + shortCID := record[2] 490 + 491 + // 3:size 492 + size, err := strconv.Atoi(record[3]) 493 + if err != nil { 494 + // --- ADDED LOG --- 495 + log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ",")) 496 + // --- 497 + return nil, fmt.Errorf("parsing 'size': %w", err) 498 + } 499 + 500 + // 4:confidence 501 + confidence, err := strconv.ParseFloat(record[4], 64) 502 + if err != nil { 503 + // --- ADDED LOG --- 504 + log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ",")) 505 + // --- 506 + return nil, fmt.Errorf("parsing 'confidence': %w", err) 507 + } 508 + 509 + // 5:labels 510 + detectors := strings.Split(record[5], ";") 511 + 512 + label := &PLCOpLabel{ 513 + Bundle: bundle, 514 + Position: position, 515 + CID: shortCID, 516 + Size: size, 517 + Confidence: confidence, 518 + Detectors: detectors, 519 + } 520 + 521 + return label, nil 522 + }
-70
internal/plc/ratelimiter.go
··· 1 - package plc 2 - 3 - import ( 4 - "context" 5 - "time" 6 - ) 7 - 8 - // RateLimiter implements a token bucket rate limiter 9 - type RateLimiter struct { 10 - tokens chan struct{} 11 - refillRate time.Duration 12 - maxTokens int 13 - stopRefill chan struct{} 14 - } 15 - 16 - // NewRateLimiter creates a new rate limiter 17 - // Example: NewRateLimiter(90, time.Minute) = 90 requests per minute 18 - func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter { 19 - rl := &RateLimiter{ 20 - tokens: make(chan struct{}, requestsPerPeriod), 21 - refillRate: period / time.Duration(requestsPerPeriod), 22 - maxTokens: requestsPerPeriod, 23 - stopRefill: make(chan struct{}), 24 - } 25 - 26 - // Fill initially 27 - for i := 0; i < requestsPerPeriod; i++ { 28 - rl.tokens <- struct{}{} 29 - } 30 - 31 - // Start refill goroutine 32 - go rl.refill() 33 - 34 - return rl 35 - } 36 - 37 - // refill adds tokens at the specified rate 38 - func (rl *RateLimiter) refill() { 39 - ticker := time.NewTicker(rl.refillRate) 40 - defer ticker.Stop() 41 - 42 - for { 43 - select { 44 - case <-ticker.C: 45 - select { 46 - case rl.tokens <- struct{}{}: 47 - // Token added 48 - default: 49 - // Buffer full, skip 50 - } 51 - case <-rl.stopRefill: 52 - return 53 - } 54 - } 55 - } 56 - 57 - // Wait blocks until a token is available 58 - func (rl *RateLimiter) Wait(ctx context.Context) error { 59 - select { 60 - case <-rl.tokens: 61 - return nil 62 - case <-ctx.Done(): 63 - return ctx.Err() 64 - } 65 - } 66 - 67 - // Stop stops the rate limiter 68 - func (rl *RateLimiter) Stop() { 69 - close(rl.stopRefill) 70 - }
+92 -415
internal/plc/scanner.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "encoding/json" 6 5 "fmt" 7 6 "strings" 8 7 "time" 9 8 10 - "github.com/acarl005/stripansi" 11 - "github.com/atscan/atscanner/internal/config" 12 - "github.com/atscan/atscanner/internal/log" 13 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/storage" 14 12 ) 15 13 16 14 type Scanner struct { 17 - client *Client 15 + bundleManager *BundleManager 18 16 db storage.Database 19 17 config config.PLCConfig 20 - bundleManager *BundleManager 21 18 } 22 19 23 - func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner { 24 - bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs 25 - if err != nil { 26 - log.Error("Warning: failed to initialize bundle manager: %v", err) 27 - bundleManager = &BundleManager{enabled: false} 28 - } 20 + func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner { 21 + log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs) 29 22 30 23 return &Scanner{ 31 - client: NewClient(cfg.DirectoryURL), 24 + bundleManager: bundleManager, // Use provided instance 32 25 db: db, 33 26 config: cfg, 34 - bundleManager: bundleManager, 35 27 } 36 28 } 37 29 38 30 func (s *Scanner) Close() { 39 - if s.bundleManager != nil { 40 - s.bundleManager.Close() 41 - } 42 - } 43 - 44 - // ScanMetrics tracks scan progress 45 - type ScanMetrics struct { 46 - totalFetched int64 // Total ops fetched from PLC/bundles 47 - totalProcessed int64 // Unique ops processed (after dedup) 48 - newEndpoints int64 // New endpoints discovered 49 - endpointCounts map[string]int64 50 - currentBundle int 51 - startTime time.Time 52 - } 53 - 54 - func newMetrics(startBundle int) *ScanMetrics { 55 - return &ScanMetrics{ 56 - endpointCounts: make(map[string]int64), 57 - currentBundle: startBundle, 58 - startTime: time.Now(), 59 - } 60 - } 61 - 62 - func (m *ScanMetrics) logSummary() { 63 - summary := formatEndpointCounts(m.endpointCounts) 64 - if m.newEndpoints > 0 { 65 - log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v", 66 - m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime)) 67 - } else { 68 - log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v", 69 - m.totalProcessed, m.totalFetched, time.Since(m.startTime)) 70 - } 31 + // Don't close bundleManager here - it's shared 71 32 } 72 33 73 34 func (s *Scanner) Scan(ctx context.Context) error { 74 35 log.Info("Starting PLC directory scan...") 75 - log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes") 76 36 77 37 cursor, err := s.db.GetScanCursor(ctx, "plc_directory") 78 38 if err != nil { 79 39 return fmt.Errorf("failed to get scan cursor: %w", err) 80 40 } 81 41 82 - startBundle := s.calculateStartBundle(cursor.LastBundleNumber) 83 - metrics := newMetrics(startBundle) 42 + metrics := newMetrics(cursor.LastBundleNumber + 1) 84 43 85 - if startBundle > 1 { 86 - if err := s.ensureContinuity(ctx, startBundle); err != nil { 87 - return err 88 - } 89 - } 90 - 91 - // Handle existing mempool first 92 - if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool { 93 - return s.handleMempoolOnly(ctx, metrics) 94 - } 95 - 96 - // Process bundles until incomplete or error 44 + // Main processing loop 97 45 for { 98 46 if err := ctx.Err(); err != nil { 99 47 return err 100 48 } 101 49 102 - if err := s.processSingleBundle(ctx, metrics); err != nil { 103 - if s.shouldRetry(err) { 104 - continue 105 - } 106 - break 107 - } 108 - 109 - if err := s.updateCursor(ctx, cursor, metrics); err != nil { 110 - log.Error("Warning: failed to update cursor: %v", err) 111 - } 112 - } 113 - 114 - // Try to finalize mempool 115 - s.finalizeMempool(ctx, metrics) 116 - 117 - metrics.logSummary() 118 - return nil 119 - } 120 - 121 - func (s *Scanner) calculateStartBundle(lastBundle int) int { 122 - if lastBundle == 0 { 123 - return 1 124 - } 125 - return lastBundle + 1 126 - } 127 - 128 - func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error { 129 - log.Info("Checking bundle continuity...") 130 - if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil { 131 - return fmt.Errorf("bundle continuity check failed: %w", err) 132 - } 133 - return nil 134 - } 135 - 136 - func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) { 137 - count, err := s.db.GetMempoolCount(ctx) 138 - if err != nil { 139 - return false, err 140 - } 141 - return count > 0, nil 142 - } 143 - 144 - func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error { 145 - count, _ := s.db.GetMempoolCount(ctx) 146 - log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count) 147 - 148 - if err := s.fillMempool(ctx, m); err != nil { 149 - return err 150 - } 151 - 152 - if err := s.processMempool(ctx, m); err != nil { 153 - log.Error("Error processing mempool: %v", err) 154 - } 155 - 156 - m.logSummary() 157 - return nil 158 - } 159 - 160 - func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error { 161 - log.Verbose("→ Processing bundle %06d...", m.currentBundle) 162 - 163 - ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client) 164 - if err != nil { 165 - return s.handleBundleError(err, m) 166 - } 167 - 168 - if isComplete { 169 - return s.handleCompleteBundle(ctx, ops, m) 170 - } 171 - return s.handleIncompleteBundle(ctx, ops, m) 172 - } 173 - 174 - func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error { 175 - log.Error("Failed to load bundle %06d: %v", m.currentBundle, err) 176 - 177 - if strings.Contains(err.Error(), "rate limited") { 178 - log.Info("⚠ Rate limit hit, pausing for 5 minutes...") 179 - time.Sleep(5 * time.Minute) 180 - return fmt.Errorf("retry") 181 - } 182 - 183 - if m.currentBundle > 1 { 184 - log.Info("→ Reached end of available data") 185 - } 186 - return err 187 - } 188 - 189 - func (s *Scanner) shouldRetry(err error) bool { 190 - return err != nil && err.Error() == "retry" 191 - } 192 - 193 - func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 194 - counts, err := s.processBatch(ctx, ops) 195 - if err != nil { 196 - return err 197 - } 198 - 199 - s.mergeCounts(m.endpointCounts, counts) 200 - m.totalProcessed += int64(len(ops)) // Unique ops after dedup 201 - m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints 202 - 203 - batchTotal := sumCounts(counts) 204 - log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints", 205 - m.currentBundle, len(ops), batchTotal) 206 - 207 - m.currentBundle++ 208 - return nil 209 - } 210 - 211 - func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 212 - log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops)) 213 - 214 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 215 - return err 216 - } 217 - 218 - s.finalizeMempool(ctx, m) 219 - return fmt.Errorf("incomplete") // Signal end of processing 220 - } 221 - 222 - func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) { 223 - if err := s.fillMempool(ctx, m); err != nil { 224 - log.Error("Error filling mempool: %v", err) 225 - } 226 - if err := s.processMempool(ctx, m); err != nil { 227 - log.Error("Error processing mempool: %v", err) 228 - } 229 - } 230 - 231 - func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error { 232 - const fetchLimit = 1000 233 - 234 - for { 235 - count, err := s.db.GetMempoolCount(ctx) 50 + // Fetch and save bundle (library handles mempool internally) 51 + bundle, err := s.bundleManager.FetchAndSaveBundle(ctx) 236 52 if err != nil { 237 - return err 238 - } 53 + if isInsufficientOpsError(err) { 54 + // Show mempool status 55 + stats := s.bundleManager.libManager.GetMempoolStats() 56 + mempoolCount := stats["count"].(int) 239 57 240 - if count >= BUNDLE_SIZE { 241 - log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE) 242 - return nil 243 - } 58 + if mempoolCount > 0 { 59 + log.Info("→ Waiting for more operations (mempool has %d/%d ops)", 60 + mempoolCount, BUNDLE_SIZE) 61 + } else { 62 + log.Info("→ Caught up! No operations available") 63 + } 64 + break 65 + } 244 66 245 - log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE) 67 + if strings.Contains(err.Error(), "rate limited") { 68 + log.Info("⚠ Rate limited, pausing for 5 minutes...") 69 + time.Sleep(5 * time.Minute) 70 + continue 71 + } 246 72 247 - // ✅ Fix: Don't capture unused 'ops' variable 248 - shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m) 249 - if err != nil { 250 - return err 73 + return fmt.Errorf("failed to fetch bundle: %w", err) 251 74 } 252 75 253 - if !shouldContinue { 254 - finalCount, _ := s.db.GetMempoolCount(ctx) 255 - log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE) 256 - return nil 257 - } 258 - } 259 - } 260 - 261 - func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) { 262 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 263 - if err != nil { 264 - return false, err 265 - } 266 - 267 - var after string 268 - if lastOp != nil { 269 - after = lastOp.CreatedAt.Format(time.RFC3339Nano) 270 - log.Verbose(" Using cursor: %s", after) 271 - } 272 - 273 - ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after}) 274 - if err != nil { 275 - return false, fmt.Errorf("failed to fetch from PLC: %w", err) 276 - } 277 - 278 - fetchedCount := len(ops) 279 - m.totalFetched += int64(fetchedCount) // Track all fetched 280 - log.Verbose(" Fetched %d operations from PLC", fetchedCount) 281 - 282 - if fetchedCount == 0 { 283 - count, _ := s.db.GetMempoolCount(ctx) 284 - log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE) 285 - return false, nil 286 - } 287 - 288 - beforeCount, err := s.db.GetMempoolCount(ctx) 289 - if err != nil { 290 - return false, err 291 - } 292 - 293 - endpointsBefore := sumCounts(m.endpointCounts) 294 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 295 - return false, err 296 - } 297 - endpointsAfter := sumCounts(m.endpointCounts) 298 - m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found 299 - 300 - afterCount, err := s.db.GetMempoolCount(ctx) 301 - if err != nil { 302 - return false, err 303 - } 304 - 305 - uniqueAdded := int64(afterCount - beforeCount) // Cast to int64 306 - m.totalProcessed += uniqueAdded // Track unique ops processed 307 - 308 - log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)", 309 - uniqueAdded, int64(fetchedCount)-uniqueAdded) 310 - 311 - // Continue only if got full batch 312 - shouldContinue := fetchedCount >= limit 313 - if !shouldContinue { 314 - log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit) 315 - } 316 - 317 - return shouldContinue, nil 318 - } 319 - 320 - func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error { 321 - mempoolOps := make([]storage.MempoolOperation, len(ops)) 322 - for i, op := range ops { 323 - mempoolOps[i] = storage.MempoolOperation{ 324 - DID: op.DID, 325 - Operation: string(op.RawJSON), 326 - CID: op.CID, 327 - CreatedAt: op.CreatedAt, 328 - } 329 - } 330 - 331 - if err := s.db.AddToMempool(ctx, mempoolOps); err != nil { 332 - return err 333 - } 334 - 335 - // Process for endpoint discovery 336 - batchCounts, err := s.processBatch(ctx, ops) 337 - s.mergeCounts(counts, batchCounts) 338 - return err 339 - } 340 - 341 - func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error { 342 - for { 343 - count, err := s.db.GetMempoolCount(ctx) 76 + // Process operations for endpoint discovery 77 + counts, err := s.processBatch(ctx, bundle.Operations) 344 78 if err != nil { 345 - return err 79 + log.Error("Failed to process batch: %v", err) 80 + // Continue anyway 346 81 } 347 82 348 - log.Verbose("Mempool contains %d operations", count) 349 - 350 - if count < BUNDLE_SIZE { 351 - log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE) 352 - return nil 353 - } 83 + // Update metrics 84 + s.mergeCounts(metrics.endpointCounts, counts) 85 + metrics.totalProcessed += int64(len(bundle.Operations)) 86 + metrics.newEndpoints += sumCounts(counts) 87 + metrics.currentBundle = bundle.BundleNumber 354 88 355 - log.Info("→ Creating bundle from mempool (%d operations available)...", count) 89 + log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints", 90 + bundle.BundleNumber, len(bundle.Operations), sumCounts(counts)) 356 91 357 - // Updated to receive 4 values instead of 3 358 - bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx) 359 - if err != nil { 360 - return err 361 - } 362 - 363 - // Process and update metrics 364 - countsBefore := sumCounts(m.endpointCounts) 365 - counts, _ := s.processBatch(ctx, ops) 366 - s.mergeCounts(m.endpointCounts, counts) 367 - newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore 368 - 369 - m.totalProcessed += int64(len(ops)) 370 - m.newEndpoints += newEndpointsFound 371 - m.currentBundle = bundleNum 372 - 373 - if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil { 92 + // Update cursor 93 + if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil { 374 94 log.Error("Warning: failed to update cursor: %v", err) 375 95 } 376 - 377 - log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor) 378 96 } 379 - } 380 97 381 - func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) { 382 - mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE) 383 - if err != nil { 384 - return 0, nil, "", err 98 + // Show final mempool status 99 + stats := s.bundleManager.libManager.GetMempoolStats() 100 + if count, ok := stats["count"].(int); ok && count > 0 { 101 + log.Info("Mempool contains %d operations (%.1f%% of next bundle)", 102 + count, float64(count)/float64(BUNDLE_SIZE)*100) 385 103 } 386 104 387 - ops, ids := s.deduplicateMempool(mempoolOps) 388 - if len(ops) < BUNDLE_SIZE { 389 - return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE) 390 - } 391 - 392 - // Determine cursor from last bundle 393 - cursor := "" 394 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 395 - if err == nil && lastBundle > 0 { 396 - if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil { 397 - cursor = bundle.EndTime.Format(time.RFC3339Nano) 398 - } 399 - } 400 - 401 - bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor) 402 - if err != nil { 403 - return 0, nil, "", err 404 - } 405 - 406 - if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil { 407 - return 0, nil, "", err 408 - } 409 - 410 - return bundleNum, ops, cursor, nil 105 + metrics.logSummary() 106 + return nil 411 107 } 412 108 413 - func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) { 414 - ops := make([]PLCOperation, 0, BUNDLE_SIZE) 415 - ids := make([]int64, 0, BUNDLE_SIZE) 416 - seenCIDs := make(map[string]bool) 417 - 418 - for _, mop := range mempoolOps { 419 - if seenCIDs[mop.CID] { 420 - ids = append(ids, mop.ID) 421 - continue 422 - } 423 - seenCIDs[mop.CID] = true 424 - 425 - var op PLCOperation 426 - json.Unmarshal([]byte(mop.Operation), &op) 427 - op.RawJSON = []byte(mop.Operation) 428 - 429 - ops = append(ops, op) 430 - ids = append(ids, mop.ID) 431 - 432 - if len(ops) >= BUNDLE_SIZE { 433 - break 434 - } 435 - } 436 - 437 - return ops, ids 438 - } 439 - 109 + // processBatch extracts endpoints from operations 440 110 func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) { 441 111 counts := make(map[string]int64) 442 112 seen := make(map[string]*PLCOperation) 443 113 444 114 // Collect unique endpoints 445 - for _, op := range ops { 115 + for i := range ops { 116 + op := &ops[i] 117 + 446 118 if op.IsNullified() { 447 119 continue 448 120 } 449 - for _, ep := range s.extractEndpointsFromOperation(op) { 121 + 122 + for _, ep := range s.extractEndpointsFromOperation(*op) { 450 123 key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint) 451 124 if _, exists := seen[key]; !exists { 452 - seen[key] = &op 125 + seen[key] = op 453 126 } 454 127 } 455 128 } ··· 465 138 } 466 139 467 140 if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil { 468 - log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err) 141 + log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err) 469 142 continue 470 143 } 471 144 472 - log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint)) 145 + log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint) 473 146 counts[epType]++ 474 147 } 475 148 476 149 return counts, nil 477 - } 478 - 479 - func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 480 - return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 481 - EndpointType: epType, 482 - Endpoint: endpoint, 483 - DiscoveredAt: discoveredAt, 484 - LastChecked: time.Time{}, 485 - Status: storage.EndpointStatusUnknown, 486 - }) 487 150 } 488 151 489 152 func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo { ··· 526 189 return nil 527 190 } 528 191 529 - func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error { 530 - return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 531 - Source: "plc_directory", 532 - LastBundleNumber: m.currentBundle - 1, 533 - LastScanTime: time.Now().UTC(), 534 - RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed, 192 + func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 193 + valid := validateEndpoint(endpoint) 194 + return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 195 + EndpointType: epType, 196 + Endpoint: endpoint, 197 + DiscoveredAt: discoveredAt, 198 + LastChecked: time.Time{}, 199 + Status: storage.EndpointStatusUnknown, 200 + Valid: valid, 535 201 }) 536 202 } 537 203 ··· 559 225 return total 560 226 } 561 227 562 - func formatEndpointCounts(counts map[string]int64) string { 563 - if len(counts) == 0 { 564 - return "0 new endpoints" 565 - } 228 + func isInsufficientOpsError(err error) bool { 229 + return err != nil && strings.Contains(err.Error(), "insufficient operations") 230 + } 566 231 567 - total := sumCounts(counts) 232 + // ScanMetrics tracks scan progress 233 + type ScanMetrics struct { 234 + totalProcessed int64 235 + newEndpoints int64 236 + endpointCounts map[string]int64 237 + currentBundle int 238 + startTime time.Time 239 + } 568 240 569 - if len(counts) == 1 { 570 - for typ, count := range counts { 571 - return fmt.Sprintf("%d new %s endpoint(s)", count, typ) 572 - } 241 + func newMetrics(startBundle int) *ScanMetrics { 242 + return &ScanMetrics{ 243 + endpointCounts: make(map[string]int64), 244 + currentBundle: startBundle, 245 + startTime: time.Now(), 573 246 } 247 + } 574 248 575 - parts := make([]string, 0, len(counts)) 576 - for typ, count := range counts { 577 - parts = append(parts, fmt.Sprintf("%d %s", count, typ)) 249 + func (m *ScanMetrics) logSummary() { 250 + if m.newEndpoints > 0 { 251 + log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v", 252 + m.totalProcessed, m.newEndpoints, time.Since(m.startTime)) 253 + } else { 254 + log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v", 255 + m.totalProcessed, time.Since(m.startTime)) 578 256 } 579 - return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", ")) 580 257 }
+68 -55
internal/plc/types.go
··· 1 1 package plc 2 2 3 - import "time" 4 - 5 - type PLCOperation struct { 6 - DID string `json:"did"` 7 - Operation map[string]interface{} `json:"operation"` 8 - CID string `json:"cid"` 9 - Nullified interface{} `json:"nullified,omitempty"` 10 - CreatedAt time.Time `json:"createdAt"` 11 - 12 - RawJSON []byte `json:"-"` // ✅ Exported (capital R) 13 - } 3 + import ( 4 + "net/url" 5 + "strings" 14 6 15 - // Helper method to check if nullified 16 - func (op *PLCOperation) IsNullified() bool { 17 - if op.Nullified == nil { 18 - return false 19 - } 20 - 21 - switch v := op.Nullified.(type) { 22 - case bool: 23 - return v 24 - case string: 25 - return v != "" 26 - default: 27 - return false 28 - } 29 - } 30 - 31 - // Get nullifying CID if available 32 - func (op *PLCOperation) GetNullifyingCID() string { 33 - if s, ok := op.Nullified.(string); ok { 34 - return s 35 - } 36 - return "" 37 - } 7 + plclib "tangled.org/atscan.net/plcbundle/plc" 8 + ) 38 9 39 - type DIDDocument struct { 40 - Context []string `json:"@context"` 41 - ID string `json:"id"` 42 - AlsoKnownAs []string `json:"alsoKnownAs"` 43 - VerificationMethod []VerificationMethod `json:"verificationMethod"` 44 - Service []Service `json:"service"` 45 - } 10 + // Re-export library types 11 + type PLCOperation = plclib.PLCOperation 12 + type DIDDocument = plclib.DIDDocument 13 + type Client = plclib.Client 14 + type ExportOptions = plclib.ExportOptions 46 15 47 - type VerificationMethod struct { 48 - ID string `json:"id"` 49 - Type string `json:"type"` 50 - Controller string `json:"controller"` 51 - PublicKeyMultibase string `json:"publicKeyMultibase"` 52 - } 16 + // Keep your custom types 17 + const BUNDLE_SIZE = 10000 53 18 54 - type Service struct { 55 - ID string `json:"id"` 56 - Type string `json:"type"` 57 - ServiceEndpoint string `json:"serviceEndpoint"` 58 - } 59 - 60 - // DIDHistoryEntry represents a single operation in DID history 61 19 type DIDHistoryEntry struct { 62 20 Operation PLCOperation `json:"operation"` 63 21 PLCBundle string `json:"plc_bundle,omitempty"` 64 22 } 65 23 66 - // DIDHistory represents the full history of a DID 67 24 type DIDHistory struct { 68 25 DID string `json:"did"` 69 26 Current *PLCOperation `json:"current"` ··· 74 31 Type string 75 32 Endpoint string 76 33 } 34 + 35 + // PLCOpLabel holds metadata from the label CSV file 36 + type PLCOpLabel struct { 37 + Bundle int `json:"bundle"` 38 + Position int `json:"position"` 39 + CID string `json:"cid"` 40 + Size int `json:"size"` 41 + Confidence float64 `json:"confidence"` 42 + Detectors []string `json:"detectors"` 43 + } 44 + 45 + // validateEndpoint checks if endpoint is in correct format: https://<domain> 46 + func validateEndpoint(endpoint string) bool { 47 + // Must not be empty 48 + if endpoint == "" { 49 + return false 50 + } 51 + 52 + // Must not have trailing slash 53 + if strings.HasSuffix(endpoint, "/") { 54 + return false 55 + } 56 + 57 + // Parse URL 58 + u, err := url.Parse(endpoint) 59 + if err != nil { 60 + return false 61 + } 62 + 63 + // Must use https scheme 64 + if u.Scheme != "https" { 65 + return false 66 + } 67 + 68 + // Must have a host 69 + if u.Host == "" { 70 + return false 71 + } 72 + 73 + // Must not have path (except empty) 74 + if u.Path != "" && u.Path != "/" { 75 + return false 76 + } 77 + 78 + // Must not have query parameters 79 + if u.RawQuery != "" { 80 + return false 81 + } 82 + 83 + // Must not have fragment 84 + if u.Fragment != "" { 85 + return false 86 + } 87 + 88 + return true 89 + }
+16 -23
internal/storage/db.go
··· 27 27 EndpointExists(ctx context.Context, endpoint string, endpointType string) (bool, error) 28 28 GetEndpointIDByEndpoint(ctx context.Context, endpoint string, endpointType string) (int64, error) 29 29 GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) 30 - UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error 30 + UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error 31 31 SaveEndpointScan(ctx context.Context, scan *EndpointScan) error 32 32 SetScanRetention(retention int) 33 33 UpdateEndpointStatus(ctx context.Context, endpointID int64, update *EndpointUpdate) error ··· 38 38 GetPDSList(ctx context.Context, filter *EndpointFilter) ([]*PDSListItem, error) 39 39 GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) 40 40 GetPDSStats(ctx context.Context) (*PDSStats, error) 41 + GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) 42 + GetVersionStats(ctx context.Context) ([]*VersionStats, error) 41 43 42 44 // IP operations (IP as primary key) 43 45 UpsertIPInfo(ctx context.Context, ip string, ipInfo map[string]interface{}) error ··· 48 50 GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) 49 51 UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error 50 52 51 - // Bundle operations 52 - CreateBundle(ctx context.Context, bundle *PLCBundle) error 53 - GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) 54 - GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) 55 - GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) 56 - GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error) 57 - GetLastBundleNumber(ctx context.Context) (int, error) 58 - GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) 59 - 60 - // Mempool operations 61 - AddToMempool(ctx context.Context, ops []MempoolOperation) error 62 - GetMempoolCount(ctx context.Context) (int, error) 63 - GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) 64 - DeleteFromMempool(ctx context.Context, ids []int64) error 65 - GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) 66 - GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) 67 - GetMempoolUniqueDIDCount(ctx context.Context) (int, error) 68 - GetMempoolUncompressedSize(ctx context.Context) (int64, error) 69 - 70 53 // Metrics 71 54 StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error 72 55 GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error) 73 56 GetEndpointStats(ctx context.Context) (*EndpointStats, error) 74 57 75 58 // DID operations 76 - UpsertDID(ctx context.Context, did string, bundleNum int) error 59 + UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error 60 + UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error 77 61 GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) 62 + GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) // NEW 63 + GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) 78 64 AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error 79 65 GetTotalDIDCount(ctx context.Context) (int64, error) 80 66 81 - GetCountryLeaderboard(ctx context.Context) ([]*CountryStats, error) 82 - GetVersionStats(ctx context.Context) ([]*VersionStats, error) 67 + // PDS Repo operations 68 + UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error 69 + GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) 70 + GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) 71 + GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) 72 + 73 + // Internal 74 + GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) 75 + GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) 83 76 }
+799 -550
internal/storage/postgres.go
··· 5 5 "database/sql" 6 6 "encoding/json" 7 7 "fmt" 8 - "strings" 9 8 "time" 10 9 11 - "github.com/atscan/atscanner/internal/log" 10 + "github.com/atscan/atscand/internal/log" 12 11 "github.com/jackc/pgx/v5" 13 12 "github.com/jackc/pgx/v5/pgxpool" 14 13 _ "github.com/jackc/pgx/v5/stdlib" ··· 73 72 log.Info("Running database migrations...") 74 73 75 74 schema := ` 76 - -- Endpoints table (NO user_count, NO ip_info) 75 + -- Endpoints table (with IPv6 support) 77 76 CREATE TABLE IF NOT EXISTS endpoints ( 78 77 id BIGSERIAL PRIMARY KEY, 79 78 endpoint_type TEXT NOT NULL DEFAULT 'pds', ··· 83 82 last_checked TIMESTAMP, 84 83 status INTEGER DEFAULT 0, 85 84 ip TEXT, 85 + ipv6 TEXT, 86 86 ip_resolved_at TIMESTAMP, 87 + valid BOOLEAN DEFAULT true, 87 88 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 88 89 UNIQUE(endpoint_type, endpoint) 89 90 ); ··· 92 93 CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status); 93 94 CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type); 94 95 CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip); 96 + CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6); 95 97 CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did); 96 - CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 98 + CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 99 + CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid); 97 100 98 - -- IP infos table (IP as PRIMARY KEY) 99 - CREATE TABLE IF NOT EXISTS ip_infos ( 100 - ip TEXT PRIMARY KEY, 101 - city TEXT, 102 - country TEXT, 103 - country_code TEXT, 104 - asn INTEGER, 105 - asn_org TEXT, 106 - is_datacenter BOOLEAN, 107 - is_vpn BOOLEAN, 108 - latitude REAL, 109 - longitude REAL, 110 - raw_data JSONB, 111 - fetched_at TIMESTAMP NOT NULL, 112 - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 113 - ); 101 + -- IP infos table (IP as PRIMARY KEY) 102 + CREATE TABLE IF NOT EXISTS ip_infos ( 103 + ip TEXT PRIMARY KEY, 104 + city TEXT, 105 + country TEXT, 106 + country_code TEXT, 107 + asn INTEGER, 108 + asn_org TEXT, 109 + is_datacenter BOOLEAN, 110 + is_vpn BOOLEAN, 111 + is_crawler BOOLEAN, 112 + is_tor BOOLEAN, 113 + is_proxy BOOLEAN, 114 + latitude REAL, 115 + longitude REAL, 116 + raw_data JSONB, 117 + fetched_at TIMESTAMP NOT NULL, 118 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 119 + ); 114 120 115 - CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code); 116 - CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn); 121 + CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code); 122 + CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn); 117 123 118 - -- Endpoint scans (renamed from pds_scans) 124 + -- Endpoint scans 119 125 CREATE TABLE IF NOT EXISTS endpoint_scans ( 120 126 id BIGSERIAL PRIMARY KEY, 121 127 endpoint_id BIGINT NOT NULL, ··· 123 129 response_time DOUBLE PRECISION, 124 130 user_count BIGINT, 125 131 version TEXT, 132 + used_ip TEXT, 126 133 scan_data JSONB, 127 134 scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 128 135 FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE ··· 131 138 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC); 132 139 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at); 133 140 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST); 141 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip); 142 + 134 143 135 144 CREATE TABLE IF NOT EXISTS plc_metrics ( 136 145 id BIGSERIAL PRIMARY KEY, ··· 149 158 records_processed BIGINT DEFAULT 0 150 159 ); 151 160 152 - CREATE TABLE IF NOT EXISTS plc_bundles ( 153 - bundle_number INTEGER PRIMARY KEY, 154 - start_time TIMESTAMP NOT NULL, 155 - end_time TIMESTAMP NOT NULL, 156 - dids JSONB NOT NULL, 157 - hash TEXT NOT NULL, 158 - compressed_hash TEXT NOT NULL, 159 - compressed_size BIGINT NOT NULL, 160 - uncompressed_size BIGINT NOT NULL, 161 - cumulative_compressed_size BIGINT NOT NULL, 162 - cumulative_uncompressed_size BIGINT NOT NULL, 163 - cursor TEXT, 164 - prev_bundle_hash TEXT, 165 - compressed BOOLEAN DEFAULT true, 166 - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 167 - ); 161 + -- Minimal dids table 162 + CREATE TABLE IF NOT EXISTS dids ( 163 + did TEXT PRIMARY KEY, 164 + handle TEXT, 165 + pds TEXT, 166 + bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb, 167 + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 168 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 169 + ); 168 170 169 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time); 170 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash); 171 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash); 172 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC); 173 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids); 171 + CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers); 172 + CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at); 173 + CREATE INDEX IF NOT EXISTS idx_dids_handle ON dids(handle); 174 + CREATE INDEX IF NOT EXISTS idx_dids_pds ON dids(pds); 174 175 175 - CREATE TABLE IF NOT EXISTS plc_mempool ( 176 + -- PDS Repositories table 177 + CREATE TABLE IF NOT EXISTS pds_repos ( 176 178 id BIGSERIAL PRIMARY KEY, 179 + endpoint_id BIGINT NOT NULL, 177 180 did TEXT NOT NULL, 178 - operation TEXT NOT NULL, 179 - cid TEXT NOT NULL UNIQUE, 180 - created_at TIMESTAMP NOT NULL, 181 - added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 182 - ); 183 - 184 - CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at); 185 - CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did); 186 - CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid); 187 - 188 - -- Minimal dids table 189 - CREATE TABLE IF NOT EXISTS dids ( 190 - did TEXT PRIMARY KEY, 191 - bundle_numbers JSONB NOT NULL DEFAULT '[]'::jsonb, 192 - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP 181 + head TEXT, 182 + rev TEXT, 183 + active BOOLEAN DEFAULT true, 184 + status TEXT, 185 + first_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 186 + last_seen TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 187 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 188 + FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE, 189 + UNIQUE(endpoint_id, did) 193 190 ); 194 191 195 - CREATE INDEX IF NOT EXISTS idx_dids_bundle_numbers ON dids USING gin(bundle_numbers); 196 - CREATE INDEX IF NOT EXISTS idx_dids_created_at ON dids(created_at); 192 + CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint ON pds_repos(endpoint_id); 193 + CREATE INDEX IF NOT EXISTS idx_pds_repos_endpoint_id_desc ON pds_repos(endpoint_id, id DESC); 194 + CREATE INDEX IF NOT EXISTS idx_pds_repos_did ON pds_repos(did); 195 + CREATE INDEX IF NOT EXISTS idx_pds_repos_active ON pds_repos(active); 196 + CREATE INDEX IF NOT EXISTS idx_pds_repos_status ON pds_repos(status); 197 + CREATE INDEX IF NOT EXISTS idx_pds_repos_last_seen ON pds_repos(last_seen DESC); 197 198 ` 198 199 199 200 _, err := p.db.Exec(schema) ··· 209 210 210 211 func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error { 211 212 query := ` 212 - INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ip_resolved_at) 213 - VALUES ($1, $2, $3, $4, $5, $6, $7) 213 + INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid) 214 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) 214 215 ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET 215 216 last_checked = EXCLUDED.last_checked, 216 217 status = EXCLUDED.status, 217 218 ip = CASE 218 219 WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip 219 220 ELSE endpoints.ip 221 + END, 222 + ipv6 = CASE 223 + WHEN EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '' THEN EXCLUDED.ipv6 224 + ELSE endpoints.ipv6 220 225 END, 221 226 ip_resolved_at = CASE 222 - WHEN EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '' THEN EXCLUDED.ip_resolved_at 227 + WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at 223 228 ELSE endpoints.ip_resolved_at 224 229 END, 230 + valid = EXCLUDED.valid, 225 231 updated_at = CURRENT_TIMESTAMP 226 232 RETURNING id 227 233 ` 228 234 err := p.db.QueryRowContext(ctx, query, 229 235 endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt, 230 - endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPResolvedAt).Scan(&endpoint.ID) 236 + endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID) 231 237 return err 232 238 } 233 239 ··· 248 254 func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) { 249 255 query := ` 250 256 SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, 251 - ip, ip_resolved_at, updated_at 257 + ip, ipv6, ip_resolved_at, valid, updated_at 252 258 FROM endpoints 253 259 WHERE endpoint = $1 AND endpoint_type = $2 254 260 ` 255 261 256 262 var ep Endpoint 257 263 var lastChecked, ipResolvedAt sql.NullTime 258 - var ip sql.NullString 264 + var ip, ipv6 sql.NullString 259 265 260 266 err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan( 261 267 &ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked, 262 - &ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt, 268 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt, 263 269 ) 264 270 if err != nil { 265 271 return nil, err ··· 271 277 if ip.Valid { 272 278 ep.IP = ip.String 273 279 } 280 + if ipv6.Valid { 281 + ep.IPv6 = ipv6.String 282 + } 274 283 if ipResolvedAt.Valid { 275 284 ep.IPResolvedAt = ipResolvedAt.Time 276 285 } ··· 280 289 281 290 func (p *PostgresDB) GetEndpoints(ctx context.Context, filter *EndpointFilter) ([]*Endpoint, error) { 282 291 query := ` 283 - SELECT DISTINCT ON (COALESCE(server_did, id::text)) 284 - id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status, 285 - ip, ip_resolved_at, updated_at 286 - FROM endpoints 287 - WHERE 1=1 292 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 293 + id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status, 294 + ip, ipv6, ip_resolved_at, valid, updated_at 295 + FROM endpoints 296 + WHERE 1=1 288 297 ` 289 298 args := []interface{}{} 290 299 argIdx := 1 ··· 295 304 args = append(args, filter.Type) 296 305 argIdx++ 297 306 } 307 + 308 + // NEW: Filter by valid flag 309 + if filter.OnlyValid { 310 + query += fmt.Sprintf(" AND valid = true", argIdx) 311 + } 298 312 if filter.Status != "" { 299 313 statusInt := EndpointStatusUnknown 300 314 switch filter.Status { ··· 317 331 } 318 332 } 319 333 320 - // NEW: Order by server_did and discovered_at to get primary endpoints 321 - query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC" 334 + // NEW: Choose ordering strategy 335 + if filter != nil && filter.Random { 336 + // For random selection, we need to wrap in a subquery 337 + query = fmt.Sprintf(` 338 + WITH filtered_endpoints AS ( 339 + %s 340 + ) 341 + SELECT * FROM filtered_endpoints 342 + ORDER BY RANDOM() 343 + `, query) 344 + } else { 345 + // Original ordering for non-random queries 346 + query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC" 347 + } 322 348 323 349 if filter != nil && filter.Limit > 0 { 324 350 query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) ··· 335 361 for rows.Next() { 336 362 var ep Endpoint 337 363 var lastChecked, ipResolvedAt sql.NullTime 338 - var ip, serverDID sql.NullString 364 + var ip, ipv6, serverDID sql.NullString 339 365 340 366 err := rows.Scan( 341 367 &ep.ID, &ep.EndpointType, &ep.Endpoint, &serverDID, &ep.DiscoveredAt, &lastChecked, 342 - &ep.Status, &ip, &ipResolvedAt, &ep.UpdatedAt, 368 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt, 343 369 ) 344 370 if err != nil { 345 371 return nil, err ··· 353 379 } 354 380 if ip.Valid { 355 381 ep.IP = ip.String 382 + } 383 + if ipv6.Valid { 384 + ep.IPv6 = ipv6.String 356 385 } 357 386 if ipResolvedAt.Valid { 358 387 ep.IPResolvedAt = ipResolvedAt.Time ··· 374 403 return err 375 404 } 376 405 377 - func (p *PostgresDB) UpdateEndpointIP(ctx context.Context, endpointID int64, ip string, resolvedAt time.Time) error { 406 + func (p *PostgresDB) UpdateEndpointIPs(ctx context.Context, endpointID int64, ipv4, ipv6 string, resolvedAt time.Time) error { 378 407 query := ` 379 408 UPDATE endpoints 380 - SET ip = $1, ip_resolved_at = $2, updated_at = $3 381 - WHERE id = $4 409 + SET ip = $1, ipv6 = $2, ip_resolved_at = $3, updated_at = $4 410 + WHERE id = $5 382 411 ` 383 - _, err := p.db.ExecContext(ctx, query, ip, resolvedAt, time.Now().UTC(), endpointID) 412 + _, err := p.db.ExecContext(ctx, query, ipv4, ipv6, resolvedAt, time.Now().UTC(), endpointID) 384 413 return err 385 414 } 386 415 ··· 447 476 defer tx.Rollback() 448 477 449 478 query := ` 450 - INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at) 451 - VALUES ($1, $2, $3, $4, $5, $6, $7) 479 + INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at) 480 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 452 481 ` 453 - _, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt) 482 + _, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt) 454 483 if err != nil { 455 484 return err 456 485 } ··· 477 506 478 507 func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) { 479 508 query := ` 480 - SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at 509 + SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at 481 510 FROM endpoint_scans 482 511 WHERE endpoint_id = $1 483 512 ORDER BY scanned_at DESC ··· 495 524 var scan EndpointScan 496 525 var responseTime sql.NullFloat64 497 526 var userCount sql.NullInt64 498 - var version sql.NullString // NEW 527 + var version, usedIP sql.NullString 499 528 var scanDataJSON []byte 500 529 501 - err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt) 530 + err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt) 502 531 if err != nil { 503 532 return nil, err 504 533 } ··· 511 540 scan.UserCount = userCount.Int64 512 541 } 513 542 514 - if version.Valid { // NEW 543 + if version.Valid { 515 544 scan.Version = version.String 545 + } 546 + 547 + if usedIP.Valid { 548 + scan.UsedIP = usedIP.String 516 549 } 517 550 518 551 if len(scanDataJSON) > 0 { ··· 540 573 discovered_at, 541 574 last_checked, 542 575 status, 543 - ip 576 + ip, 577 + ipv6, 578 + valid 544 579 FROM endpoints 545 580 WHERE endpoint_type = 'pds' 546 581 ORDER BY COALESCE(server_did, id::text), discovered_at ASC 547 582 ) 548 583 SELECT 549 - e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, 584 + e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid, 550 585 latest.user_count, latest.response_time, latest.version, latest.scanned_at, 551 586 i.city, i.country, i.country_code, i.asn, i.asn_org, 552 - i.is_datacenter, i.is_vpn, i.latitude, i.longitude 587 + i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 588 + i.latitude, i.longitude 553 589 FROM unique_servers e 554 590 LEFT JOIN LATERAL ( 555 591 SELECT ··· 606 642 var items []*PDSListItem 607 643 for rows.Next() { 608 644 item := &PDSListItem{} 609 - var ip, serverDID, city, country, countryCode, asnOrg sql.NullString 645 + var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString 610 646 var asn sql.NullInt32 611 - var isDatacenter, isVPN sql.NullBool 647 + var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 612 648 var lat, lon sql.NullFloat64 613 649 var userCount sql.NullInt32 614 650 var responseTime sql.NullFloat64 ··· 616 652 var scannedAt sql.NullTime 617 653 618 654 err := rows.Scan( 619 - &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, 655 + &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid, 620 656 &userCount, &responseTime, &version, &scannedAt, 621 657 &city, &country, &countryCode, &asn, &asnOrg, 622 - &isDatacenter, &isVPN, &lat, &lon, 658 + &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, 659 + &lat, &lon, 623 660 ) 624 661 if err != nil { 625 662 return nil, err ··· 627 664 628 665 if ip.Valid { 629 666 item.IP = ip.String 667 + } 668 + if ipv6.Valid { 669 + item.IPv6 = ipv6.String 630 670 } 631 671 if serverDID.Valid { 632 672 item.ServerDID = serverDID.String ··· 658 698 ASNOrg: asnOrg.String, 659 699 IsDatacenter: isDatacenter.Bool, 660 700 IsVPN: isVPN.Bool, 701 + IsCrawler: isCrawler.Bool, 702 + IsTor: isTor.Bool, 703 + IsProxy: isProxy.Bool, 661 704 Latitude: float32(lat.Float64), 662 705 Longitude: float32(lon.Float64), 663 706 } ··· 671 714 672 715 func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) { 673 716 query := ` 674 - WITH target_endpoint AS ( 717 + WITH target_endpoint AS MATERIALIZED ( 675 718 SELECT 676 719 e.id, 677 720 e.endpoint, ··· 679 722 e.discovered_at, 680 723 e.last_checked, 681 724 e.status, 682 - e.ip 725 + e.ip, 726 + e.ipv6, 727 + e.valid 683 728 FROM endpoints e 684 - WHERE e.endpoint = $1 AND e.endpoint_type = 'pds' 685 - ), 686 - aliases_agg AS ( 687 - SELECT 688 - te.server_did, 689 - array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases, 690 - MIN(e.discovered_at) as first_discovered_at 691 - FROM target_endpoint te 692 - LEFT JOIN endpoints e ON te.server_did = e.server_did 693 - AND e.endpoint_type = 'pds' 694 - AND te.server_did IS NOT NULL 695 - GROUP BY te.server_did 729 + WHERE e.endpoint = $1 730 + AND e.endpoint_type = 'pds' 731 + LIMIT 1 696 732 ) 697 733 SELECT 698 734 te.id, ··· 702 738 te.last_checked, 703 739 te.status, 704 740 te.ip, 741 + te.ipv6, 742 + te.valid, 705 743 latest.user_count, 706 744 latest.response_time, 707 745 latest.version, 708 746 latest.scan_data->'metadata'->'server_info' as server_info, 709 747 latest.scanned_at, 710 748 i.city, i.country, i.country_code, i.asn, i.asn_org, 711 - i.is_datacenter, i.is_vpn, i.latitude, i.longitude, 749 + i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 750 + i.latitude, i.longitude, 712 751 i.raw_data, 713 - COALESCE(aa.aliases, ARRAY[]::text[]) as aliases, 714 - aa.first_discovered_at 752 + COALESCE( 753 + ARRAY( 754 + SELECT e2.endpoint 755 + FROM endpoints e2 756 + WHERE e2.server_did = te.server_did 757 + AND e2.endpoint_type = 'pds' 758 + AND e2.endpoint != te.endpoint 759 + AND te.server_did IS NOT NULL 760 + ORDER BY e2.discovered_at 761 + ), 762 + ARRAY[]::text[] 763 + ) as aliases, 764 + CASE 765 + WHEN te.server_did IS NOT NULL THEN ( 766 + SELECT MIN(e3.discovered_at) 767 + FROM endpoints e3 768 + WHERE e3.server_did = te.server_did 769 + AND e3.endpoint_type = 'pds' 770 + ) 771 + ELSE NULL 772 + END as first_discovered_at 715 773 FROM target_endpoint te 716 - LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did 717 774 LEFT JOIN LATERAL ( 718 - SELECT scan_data, response_time, version, scanned_at, user_count 719 - FROM endpoint_scans 720 - WHERE endpoint_id = te.id 721 - ORDER BY scanned_at DESC 775 + SELECT 776 + es.scan_data, 777 + es.response_time, 778 + es.version, 779 + es.scanned_at, 780 + es.user_count 781 + FROM endpoint_scans es 782 + WHERE es.endpoint_id = te.id 783 + ORDER BY es.scanned_at DESC 722 784 LIMIT 1 723 785 ) latest ON true 724 - LEFT JOIN ip_infos i ON te.ip = i.ip 786 + LEFT JOIN ip_infos i ON te.ip = i.ip; 725 787 ` 726 788 727 789 detail := &PDSDetail{} 728 - var ip, city, country, countryCode, asnOrg, serverDID sql.NullString 790 + var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString 729 791 var asn sql.NullInt32 730 - var isDatacenter, isVPN sql.NullBool 792 + var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 731 793 var lat, lon sql.NullFloat64 732 794 var userCount sql.NullInt32 733 795 var responseTime sql.NullFloat64 ··· 739 801 var firstDiscoveredAt sql.NullTime 740 802 741 803 err := p.db.QueryRowContext(ctx, query, endpoint).Scan( 742 - &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, 804 + &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid, 743 805 &userCount, &responseTime, &version, &serverInfoJSON, &scannedAt, 744 806 &city, &country, &countryCode, &asn, &asnOrg, 745 - &isDatacenter, &isVPN, &lat, &lon, 807 + &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, 808 + &lat, &lon, 746 809 &rawDataJSON, 747 810 pq.Array(&aliases), 748 811 &firstDiscoveredAt, ··· 754 817 if ip.Valid { 755 818 detail.IP = ip.String 756 819 } 820 + if ipv6.Valid { 821 + detail.IPv6 = ipv6.String 822 + } 757 823 758 824 if serverDID.Valid { 759 825 detail.ServerDID = serverDID.String ··· 762 828 // Set aliases and is_primary 763 829 detail.Aliases = aliases 764 830 if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid { 765 - // Has server_did - check if this is the first discovered 766 831 detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) || 767 832 detail.DiscoveredAt.Before(firstDiscoveredAt.Time) 768 833 } else { 769 - // No server_did means unique server 770 834 detail.IsPrimary = true 771 835 } 772 836 ··· 792 856 } 793 857 } 794 858 795 - // Parse IP info 859 + // Parse IP info with all fields 796 860 if city.Valid || country.Valid { 797 861 detail.IPInfo = &IPInfo{ 798 862 IP: ip.String, ··· 803 867 ASNOrg: asnOrg.String, 804 868 IsDatacenter: isDatacenter.Bool, 805 869 IsVPN: isVPN.Bool, 870 + IsCrawler: isCrawler.Bool, 871 + IsTor: isTor.Bool, 872 + IsProxy: isProxy.Bool, 806 873 Latitude: float32(lat.Float64), 807 874 Longitude: float32(lon.Float64), 808 875 } ··· 938 1005 countryCode := extractString(ipInfo, "location", "country_code") 939 1006 asn := extractInt(ipInfo, "asn", "asn") 940 1007 asnOrg := extractString(ipInfo, "asn", "org") 941 - isDatacenter := extractBool(ipInfo, "company", "type", "hosting") 942 - isVPN := extractBool(ipInfo, "security", "vpn") 1008 + 1009 + // Extract top-level boolean flags 1010 + isDatacenter := false 1011 + if val, ok := ipInfo["is_datacenter"].(bool); ok { 1012 + isDatacenter = val 1013 + } 1014 + 1015 + isVPN := false 1016 + if val, ok := ipInfo["is_vpn"].(bool); ok { 1017 + isVPN = val 1018 + } 1019 + 1020 + isCrawler := false 1021 + if val, ok := ipInfo["is_crawler"].(bool); ok { 1022 + isCrawler = val 1023 + } 1024 + 1025 + isTor := false 1026 + if val, ok := ipInfo["is_tor"].(bool); ok { 1027 + isTor = val 1028 + } 1029 + 1030 + isProxy := false 1031 + if val, ok := ipInfo["is_proxy"].(bool); ok { 1032 + isProxy = val 1033 + } 1034 + 943 1035 lat := extractFloat(ipInfo, "location", "latitude") 944 1036 lon := extractFloat(ipInfo, "location", "longitude") 945 1037 946 1038 query := ` 947 - INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, latitude, longitude, raw_data, fetched_at) 948 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) 1039 + INSERT INTO ip_infos (ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, latitude, longitude, raw_data, fetched_at) 1040 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 949 1041 ON CONFLICT(ip) DO UPDATE SET 950 1042 city = EXCLUDED.city, 951 1043 country = EXCLUDED.country, ··· 954 1046 asn_org = EXCLUDED.asn_org, 955 1047 is_datacenter = EXCLUDED.is_datacenter, 956 1048 is_vpn = EXCLUDED.is_vpn, 1049 + is_crawler = EXCLUDED.is_crawler, 1050 + is_tor = EXCLUDED.is_tor, 1051 + is_proxy = EXCLUDED.is_proxy, 957 1052 latitude = EXCLUDED.latitude, 958 1053 longitude = EXCLUDED.longitude, 959 1054 raw_data = EXCLUDED.raw_data, 960 1055 fetched_at = EXCLUDED.fetched_at, 961 1056 updated_at = CURRENT_TIMESTAMP 962 1057 ` 963 - _, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, lat, lon, rawDataJSON, time.Now().UTC()) 1058 + _, err := p.db.ExecContext(ctx, query, ip, city, country, countryCode, asn, asnOrg, isDatacenter, isVPN, isCrawler, isTor, isProxy, lat, lon, rawDataJSON, time.Now().UTC()) 964 1059 return err 965 1060 } 966 1061 967 1062 func (p *PostgresDB) GetIPInfo(ctx context.Context, ip string) (*IPInfo, error) { 968 1063 query := ` 969 - SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, 1064 + SELECT ip, city, country, country_code, asn, asn_org, is_datacenter, is_vpn, is_crawler, is_tor, is_proxy, 970 1065 latitude, longitude, raw_data, fetched_at, updated_at 971 1066 FROM ip_infos 972 1067 WHERE ip = $1 ··· 977 1072 978 1073 err := p.db.QueryRowContext(ctx, query, ip).Scan( 979 1074 &info.IP, &info.City, &info.Country, &info.CountryCode, &info.ASN, &info.ASNOrg, 980 - &info.IsDatacenter, &info.IsVPN, &info.Latitude, &info.Longitude, 1075 + &info.IsDatacenter, &info.IsVPN, &info.IsCrawler, &info.IsTor, &info.IsProxy, 1076 + &info.Latitude, &info.Longitude, 981 1077 &rawDataJSON, &info.FetchedAt, &info.UpdatedAt, 982 1078 ) 983 1079 if err != nil { ··· 1067 1163 return 0 1068 1164 } 1069 1165 1070 - func extractBool(data map[string]interface{}, keys ...string) bool { 1071 - current := data 1072 - for i, key := range keys { 1073 - if i == len(keys)-1 { 1074 - if val, ok := current[key].(bool); ok { 1075 - return val 1076 - } 1077 - // Check if it's a string that matches (for type="hosting") 1078 - if val, ok := current[key].(string); ok { 1079 - // For cases like company.type == "hosting" 1080 - expectedValue := keys[len(keys)-1] 1081 - return val == expectedValue 1082 - } 1083 - return false 1084 - } 1085 - if nested, ok := current[key].(map[string]interface{}); ok { 1086 - current = nested 1087 - } else { 1088 - return false 1089 - } 1090 - } 1091 - return false 1092 - } 1093 - 1094 - // ===== BUNDLE OPERATIONS ===== 1095 - 1096 - func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error { 1097 - didsJSON, err := json.Marshal(bundle.DIDs) 1098 - if err != nil { 1099 - return err 1100 - } 1101 - 1102 - // Calculate cumulative sizes from previous bundle 1103 - if bundle.BundleNumber > 1 { 1104 - prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1) 1105 - if err == nil && prevBundle != nil { 1106 - bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize 1107 - bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize 1108 - } else { 1109 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1110 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1111 - } 1112 - } else { 1113 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1114 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1115 - } 1116 - 1117 - query := ` 1118 - INSERT INTO plc_bundles ( 1119 - bundle_number, start_time, end_time, dids, 1120 - hash, compressed_hash, compressed_size, uncompressed_size, 1121 - cumulative_compressed_size, cumulative_uncompressed_size, 1122 - cursor, prev_bundle_hash, compressed 1123 - ) 1124 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) 1125 - ON CONFLICT(bundle_number) DO UPDATE SET 1126 - start_time = EXCLUDED.start_time, 1127 - end_time = EXCLUDED.end_time, 1128 - dids = EXCLUDED.dids, 1129 - hash = EXCLUDED.hash, 1130 - compressed_hash = EXCLUDED.compressed_hash, 1131 - compressed_size = EXCLUDED.compressed_size, 1132 - uncompressed_size = EXCLUDED.uncompressed_size, 1133 - cumulative_compressed_size = EXCLUDED.cumulative_compressed_size, 1134 - cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size, 1135 - cursor = EXCLUDED.cursor, 1136 - prev_bundle_hash = EXCLUDED.prev_bundle_hash, 1137 - compressed = EXCLUDED.compressed 1138 - ` 1139 - _, err = p.db.ExecContext(ctx, query, 1140 - bundle.BundleNumber, bundle.StartTime, bundle.EndTime, 1141 - didsJSON, bundle.Hash, bundle.CompressedHash, 1142 - bundle.CompressedSize, bundle.UncompressedSize, 1143 - bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize, 1144 - bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed, 1145 - ) 1146 - 1147 - return err 1148 - } 1149 - 1150 - func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) { 1151 - query := ` 1152 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1153 - compressed_size, uncompressed_size, cumulative_compressed_size, 1154 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1155 - FROM plc_bundles 1156 - WHERE bundle_number = $1 1157 - ` 1158 - 1159 - var bundle PLCBundle 1160 - var didsJSON []byte 1161 - var prevHash sql.NullString 1162 - var cursor sql.NullString 1163 - 1164 - err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan( 1165 - &bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime, 1166 - &didsJSON, &bundle.Hash, &bundle.CompressedHash, 1167 - &bundle.CompressedSize, &bundle.UncompressedSize, 1168 - &bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize, 1169 - &cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt, 1170 - ) 1171 - if err != nil { 1172 - return nil, err 1173 - } 1174 - 1175 - if prevHash.Valid { 1176 - bundle.PrevBundleHash = prevHash.String 1177 - } 1178 - if cursor.Valid { 1179 - bundle.Cursor = cursor.String 1180 - } 1181 - 1182 - json.Unmarshal(didsJSON, &bundle.DIDs) 1183 - return &bundle, nil 1184 - } 1185 - 1186 - func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) { 1187 - query := ` 1188 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1189 - compressed_size, uncompressed_size, cumulative_compressed_size, 1190 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1191 - FROM plc_bundles 1192 - ORDER BY bundle_number DESC 1193 - LIMIT $1 1194 - ` 1195 - 1196 - rows, err := p.db.QueryContext(ctx, query, limit) 1197 - if err != nil { 1198 - return nil, err 1199 - } 1200 - defer rows.Close() 1201 - 1202 - return p.scanBundles(rows) 1203 - } 1204 - 1205 - func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) { 1206 - query := ` 1207 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1208 - compressed_size, uncompressed_size, cumulative_compressed_size, 1209 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1210 - FROM plc_bundles 1211 - WHERE dids ? $1 1212 - ORDER BY bundle_number ASC 1213 - ` 1214 - 1215 - rows, err := p.db.QueryContext(ctx, query, did) 1216 - if err != nil { 1217 - return nil, err 1218 - } 1219 - defer rows.Close() 1220 - 1221 - return p.scanBundles(rows) 1222 - } 1223 - 1224 - func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) { 1225 - var bundles []*PLCBundle 1226 - 1227 - for rows.Next() { 1228 - var bundle PLCBundle 1229 - var didsJSON []byte 1230 - var prevHash sql.NullString 1231 - var cursor sql.NullString 1232 - 1233 - if err := rows.Scan( 1234 - &bundle.BundleNumber, 1235 - &bundle.StartTime, 1236 - &bundle.EndTime, 1237 - &didsJSON, 1238 - &bundle.Hash, 1239 - &bundle.CompressedHash, 1240 - &bundle.CompressedSize, 1241 - &bundle.UncompressedSize, 1242 - &bundle.CumulativeCompressedSize, 1243 - &bundle.CumulativeUncompressedSize, 1244 - &cursor, 1245 - &prevHash, 1246 - &bundle.Compressed, 1247 - &bundle.CreatedAt, 1248 - ); err != nil { 1249 - return nil, err 1250 - } 1251 - 1252 - if prevHash.Valid { 1253 - bundle.PrevBundleHash = prevHash.String 1254 - } 1255 - if cursor.Valid { 1256 - bundle.Cursor = cursor.String 1257 - } 1258 - 1259 - json.Unmarshal(didsJSON, &bundle.DIDs) 1260 - bundles = append(bundles, &bundle) 1261 - } 1262 - 1263 - return bundles, rows.Err() 1264 - } 1265 - 1266 - func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) { 1267 - var count, lastBundleNum int64 1268 - err := p.db.QueryRowContext(ctx, ` 1269 - SELECT COUNT(*), COALESCE(MAX(bundle_number), 0) 1270 - FROM plc_bundles 1271 - `).Scan(&count, &lastBundleNum) 1272 - if err != nil { 1273 - return 0, 0, 0, 0, err 1274 - } 1275 - 1276 - if lastBundleNum == 0 { 1277 - return 0, 0, 0, 0, nil 1278 - } 1279 - 1280 - var compressedSize, uncompressedSize int64 1281 - err = p.db.QueryRowContext(ctx, ` 1282 - SELECT cumulative_compressed_size, cumulative_uncompressed_size 1283 - FROM plc_bundles 1284 - WHERE bundle_number = $1 1285 - `, lastBundleNum).Scan(&compressedSize, &uncompressedSize) 1286 - if err != nil { 1287 - return 0, 0, 0, 0, err 1288 - } 1289 - 1290 - return count, compressedSize, uncompressedSize, lastBundleNum, nil 1291 - } 1292 - 1293 - func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) { 1294 - query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles" 1295 - var num int 1296 - err := p.db.QueryRowContext(ctx, query).Scan(&num) 1297 - return num, err 1298 - } 1299 - 1300 - func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) { 1301 - query := ` 1302 - SELECT bundle_number 1303 - FROM plc_bundles 1304 - WHERE start_time <= $1 AND end_time >= $1 1305 - ORDER BY bundle_number ASC 1306 - LIMIT 1 1307 - ` 1308 - 1309 - var bundleNum int 1310 - err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1311 - if err == sql.ErrNoRows { 1312 - query = ` 1313 - SELECT bundle_number 1314 - FROM plc_bundles 1315 - WHERE end_time < $1 1316 - ORDER BY bundle_number DESC 1317 - LIMIT 1 1318 - ` 1319 - err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1320 - if err == sql.ErrNoRows { 1321 - return 1, nil 1322 - } 1323 - if err != nil { 1324 - return 0, err 1325 - } 1326 - return bundleNum, nil 1327 - } 1328 - if err != nil { 1329 - return 0, err 1330 - } 1331 - 1332 - return bundleNum, nil 1333 - } 1334 - 1335 - // ===== MEMPOOL OPERATIONS ===== 1336 - 1337 - func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error { 1338 - if len(ops) == 0 { 1339 - return nil 1340 - } 1341 - 1342 - tx, err := p.db.BeginTx(ctx, nil) 1343 - if err != nil { 1344 - return err 1345 - } 1346 - defer tx.Rollback() 1347 - 1348 - stmt, err := tx.PrepareContext(ctx, ` 1349 - INSERT INTO plc_mempool (did, operation, cid, created_at) 1350 - VALUES ($1, $2, $3, $4) 1351 - ON CONFLICT(cid) DO NOTHING 1352 - `) 1353 - if err != nil { 1354 - return err 1355 - } 1356 - defer stmt.Close() 1357 - 1358 - for _, op := range ops { 1359 - _, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt) 1360 - if err != nil { 1361 - return err 1362 - } 1363 - } 1364 - 1365 - return tx.Commit() 1366 - } 1367 - 1368 - func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) { 1369 - query := "SELECT COUNT(*) FROM plc_mempool" 1370 - var count int 1371 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1372 - return count, err 1373 - } 1374 - 1375 - func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) { 1376 - query := ` 1377 - SELECT id, did, operation, cid, created_at, added_at 1378 - FROM plc_mempool 1379 - ORDER BY created_at ASC 1380 - LIMIT $1 1381 - ` 1382 - 1383 - rows, err := p.db.QueryContext(ctx, query, limit) 1384 - if err != nil { 1385 - return nil, err 1386 - } 1387 - defer rows.Close() 1388 - 1389 - var ops []MempoolOperation 1390 - for rows.Next() { 1391 - var op MempoolOperation 1392 - err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt) 1393 - if err != nil { 1394 - return nil, err 1395 - } 1396 - ops = append(ops, op) 1397 - } 1398 - 1399 - return ops, rows.Err() 1400 - } 1401 - 1402 - func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error { 1403 - if len(ids) == 0 { 1404 - return nil 1405 - } 1406 - 1407 - placeholders := make([]string, len(ids)) 1408 - args := make([]interface{}, len(ids)) 1409 - for i, id := range ids { 1410 - placeholders[i] = fmt.Sprintf("$%d", i+1) 1411 - args[i] = id 1412 - } 1413 - 1414 - query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)", 1415 - strings.Join(placeholders, ",")) 1416 - 1417 - _, err := p.db.ExecContext(ctx, query, args...) 1418 - return err 1419 - } 1420 - 1421 - func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1422 - query := ` 1423 - SELECT id, did, operation, cid, created_at, added_at 1424 - FROM plc_mempool 1425 - ORDER BY created_at ASC, id ASC 1426 - LIMIT 1 1427 - ` 1428 - 1429 - var op MempoolOperation 1430 - err := p.db.QueryRowContext(ctx, query).Scan( 1431 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1432 - ) 1433 - if err == sql.ErrNoRows { 1434 - return nil, nil 1435 - } 1436 - if err != nil { 1437 - return nil, err 1438 - } 1439 - 1440 - return &op, nil 1441 - } 1442 - 1443 - func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1444 - query := ` 1445 - SELECT id, did, operation, cid, created_at, added_at 1446 - FROM plc_mempool 1447 - ORDER BY created_at DESC, id DESC 1448 - LIMIT 1 1449 - ` 1450 - 1451 - var op MempoolOperation 1452 - err := p.db.QueryRowContext(ctx, query).Scan( 1453 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1454 - ) 1455 - if err == sql.ErrNoRows { 1456 - return nil, nil 1457 - } 1458 - if err != nil { 1459 - return nil, err 1460 - } 1461 - 1462 - return &op, nil 1463 - } 1464 - 1465 - func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) { 1466 - query := "SELECT COUNT(DISTINCT did) FROM plc_mempool" 1467 - var count int 1468 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1469 - return count, err 1470 - } 1471 - 1472 - func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) { 1473 - query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool" 1474 - var size int64 1475 - err := p.db.QueryRowContext(ctx, query).Scan(&size) 1476 - return size, err 1477 - } 1478 - 1479 1166 // ===== CURSOR OPERATIONS ===== 1480 1167 1481 1168 func (p *PostgresDB) GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) { ··· 1548 1235 1549 1236 // ===== DID OPERATIONS ===== 1550 1237 1551 - func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int) error { 1238 + func (p *PostgresDB) UpsertDID(ctx context.Context, did string, bundleNum int, handle, pds string) error { 1552 1239 query := ` 1553 - INSERT INTO dids (did, bundle_numbers, created_at) 1554 - VALUES ($1, jsonb_build_array($2), CURRENT_TIMESTAMP) 1240 + INSERT INTO dids (did, handle, pds, bundle_numbers, created_at) 1241 + VALUES ($1, $2, $3, jsonb_build_array($4::integer), CURRENT_TIMESTAMP) 1555 1242 ON CONFLICT(did) DO UPDATE SET 1243 + handle = EXCLUDED.handle, 1244 + pds = EXCLUDED.pds, 1556 1245 bundle_numbers = CASE 1557 - WHEN dids.bundle_numbers ? $2::text THEN dids.bundle_numbers 1558 - ELSE dids.bundle_numbers || jsonb_build_array($2) 1559 - END 1246 + WHEN dids.bundle_numbers @> jsonb_build_array($4::integer) THEN dids.bundle_numbers 1247 + ELSE dids.bundle_numbers || jsonb_build_array($4::integer) 1248 + END, 1249 + updated_at = CURRENT_TIMESTAMP 1250 + ` 1251 + _, err := p.db.ExecContext(ctx, query, did, handle, pds, bundleNum) 1252 + return err 1253 + } 1254 + 1255 + // UpsertDIDFromMempool creates/updates DID record without adding to bundle_numbers 1256 + func (p *PostgresDB) UpsertDIDFromMempool(ctx context.Context, did string, handle, pds string) error { 1257 + query := ` 1258 + INSERT INTO dids (did, handle, pds, bundle_numbers, created_at) 1259 + VALUES ($1, $2, $3, '[]'::jsonb, CURRENT_TIMESTAMP) 1260 + ON CONFLICT(did) DO UPDATE SET 1261 + handle = EXCLUDED.handle, 1262 + pds = EXCLUDED.pds, 1263 + updated_at = CURRENT_TIMESTAMP 1560 1264 ` 1561 - _, err := p.db.ExecContext(ctx, query, did, bundleNum) 1265 + _, err := p.db.ExecContext(ctx, query, did, handle, pds) 1562 1266 return err 1563 1267 } 1564 1268 1565 1269 func (p *PostgresDB) GetDIDRecord(ctx context.Context, did string) (*DIDRecord, error) { 1566 1270 query := ` 1567 - SELECT did, bundle_numbers, created_at 1271 + SELECT did, handle, pds, bundle_numbers, created_at 1568 1272 FROM dids 1569 1273 WHERE did = $1 1570 1274 ` 1571 1275 1572 1276 var record DIDRecord 1573 1277 var bundleNumbersJSON []byte 1278 + var handle, pds sql.NullString 1574 1279 1575 1280 err := p.db.QueryRowContext(ctx, query, did).Scan( 1576 1281 &record.DID, 1282 + &handle, 1283 + &pds, 1577 1284 &bundleNumbersJSON, 1578 1285 &record.CreatedAt, 1579 1286 ) ··· 1581 1288 return nil, err 1582 1289 } 1583 1290 1291 + if handle.Valid { 1292 + record.Handle = handle.String 1293 + } 1294 + if pds.Valid { 1295 + record.CurrentPDS = pds.String 1296 + } 1297 + 1584 1298 if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil { 1585 1299 return nil, err 1586 1300 } ··· 1588 1302 return &record, nil 1589 1303 } 1590 1304 1305 + func (p *PostgresDB) GetDIDByHandle(ctx context.Context, handle string) (*DIDRecord, error) { 1306 + query := ` 1307 + SELECT did, handle, pds, bundle_numbers, created_at 1308 + FROM dids 1309 + WHERE handle = $1 1310 + ` 1311 + 1312 + var record DIDRecord 1313 + var bundleNumbersJSON []byte 1314 + var recordHandle, pds sql.NullString 1315 + 1316 + err := p.db.QueryRowContext(ctx, query, handle).Scan( 1317 + &record.DID, 1318 + &recordHandle, 1319 + &pds, 1320 + &bundleNumbersJSON, 1321 + &record.CreatedAt, 1322 + ) 1323 + if err != nil { 1324 + return nil, err 1325 + } 1326 + 1327 + if recordHandle.Valid { 1328 + record.Handle = recordHandle.String 1329 + } 1330 + if pds.Valid { 1331 + record.CurrentPDS = pds.String 1332 + } 1333 + 1334 + if err := json.Unmarshal(bundleNumbersJSON, &record.BundleNumbers); err != nil { 1335 + return nil, err 1336 + } 1337 + 1338 + return &record, nil 1339 + } 1340 + 1341 + // GetGlobalDIDInfo retrieves consolidated DID info from 'dids' and 'pds_repos' 1342 + func (p *PostgresDB) GetGlobalDIDInfo(ctx context.Context, did string) (*GlobalDIDInfo, error) { 1343 + query := ` 1344 + WITH primary_endpoints AS ( 1345 + SELECT DISTINCT ON (COALESCE(server_did, id::text)) 1346 + id 1347 + FROM endpoints 1348 + WHERE endpoint_type = 'pds' 1349 + ORDER BY COALESCE(server_did, id::text), discovered_at ASC 1350 + ) 1351 + SELECT 1352 + d.did, 1353 + d.handle, 1354 + d.pds, 1355 + d.bundle_numbers, 1356 + d.created_at, 1357 + COALESCE( 1358 + jsonb_agg( 1359 + jsonb_build_object( 1360 + 'id', pr.id, 1361 + 'endpoint_id', pr.endpoint_id, 1362 + 'endpoint', e.endpoint, 1363 + 'did', pr.did, 1364 + 'head', pr.head, 1365 + 'rev', pr.rev, 1366 + 'active', pr.active, 1367 + 'status', pr.status, 1368 + 'first_seen', pr.first_seen AT TIME ZONE 'UTC', 1369 + 'last_seen', pr.last_seen AT TIME ZONE 'UTC', 1370 + 'updated_at', pr.updated_at AT TIME ZONE 'UTC' 1371 + ) 1372 + ORDER BY pr.last_seen DESC 1373 + ) FILTER ( 1374 + WHERE pr.id IS NOT NULL AND pe.id IS NOT NULL 1375 + ), 1376 + '[]'::jsonb 1377 + ) AS hosting_on 1378 + FROM 1379 + dids d 1380 + LEFT JOIN 1381 + pds_repos pr ON d.did = pr.did 1382 + LEFT JOIN 1383 + endpoints e ON pr.endpoint_id = e.id 1384 + LEFT JOIN 1385 + primary_endpoints pe ON pr.endpoint_id = pe.id 1386 + WHERE 1387 + d.did = $1 1388 + GROUP BY 1389 + d.did, d.handle, d.pds, d.bundle_numbers, d.created_at 1390 + ` 1391 + 1392 + var info GlobalDIDInfo 1393 + var bundleNumbersJSON []byte 1394 + var hostingOnJSON []byte 1395 + var handle, pds sql.NullString 1396 + 1397 + err := p.db.QueryRowContext(ctx, query, did).Scan( 1398 + &info.DID, 1399 + &handle, 1400 + &pds, 1401 + &bundleNumbersJSON, 1402 + &info.CreatedAt, 1403 + &hostingOnJSON, 1404 + ) 1405 + if err != nil { 1406 + return nil, err 1407 + } 1408 + 1409 + if handle.Valid { 1410 + info.Handle = handle.String 1411 + } 1412 + if pds.Valid { 1413 + info.CurrentPDS = pds.String 1414 + } 1415 + 1416 + if err := json.Unmarshal(bundleNumbersJSON, &info.BundleNumbers); err != nil { 1417 + return nil, fmt.Errorf("failed to unmarshal bundle_numbers: %w", err) 1418 + } 1419 + 1420 + if err := json.Unmarshal(hostingOnJSON, &info.HostingOn); err != nil { 1421 + return nil, fmt.Errorf("failed to unmarshal hosting_on: %w", err) 1422 + } 1423 + 1424 + return &info, nil 1425 + } 1426 + 1591 1427 func (p *PostgresDB) AddBundleDIDs(ctx context.Context, bundleNum int, dids []string) error { 1592 1428 if len(dids) == 0 { 1593 1429 return nil ··· 1853 1689 } 1854 1690 return "0%" 1855 1691 } 1692 + 1693 + func (p *PostgresDB) UpsertPDSRepos(ctx context.Context, endpointID int64, repos []PDSRepoData) error { 1694 + if len(repos) == 0 { 1695 + return nil 1696 + } 1697 + 1698 + // Step 1: Load all existing repos for this endpoint into memory 1699 + query := ` 1700 + SELECT did, head, rev, active, status 1701 + FROM pds_repos 1702 + WHERE endpoint_id = $1 1703 + ` 1704 + 1705 + rows, err := p.db.QueryContext(ctx, query, endpointID) 1706 + if err != nil { 1707 + return err 1708 + } 1709 + 1710 + existingRepos := make(map[string]*PDSRepo) 1711 + for rows.Next() { 1712 + var repo PDSRepo 1713 + var head, rev, status sql.NullString 1714 + 1715 + err := rows.Scan(&repo.DID, &head, &rev, &repo.Active, &status) 1716 + if err != nil { 1717 + rows.Close() 1718 + return err 1719 + } 1720 + 1721 + if head.Valid { 1722 + repo.Head = head.String 1723 + } 1724 + if rev.Valid { 1725 + repo.Rev = rev.String 1726 + } 1727 + if status.Valid { 1728 + repo.Status = status.String 1729 + } 1730 + 1731 + existingRepos[repo.DID] = &repo 1732 + } 1733 + rows.Close() 1734 + 1735 + if err := rows.Err(); err != nil { 1736 + return err 1737 + } 1738 + 1739 + // Step 2: Compare and collect changes 1740 + var newRepos []PDSRepoData 1741 + var changedRepos []PDSRepoData 1742 + 1743 + for _, repo := range repos { 1744 + existing, exists := existingRepos[repo.DID] 1745 + if !exists { 1746 + // New repo 1747 + newRepos = append(newRepos, repo) 1748 + } else if existing.Head != repo.Head || 1749 + existing.Rev != repo.Rev || 1750 + existing.Active != repo.Active || 1751 + existing.Status != repo.Status { 1752 + // Repo changed 1753 + changedRepos = append(changedRepos, repo) 1754 + } 1755 + } 1756 + 1757 + // Log comparison results 1758 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, total=%d, existing=%d, new=%d, changed=%d, unchanged=%d", 1759 + endpointID, len(repos), len(existingRepos), len(newRepos), len(changedRepos), 1760 + len(repos)-len(newRepos)-len(changedRepos)) 1761 + 1762 + // If nothing changed, return early 1763 + if len(newRepos) == 0 && len(changedRepos) == 0 { 1764 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, no changes detected, skipping database operations", endpointID) 1765 + return nil 1766 + } 1767 + 1768 + // Step 3: Execute batched operations 1769 + conn, err := p.pool.Acquire(ctx) 1770 + if err != nil { 1771 + return err 1772 + } 1773 + defer conn.Release() 1774 + 1775 + tx, err := conn.Begin(ctx) 1776 + if err != nil { 1777 + return err 1778 + } 1779 + defer tx.Rollback(ctx) 1780 + 1781 + // Insert new repos 1782 + if len(newRepos) > 0 { 1783 + _, err := tx.Exec(ctx, ` 1784 + CREATE TEMP TABLE temp_new_repos ( 1785 + did TEXT, 1786 + head TEXT, 1787 + rev TEXT, 1788 + active BOOLEAN, 1789 + status TEXT 1790 + ) ON COMMIT DROP 1791 + `) 1792 + if err != nil { 1793 + return err 1794 + } 1795 + 1796 + _, err = tx.Conn().CopyFrom( 1797 + ctx, 1798 + pgx.Identifier{"temp_new_repos"}, 1799 + []string{"did", "head", "rev", "active", "status"}, 1800 + pgx.CopyFromSlice(len(newRepos), func(i int) ([]interface{}, error) { 1801 + repo := newRepos[i] 1802 + return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil 1803 + }), 1804 + ) 1805 + if err != nil { 1806 + return err 1807 + } 1808 + 1809 + result, err := tx.Exec(ctx, ` 1810 + INSERT INTO pds_repos (endpoint_id, did, head, rev, active, status, first_seen, last_seen) 1811 + SELECT $1, did, head, rev, active, status, 1812 + TIMEZONE('UTC', NOW()), 1813 + TIMEZONE('UTC', NOW()) 1814 + FROM temp_new_repos 1815 + `, endpointID) 1816 + if err != nil { 1817 + return err 1818 + } 1819 + 1820 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, inserted %d new repos", endpointID, result.RowsAffected()) 1821 + } 1822 + 1823 + // Update changed repos 1824 + if len(changedRepos) > 0 { 1825 + _, err := tx.Exec(ctx, ` 1826 + CREATE TEMP TABLE temp_changed_repos ( 1827 + did TEXT, 1828 + head TEXT, 1829 + rev TEXT, 1830 + active BOOLEAN, 1831 + status TEXT 1832 + ) ON COMMIT DROP 1833 + `) 1834 + if err != nil { 1835 + return err 1836 + } 1837 + 1838 + _, err = tx.Conn().CopyFrom( 1839 + ctx, 1840 + pgx.Identifier{"temp_changed_repos"}, 1841 + []string{"did", "head", "rev", "active", "status"}, 1842 + pgx.CopyFromSlice(len(changedRepos), func(i int) ([]interface{}, error) { 1843 + repo := changedRepos[i] 1844 + return []interface{}{repo.DID, repo.Head, repo.Rev, repo.Active, repo.Status}, nil 1845 + }), 1846 + ) 1847 + if err != nil { 1848 + return err 1849 + } 1850 + 1851 + result, err := tx.Exec(ctx, ` 1852 + UPDATE pds_repos 1853 + SET head = t.head, 1854 + rev = t.rev, 1855 + active = t.active, 1856 + status = t.status, 1857 + last_seen = TIMEZONE('UTC', NOW()), 1858 + updated_at = TIMEZONE('UTC', NOW()) 1859 + FROM temp_changed_repos t 1860 + WHERE pds_repos.endpoint_id = $1 1861 + AND pds_repos.did = t.did 1862 + `, endpointID) 1863 + if err != nil { 1864 + return err 1865 + } 1866 + 1867 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, updated %d changed repos", endpointID, result.RowsAffected()) 1868 + } 1869 + 1870 + if err := tx.Commit(ctx); err != nil { 1871 + return err 1872 + } 1873 + 1874 + log.Verbose("UpsertPDSRepos: endpoint_id=%d, transaction committed successfully", endpointID) 1875 + return nil 1876 + } 1877 + 1878 + func (p *PostgresDB) GetPDSRepos(ctx context.Context, endpointID int64, activeOnly bool, limit int, offset int) ([]*PDSRepo, error) { 1879 + query := ` 1880 + SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at 1881 + FROM pds_repos 1882 + WHERE endpoint_id = $1 1883 + ` 1884 + 1885 + args := []interface{}{endpointID} 1886 + argIdx := 2 1887 + 1888 + if activeOnly { 1889 + query += " AND active = true" 1890 + } 1891 + 1892 + // Order by id (primary key) - fastest 1893 + query += " ORDER BY id DESC" 1894 + 1895 + if limit > 0 { 1896 + query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) 1897 + args = append(args, limit, offset) 1898 + } 1899 + 1900 + rows, err := p.db.QueryContext(ctx, query, args...) 1901 + if err != nil { 1902 + return nil, err 1903 + } 1904 + defer rows.Close() 1905 + 1906 + var repos []*PDSRepo 1907 + for rows.Next() { 1908 + var repo PDSRepo 1909 + var head, rev, status sql.NullString 1910 + 1911 + err := rows.Scan( 1912 + &repo.ID, &repo.EndpointID, &repo.DID, &head, &rev, 1913 + &repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt, 1914 + ) 1915 + if err != nil { 1916 + return nil, err 1917 + } 1918 + 1919 + if head.Valid { 1920 + repo.Head = head.String 1921 + } 1922 + if rev.Valid { 1923 + repo.Rev = rev.String 1924 + } 1925 + if status.Valid { 1926 + repo.Status = status.String 1927 + } 1928 + 1929 + repos = append(repos, &repo) 1930 + } 1931 + 1932 + return repos, rows.Err() 1933 + } 1934 + 1935 + func (p *PostgresDB) GetReposByDID(ctx context.Context, did string) ([]*PDSRepo, error) { 1936 + query := ` 1937 + SELECT id, endpoint_id, did, head, rev, active, status, first_seen, last_seen, updated_at 1938 + FROM pds_repos 1939 + WHERE did = $1 1940 + ORDER BY last_seen DESC 1941 + ` 1942 + 1943 + rows, err := p.db.QueryContext(ctx, query, did) 1944 + if err != nil { 1945 + return nil, err 1946 + } 1947 + defer rows.Close() 1948 + 1949 + var repos []*PDSRepo 1950 + for rows.Next() { 1951 + var repo PDSRepo 1952 + var head, rev, status sql.NullString 1953 + 1954 + err := rows.Scan( 1955 + &repo.ID, &repo.EndpointID, &repo.DID, &head, &rev, 1956 + &repo.Active, &status, &repo.FirstSeen, &repo.LastSeen, &repo.UpdatedAt, 1957 + ) 1958 + if err != nil { 1959 + return nil, err 1960 + } 1961 + 1962 + if head.Valid { 1963 + repo.Head = head.String 1964 + } 1965 + if rev.Valid { 1966 + repo.Rev = rev.String 1967 + } 1968 + if status.Valid { 1969 + repo.Status = status.String 1970 + } 1971 + 1972 + repos = append(repos, &repo) 1973 + } 1974 + 1975 + return repos, rows.Err() 1976 + } 1977 + 1978 + func (p *PostgresDB) GetPDSRepoStats(ctx context.Context, endpointID int64) (map[string]interface{}, error) { 1979 + query := ` 1980 + SELECT 1981 + COUNT(*) as total_repos, 1982 + COUNT(*) FILTER (WHERE active = true) as active_repos, 1983 + COUNT(*) FILTER (WHERE active = false) as inactive_repos, 1984 + COUNT(*) FILTER (WHERE status IS NOT NULL AND status != '') as repos_with_status, 1985 + COUNT(*) FILTER (WHERE updated_at > CURRENT_TIMESTAMP - INTERVAL '1 hour') as recent_changes 1986 + FROM pds_repos 1987 + WHERE endpoint_id = $1 1988 + ` 1989 + 1990 + var totalRepos, activeRepos, inactiveRepos, reposWithStatus, recentChanges int64 1991 + 1992 + err := p.db.QueryRowContext(ctx, query, endpointID).Scan( 1993 + &totalRepos, &activeRepos, &inactiveRepos, &reposWithStatus, &recentChanges, 1994 + ) 1995 + if err != nil { 1996 + return nil, err 1997 + } 1998 + 1999 + return map[string]interface{}{ 2000 + "total_repos": totalRepos, 2001 + "active_repos": activeRepos, 2002 + "inactive_repos": inactiveRepos, 2003 + "repos_with_status": reposWithStatus, 2004 + "recent_changes": recentChanges, 2005 + }, nil 2006 + } 2007 + 2008 + // GetTableSizes fetches size information (in bytes) for all tables in the specified schema. 2009 + func (p *PostgresDB) GetTableSizes(ctx context.Context, schema string) ([]TableSizeInfo, error) { 2010 + // Query now selects raw byte values directly 2011 + query := ` 2012 + SELECT 2013 + c.relname AS table_name, 2014 + pg_total_relation_size(c.oid) AS total_bytes, 2015 + pg_relation_size(c.oid) AS table_heap_bytes, 2016 + pg_indexes_size(c.oid) AS indexes_bytes 2017 + FROM 2018 + pg_class c 2019 + LEFT JOIN 2020 + pg_namespace n ON n.oid = c.relnamespace 2021 + WHERE 2022 + c.relkind = 'r' -- 'r' = ordinary table 2023 + AND n.nspname = $1 2024 + ORDER BY 2025 + total_bytes DESC; 2026 + ` 2027 + rows, err := p.db.QueryContext(ctx, query, schema) 2028 + if err != nil { 2029 + return nil, fmt.Errorf("failed to query table sizes: %w", err) 2030 + } 2031 + defer rows.Close() 2032 + 2033 + var results []TableSizeInfo 2034 + for rows.Next() { 2035 + var info TableSizeInfo 2036 + // Scan directly into int64 fields 2037 + if err := rows.Scan( 2038 + &info.TableName, 2039 + &info.TotalBytes, 2040 + &info.TableHeapBytes, 2041 + &info.IndexesBytes, 2042 + ); err != nil { 2043 + return nil, fmt.Errorf("failed to scan table size row: %w", err) 2044 + } 2045 + results = append(results, info) 2046 + } 2047 + if err := rows.Err(); err != nil { 2048 + return nil, fmt.Errorf("error iterating table size rows: %w", err) 2049 + } 2050 + 2051 + return results, nil 2052 + } 2053 + 2054 + // GetIndexSizes fetches size information (in bytes) for all indexes in the specified schema. 2055 + func (p *PostgresDB) GetIndexSizes(ctx context.Context, schema string) ([]IndexSizeInfo, error) { 2056 + // Query now selects raw byte values directly 2057 + query := ` 2058 + SELECT 2059 + c.relname AS index_name, 2060 + COALESCE(i.indrelid::regclass::text, 'N/A') AS table_name, 2061 + pg_relation_size(c.oid) AS index_bytes 2062 + FROM 2063 + pg_class c 2064 + LEFT JOIN 2065 + pg_index i ON i.indexrelid = c.oid 2066 + LEFT JOIN 2067 + pg_namespace n ON n.oid = c.relnamespace 2068 + WHERE 2069 + c.relkind = 'i' -- 'i' = index 2070 + AND n.nspname = $1 2071 + ORDER BY 2072 + index_bytes DESC; 2073 + ` 2074 + rows, err := p.db.QueryContext(ctx, query, schema) 2075 + if err != nil { 2076 + return nil, fmt.Errorf("failed to query index sizes: %w", err) 2077 + } 2078 + defer rows.Close() 2079 + 2080 + var results []IndexSizeInfo 2081 + for rows.Next() { 2082 + var info IndexSizeInfo 2083 + var tableName sql.NullString 2084 + // Scan directly into int64 field 2085 + if err := rows.Scan( 2086 + &info.IndexName, 2087 + &tableName, 2088 + &info.IndexBytes, 2089 + ); err != nil { 2090 + return nil, fmt.Errorf("failed to scan index size row: %w", err) 2091 + } 2092 + if tableName.Valid { 2093 + info.TableName = tableName.String 2094 + } else { 2095 + info.TableName = "N/A" 2096 + } 2097 + results = append(results, info) 2098 + } 2099 + if err := rows.Err(); err != nil { 2100 + return nil, fmt.Errorf("error iterating index size rows: %w", err) 2101 + } 2102 + 2103 + return results, nil 2104 + }
+89 -14
internal/storage/types.go
··· 1 1 package storage 2 2 3 3 import ( 4 + "database/sql" 4 5 "fmt" 5 6 "path/filepath" 6 7 "time" ··· 25 26 LastChecked time.Time 26 27 Status int 27 28 IP string 29 + IPv6 string 28 30 IPResolvedAt time.Time 31 + Valid bool 29 32 UpdatedAt time.Time 30 33 } 31 34 ··· 52 55 Status int 53 56 ResponseTime float64 54 57 UserCount int64 55 - Version string // NEW: Add this field 58 + Version string 59 + UsedIP string // NEW: Track which IP was actually used 56 60 ScanData *EndpointScanData 57 61 ScannedAt time.Time 58 62 } ··· 73 77 74 78 // EndpointFilter for querying endpoints 75 79 type EndpointFilter struct { 76 - Type string // "pds", "labeler", etc. 80 + Type string 77 81 Status string 78 82 MinUserCount int64 79 - OnlyStale bool // NEW: Only return endpoints that need re-checking 80 - RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale 83 + OnlyStale bool 84 + OnlyValid bool 85 + RecheckInterval time.Duration 86 + Random bool 81 87 Limit int 82 88 Offset int 83 89 } ··· 116 122 StartTime time.Time 117 123 EndTime time.Time 118 124 BoundaryCIDs []string 119 - DIDs []string 125 + DIDCount int // Changed from DIDs []string 120 126 Hash string 121 127 CompressedHash string 122 128 CompressedSize int64 ··· 139 145 return 10000 140 146 } 141 147 142 - // MempoolOperation represents an operation waiting to be bundled 143 - type MempoolOperation struct { 144 - ID int64 145 - DID string 146 - Operation string 147 - CID string 148 - CreatedAt time.Time 149 - AddedAt time.Time 148 + type PLCHistoryPoint struct { 149 + Date string `json:"date"` 150 + BundleNumber int `json:"last_bundle_number"` 151 + OperationCount int `json:"operations"` 152 + UncompressedSize int64 `json:"size_uncompressed"` 153 + CompressedSize int64 `json:"size_compressed"` 154 + CumulativeUncompressed int64 `json:"cumulative_uncompressed"` 155 + CumulativeCompressed int64 `json:"cumulative_compressed"` 150 156 } 151 157 152 158 // ScanCursor stores scanning progress ··· 160 166 // DIDRecord represents a DID entry in the database 161 167 type DIDRecord struct { 162 168 DID string `json:"did"` 169 + Handle string `json:"handle,omitempty"` 170 + CurrentPDS string `json:"current_pds,omitempty"` 171 + LastOpAt time.Time `json:"last_op_at,omitempty"` 163 172 BundleNumbers []int `json:"bundle_numbers"` 164 173 CreatedAt time.Time `json:"created_at"` 165 174 } 166 175 176 + // GlobalDIDInfo consolidates DID data from PLC and PDS tables 177 + type GlobalDIDInfo struct { 178 + DIDRecord // Embeds all fields: DID, Handle, CurrentPDS, etc. 179 + HostingOn []*PDSRepo `json:"hosting_on"` 180 + } 181 + 167 182 // IPInfo represents IP information (stored with IP as primary key) 168 183 type IPInfo struct { 169 184 IP string `json:"ip"` ··· 174 189 ASNOrg string `json:"asn_org,omitempty"` 175 190 IsDatacenter bool `json:"is_datacenter"` 176 191 IsVPN bool `json:"is_vpn"` 192 + IsCrawler bool `json:"is_crawler"` 193 + IsTor bool `json:"is_tor"` 194 + IsProxy bool `json:"is_proxy"` 177 195 Latitude float32 `json:"latitude,omitempty"` 178 196 Longitude float32 `json:"longitude,omitempty"` 179 197 RawData map[string]interface{} `json:"raw_data,omitempty"` ··· 181 199 UpdatedAt time.Time `json:"updated_at"` 182 200 } 183 201 202 + // IsHome returns true if this is a residential/home IP 203 + // (not crawler, datacenter, tor, proxy, or vpn) 204 + func (i *IPInfo) IsHome() bool { 205 + return !i.IsCrawler && !i.IsDatacenter && !i.IsTor && !i.IsProxy && !i.IsVPN 206 + } 207 + 184 208 // PDSListItem is a virtual type created by JOIN for /pds endpoint 185 209 type PDSListItem struct { 186 210 // From endpoints table 187 211 ID int64 188 212 Endpoint string 189 - ServerDID string // NEW: Add this 213 + ServerDID string 190 214 DiscoveredAt time.Time 191 215 LastChecked time.Time 192 216 Status int 193 217 IP string 218 + IPv6 string 219 + Valid bool // NEW 194 220 195 221 // From latest endpoint_scans (via JOIN) 196 222 LatestScan *struct { ··· 242 268 FirstSeen time.Time `json:"first_seen"` 243 269 LastSeen time.Time `json:"last_seen"` 244 270 } 271 + 272 + type PDSRepo struct { 273 + ID int64 `json:"id"` 274 + EndpointID int64 `json:"endpoint_id"` 275 + Endpoint string `json:"endpoint,omitempty"` 276 + DID string `json:"did"` 277 + Head string `json:"head,omitempty"` 278 + Rev string `json:"rev,omitempty"` 279 + Active bool `json:"active"` 280 + Status string `json:"status,omitempty"` 281 + FirstSeen time.Time `json:"first_seen"` 282 + LastSeen time.Time `json:"last_seen"` 283 + UpdatedAt time.Time `json:"updated_at"` 284 + } 285 + 286 + type PDSRepoData struct { 287 + DID string 288 + Head string 289 + Rev string 290 + Active bool 291 + Status string 292 + } 293 + 294 + type DIDBackfillInfo struct { 295 + DID string 296 + LastBundleNum int 297 + } 298 + 299 + type DIDStateUpdateData struct { 300 + DID string 301 + Handle sql.NullString // Use sql.NullString for potential NULLs 302 + PDS sql.NullString 303 + OpTime time.Time 304 + } 305 + 306 + // TableSizeInfo holds size information for a database table. 307 + type TableSizeInfo struct { 308 + TableName string `json:"table_name"` 309 + TotalBytes int64 `json:"total_bytes"` // Raw bytes 310 + TableHeapBytes int64 `json:"table_heap_bytes"` // Raw bytes 311 + IndexesBytes int64 `json:"indexes_bytes"` // Raw bytes 312 + } 313 + 314 + // IndexSizeInfo holds size information for a database index. 315 + type IndexSizeInfo struct { 316 + IndexName string `json:"index_name"` 317 + TableName string `json:"table_name"` 318 + IndexBytes int64 `json:"index_bytes"` // Raw bytes 319 + }
+2 -2
internal/worker/scheduler.go
··· 5 5 "sync" 6 6 "time" 7 7 8 - "github.com/atscan/atscanner/internal/log" 9 - "github.com/atscan/atscanner/internal/monitor" 8 + "github.com/atscan/atscand/internal/log" 9 + "github.com/atscan/atscand/internal/monitor" 10 10 ) 11 11 12 12 type Job struct {
+125
utils/db-sizes.sh
··· 1 + #!/bin/bash 2 + 3 + # === Configuration === 4 + CONFIG_FILE="config.yaml" # Path to your config file 5 + SCHEMA_NAME="public" # Replace if your schema is different 6 + 7 + # Check if config file exists 8 + if [ ! -f "$CONFIG_FILE" ]; then 9 + echo "Error: Config file not found at '$CONFIG_FILE'" 10 + exit 1 11 + fi 12 + 13 + # Check if yq is installed 14 + if ! command -v yq &> /dev/null; then 15 + echo "Error: 'yq' command not found. Please install yq (Go version by Mike Farah)." 16 + echo "See: https://github.com/mikefarah/yq/" 17 + exit 1 18 + fi 19 + 20 + echo "--- Reading connection info from '$CONFIG_FILE' ---" 21 + 22 + # === Extract Database Config using yq === 23 + DB_TYPE=$(yq e '.database.type' "$CONFIG_FILE") 24 + DB_CONN_STRING=$(yq e '.database.path' "$CONFIG_FILE") # This is likely a URI 25 + 26 + if [ -z "$DB_TYPE" ] || [ -z "$DB_CONN_STRING" ]; then 27 + echo "Error: Could not read database type or path from '$CONFIG_FILE'." 28 + exit 1 29 + fi 30 + 31 + # === Parse the Connection String === 32 + DB_USER="" 33 + DB_PASSWORD="" 34 + DB_HOST="localhost" # Default 35 + DB_PORT="5432" # Default 36 + DB_NAME="" 37 + 38 + # Use regex to parse the URI (handles postgres:// or postgresql://, optional password/port, and query parameters) 39 + if [[ "$DB_CONN_STRING" =~ ^(postgres|postgresql)://([^:]+)(:([^@]+))?@([^:/]+)(:([0-9]+))?/([^?]+)(\?.+)?$ ]]; then 40 + DB_USER="${BASH_REMATCH[2]}" 41 + DB_PASSWORD="${BASH_REMATCH[4]}" # Optional group 42 + DB_HOST="${BASH_REMATCH[5]}" 43 + DB_PORT="${BASH_REMATCH[7]:-$DB_PORT}" # Use extracted port or default 44 + DB_NAME="${BASH_REMATCH[8]}" # Database name before the '?' 45 + else 46 + echo "Error: Could not parse database connection string URI: $DB_CONN_STRING" 47 + exit 1 48 + fi 49 + 50 + # Set PGPASSWORD environment variable if password was found 51 + if [ -n "$DB_PASSWORD" ]; then 52 + export PGPASSWORD="$DB_PASSWORD" 53 + else 54 + echo "Warning: No password found in connection string. Relying on ~/.pgpass or password prompt." 55 + unset PGPASSWORD 56 + fi 57 + 58 + echo "--- Database Size Investigation ---" 59 + echo "Database: $DB_NAME" 60 + echo "Schema: $SCHEMA_NAME" 61 + echo "User: $DB_USER" 62 + echo "Host: $DB_HOST:$DB_PORT" 63 + echo "-----------------------------------" 64 + 65 + # === Table Sizes === 66 + echo "" 67 + echo "## Table Sizes (Schema: $SCHEMA_NAME) ##" 68 + # Removed --tuples-only and --no-align, added -P footer=off 69 + psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF 70 + SELECT 71 + c.relname AS "Table Name", 72 + pg_size_pretty(pg_total_relation_size(c.oid)) AS "Total Size", 73 + pg_size_pretty(pg_relation_size(c.oid)) AS "Table Heap Size", 74 + pg_size_pretty(pg_indexes_size(c.oid)) AS "Indexes Size" 75 + FROM 76 + pg_class c 77 + LEFT JOIN 78 + pg_namespace n ON n.oid = c.relnamespace 79 + WHERE 80 + c.relkind = 'r' -- 'r' = ordinary table 81 + AND n.nspname = '$SCHEMA_NAME' 82 + ORDER BY 83 + pg_total_relation_size(c.oid) DESC; 84 + EOF 85 + 86 + if [ $? -ne 0 ]; then 87 + echo "Error querying table sizes. Check connection details, permissions, and password." 88 + unset PGPASSWORD 89 + exit 1 90 + fi 91 + 92 + # === Index Sizes === 93 + echo "" 94 + echo "## Index Sizes (Schema: $SCHEMA_NAME) ##" 95 + # Removed --tuples-only and --no-align, added -P footer=off 96 + psql -U "$DB_USER" -d "$DB_NAME" -h "$DB_HOST" -p "$DB_PORT" -X -q -P footer=off <<EOF 97 + SELECT 98 + c.relname AS "Index Name", 99 + i.indrelid::regclass AS "Table Name", -- Show associated table 100 + pg_size_pretty(pg_relation_size(c.oid)) AS "Index Size" 101 + FROM 102 + pg_class c 103 + LEFT JOIN 104 + pg_index i ON i.indexrelid = c.oid 105 + LEFT JOIN 106 + pg_namespace n ON n.oid = c.relnamespace 107 + WHERE 108 + c.relkind = 'i' -- 'i' = index 109 + AND n.nspname = '$SCHEMA_NAME' 110 + ORDER BY 111 + pg_relation_size(c.oid) DESC; 112 + EOF 113 + 114 + if [ $? -ne 0 ]; then 115 + echo "Error querying index sizes. Check connection details, permissions, and password." 116 + unset PGPASSWORD 117 + exit 1 118 + fi 119 + 120 + echo "" 121 + echo "-----------------------------------" 122 + echo "Investigation complete." 123 + 124 + # Unset the password variable for security 125 + unset PGPASSWORD
+113
utils/import-labels.js
··· 1 + import { file, write } from "bun"; 2 + import { join } from "path"; 3 + import { mkdir } from "fs/promises"; 4 + import { init, compress } from "@bokuweb/zstd-wasm"; 5 + 6 + // --- Configuration --- 7 + const CSV_FILE = process.argv[2]; 8 + const CONFIG_FILE = "config.yaml"; 9 + const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance) 10 + // --------------------- 11 + 12 + if (!CSV_FILE) { 13 + console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>"); 14 + process.exit(1); 15 + } 16 + 17 + console.log("========================================"); 18 + console.log("PLC Operation Labels Import (Bun + WASM)"); 19 + console.log("========================================"); 20 + 21 + // 1. Read and parse config 22 + console.log(`Loading config from ${CONFIG_FILE}...`); 23 + const configFile = await file(CONFIG_FILE).text(); 24 + const config = Bun.YAML.parse(configFile); 25 + const bundleDir = config?.plc?.bundle_dir; 26 + 27 + if (!bundleDir) { 28 + console.error("Error: Could not parse plc.bundle_dir from config.yaml"); 29 + process.exit(1); 30 + } 31 + 32 + const FINAL_LABELS_DIR = join(bundleDir, "labels"); 33 + await mkdir(FINAL_LABELS_DIR, { recursive: true }); 34 + 35 + console.log(`CSV File: ${CSV_FILE}`); 36 + console.log(`Output Dir: ${FINAL_LABELS_DIR}`); 37 + console.log(""); 38 + 39 + // 2. Initialize Zstd WASM module 40 + await init(); 41 + 42 + // --- Pass 1: Read entire file into memory and group by bundle --- 43 + console.log("Pass 1/2: Reading and grouping all lines by bundle..."); 44 + console.warn("This will use a large amount of RAM!"); 45 + 46 + const startTime = Date.now(); 47 + const bundles = new Map(); // Map<string, string[]> 48 + let lineCount = 0; 49 + 50 + const inputFile = file(CSV_FILE); 51 + const fileStream = inputFile.stream(); 52 + const decoder = new TextDecoder(); 53 + let remainder = ""; 54 + 55 + for await (const chunk of fileStream) { 56 + const text = remainder + decoder.decode(chunk); 57 + const lines = text.split("\n"); 58 + remainder = lines.pop() || ""; 59 + 60 + for (const line of lines) { 61 + if (line === "") continue; 62 + lineCount++; 63 + 64 + if (lineCount === 1 && line.startsWith("bundle,")) { 65 + continue; // Skip header 66 + } 67 + 68 + const firstCommaIndex = line.indexOf(","); 69 + if (firstCommaIndex === -1) { 70 + console.warn(`Skipping malformed line: ${line}`); 71 + continue; 72 + } 73 + const bundleNumStr = line.substring(0, firstCommaIndex); 74 + const bundleKey = bundleNumStr.padStart(6, "0"); 75 + 76 + // Add line to the correct bundle's array 77 + if (!bundles.has(bundleKey)) { 78 + bundles.set(bundleKey, []); 79 + } 80 + bundles.get(bundleKey).push(line); 81 + } 82 + } 83 + // Note: We ignore any final `remainder` as it's likely an empty line 84 + 85 + console.log(`Finished reading ${lineCount.toLocaleString()} lines.`); 86 + console.log(`Found ${bundles.size} unique bundles.`); 87 + 88 + // --- Pass 2: Compress and write each bundle --- 89 + console.log("\nPass 2/2: Compressing and writing bundle files..."); 90 + let i = 0; 91 + for (const [bundleKey, lines] of bundles.entries()) { 92 + i++; 93 + console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`); 94 + 95 + // Join all lines for this bundle into one big string 96 + const content = lines.join("\n"); 97 + 98 + // Compress the string 99 + const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL); 100 + 101 + // Write the compressed data to the file 102 + const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`); 103 + await write(outPath, compressedData); 104 + } 105 + 106 + // 3. Clean up 107 + const totalTime = (Date.now() - startTime) / 1000; 108 + console.log("\n========================================"); 109 + console.log("Import Summary"); 110 + console.log("========================================"); 111 + console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`); 112 + console.log(`Total lines processed: ${lineCount.toLocaleString()}`); 113 + console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
··· 1 + #!/bin/bash 2 + # import-labels-v4-sorted-pipe.sh 3 + 4 + set -e 5 + 6 + if [ $# -lt 1 ]; then 7 + echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>" 8 + exit 1 9 + fi 10 + 11 + CSV_FILE="$1" 12 + CONFIG_FILE="config.yaml" 13 + 14 + [ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1 15 + [ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1 16 + 17 + # Extract bundle directory path 18 + BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1) 19 + 20 + [ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1 21 + 22 + FINAL_LABELS_DIR="$BUNDLE_DIR/labels" 23 + 24 + echo "========================================" 25 + echo "PLC Operation Labels Import (Sorted Pipe)" 26 + echo "========================================" 27 + echo "CSV File: $CSV_FILE" 28 + echo "Output Dir: $FINAL_LABELS_DIR" 29 + echo "" 30 + 31 + # Ensure the final directory exists 32 + mkdir -p "$FINAL_LABELS_DIR" 33 + 34 + echo "Streaming, sorting, and compressing on the fly..." 35 + echo "This will take time. `pv` will show progress of the TAIL command." 36 + echo "The `sort` command will run after `pv` is complete." 37 + echo "" 38 + 39 + # This is the single-pass pipeline 40 + tail -n +2 "$CSV_FILE" | \ 41 + pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \ 42 + sort -t, -k1,1n | \ 43 + awk -F',' -v final_dir="$FINAL_LABELS_DIR" ' 44 + # This awk script EXPECTS input sorted by bundle number (col 1) 45 + BEGIN { 46 + # last_bundle_num tracks the bundle we are currently writing 47 + last_bundle_num = -1 48 + # cmd holds the current zstd pipe command 49 + cmd = "" 50 + } 51 + { 52 + current_bundle_num = $1 53 + 54 + # Check if the bundle number has changed 55 + if (current_bundle_num != last_bundle_num) { 56 + 57 + # If it changed, and we have an old pipe open, close it 58 + if (last_bundle_num != -1) { 59 + close(cmd) 60 + } 61 + 62 + # Create the new pipe command, writing to the final .zst file 63 + outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num) 64 + cmd = "zstd -T0 -o " outfile 65 + 66 + # Update the tracker 67 + last_bundle_num = current_bundle_num 68 + 69 + # Print progress to stderr 70 + printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr" 71 + } 72 + 73 + # Print the current line ($0) to the open pipe 74 + # The first time this runs for a bundle, it opens the pipe 75 + # Subsequent times, it writes to the already-open pipe 76 + print $0 | cmd 77 + } 78 + # END block: close the very last pipe 79 + END { 80 + if (last_bundle_num != -1) { 81 + close(cmd) 82 + } 83 + printf " Finished. Total lines: %d\n", NR > "/dev/stderr" 84 + }' 85 + 86 + echo "" 87 + echo "========================================" 88 + echo "Import Summary" 89 + echo "========================================" 90 + echo "✓ Import completed successfully!" 91 + echo "Label files are stored in: $FINAL_LABELS_DIR"
+2 -2
utils/migrate-ipinfo.sh
··· 4 4 # Configuration (edit these) 5 5 DB_HOST="localhost" 6 6 DB_PORT="5432" 7 - DB_NAME="atscanner" 8 - DB_USER="atscanner" 7 + DB_NAME="atscand" 8 + DB_USER="atscand" 9 9 DB_PASSWORD="Noor1kooz5eeFai9leZagh5ua5eihai4" 10 10 11 11 # Colors for output
+199
utils/vuln-scanner-parallel.sh
··· 1 + #!/bin/bash 2 + 3 + # Configuration 4 + API_HOST="${API_HOST:-http://localhost:8080}" 5 + TIMEOUT=5 6 + PARALLEL_JOBS=20 7 + OUTPUT_DIR="./pds_scan_results" 8 + TIMESTAMP=$(date +%Y%m%d_%H%M%S) 9 + RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt" 10 + FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt" 11 + 12 + # Paths to check 13 + PATHS=( 14 + "/info.php" 15 + "/phpinfo.php" 16 + "/test.php" 17 + "/admin" 18 + "/admin.php" 19 + "/wp-admin" 20 + "/robots.txt" 21 + "/.env" 22 + "/.git/config" 23 + "/config.php" 24 + "/backup" 25 + "/db.sql" 26 + "/.DS_Store" 27 + "/server-status" 28 + "/.well-known/security.txt" 29 + ) 30 + 31 + # Colors 32 + RED='\033[0;31m' 33 + GREEN='\033[0;32m' 34 + YELLOW='\033[1;33m' 35 + BLUE='\033[0;34m' 36 + NC='\033[0m' 37 + 38 + # Check dependencies 39 + if ! command -v jq &> /dev/null; then 40 + echo -e "${RED}Error: jq is required${NC}" 41 + echo "Install: sudo apt-get install jq" 42 + exit 1 43 + fi 44 + 45 + if ! command -v parallel &> /dev/null; then 46 + echo -e "${RED}Error: GNU parallel is required${NC}" 47 + echo "Install: sudo apt-get install parallel (or brew install parallel)" 48 + exit 1 49 + fi 50 + 51 + mkdir -p "$OUTPUT_DIR" 52 + 53 + echo -e "${BLUE}╔════════════════════════════════════════╗${NC}" 54 + echo -e "${BLUE}║ PDS Security Scanner (Parallel) ║${NC}" 55 + echo -e "${BLUE}╚════════════════════════════════════════╝${NC}" 56 + echo "" 57 + echo "API Host: $API_HOST" 58 + echo "Timeout: ${TIMEOUT}s per request" 59 + echo "Parallel jobs: ${PARALLEL_JOBS}" 60 + echo "Paths to check: ${#PATHS[@]}" 61 + echo "" 62 + 63 + # Scan function - will be called by GNU parallel 64 + scan_endpoint() { 65 + local endpoint="$1" 66 + local timeout="$2" 67 + shift 2 68 + local paths=("$@") 69 + 70 + for path in "${paths[@]}"; do 71 + url="${endpoint}${path}" 72 + 73 + response=$(curl -s -o /dev/null -w "%{http_code}" \ 74 + --max-time "$timeout" \ 75 + --connect-timeout "$timeout" \ 76 + --retry 0 \ 77 + -A "Mozilla/5.0 (Security Scanner)" \ 78 + "$url" 2>/dev/null) 79 + 80 + if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then 81 + if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then 82 + echo "FOUND|$endpoint|$path|$response" 83 + elif [ "$response" != "403" ] && [ "$response" != "401" ]; then 84 + echo "MAYBE|$endpoint|$path|$response" 85 + fi 86 + fi 87 + done 88 + } 89 + 90 + export -f scan_endpoint 91 + 92 + # Fetch active PDS endpoints 93 + echo -e "${YELLOW}Fetching active PDS endpoints...${NC}" 94 + ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \ 95 + jq -r '.[].endpoint' 2>/dev/null) 96 + 97 + if [ -z "$ENDPOINTS" ]; then 98 + echo -e "${RED}Error: Could not fetch endpoints from API${NC}" 99 + echo "Check that the API is running at: $API_HOST" 100 + exit 1 101 + fi 102 + 103 + ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l | tr -d ' ') 104 + echo -e "${GREEN}✓ Found ${ENDPOINT_COUNT} active PDS endpoints${NC}" 105 + echo "" 106 + 107 + # Write header to results file 108 + { 109 + echo "PDS Security Scan Results" 110 + echo "=========================" 111 + echo "Scan started: $(date)" 112 + echo "Endpoints scanned: ${ENDPOINT_COUNT}" 113 + echo "Paths checked: ${#PATHS[@]}" 114 + echo "Parallel jobs: ${PARALLEL_JOBS}" 115 + echo "" 116 + echo "Results:" 117 + echo "--------" 118 + } > "$RESULTS_FILE" 119 + 120 + # Run parallel scan 121 + echo -e "${YELLOW}Starting parallel scan...${NC}" 122 + echo -e "${BLUE}(This may take a few minutes depending on endpoint count)${NC}" 123 + echo "" 124 + 125 + echo "$ENDPOINTS" | \ 126 + parallel \ 127 + -j "$PARALLEL_JOBS" \ 128 + --bar \ 129 + --joblog "${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" \ 130 + scan_endpoint {} "$TIMEOUT" "${PATHS[@]}" \ 131 + >> "$RESULTS_FILE" 132 + 133 + echo "" 134 + echo -e "${YELLOW}Processing results...${NC}" 135 + 136 + # Count results 137 + FOUND_COUNT=$(grep -c "^FOUND|" "$RESULTS_FILE" 2>/dev/null || echo 0) 138 + MAYBE_COUNT=$(grep -c "^MAYBE|" "$RESULTS_FILE" 2>/dev/null || echo 0) 139 + 140 + # Extract found URLs to separate file 141 + { 142 + echo "Found URLs (HTTP 200/301/302)" 143 + echo "==============================" 144 + echo "Scan: $(date)" 145 + echo "" 146 + } > "$FOUND_FILE" 147 + 148 + grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | while IFS='|' read -r status endpoint path code; do 149 + echo "$endpoint$path [$code]" 150 + done >> "$FOUND_FILE" 151 + 152 + # Create summary at end of results file 153 + { 154 + echo "" 155 + echo "Summary" 156 + echo "=======" 157 + echo "Scan completed: $(date)" 158 + echo "Total endpoints scanned: ${ENDPOINT_COUNT}" 159 + echo "Total paths checked: $((ENDPOINT_COUNT * ${#PATHS[@]}))" 160 + echo "Found (200/301/302): ${FOUND_COUNT}" 161 + echo "Maybe (other codes): ${MAYBE_COUNT}" 162 + } >> "$RESULTS_FILE" 163 + 164 + # Display summary 165 + echo "" 166 + echo -e "${BLUE}╔════════════════════════════════════════╗${NC}" 167 + echo -e "${BLUE}║ Scan Complete! ║${NC}" 168 + echo -e "${BLUE}╚════════════════════════════════════════╝${NC}" 169 + echo "" 170 + echo -e "Endpoints scanned: ${GREEN}${ENDPOINT_COUNT}${NC}" 171 + echo -e "Paths checked per site: ${BLUE}${#PATHS[@]}${NC}" 172 + echo -e "Total requests made: ${BLUE}$((ENDPOINT_COUNT * ${#PATHS[@]}))${NC}" 173 + echo "" 174 + echo -e "Results:" 175 + echo -e " ${GREEN}✓ Found (200/301/302):${NC} ${FOUND_COUNT}" 176 + echo -e " ${YELLOW}? Maybe (other):${NC} ${MAYBE_COUNT}" 177 + echo "" 178 + echo "Files created:" 179 + echo " Full results: $RESULTS_FILE" 180 + echo " Found URLs: $FOUND_FILE" 181 + echo " Job log: ${OUTPUT_DIR}/joblog_${TIMESTAMP}.txt" 182 + 183 + # Show sample of found URLs if any 184 + if [ "$FOUND_COUNT" -gt 0 ]; then 185 + echo "" 186 + echo -e "${RED}⚠ SECURITY ALERT: Found exposed paths!${NC}" 187 + echo "" 188 + echo "Sample findings (first 10):" 189 + grep "^FOUND|" "$RESULTS_FILE" 2>/dev/null | head -10 | while IFS='|' read -r status endpoint path code; do 190 + echo -e " ${RED}✗${NC} $endpoint${RED}$path${NC} [$code]" 191 + done 192 + 193 + if [ "$FOUND_COUNT" -gt 10 ]; then 194 + echo "" 195 + echo " ... and $((FOUND_COUNT - 10)) more (see $FOUND_FILE)" 196 + fi 197 + fi 198 + 199 + echo ""
+117
utils/vuln-scanner.sh
··· 1 + #!/bin/bash 2 + 3 + # Configuration 4 + API_HOST="${API_HOST:-http://localhost:8080}" 5 + TIMEOUT=5 6 + OUTPUT_DIR="./pds_scan_results" 7 + TIMESTAMP=$(date +%Y%m%d_%H%M%S) 8 + RESULTS_FILE="${OUTPUT_DIR}/scan_${TIMESTAMP}.txt" 9 + FOUND_FILE="${OUTPUT_DIR}/found_${TIMESTAMP}.txt" 10 + 11 + # Paths to check (one per line for easier editing) 12 + PATHS=( 13 + "/info.php" 14 + "/phpinfo.php" 15 + "/test.php" 16 + "/admin" 17 + "/admin.php" 18 + "/wp-admin" 19 + "/robots.txt" 20 + "/.env" 21 + "/.git/config" 22 + "/config.php" 23 + "/backup" 24 + "/db.sql" 25 + "/.DS_Store" 26 + "/server-status" 27 + "/.well-known/security.txt" 28 + ) 29 + 30 + # Colors 31 + RED='\033[0;31m' 32 + GREEN='\033[0;32m' 33 + YELLOW='\033[1;33m' 34 + BLUE='\033[0;34m' 35 + NC='\033[0m' 36 + 37 + mkdir -p "$OUTPUT_DIR" 38 + 39 + echo -e "${BLUE}=== PDS Security Scanner ===${NC}" 40 + echo "API Host: $API_HOST" 41 + echo "Timeout: ${TIMEOUT}s" 42 + echo "Scanning for ${#PATHS[@]} paths" 43 + echo "Results: $RESULTS_FILE" 44 + echo "" 45 + 46 + # Fetch active PDS endpoints 47 + echo -e "${YELLOW}Fetching active PDS endpoints...${NC}" 48 + ENDPOINTS=$(curl -s "${API_HOST}/api/v1/pds?status=online&limit=10000" | \ 49 + jq -r '.[].endpoint' 2>/dev/null) 50 + 51 + if [ -z "$ENDPOINTS" ]; then 52 + echo -e "${RED}Error: Could not fetch endpoints from API${NC}" 53 + exit 1 54 + fi 55 + 56 + ENDPOINT_COUNT=$(echo "$ENDPOINTS" | wc -l) 57 + echo -e "${GREEN}Found ${ENDPOINT_COUNT} active PDS endpoints${NC}" 58 + echo "" 59 + 60 + # Write header 61 + echo "PDS Security Scan - $(date)" > "$RESULTS_FILE" 62 + echo "========================================" >> "$RESULTS_FILE" 63 + echo "" >> "$RESULTS_FILE" 64 + 65 + # Counters 66 + CURRENT=0 67 + TOTAL_FOUND=0 68 + TOTAL_MAYBE=0 69 + 70 + # Scan each endpoint sequentially 71 + while IFS= read -r endpoint; do 72 + CURRENT=$((CURRENT + 1)) 73 + 74 + echo -e "${BLUE}[$CURRENT/$ENDPOINT_COUNT]${NC} Scanning: $endpoint" 75 + 76 + # Scan each path 77 + for path in "${PATHS[@]}"; do 78 + url="${endpoint}${path}" 79 + 80 + # Make request with timeout 81 + response=$(curl -s -o /dev/null -w "%{http_code}" \ 82 + --max-time "$TIMEOUT" \ 83 + --connect-timeout "$TIMEOUT" \ 84 + -L \ 85 + -A "Mozilla/5.0 (Security Scanner)" \ 86 + "$url" 2>/dev/null) 87 + 88 + # Check response 89 + if [ -n "$response" ] && [ "$response" != "404" ] && [ "$response" != "000" ]; then 90 + if [ "$response" = "200" ] || [ "$response" = "301" ] || [ "$response" = "302" ]; then 91 + echo -e " ${GREEN}✓ FOUND${NC} $path ${YELLOW}[$response]${NC}" 92 + echo "FOUND: $endpoint$path [$response]" >> "$RESULTS_FILE" 93 + echo "$endpoint$path" >> "$FOUND_FILE" 94 + TOTAL_FOUND=$((TOTAL_FOUND + 1)) 95 + elif [ "$response" != "403" ]; then 96 + echo -e " ${YELLOW}? MAYBE${NC} $path ${YELLOW}[$response]${NC}" 97 + echo "MAYBE: $endpoint$path [$response]" >> "$RESULTS_FILE" 98 + TOTAL_MAYBE=$((TOTAL_MAYBE + 1)) 99 + fi 100 + fi 101 + done 102 + 103 + echo "" >> "$RESULTS_FILE" 104 + 105 + done <<< "$ENDPOINTS" 106 + 107 + # Summary 108 + echo "" 109 + echo -e "${BLUE}========================================${NC}" 110 + echo -e "${GREEN}Scan Complete!${NC}" 111 + echo "Scanned: ${ENDPOINT_COUNT} endpoints" 112 + echo "Paths checked per endpoint: ${#PATHS[@]}" 113 + echo -e "${GREEN}Found (200/301/302): ${TOTAL_FOUND}${NC}" 114 + echo -e "${YELLOW}Maybe (other codes): ${TOTAL_MAYBE}${NC}" 115 + echo "" 116 + echo "Full results: $RESULTS_FILE" 117 + [ -f "$FOUND_FILE" ] && echo "Found URLs: $FOUND_FILE"