Compare changes

Choose any two refs to compare.

+2 -1
.gitignore
··· 5 5 .DS_Store 6 6 plc_cache\.tmp/* 7 7 plc_bundles* 8 - config.yaml 8 + config.yaml 9 + /atscand
+39 -5
Makefile
··· 1 - all: run 1 + .PHONY: all build install test clean fmt lint help 2 + 3 + # Binary name 4 + BINARY_NAME=atscand 5 + INSTALL_PATH=$(GOPATH)/bin 6 + 7 + # Go commands 8 + GOCMD=go 9 + GOBUILD=$(GOCMD) build 10 + GOINSTALL=$(GOCMD) install 11 + GOCLEAN=$(GOCMD) clean 12 + GOTEST=$(GOCMD) test 13 + GOGET=$(GOCMD) get 14 + GOFMT=$(GOCMD) fmt 15 + GOMOD=$(GOCMD) mod 16 + GORUN=$(GOCMD) run 17 + 18 + # Default target 19 + all: build 20 + 21 + # Build the CLI tool 22 + build: 23 + @echo "Building $(BINARY_NAME)..." 24 + $(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand 25 + 26 + # Install the CLI tool globally 27 + install: 28 + @echo "Installing $(BINARY_NAME)..." 29 + $(GOINSTALL) ./cmd/atscand 2 30 3 31 run: 4 - go run cmd/atscanner.go -verbose 32 + $(GORUN) cmd/atscand/main.go -verbose 5 33 6 - clean-db: 7 - dropdb -U atscanner atscanner 8 - createdb atscanner -O atscanner 34 + update-plcbundle: 35 + GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest 36 + 37 + # Show help 38 + help: 39 + @echo "Available targets:" 40 + @echo " make build - Build the binary" 41 + @echo " make install - Install binary globally" 42 + @echo " make run - Run app"
+159
cmd/atscand/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "os/signal" 9 + "syscall" 10 + "time" 11 + 12 + "github.com/atscan/atscand/internal/api" 13 + "github.com/atscan/atscand/internal/config" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/pds" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 18 + "github.com/atscan/atscand/internal/worker" 19 + ) 20 + 21 + const VERSION = "1.0.0" 22 + 23 + func main() { 24 + configPath := flag.String("config", "config.yaml", "path to config file") 25 + verbose := flag.Bool("verbose", false, "enable verbose logging") 26 + flag.Parse() 27 + 28 + // Load configuration 29 + cfg, err := config.Load(*configPath) 30 + if err != nil { 31 + fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 + os.Exit(1) 33 + } 34 + 35 + // Override verbose setting if flag is provided 36 + if *verbose { 37 + cfg.API.Verbose = true 38 + } 39 + 40 + // Initialize logger 41 + log.Init(cfg.API.Verbose) 42 + 43 + // Print banner 44 + log.Banner(VERSION) 45 + 46 + // Print configuration summary 47 + log.PrintConfig(map[string]string{ 48 + "Database Type": cfg.Database.Type, 49 + "Database Path": cfg.Database.Path, // Will be auto-redacted 50 + "PLC Directory": cfg.PLC.DirectoryURL, 51 + "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 + "PLC Bundle Dir": cfg.PLC.BundleDir, 53 + "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 + "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 + "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 + "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 + "PDS Timeout": cfg.PDS.Timeout.String(), 58 + "API Host": cfg.API.Host, 59 + "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 + "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 + }) 62 + 63 + // Initialize database using factory pattern 64 + db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 + if err != nil { 66 + log.Fatal("Failed to initialize database: %v", err) 67 + } 68 + defer func() { 69 + log.Info("Closing database connection...") 70 + db.Close() 71 + }() 72 + 73 + // Set scan retention from config 74 + if cfg.PDS.ScanRetention > 0 { 75 + db.SetScanRetention(cfg.PDS.ScanRetention) 76 + log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 + } 78 + 79 + // Run migrations 80 + if err := db.Migrate(); err != nil { 81 + log.Fatal("Failed to run migrations: %v", err) 82 + } 83 + 84 + ctx, cancel := context.WithCancel(context.Background()) 85 + defer cancel() 86 + 87 + // Initialize workers 88 + log.Info("Initializing scanners...") 89 + 90 + bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs) 91 + if err != nil { 92 + log.Fatal("Failed to create bundle manager: %v", err) 93 + } 94 + defer bundleManager.Close() 95 + log.Verbose("✓ Bundle manager initialized (shared)") 96 + 97 + plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager) 98 + defer plcScanner.Close() 99 + log.Verbose("✓ PLC scanner initialized") 100 + 101 + pdsScanner := pds.NewScanner(db, cfg.PDS) 102 + log.Verbose("✓ PDS scanner initialized") 103 + 104 + scheduler := worker.NewScheduler() 105 + 106 + // Schedule PLC directory scan 107 + scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 108 + if err := plcScanner.Scan(ctx); err != nil { 109 + log.Error("PLC scan error: %v", err) 110 + } 111 + }) 112 + log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 113 + 114 + // Schedule PDS availability checks 115 + scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 116 + if err := pdsScanner.ScanAll(ctx); err != nil { 117 + log.Error("PDS scan error: %v", err) 118 + } 119 + }) 120 + log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 121 + 122 + // Start API server 123 + log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 124 + apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager) 125 + go func() { 126 + if err := apiServer.Start(); err != nil { 127 + log.Fatal("API server error: %v", err) 128 + } 129 + }() 130 + 131 + // Give the API server a moment to start 132 + time.Sleep(100 * time.Millisecond) 133 + log.Info("✓ API server started successfully") 134 + log.Info("") 135 + log.Info("🚀 ATScanner is running!") 136 + log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 137 + log.Info(" Press Ctrl+C to stop") 138 + log.Info("") 139 + 140 + // Start scheduler 141 + scheduler.Start(ctx) 142 + 143 + // Wait for interrupt 144 + sigChan := make(chan os.Signal, 1) 145 + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 146 + <-sigChan 147 + 148 + log.Info("") 149 + log.Info("Shutting down gracefully...") 150 + cancel() 151 + 152 + log.Info("Stopping API server...") 153 + apiServer.Shutdown(context.Background()) 154 + 155 + log.Info("Waiting for active tasks to complete...") 156 + time.Sleep(2 * time.Second) 157 + 158 + log.Info("✓ Shutdown complete. Goodbye!") 159 + }
-152
cmd/atscanner.go
··· 1 - package main 2 - 3 - import ( 4 - "context" 5 - "flag" 6 - "fmt" 7 - "os" 8 - "os/signal" 9 - "syscall" 10 - "time" 11 - 12 - "github.com/atscan/atscanner/internal/api" 13 - "github.com/atscan/atscanner/internal/config" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/pds" 16 - "github.com/atscan/atscanner/internal/plc" 17 - "github.com/atscan/atscanner/internal/storage" 18 - "github.com/atscan/atscanner/internal/worker" 19 - ) 20 - 21 - const VERSION = "1.0.0" 22 - 23 - func main() { 24 - configPath := flag.String("config", "config.yaml", "path to config file") 25 - verbose := flag.Bool("verbose", false, "enable verbose logging") 26 - flag.Parse() 27 - 28 - // Load configuration 29 - cfg, err := config.Load(*configPath) 30 - if err != nil { 31 - fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 - os.Exit(1) 33 - } 34 - 35 - // Override verbose setting if flag is provided 36 - if *verbose { 37 - cfg.API.Verbose = true 38 - } 39 - 40 - // Initialize logger 41 - log.Init(cfg.API.Verbose) 42 - 43 - // Print banner 44 - log.Banner(VERSION) 45 - 46 - // Print configuration summary 47 - log.PrintConfig(map[string]string{ 48 - "Database Type": cfg.Database.Type, 49 - "Database Path": cfg.Database.Path, // Will be auto-redacted 50 - "PLC Directory": cfg.PLC.DirectoryURL, 51 - "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 - "PLC Bundle Dir": cfg.PLC.BundleDir, 53 - "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 - "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 - "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 - "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 - "PDS Timeout": cfg.PDS.Timeout.String(), 58 - "API Host": cfg.API.Host, 59 - "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 - "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 - }) 62 - 63 - // Initialize database using factory pattern 64 - db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 - if err != nil { 66 - log.Fatal("Failed to initialize database: %v", err) 67 - } 68 - defer func() { 69 - log.Info("Closing database connection...") 70 - db.Close() 71 - }() 72 - 73 - // Set scan retention from config 74 - if cfg.PDS.ScanRetention > 0 { 75 - db.SetScanRetention(cfg.PDS.ScanRetention) 76 - log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 - } 78 - 79 - // Run migrations 80 - if err := db.Migrate(); err != nil { 81 - log.Fatal("Failed to run migrations: %v", err) 82 - } 83 - 84 - ctx, cancel := context.WithCancel(context.Background()) 85 - defer cancel() 86 - 87 - // Initialize workers 88 - log.Info("Initializing scanners...") 89 - 90 - plcScanner := plc.NewScanner(db, cfg.PLC) 91 - defer plcScanner.Close() 92 - log.Verbose("✓ PLC scanner initialized") 93 - 94 - pdsScanner := pds.NewScanner(db, cfg.PDS) 95 - log.Verbose("✓ PDS scanner initialized") 96 - 97 - scheduler := worker.NewScheduler() 98 - 99 - // Schedule PLC directory scan 100 - scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 101 - if err := plcScanner.Scan(ctx); err != nil { 102 - log.Error("PLC scan error: %v", err) 103 - } 104 - }) 105 - log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 106 - 107 - // Schedule PDS availability checks 108 - scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 109 - if err := pdsScanner.ScanAll(ctx); err != nil { 110 - log.Error("PDS scan error: %v", err) 111 - } 112 - }) 113 - log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 114 - 115 - // Start API server 116 - log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 117 - apiServer := api.NewServer(db, cfg.API, cfg.PLC) 118 - go func() { 119 - if err := apiServer.Start(); err != nil { 120 - log.Fatal("API server error: %v", err) 121 - } 122 - }() 123 - 124 - // Give the API server a moment to start 125 - time.Sleep(100 * time.Millisecond) 126 - log.Info("✓ API server started successfully") 127 - log.Info("") 128 - log.Info("🚀 ATScanner is running!") 129 - log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 130 - log.Info(" Press Ctrl+C to stop") 131 - log.Info("") 132 - 133 - // Start scheduler 134 - scheduler.Start(ctx) 135 - 136 - // Wait for interrupt 137 - sigChan := make(chan os.Signal, 1) 138 - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 139 - <-sigChan 140 - 141 - log.Info("") 142 - log.Info("Shutting down gracefully...") 143 - cancel() 144 - 145 - log.Info("Stopping API server...") 146 - apiServer.Shutdown(context.Background()) 147 - 148 - log.Info("Waiting for active tasks to complete...") 149 - time.Sleep(2 * time.Second) 150 - 151 - log.Info("✓ Shutdown complete. Goodbye!") 152 - }
+168
cmd/import-labels/main.go
··· 1 + package main 2 + 3 + import ( 4 + "bufio" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + "time" 11 + 12 + "github.com/klauspost/compress/zstd" 13 + "gopkg.in/yaml.v3" 14 + ) 15 + 16 + type Config struct { 17 + PLC struct { 18 + BundleDir string `yaml:"bundle_dir"` 19 + } `yaml:"plc"` 20 + } 21 + 22 + var CONFIG_FILE = "config.yaml" 23 + 24 + // --------------------- 25 + 26 + func main() { 27 + // Define a new flag for changing the directory 28 + workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)") 29 + flag.Usage = func() { // Custom usage message 30 + fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0]) 31 + fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.") 32 + flag.PrintDefaults() 33 + } 34 + flag.Parse() // Parse all defined flags 35 + 36 + // Change directory if the flag was used 37 + if *workDir != "." { 38 + fmt.Printf("Changing working directory to %s...\n", *workDir) 39 + if err := os.Chdir(*workDir); err != nil { 40 + fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err) 41 + os.Exit(1) 42 + } 43 + } 44 + 45 + // --- REMOVED UNUSED CODE --- 46 + // The csvFilePath variable and NArg check were removed 47 + // as the script now reads from stdin. 48 + // --------------------------- 49 + 50 + fmt.Println("========================================") 51 + fmt.Println("PLC Operation Labels Import (Go STDIN)") 52 + fmt.Println("========================================") 53 + 54 + // 1. Read config (will now read from the new CWD) 55 + fmt.Printf("Loading config from %s...\n", CONFIG_FILE) 56 + configData, err := os.ReadFile(CONFIG_FILE) 57 + if err != nil { 58 + fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err) 59 + os.Exit(1) 60 + } 61 + 62 + var config Config 63 + if err := yaml.Unmarshal(configData, &config); err != nil { 64 + fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err) 65 + os.Exit(1) 66 + } 67 + 68 + if config.PLC.BundleDir == "" { 69 + fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml") 70 + os.Exit(1) 71 + } 72 + 73 + finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels") 74 + if err := os.MkdirAll(finalLabelsDir, 0755); err != nil { 75 + fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) 76 + os.Exit(1) 77 + } 78 + 79 + fmt.Printf("Output Dir: %s\n", finalLabelsDir) 80 + fmt.Println("Waiting for sorted data from stdin...") 81 + 82 + // 2. Process sorted data from stdin 83 + // This script *requires* the input to be sorted by bundle number. 84 + 85 + var currentWriter *zstd.Encoder 86 + var currentFile *os.File 87 + var lastBundleKey string = "" 88 + 89 + lineCount := 0 90 + startTime := time.Now() 91 + 92 + scanner := bufio.NewScanner(os.Stdin) 93 + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) 94 + 95 + for scanner.Scan() { 96 + line := scanner.Text() 97 + lineCount++ 98 + 99 + parts := strings.SplitN(line, ",", 2) 100 + if len(parts) < 1 { 101 + continue // Skip empty/bad lines 102 + } 103 + 104 + bundleNumStr := parts[0] 105 + bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros 106 + 107 + // If the bundle key is new, close the old writer and open a new one. 108 + if bundleKey != lastBundleKey { 109 + // Close the previous writer/file 110 + if currentWriter != nil { 111 + if err := currentWriter.Close(); err != nil { 112 + fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err) 113 + } 114 + currentFile.Close() 115 + } 116 + 117 + // Start the new one 118 + fmt.Printf(" -> Writing bundle %s\n", bundleKey) 119 + outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey)) 120 + 121 + file, err := os.Create(outPath) 122 + if err != nil { 123 + fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err) 124 + os.Exit(1) 125 + } 126 + currentFile = file 127 + 128 + writer, err := zstd.NewWriter(file) 129 + if err != nil { 130 + fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err) 131 + os.Exit(1) 132 + } 133 + currentWriter = writer 134 + lastBundleKey = bundleKey 135 + } 136 + 137 + // Write the line to the currently active writer 138 + if _, err := currentWriter.Write([]byte(line + "\n")); err != nil { 139 + fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err) 140 + } 141 + 142 + // Progress update 143 + if lineCount%100000 == 0 { 144 + elapsed := time.Since(startTime).Seconds() 145 + rate := float64(lineCount) / elapsed 146 + fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate) 147 + } 148 + } 149 + 150 + // 3. Close the very last writer 151 + if currentWriter != nil { 152 + if err := currentWriter.Close(); err != nil { 153 + fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err) 154 + } 155 + currentFile.Close() 156 + } 157 + 158 + if err := scanner.Err(); err != nil { 159 + fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err) 160 + } 161 + 162 + totalTime := time.Since(startTime) 163 + fmt.Println("\n========================================") 164 + fmt.Println("Import Summary") 165 + fmt.Println("========================================") 166 + fmt.Printf("✓ Import completed in %v\n", totalTime) 167 + fmt.Printf("Total lines processed: %d\n", lineCount) 168 + }
+1 -1
config.sample.yaml
··· 1 1 database: 2 2 type: "postgres" # or "sqlite" 3 - path: "postgres://atscanner:YOUR_PASSWORD@localhost:5432/atscanner?sslmode=disable" 3 + path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable" 4 4 # For SQLite: path: "atscan.db" 5 5 6 6 plc:
+6 -5
go.mod
··· 1 - module github.com/atscan/atscanner 1 + module github.com/atscan/atscand 2 2 3 3 go 1.23.0 4 4 5 5 require ( 6 6 github.com/gorilla/mux v1.8.1 7 7 github.com/lib/pq v1.10.9 8 - github.com/mattn/go-sqlite3 v1.14.18 9 8 gopkg.in/yaml.v3 v3.0.1 10 9 ) 11 10 12 - require github.com/klauspost/compress v1.18.0 11 + require github.com/klauspost/compress v1.18.1 13 12 14 13 require ( 15 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d 16 14 github.com/gorilla/handlers v1.5.2 15 + github.com/jackc/pgx/v5 v5.7.6 16 + tangled.org/atscan.net/plcbundle v0.3.6 17 17 ) 18 18 19 19 require ( 20 20 github.com/felixge/httpsnoop v1.0.3 // indirect 21 21 github.com/jackc/pgpassfile v1.0.0 // indirect 22 22 github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 23 - github.com/jackc/pgx/v5 v5.7.6 // indirect 24 23 github.com/jackc/puddle/v2 v2.2.2 // indirect 24 + github.com/kr/text v0.2.0 // indirect 25 + github.com/rogpeppe/go-internal v1.14.1 // indirect 25 26 golang.org/x/crypto v0.37.0 // indirect 26 27 golang.org/x/sync v0.13.0 // indirect 27 28 golang.org/x/text v0.24.0 // indirect
+17 -7
go.sum
··· 1 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 2 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 1 + github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 3 2 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 5 github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= 5 6 github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 6 7 github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= ··· 15 16 github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= 16 17 github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 17 18 github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 18 - github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 19 - github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 19 + github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= 20 + github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= 21 + github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 22 + github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 23 + github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 24 + github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 20 25 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= 21 26 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 22 - github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI= 23 - github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= 27 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 24 28 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 29 + github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= 30 + github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= 25 31 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 26 32 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 27 33 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 34 + github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 35 + github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 28 36 golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 29 37 golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 30 38 golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= 31 39 golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 32 40 golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 33 41 golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 34 - gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 35 42 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 36 43 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 44 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 37 45 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 38 46 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 39 47 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 48 + tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY= 49 + tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+225 -449
internal/api/handlers.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "crypto/sha256" 6 5 "database/sql" 7 - "encoding/hex" 8 6 "encoding/json" 9 7 "fmt" 8 + "io" 10 9 "net/http" 11 - "os" 12 - "path/filepath" 13 10 "strconv" 14 11 "strings" 15 12 "time" 16 13 17 - "github.com/atscan/atscanner/internal/log" 18 - "github.com/atscan/atscanner/internal/monitor" 19 - "github.com/atscan/atscanner/internal/plc" 20 - "github.com/atscan/atscanner/internal/storage" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/monitor" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 21 18 "github.com/gorilla/mux" 19 + "tangled.org/atscan.net/plcbundle" 22 20 ) 23 21 24 22 // ===== RESPONSE HELPERS ===== ··· 40 38 http.Error(r.w, msg, code) 41 39 } 42 40 43 - func (r *response) bundleHeaders(bundle *storage.PLCBundle) { 41 + func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) { 44 42 r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber)) 45 43 r.w.Header().Set("X-Bundle-Hash", bundle.Hash) 46 44 r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash) ··· 76 74 } 77 75 78 76 // ===== FORMATTING HELPERS ===== 79 - 80 - func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} { 81 - return map[string]interface{}{ 82 - "plc_bundle_number": bundle.BundleNumber, 83 - "start_time": bundle.StartTime, 84 - "end_time": bundle.EndTime, 85 - "operation_count": plc.BUNDLE_SIZE, 86 - "did_count": bundle.DIDCount, // Use DIDCount instead of len(DIDs) 87 - "hash": bundle.Hash, 88 - "compressed_hash": bundle.CompressedHash, 89 - "compressed_size": bundle.CompressedSize, 90 - "uncompressed_size": bundle.UncompressedSize, 91 - "compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize), 92 - "cursor": bundle.Cursor, 93 - "prev_bundle_hash": bundle.PrevBundleHash, 94 - "created_at": bundle.CreatedAt, 95 - } 96 - } 97 77 98 78 func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} { 99 79 response := map[string]interface{}{ ··· 268 248 "endpoint": pds.Endpoint, 269 249 "discovered_at": pds.DiscoveredAt, 270 250 "status": statusToString(pds.Status), 251 + "valid": pds.Valid, // NEW 271 252 } 272 253 273 254 // Add server_did if available ··· 703 684 return 704 685 } 705 686 706 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 707 - if err != nil { 708 - resp.error(err.Error(), http.StatusInternalServerError) 709 - return 710 - } 711 - 687 + lastBundle := s.bundleManager.GetLastBundleNumber() 712 688 resp.json(map[string]interface{}{ 713 689 "total_unique_dids": totalDIDs, 714 690 "last_bundle": lastBundle, ··· 719 695 720 696 func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) { 721 697 resp := newResponse(w) 722 - 723 698 bundleNum, err := getBundleNumber(r) 724 699 if err != nil { 725 700 resp.error("invalid bundle number", http.StatusBadRequest) 726 701 return 727 702 } 728 703 729 - // Try to get existing bundle 730 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 731 - if err == nil { 732 - // Bundle exists, return it normally 733 - resp.json(formatBundleResponse(bundle)) 734 - return 735 - } 736 - 737 - // Bundle not found - check if it's the next upcoming bundle 738 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 704 + // Get from library's index 705 + index := s.bundleManager.GetIndex() 706 + bundleMeta, err := index.GetBundle(bundleNum) 739 707 if err != nil { 740 - resp.error("bundle not found", http.StatusNotFound) 741 - return 742 - } 743 - 744 - if bundleNum == lastBundle+1 { 745 - // This is the upcoming bundle - return preview based on mempool 746 - upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum) 747 - if err != nil { 748 - resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError) 708 + // Check if it's upcoming bundle 709 + lastBundle := index.GetLastBundle() 710 + if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 { 711 + upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum) 712 + if err != nil { 713 + resp.error(err.Error(), http.StatusInternalServerError) 714 + return 715 + } 716 + resp.json(upcomingBundle) 749 717 return 750 718 } 751 - resp.json(upcomingBundle) 719 + resp.error("bundle not found", http.StatusNotFound) 752 720 return 753 721 } 754 722 755 - // Not an upcoming bundle, just not found 756 - resp.error("bundle not found", http.StatusNotFound) 723 + resp.json(formatBundleMetadata(bundleMeta)) 757 724 } 758 725 759 - func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) { 760 - // Get mempool stats 761 - mempoolCount, err := s.db.GetMempoolCount(ctx) 762 - if err != nil { 763 - return nil, err 726 + // Helper to format library's BundleMetadata 727 + func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} { 728 + return map[string]interface{}{ 729 + "plc_bundle_number": meta.BundleNumber, 730 + "start_time": meta.StartTime, 731 + "end_time": meta.EndTime, 732 + "operation_count": meta.OperationCount, 733 + "did_count": meta.DIDCount, 734 + "hash": meta.Hash, // Chain hash (primary) 735 + "content_hash": meta.ContentHash, // Content hash 736 + "parent": meta.Parent, // Parent chain hash 737 + "compressed_hash": meta.CompressedHash, 738 + "compressed_size": meta.CompressedSize, 739 + "uncompressed_size": meta.UncompressedSize, 740 + "compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize), 741 + "cursor": meta.Cursor, 742 + "created_at": meta.CreatedAt, 764 743 } 744 + } 765 745 766 - if mempoolCount == 0 { 746 + func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) { 747 + // Get mempool stats from library via wrapper 748 + stats := s.bundleManager.GetMempoolStats() 749 + 750 + count, ok := stats["count"].(int) 751 + if !ok || count == 0 { 767 752 return map[string]interface{}{ 768 753 "plc_bundle_number": bundleNum, 769 754 "is_upcoming": true, ··· 773 758 }, nil 774 759 } 775 760 776 - // Get first and last operations for time range 777 - firstOp, err := s.db.GetFirstMempoolOperation(ctx) 778 - if err != nil { 779 - return nil, err 761 + // Build response 762 + result := map[string]interface{}{ 763 + "plc_bundle_number": bundleNum, 764 + "is_upcoming": true, 765 + "status": "filling", 766 + "operation_count": count, 767 + "did_count": stats["did_count"], 768 + "target_operation_count": 10000, 769 + "progress_percent": float64(count) / 100.0, 770 + "operations_needed": 10000 - count, 780 771 } 781 772 782 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 783 - if err != nil { 784 - return nil, err 773 + if count >= 10000 { 774 + result["status"] = "ready" 785 775 } 786 776 787 - // Get unique DID count 788 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 789 - if err != nil { 790 - return nil, err 777 + // Add time range if available 778 + if firstTime, ok := stats["first_time"]; ok { 779 + result["start_time"] = firstTime 791 780 } 792 - 793 - // Get uncompressed size estimate 794 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 795 - if err != nil { 796 - return nil, err 781 + if lastTime, ok := stats["last_time"]; ok { 782 + result["current_end_time"] = lastTime 797 783 } 798 784 799 - // Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data) 800 - estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12) 801 - 802 - // Calculate completion estimate 803 - var estimatedCompletionTime *time.Time 804 - var operationsNeeded int 805 - var currentRate float64 806 - 807 - operationsNeeded = plc.BUNDLE_SIZE - mempoolCount 808 - 809 - if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 { 810 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 811 - if timeSpan > 0 { 812 - currentRate = float64(mempoolCount) / timeSpan 813 - if currentRate > 0 { 814 - secondsNeeded := float64(operationsNeeded) / currentRate 815 - completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 816 - estimatedCompletionTime = &completionTime 817 - } 818 - } 785 + // Add size info if available 786 + if sizeBytes, ok := stats["size_bytes"]; ok { 787 + result["uncompressed_size"] = sizeBytes 788 + result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12) 819 789 } 820 790 821 - // Get previous bundle for cursor context 822 - var prevBundleHash string 823 - var cursor string 791 + // Get previous bundle info 824 792 if bundleNum > 1 { 825 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 826 - if err == nil { 827 - prevBundleHash = prevBundle.Hash 828 - cursor = prevBundle.EndTime.Format(time.RFC3339Nano) 829 - } 830 - } 831 - 832 - // Determine bundle status 833 - status := "filling" 834 - if mempoolCount >= plc.BUNDLE_SIZE { 835 - status = "ready" 836 - } 837 - 838 - // Build upcoming bundle response 839 - result := map[string]interface{}{ 840 - "plc_bundle_number": bundleNum, 841 - "is_upcoming": true, 842 - "status": status, 843 - "operation_count": mempoolCount, 844 - "target_operation_count": plc.BUNDLE_SIZE, 845 - "progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100, 846 - "operations_needed": operationsNeeded, 847 - "did_count": uniqueDIDCount, 848 - "start_time": firstOp.CreatedAt, 849 - "current_end_time": lastOp.CreatedAt, 850 - "uncompressed_size": uncompressedSize, 851 - "estimated_compressed_size": estimatedCompressedSize, 852 - "compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize), 853 - "prev_bundle_hash": prevBundleHash, 854 - "cursor": cursor, 855 - } 856 - 857 - if estimatedCompletionTime != nil { 858 - result["estimated_completion_time"] = *estimatedCompletionTime 859 - result["current_rate_per_second"] = currentRate 860 - } 861 - 862 - // Get actual mempool operations if requested (for DIDs list) 863 - if r.URL.Query().Get("include_dids") == "true" { 864 - ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 865 - if err == nil { 866 - // Extract unique DIDs 867 - didSet := make(map[string]bool) 868 - for _, op := range ops { 869 - didSet[op.DID] = true 870 - } 871 - dids := make([]string, 0, len(didSet)) 872 - for did := range didSet { 873 - dids = append(dids, did) 874 - } 875 - result["dids"] = dids 793 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 794 + result["parent"] = prevBundle.Hash // Parent chain hash 795 + result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano) 876 796 } 877 797 } 878 798 ··· 888 808 return 889 809 } 890 810 891 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 811 + // Get from library 812 + dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum) 892 813 if err != nil { 893 814 resp.error("bundle not found", http.StatusNotFound) 894 815 return 895 816 } 896 817 897 - // Query DIDs from dids table instead 898 - dids, err := s.db.GetDIDsForBundle(r.Context(), bundleNum) 899 - if err != nil { 900 - resp.error(fmt.Sprintf("failed to get DIDs: %v", err), http.StatusInternalServerError) 901 - return 902 - } 903 - 904 818 resp.json(map[string]interface{}{ 905 - "plc_bundle_number": bundle.BundleNumber, 906 - "did_count": bundle.DIDCount, 819 + "plc_bundle_number": bundleNum, 820 + "did_count": didCount, 907 821 "dids": dids, 908 822 }) 909 823 } ··· 919 833 920 834 compressed := r.URL.Query().Get("compressed") != "false" 921 835 922 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 836 + bundle, err := s.bundleManager.GetBundleMetadata(bundleNum) 923 837 if err == nil { 924 838 // Bundle exists, serve it normally 925 839 resp.bundleHeaders(bundle) ··· 933 847 } 934 848 935 849 // Bundle not found - check if it's the upcoming bundle 936 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 937 - if err != nil { 938 - resp.error("bundle not found", http.StatusNotFound) 939 - return 940 - } 941 - 850 + lastBundle := s.bundleManager.GetLastBundleNumber() 942 851 if bundleNum == lastBundle+1 { 943 852 // This is the upcoming bundle - serve from mempool 944 - s.serveUpcomingBundle(w, r, bundleNum) 853 + s.serveUpcomingBundle(w, bundleNum) 945 854 return 946 855 } 947 856 ··· 949 858 resp.error("bundle not found", http.StatusNotFound) 950 859 } 951 860 952 - func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) { 953 - ctx := r.Context() 954 - 955 - // Get mempool count 956 - mempoolCount, err := s.db.GetMempoolCount(ctx) 957 - if err != nil { 958 - http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError) 959 - return 960 - } 861 + func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) { 862 + // Get mempool stats 863 + stats := s.bundleManager.GetMempoolStats() 864 + count, ok := stats["count"].(int) 961 865 962 - if mempoolCount == 0 { 866 + if !ok || count == 0 { 963 867 http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound) 964 868 return 965 869 } 966 870 967 - // Get mempool operations (up to BUNDLE_SIZE) 968 - mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 871 + // Get operations from mempool 872 + ops, err := s.bundleManager.GetMempoolOperations() 969 873 if err != nil { 970 874 http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError) 971 875 return 972 876 } 973 877 974 - if len(mempoolOps) == 0 { 975 - http.Error(w, "upcoming bundle is empty", http.StatusNotFound) 878 + if len(ops) == 0 { 879 + http.Error(w, "no operations in mempool", http.StatusNotFound) 976 880 return 977 881 } 978 882 979 - // Get time range 980 - firstOp := mempoolOps[0] 981 - lastOp := mempoolOps[len(mempoolOps)-1] 883 + // Calculate times 884 + firstOp := ops[0] 885 + lastOp := ops[len(ops)-1] 982 886 983 887 // Extract unique DIDs 984 888 didSet := make(map[string]bool) 985 - for _, op := range mempoolOps { 889 + for _, op := range ops { 986 890 didSet[op.DID] = true 987 891 } 988 892 893 + // Calculate uncompressed size 894 + uncompressedSize := int64(0) 895 + for _, op := range ops { 896 + uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 897 + } 898 + 989 899 // Get previous bundle hash 990 900 prevBundleHash := "" 991 901 if bundleNum > 1 { 992 - if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 902 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 993 903 prevBundleHash = prevBundle.Hash 994 904 } 995 905 } 996 906 997 - // Serialize operations to JSONL 998 - var buf []byte 999 - for _, mop := range mempoolOps { 1000 - buf = append(buf, []byte(mop.Operation)...) 1001 - buf = append(buf, '\n') 1002 - } 1003 - 1004 - // Calculate size 1005 - uncompressedSize := int64(len(buf)) 1006 - 1007 907 // Set headers 1008 908 w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum)) 1009 909 w.Header().Set("X-Bundle-Is-Upcoming", "true") 1010 910 w.Header().Set("X-Bundle-Status", "preview") 1011 911 w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano)) 1012 912 w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano)) 1013 - w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps))) 1014 - w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 1015 - w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100)) 913 + w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops))) 914 + w.Header().Set("X-Bundle-Target-Count", "10000") 915 + w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0)) 1016 916 w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet))) 1017 917 w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash) 918 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 1018 919 1019 920 w.Header().Set("Content-Type", "application/jsonl") 1020 921 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum)) 1021 - w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize)) 1022 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 1023 922 923 + // Stream operations as JSONL 1024 924 w.WriteHeader(http.StatusOK) 1025 - w.Write(buf) 925 + 926 + for _, op := range ops { 927 + // Use RawJSON if available (preserves exact format) 928 + if len(op.RawJSON) > 0 { 929 + w.Write(op.RawJSON) 930 + } else { 931 + // Fallback to marshaling 932 + data, _ := json.Marshal(op) 933 + w.Write(data) 934 + } 935 + w.Write([]byte("\n")) 936 + } 1026 937 } 1027 938 1028 - func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 939 + func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 1029 940 resp := newResponse(w) 1030 - path := bundle.GetFilePath(s.plcBundleDir) 1031 941 1032 - file, err := os.Open(path) 942 + // Use the new streaming API for compressed data 943 + reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber) 1033 944 if err != nil { 1034 - resp.error("bundle file not found on disk", http.StatusNotFound) 945 + resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError) 1035 946 return 1036 947 } 1037 - defer file.Close() 1038 - 1039 - fileInfo, _ := file.Stat() 948 + defer reader.Close() 1040 949 1041 950 w.Header().Set("Content-Type", "application/zstd") 1042 951 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber)) 1043 - w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size())) 1044 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size())) 952 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize)) 953 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 1045 954 1046 - http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file) 955 + // Stream the data directly to the response 956 + w.WriteHeader(http.StatusOK) 957 + io.Copy(w, reader) 1047 958 } 1048 959 1049 - func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 960 + func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 1050 961 resp := newResponse(w) 1051 962 1052 - ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber) 963 + // Use the new streaming API for decompressed data 964 + reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber) 1053 965 if err != nil { 1054 - resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError) 966 + resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError) 1055 967 return 1056 968 } 1057 - 1058 - // Serialize to JSONL 1059 - var buf []byte 1060 - for _, op := range ops { 1061 - buf = append(buf, op.RawJSON...) 1062 - buf = append(buf, '\n') 1063 - } 1064 - 1065 - fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir)) 1066 - compressedSize := int64(0) 1067 - if fileInfo != nil { 1068 - compressedSize = fileInfo.Size() 1069 - } 969 + defer reader.Close() 1070 970 1071 971 w.Header().Set("Content-Type", "application/jsonl") 1072 972 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber)) 1073 - w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf))) 1074 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize)) 1075 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf))) 1076 - if compressedSize > 0 { 1077 - w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize))) 973 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize)) 974 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 975 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize)) 976 + if bundle.CompressedSize > 0 { 977 + w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize))) 1078 978 } 1079 979 980 + // Stream the data directly to the response 1080 981 w.WriteHeader(http.StatusOK) 1081 - w.Write(buf) 982 + io.Copy(w, reader) 1082 983 } 1083 984 1084 985 func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) { 1085 986 resp := newResponse(w) 1086 987 limit := getQueryInt(r, "limit", 50) 1087 988 1088 - bundles, err := s.db.GetBundles(r.Context(), limit) 1089 - if err != nil { 1090 - resp.error(err.Error(), http.StatusInternalServerError) 1091 - return 1092 - } 989 + bundles := s.bundleManager.GetBundles(limit) 1093 990 1094 991 response := make([]map[string]interface{}, len(bundles)) 1095 992 for i, bundle := range bundles { 1096 - response[i] = formatBundleResponse(bundle) 993 + response[i] = formatBundleMetadata(bundle) 1097 994 } 1098 995 1099 996 resp.json(response) ··· 1102 999 func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) { 1103 1000 resp := newResponse(w) 1104 1001 1105 - count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context()) 1106 - if err != nil { 1107 - resp.error(err.Error(), http.StatusInternalServerError) 1108 - return 1109 - } 1002 + stats := s.bundleManager.GetBundleStats() 1003 + 1004 + bundleCount := stats["bundle_count"].(int64) 1005 + totalSize := stats["total_size"].(int64) 1006 + totalUncompressedSize := stats["total_uncompressed_size"].(int64) 1007 + lastBundle := stats["last_bundle"].(int64) 1110 1008 1111 1009 resp.json(map[string]interface{}{ 1112 - "plc_bundle_count": count, 1113 - "last_bundle_number": lastBundle, 1114 - "total_compressed_size": compressedSize, 1115 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 1116 - "total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024, 1117 - "total_uncompressed_size": uncompressedSize, 1118 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1119 - "total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024, 1120 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1010 + "plc_bundle_count": bundleCount, 1011 + "last_bundle_number": lastBundle, 1012 + "total_compressed_size": totalSize, 1013 + "total_uncompressed_size": totalUncompressedSize, 1014 + "overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize), 1121 1015 }) 1122 1016 } 1123 1017 ··· 1125 1019 1126 1020 func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) { 1127 1021 resp := newResponse(w) 1128 - ctx := r.Context() 1129 1022 1130 - count, err := s.db.GetMempoolCount(ctx) 1131 - if err != nil { 1132 - resp.error(err.Error(), http.StatusInternalServerError) 1133 - return 1134 - } 1023 + // Get stats from library's mempool via wrapper method 1024 + stats := s.bundleManager.GetMempoolStats() 1135 1025 1136 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 1137 - if err != nil { 1138 - resp.error(err.Error(), http.StatusInternalServerError) 1139 - return 1026 + // Convert to API response format 1027 + result := map[string]interface{}{ 1028 + "operation_count": stats["count"], 1029 + "can_create_bundle": stats["can_create_bundle"], 1140 1030 } 1141 1031 1142 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 1143 - if err != nil { 1144 - resp.error(err.Error(), http.StatusInternalServerError) 1145 - return 1032 + // Add size information 1033 + if sizeBytes, ok := stats["size_bytes"]; ok { 1034 + result["uncompressed_size"] = sizeBytes 1035 + result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024 1146 1036 } 1147 1037 1148 - result := map[string]interface{}{ 1149 - "operation_count": count, 1150 - "unique_did_count": uniqueDIDCount, 1151 - "uncompressed_size": uncompressedSize, 1152 - "uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1153 - "can_create_bundle": count >= plc.BUNDLE_SIZE, 1154 - } 1038 + // Add time range and calculate estimated completion 1039 + if count, ok := stats["count"].(int); ok && count > 0 { 1040 + if firstTime, ok := stats["first_time"].(time.Time); ok { 1041 + result["mempool_start_time"] = firstTime 1155 1042 1156 - if count > 0 { 1157 - if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil { 1158 - result["mempool_start_time"] = firstOp.CreatedAt 1043 + if lastTime, ok := stats["last_time"].(time.Time); ok { 1044 + result["mempool_end_time"] = lastTime 1159 1045 1160 - if count < plc.BUNDLE_SIZE { 1161 - if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil { 1162 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 1046 + // Calculate estimated next bundle time if not complete 1047 + if count < 10000 { 1048 + timeSpan := lastTime.Sub(firstTime).Seconds() 1163 1049 if timeSpan > 0 { 1164 1050 opsPerSecond := float64(count) / timeSpan 1165 1051 if opsPerSecond > 0 { 1166 - remainingOps := plc.BUNDLE_SIZE - count 1052 + remainingOps := 10000 - count 1167 1053 secondsNeeded := float64(remainingOps) / opsPerSecond 1168 - result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second) 1169 - result["operations_needed"] = remainingOps 1054 + estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 1055 + 1056 + result["estimated_next_bundle_time"] = estimatedTime 1170 1057 result["current_rate_per_second"] = opsPerSecond 1058 + result["operations_needed"] = remainingOps 1171 1059 } 1172 1060 } 1061 + result["progress_percent"] = float64(count) / 100.0 1062 + } else { 1063 + // Ready to create bundle 1064 + result["estimated_next_bundle_time"] = time.Now() 1065 + result["operations_needed"] = 0 1173 1066 } 1174 - } else { 1175 - result["estimated_next_bundle_time"] = time.Now() 1176 - result["operations_needed"] = 0 1177 1067 } 1178 1068 } 1179 1069 } else { 1070 + // Empty mempool 1180 1071 result["mempool_start_time"] = nil 1181 1072 result["estimated_next_bundle_time"] = nil 1182 1073 } ··· 1201 1092 1202 1093 // ===== VERIFICATION HANDLERS ===== 1203 1094 1204 - func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) { 1205 - resp := newResponse(w) 1206 - vars := mux.Vars(r) 1207 - 1208 - bundleNumber, err := strconv.Atoi(vars["bundleNumber"]) 1209 - if err != nil { 1210 - resp.error("Invalid bundle number", http.StatusBadRequest) 1211 - return 1212 - } 1213 - 1214 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber) 1215 - if err != nil { 1216 - resp.error("Bundle not found", http.StatusNotFound) 1217 - return 1218 - } 1219 - 1220 - // Fetch from PLC and verify 1221 - remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber) 1222 - if err != nil { 1223 - resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError) 1224 - return 1225 - } 1226 - 1227 - remoteHash := computeOperationsHash(remoteOps) 1228 - verified := bundle.Hash == remoteHash 1229 - 1230 - resp.json(map[string]interface{}{ 1231 - "bundle_number": bundleNumber, 1232 - "verified": verified, 1233 - "local_hash": bundle.Hash, 1234 - "remote_hash": remoteHash, 1235 - "local_op_count": plc.BUNDLE_SIZE, 1236 - "remote_op_count": len(remoteOps), 1237 - "boundary_cids_used": len(prevCIDs), 1238 - }) 1239 - } 1240 - 1241 - func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) { 1242 - var after string 1243 - var prevBoundaryCIDs map[string]bool 1244 - 1245 - if bundleNum > 1 { 1246 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 1247 - if err != nil { 1248 - return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err) 1249 - } 1250 - 1251 - after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z") 1252 - 1253 - if len(prevBundle.BoundaryCIDs) > 0 { 1254 - prevBoundaryCIDs = make(map[string]bool) 1255 - for _, cid := range prevBundle.BoundaryCIDs { 1256 - prevBoundaryCIDs[cid] = true 1257 - } 1258 - } 1259 - } 1260 - 1261 - var allRemoteOps []plc.PLCOperation 1262 - seenCIDs := make(map[string]bool) 1263 - 1264 - for cid := range prevBoundaryCIDs { 1265 - seenCIDs[cid] = true 1266 - } 1267 - 1268 - currentAfter := after 1269 - maxFetches := 20 1270 - 1271 - for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ { 1272 - batch, err := s.plcClient.Export(ctx, plc.ExportOptions{ 1273 - Count: 1000, 1274 - After: currentAfter, 1275 - }) 1276 - if err != nil || len(batch) == 0 { 1277 - break 1278 - } 1279 - 1280 - for _, op := range batch { 1281 - if !seenCIDs[op.CID] { 1282 - seenCIDs[op.CID] = true 1283 - allRemoteOps = append(allRemoteOps, op) 1284 - if len(allRemoteOps) >= plc.BUNDLE_SIZE { 1285 - break 1286 - } 1287 - } 1288 - } 1289 - 1290 - if len(batch) > 0 { 1291 - lastOp := batch[len(batch)-1] 1292 - currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z") 1293 - } 1294 - 1295 - if len(batch) < 1000 { 1296 - break 1297 - } 1298 - } 1299 - 1300 - if len(allRemoteOps) > plc.BUNDLE_SIZE { 1301 - allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE] 1302 - } 1303 - 1304 - return allRemoteOps, prevBoundaryCIDs, nil 1305 - } 1306 - 1307 1095 func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) { 1308 1096 resp := newResponse(w) 1309 - ctx := r.Context() 1310 1097 1311 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1312 - if err != nil { 1313 - resp.error(err.Error(), http.StatusInternalServerError) 1314 - return 1315 - } 1316 - 1098 + lastBundle := s.bundleManager.GetLastBundleNumber() 1317 1099 if lastBundle == 0 { 1318 1100 resp.json(map[string]interface{}{ 1319 1101 "status": "empty", ··· 1327 1109 var errorMsg string 1328 1110 1329 1111 for i := 1; i <= lastBundle; i++ { 1330 - bundle, err := s.db.GetBundleByNumber(ctx, i) 1112 + bundle, err := s.bundleManager.GetBundleMetadata(i) 1331 1113 if err != nil { 1332 1114 valid = false 1333 1115 brokenAt = i ··· 1336 1118 } 1337 1119 1338 1120 if i > 1 { 1339 - prevBundle, err := s.db.GetBundleByNumber(ctx, i-1) 1121 + prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1) 1340 1122 if err != nil { 1341 1123 valid = false 1342 1124 brokenAt = i ··· 1344 1126 break 1345 1127 } 1346 1128 1347 - if bundle.PrevBundleHash != prevBundle.Hash { 1129 + if bundle.Parent != prevBundle.Hash { 1348 1130 valid = false 1349 1131 brokenAt = i 1350 - errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1) 1132 + errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1) 1351 1133 break 1352 1134 } 1353 1135 } ··· 1368 1150 1369 1151 func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) { 1370 1152 resp := newResponse(w) 1371 - ctx := r.Context() 1372 1153 1373 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1374 - if err != nil { 1375 - resp.error(err.Error(), http.StatusInternalServerError) 1376 - return 1377 - } 1378 - 1154 + lastBundle := s.bundleManager.GetLastBundleNumber() 1379 1155 if lastBundle == 0 { 1380 1156 resp.json(map[string]interface{}{ 1381 1157 "chain_length": 0, ··· 1384 1160 return 1385 1161 } 1386 1162 1387 - firstBundle, _ := s.db.GetBundleByNumber(ctx, 1) 1388 - lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle) 1389 - 1390 - // Updated to receive 5 values instead of 3 1391 - count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx) 1392 - if err != nil { 1393 - resp.error(err.Error(), http.StatusInternalServerError) 1394 - return 1395 - } 1163 + firstBundle, _ := s.bundleManager.GetBundleMetadata(1) 1164 + lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle) 1165 + stats := s.bundleManager.GetBundleStats() 1396 1166 1397 1167 resp.json(map[string]interface{}{ 1398 - "chain_length": lastBundle, 1399 - "total_bundles": count, 1400 - "total_compressed_size": compressedSize, 1401 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 1402 - "total_uncompressed_size": uncompressedSize, 1403 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1404 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1405 - "chain_start_time": firstBundle.StartTime, 1406 - "chain_end_time": lastBundleData.EndTime, 1407 - "chain_head_hash": lastBundleData.Hash, 1408 - "first_prev_hash": firstBundle.PrevBundleHash, 1409 - "last_prev_hash": lastBundleData.PrevBundleHash, 1168 + "chain_length": lastBundle, 1169 + "total_bundles": stats["bundle_count"], 1170 + "total_compressed_size": stats["total_size"], 1171 + "total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024, 1172 + "chain_start_time": firstBundle.StartTime, 1173 + "chain_end_time": lastBundleData.EndTime, 1174 + "chain_head_hash": lastBundleData.Hash, 1175 + "first_parent": firstBundle.Parent, 1176 + "last_parent": lastBundleData.Parent, 1410 1177 }) 1411 1178 } 1412 1179 ··· 1427 1194 return 1428 1195 } 1429 1196 1430 - startBundle := s.findStartBundle(ctx, afterTime) 1197 + startBundle := s.findStartBundle(afterTime) 1431 1198 ops := s.collectOperations(ctx, startBundle, afterTime, count) 1432 1199 1433 1200 w.Header().Set("Content-Type", "application/jsonl") ··· 1467 1234 return time.Time{}, fmt.Errorf("invalid timestamp format") 1468 1235 } 1469 1236 1470 - func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int { 1237 + func (s *Server) findStartBundle(afterTime time.Time) int { 1471 1238 if afterTime.IsZero() { 1472 1239 return 1 1473 1240 } 1474 1241 1475 - foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime) 1476 - if err != nil { 1477 - return 1 1478 - } 1479 - 1242 + foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime) 1480 1243 if foundBundle > 1 { 1481 1244 return foundBundle - 1 1482 1245 } ··· 1487 1250 var allOps []plc.PLCOperation 1488 1251 seenCIDs := make(map[string]bool) 1489 1252 1490 - lastBundle, _ := s.db.GetLastBundleNumber(ctx) 1253 + lastBundle := s.bundleManager.GetLastBundleNumber() 1491 1254 1492 1255 for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ { 1493 1256 ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum) ··· 1647 1410 limit := getQueryInt(r, "limit", 0) 1648 1411 fromBundle := getQueryInt(r, "from", 1) 1649 1412 1650 - history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle) 1413 + // Use BundleManager instead of database 1414 + history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle) 1651 1415 if err != nil { 1652 1416 resp.error(err.Error(), http.StatusInternalServerError) 1653 1417 return ··· 1719 1483 }) 1720 1484 } 1721 1485 1722 - // ===== UTILITY FUNCTIONS ===== 1486 + func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) { 1487 + resp := newResponse(w) 1723 1488 1724 - func computeOperationsHash(ops []plc.PLCOperation) string { 1725 - var jsonlData []byte 1726 - for _, op := range ops { 1727 - jsonlData = append(jsonlData, op.RawJSON...) 1728 - jsonlData = append(jsonlData, '\n') 1489 + bundleNum, err := getBundleNumber(r) 1490 + if err != nil { 1491 + resp.error("invalid bundle number", http.StatusBadRequest) 1492 + return 1729 1493 } 1730 - hash := sha256.Sum256(jsonlData) 1731 - return hex.EncodeToString(hash[:]) 1494 + 1495 + labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum) 1496 + if err != nil { 1497 + resp.error(err.Error(), http.StatusInternalServerError) 1498 + return 1499 + } 1500 + 1501 + resp.json(map[string]interface{}{ 1502 + "bundle": bundleNum, 1503 + "count": len(labels), 1504 + "labels": labels, 1505 + }) 1732 1506 } 1507 + 1508 + // ===== UTILITY FUNCTIONS ===== 1733 1509 1734 1510 func normalizeEndpoint(endpoint string) string { 1735 1511 endpoint = strings.TrimPrefix(endpoint, "https://")
+7 -11
internal/api/server.go
··· 6 6 "net/http" 7 7 "time" 8 8 9 - "github.com/atscan/atscanner/internal/config" 10 - "github.com/atscan/atscanner/internal/log" 11 - "github.com/atscan/atscanner/internal/plc" 12 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/plc" 12 + "github.com/atscan/atscand/internal/storage" 13 13 "github.com/gorilla/handlers" 14 14 "github.com/gorilla/mux" 15 15 ) ··· 18 18 router *mux.Router 19 19 server *http.Server 20 20 db storage.Database 21 - plcClient *plc.Client 22 21 plcBundleDir string 23 22 bundleManager *plc.BundleManager 24 23 plcIndexDIDs bool 25 24 } 26 25 27 - func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server { 28 - bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs) 29 - 26 + func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server { 30 27 s := &Server{ 31 28 router: mux.NewRouter(), 32 29 db: db, 33 - plcClient: plc.NewClient(plcCfg.DirectoryURL), 34 30 plcBundleDir: plcCfg.BundleDir, 35 - bundleManager: bundleManager, 31 + bundleManager: bundleManager, // Use provided shared instance 36 32 plcIndexDIDs: plcCfg.IndexDIDs, 37 33 } 38 34 ··· 88 84 api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET") 89 85 api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET") 90 86 api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET") 91 - api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST") 87 + api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET") 92 88 93 89 // PLC history/metrics 94 90 api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+2 -2
internal/log/log.go
··· 28 28 errorLog = log.New(os.Stderr, "", 0) 29 29 } 30 30 31 - // timestamp returns current time in ISO 8601 format 31 + // timestamp returns current time with milliseconds (local time, no timezone) 32 32 func timestamp() string { 33 - return time.Now().Format(time.RFC3339) 33 + return time.Now().Format("2006-01-02T15:04:05.000") 34 34 } 35 35 36 36 func Verbose(format string, v ...interface{}) {
+44 -45
internal/pds/client.go
··· 84 84 } 85 85 86 86 // DescribeServer fetches com.atproto.server.describeServer 87 - func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) { 87 + // Returns: description, responseTime, usedIP, error 88 + func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) { 89 + startTime := time.Now() 88 90 url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint) 89 91 90 - //fmt.Println(url) 92 + // Track which IP was used 93 + var usedIP string 94 + transport := &http.Transport{ 95 + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 96 + conn, err := (&net.Dialer{ 97 + Timeout: 30 * time.Second, 98 + KeepAlive: 30 * time.Second, 99 + }).DialContext(ctx, network, addr) 100 + 101 + if err == nil && conn != nil { 102 + if remoteAddr := conn.RemoteAddr(); remoteAddr != nil { 103 + if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok { 104 + usedIP = tcpAddr.IP.String() 105 + } 106 + } 107 + } 108 + return conn, err 109 + }, 110 + } 111 + 112 + client := &http.Client{ 113 + Timeout: c.httpClient.Timeout, 114 + Transport: transport, 115 + } 91 116 92 117 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 93 118 if err != nil { 94 - return nil, err 119 + return nil, 0, "", err 95 120 } 96 121 97 - resp, err := c.httpClient.Do(req) 122 + resp, err := client.Do(req) 123 + responseTime := time.Since(startTime) 124 + 98 125 if err != nil { 99 - return nil, err 126 + return nil, responseTime, usedIP, err 100 127 } 101 128 defer resp.Body.Close() 102 129 103 130 if resp.StatusCode != http.StatusOK { 104 - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 131 + return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 105 132 } 106 133 107 134 var desc ServerDescription 108 135 if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil { 109 - return nil, err 136 + return nil, responseTime, usedIP, err 110 137 } 111 138 112 - return &desc, nil 139 + return &desc, responseTime, usedIP, nil 113 140 } 114 141 115 142 // CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version" 116 - // Returns: available, responseTime, version, usedIP, error 117 - func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, string, error) { 143 + // Returns: available, responseTime, version, error 144 + func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) { 118 145 startTime := time.Now() 119 146 120 147 url := fmt.Sprintf("%s/xrpc/_health", endpoint) 121 148 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 122 149 if err != nil { 123 - return false, 0, "", "", err 124 - } 125 - 126 - // Create a custom dialer to track which IP was actually used 127 - var usedIP string 128 - transport := &http.Transport{ 129 - DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 130 - conn, err := (&net.Dialer{ 131 - Timeout: 30 * time.Second, 132 - KeepAlive: 30 * time.Second, 133 - }).DialContext(ctx, network, addr) 134 - 135 - if err == nil && conn != nil { 136 - if remoteAddr := conn.RemoteAddr(); remoteAddr != nil { 137 - // Extract IP from "ip:port" format 138 - if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok { 139 - usedIP = tcpAddr.IP.String() 140 - } 141 - } 142 - } 143 - 144 - return conn, err 145 - }, 150 + return false, 0, "", err 146 151 } 147 152 148 - // Create a client with our custom transport 149 - client := &http.Client{ 150 - Timeout: c.httpClient.Timeout, 151 - Transport: transport, 152 - } 153 - 154 - resp, err := client.Do(req) 153 + resp, err := c.httpClient.Do(req) 155 154 duration := time.Since(startTime) 156 155 157 156 if err != nil { 158 - return false, duration, "", usedIP, err 157 + return false, duration, "", err 159 158 } 160 159 defer resp.Body.Close() 161 160 162 161 if resp.StatusCode != http.StatusOK { 163 - return false, duration, "", usedIP, fmt.Errorf("health check returned status %d", resp.StatusCode) 162 + return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode) 164 163 } 165 164 166 165 // Decode the JSON response and check for "version" ··· 169 168 } 170 169 171 170 if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil { 172 - return false, duration, "", usedIP, fmt.Errorf("failed to decode health JSON: %w", err) 171 + return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err) 173 172 } 174 173 175 174 if healthResponse.Version == "" { 176 - return false, duration, "", usedIP, fmt.Errorf("health JSON response missing 'version' field") 175 + return false, duration, "", fmt.Errorf("health JSON response missing 'version' field") 177 176 } 178 177 179 178 // All checks passed 180 - return true, duration, healthResponse.Version, usedIP, nil 179 + return true, duration, healthResponse.Version, nil 181 180 }
+36 -32
internal/pds/scanner.go
··· 8 8 "sync/atomic" 9 9 "time" 10 10 11 - "github.com/acarl005/stripansi" 12 - "github.com/atscan/atscanner/internal/config" 13 - "github.com/atscan/atscanner/internal/ipinfo" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/monitor" 16 - "github.com/atscan/atscanner/internal/storage" 11 + "github.com/atscan/atscand/internal/config" 12 + "github.com/atscan/atscand/internal/ipinfo" 13 + "github.com/atscan/atscand/internal/log" 14 + "github.com/atscan/atscand/internal/monitor" 15 + "github.com/atscan/atscand/internal/storage" 17 16 ) 18 17 19 18 type Scanner struct { ··· 40 39 servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{ 41 40 Type: "pds", 42 41 OnlyStale: true, 42 + OnlyValid: true, 43 43 RecheckInterval: s.config.RecheckInterval, 44 44 }) 45 45 if err != nil { ··· 127 127 // STEP 1: Resolve IPs (both IPv4 and IPv6) 128 128 ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint) 129 129 if err != nil { 130 - // Mark as offline due to DNS failure 131 130 s.saveScanResult(ctx, ep.ID, &ScanResult{ 132 131 Status: storage.EndpointStatusOffline, 133 132 ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err), ··· 146 145 go s.updateIPInfoIfNeeded(ctx, ips.IPv6) 147 146 } 148 147 149 - // STEP 2: Health check (now returns which IP was used) 150 - available, responseTime, version, usedIP, err := s.client.CheckHealth(ctx, ep.Endpoint) 151 - if err != nil || !available { 152 - errMsg := "health check failed" 153 - if err != nil { 154 - errMsg = err.Error() 155 - } 148 + // STEP 2: Call describeServer (primary health check + metadata) 149 + desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint) 150 + if err != nil { 156 151 s.saveScanResult(ctx, ep.ID, &ScanResult{ 157 152 Status: storage.EndpointStatusOffline, 158 - ResponseTime: responseTime, 159 - ErrorMessage: errMsg, 160 - UsedIP: usedIP, // Save even if failed 153 + ResponseTime: descResponseTime, 154 + ErrorMessage: fmt.Sprintf("describeServer failed: %v", err), 155 + UsedIP: usedIP, 161 156 }) 162 157 return 163 158 } 164 159 165 - // STEP 3: Fetch PDS-specific data 166 - desc, err := s.client.DescribeServer(ctx, ep.Endpoint) 167 - if err != nil { 168 - log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err) 169 - } else if desc != nil && desc.DID != "" { 160 + // Update server DID immediately 161 + if desc.DID != "" { 170 162 s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID) 171 163 } 172 164 173 - // Fetch repos with full info 165 + // STEP 3: Call _health to get version 166 + available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 167 + if err != nil || !available { 168 + log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err) 169 + // Server is online (describeServer worked) but _health failed 170 + // Continue with empty version 171 + version = "" 172 + } 173 + 174 + // Calculate average response time from both calls 175 + avgResponseTime := descResponseTime 176 + if available { 177 + avgResponseTime = (descResponseTime + healthResponseTime) / 2 178 + } 179 + 180 + // STEP 4: Fetch repos 174 181 repoList, err := s.client.ListRepos(ctx, ep.Endpoint) 175 182 if err != nil { 176 183 log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err) 177 184 repoList = []Repo{} 178 185 } 179 186 180 - // Convert to DIDs for backward compatibility 187 + // Convert to DIDs 181 188 dids := make([]string, len(repoList)) 182 189 for i, repo := range repoList { 183 190 dids[i] = repo.DID 184 191 } 185 192 186 - // STEP 4: SAVE scan result 193 + // STEP 5: SAVE scan result 187 194 s.saveScanResult(ctx, ep.ID, &ScanResult{ 188 195 Status: storage.EndpointStatusOnline, 189 - ResponseTime: responseTime, 196 + ResponseTime: avgResponseTime, 190 197 Description: desc, 191 198 DIDs: dids, 192 199 Version: version, 193 - UsedIP: usedIP, // NEW: Save which IP was used 200 + UsedIP: usedIP, // Only from describeServer 194 201 }) 195 202 196 - // Save repos in batches (only tracks changes) 203 + // STEP 6: Save repos in batches (only tracks changes) 197 204 if len(repoList) > 0 { 198 - batchSize := 10000 205 + batchSize := 100_000 199 206 200 207 log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint) 201 208 ··· 235 242 236 243 log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint) 237 244 } 238 - 239 - // IP info fetch already started at the beginning (step 1.5) 240 - // It will complete in the background 241 245 } 242 246 243 247 func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
-696
internal/plc/bundle.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "bytes" 6 - "context" 7 - "crypto/sha256" 8 - "encoding/hex" 9 - "encoding/json" 10 - "fmt" 11 - "os" 12 - "path/filepath" 13 - "time" 14 - 15 - "github.com/atscan/atscanner/internal/log" 16 - "github.com/atscan/atscanner/internal/storage" 17 - "github.com/klauspost/compress/zstd" 18 - ) 19 - 20 - const BUNDLE_SIZE = 10000 21 - 22 - type BundleManager struct { 23 - dir string 24 - enabled bool 25 - encoder *zstd.Encoder 26 - decoder *zstd.Decoder 27 - db storage.Database 28 - indexDIDs bool 29 - } 30 - 31 - // ===== INITIALIZATION ===== 32 - 33 - func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) { 34 - if !enabled { 35 - log.Verbose("BundleManager disabled (enabled=false)") 36 - return &BundleManager{enabled: false}, nil 37 - } 38 - 39 - if err := os.MkdirAll(dir, 0755); err != nil { 40 - return nil, fmt.Errorf("failed to create bundle dir: %w", err) 41 - } 42 - 43 - encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 44 - if err != nil { 45 - return nil, err 46 - } 47 - 48 - decoder, err := zstd.NewReader(nil) 49 - if err != nil { 50 - return nil, err 51 - } 52 - 53 - log.Verbose("BundleManager initialized: enabled=%v, indexDIDs=%v, dir=%s", enabled, indexDIDs, dir) 54 - 55 - return &BundleManager{ 56 - dir: dir, 57 - enabled: enabled, 58 - encoder: encoder, 59 - decoder: decoder, 60 - db: db, 61 - indexDIDs: indexDIDs, 62 - }, nil 63 - } 64 - 65 - func (bm *BundleManager) Close() { 66 - if bm.encoder != nil { 67 - bm.encoder.Close() 68 - } 69 - if bm.decoder != nil { 70 - bm.decoder.Close() 71 - } 72 - } 73 - 74 - // ===== BUNDLE FILE ABSTRACTION ===== 75 - 76 - type bundleFile struct { 77 - path string 78 - operations []PLCOperation 79 - uncompressedHash string 80 - compressedHash string 81 - } 82 - 83 - func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile { 84 - return &bundleFile{ 85 - path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)), 86 - } 87 - } 88 - 89 - func (bf *bundleFile) exists() bool { 90 - _, err := os.Stat(bf.path) 91 - return err == nil 92 - } 93 - 94 - func (bm *BundleManager) load(bf *bundleFile) error { 95 - compressed, err := os.ReadFile(bf.path) 96 - if err != nil { 97 - return fmt.Errorf("read failed: %w", err) 98 - } 99 - 100 - decompressed, err := bm.decoder.DecodeAll(compressed, nil) 101 - if err != nil { 102 - return fmt.Errorf("decompress failed: %w", err) 103 - } 104 - 105 - bf.operations = bm.parseJSONL(decompressed) 106 - return nil 107 - } 108 - 109 - func (bm *BundleManager) save(bf *bundleFile) error { 110 - jsonlData := bm.serializeJSONL(bf.operations) 111 - bf.uncompressedHash = bm.hash(jsonlData) 112 - 113 - compressed := bm.encoder.EncodeAll(jsonlData, nil) 114 - bf.compressedHash = bm.hash(compressed) 115 - 116 - return os.WriteFile(bf.path, compressed, 0644) 117 - } 118 - 119 - func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation { 120 - var ops []PLCOperation 121 - scanner := bufio.NewScanner(bytes.NewReader(data)) 122 - 123 - for scanner.Scan() { 124 - line := scanner.Bytes() 125 - if len(line) == 0 { 126 - continue 127 - } 128 - 129 - var op PLCOperation 130 - if err := json.Unmarshal(line, &op); err == nil { 131 - op.RawJSON = append([]byte(nil), line...) 132 - ops = append(ops, op) 133 - } 134 - } 135 - 136 - return ops 137 - } 138 - 139 - func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte { 140 - var buf []byte 141 - for _, op := range ops { 142 - buf = append(buf, op.RawJSON...) 143 - buf = append(buf, '\n') 144 - } 145 - return buf 146 - } 147 - 148 - // ===== BUNDLE FETCHING ===== 149 - 150 - type bundleFetcher struct { 151 - client *Client 152 - seenCIDs map[string]bool 153 - currentAfter string 154 - fetchCount int 155 - } 156 - 157 - func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher { 158 - seen := make(map[string]bool) 159 - for cid := range prevBoundaryCIDs { 160 - seen[cid] = true 161 - } 162 - 163 - return &bundleFetcher{ 164 - client: client, 165 - seenCIDs: seen, 166 - currentAfter: afterTime, 167 - } 168 - } 169 - 170 - func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) { 171 - var ops []PLCOperation 172 - maxFetches := (target / 900) + 5 173 - 174 - for len(ops) < target && bf.fetchCount < maxFetches { 175 - bf.fetchCount++ 176 - batchSize := bf.calculateBatchSize(target - len(ops)) 177 - 178 - log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize) 179 - 180 - batch, shouldContinue := bf.fetchBatch(ctx, batchSize) 181 - 182 - for _, op := range batch { 183 - if !bf.seenCIDs[op.CID] { 184 - bf.seenCIDs[op.CID] = true 185 - ops = append(ops, op) 186 - 187 - if len(ops) >= target { 188 - return ops[:target], true 189 - } 190 - } 191 - } 192 - 193 - if !shouldContinue { 194 - break 195 - } 196 - } 197 - 198 - return ops, len(ops) >= target 199 - } 200 - 201 - func (bf *bundleFetcher) calculateBatchSize(remaining int) int { 202 - if bf.fetchCount == 0 { 203 - return 1000 204 - } 205 - if remaining < 100 { 206 - return 50 207 - } 208 - if remaining < 500 { 209 - return 200 210 - } 211 - return 1000 212 - } 213 - 214 - func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) { 215 - ops, err := bf.client.Export(ctx, ExportOptions{ 216 - Count: size, 217 - After: bf.currentAfter, 218 - }) 219 - 220 - if err != nil || len(ops) == 0 { 221 - return nil, false 222 - } 223 - 224 - if len(ops) > 0 { 225 - bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano) 226 - } 227 - 228 - return ops, len(ops) >= size 229 - } 230 - 231 - // ===== MAIN BUNDLE LOADING ===== 232 - 233 - func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) { 234 - if !bm.enabled { 235 - return nil, false, fmt.Errorf("bundle manager disabled") 236 - } 237 - 238 - bf := bm.newBundleFile(bundleNum) 239 - 240 - // Try local file first 241 - if bf.exists() { 242 - return bm.loadFromFile(ctx, bundleNum, bf) 243 - } 244 - 245 - // Fetch from PLC 246 - return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient) 247 - } 248 - 249 - func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) { 250 - log.Verbose("→ Loading bundle %06d from local file", bundleNum) 251 - 252 - // Verify hash if bundle is in DB 253 - if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil { 254 - if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil { 255 - log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum) 256 - os.Remove(bf.path) 257 - return nil, false, fmt.Errorf("hash mismatch") 258 - } 259 - log.Verbose("✓ Hash verified for bundle %06d", bundleNum) 260 - } 261 - 262 - if err := bm.load(bf); err != nil { 263 - return nil, false, err 264 - } 265 - 266 - // Index if not in DB 267 - if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil { 268 - bf.compressedHash = bm.hashFile(bf.path) 269 - bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations)) 270 - 271 - // Calculate cursor from previous bundle 272 - cursor := bm.calculateCursor(ctx, bundleNum) 273 - 274 - bm.indexBundle(ctx, bundleNum, bf, cursor) 275 - } 276 - 277 - return bf.operations, true, nil 278 - } 279 - 280 - func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) { 281 - log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum) 282 - 283 - afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum) 284 - fetcher := newBundleFetcher(client, afterTime, prevCIDs) 285 - 286 - ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE) 287 - 288 - log.Info(" Collected %d unique operations after %d fetches (complete=%v)", 289 - len(ops), fetcher.fetchCount, isComplete) 290 - 291 - if isComplete { 292 - bf.operations = ops 293 - if err := bm.save(bf); err != nil { 294 - log.Error("Warning: failed to save bundle: %v", err) 295 - } else { 296 - // The cursor is the afterTime that was used to fetch this bundle 297 - cursor := afterTime 298 - bm.indexBundle(ctx, bundleNum, bf, cursor) 299 - log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]", 300 - bundleNum, len(ops), bf.uncompressedHash[:16], cursor) 301 - } 302 - } 303 - 304 - return ops, isComplete, nil 305 - } 306 - 307 - func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) { 308 - if bundleNum == 1 { 309 - return "", nil 310 - } 311 - 312 - prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1) 313 - if err != nil { 314 - return "", nil 315 - } 316 - 317 - afterTime := prevBundle.EndTime.Format(time.RFC3339Nano) 318 - 319 - // Return stored boundary CIDs if available 320 - if len(prevBundle.BoundaryCIDs) > 0 { 321 - cids := make(map[string]bool) 322 - for _, cid := range prevBundle.BoundaryCIDs { 323 - cids[cid] = true 324 - } 325 - return afterTime, cids 326 - } 327 - 328 - // Fallback: compute from file 329 - bf := bm.newBundleFile(bundleNum - 1) 330 - if bf.exists() { 331 - if err := bm.load(bf); err == nil { 332 - _, cids := GetBoundaryCIDs(bf.operations) 333 - return afterTime, cids 334 - } 335 - } 336 - 337 - return afterTime, nil 338 - } 339 - 340 - // ===== BUNDLE INDEXING ===== 341 - 342 - func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error { 343 - log.Verbose("indexBundle called for bundle %06d: indexDIDs=%v", bundleNum, bm.indexDIDs) 344 - 345 - prevHash := "" 346 - if bundleNum > 1 { 347 - if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 348 - prevHash = prev.Hash 349 - } 350 - } 351 - 352 - dids := bm.extractUniqueDIDs(bf.operations) 353 - log.Verbose("Extracted %d unique DIDs from bundle %06d", len(dids), bundleNum) 354 - 355 - compressedFileSize := bm.getFileSize(bf.path) 356 - 357 - // Calculate uncompressed size 358 - uncompressedSize := int64(0) 359 - for _, op := range bf.operations { 360 - uncompressedSize += int64(len(op.RawJSON)) + 1 361 - } 362 - 363 - // Get time range from operations 364 - firstSeenAt := bf.operations[0].CreatedAt 365 - lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt 366 - 367 - bundle := &storage.PLCBundle{ 368 - BundleNumber: bundleNum, 369 - StartTime: firstSeenAt, 370 - EndTime: lastSeenAt, 371 - DIDCount: len(dids), 372 - Hash: bf.uncompressedHash, 373 - CompressedHash: bf.compressedHash, 374 - CompressedSize: compressedFileSize, 375 - UncompressedSize: uncompressedSize, 376 - Cursor: cursor, 377 - PrevBundleHash: prevHash, 378 - Compressed: true, 379 - CreatedAt: time.Now().UTC(), 380 - } 381 - 382 - log.Verbose("About to create bundle %06d in database (DIDCount=%d)", bundleNum, bundle.DIDCount) 383 - 384 - // Create bundle first 385 - if err := bm.db.CreateBundle(ctx, bundle); err != nil { 386 - log.Error("Failed to create bundle %06d in database: %v", bundleNum, err) 387 - return err 388 - } 389 - 390 - log.Verbose("Bundle %06d created successfully in database", bundleNum) 391 - 392 - // Index DIDs if enabled 393 - if bm.indexDIDs { 394 - start := time.Now() 395 - log.Verbose("Starting DID indexing for bundle %06d: %d unique DIDs", bundleNum, len(dids)) 396 - 397 - // Extract handle and PDS for each DID 398 - didInfoMap := ExtractDIDInfoMap(bf.operations) 399 - log.Verbose("Extracted info for %d DIDs from operations", len(didInfoMap)) 400 - 401 - successCount := 0 402 - errorCount := 0 403 - invalidHandleCount := 0 404 - 405 - // Upsert each DID with handle, pds, and bundle number 406 - for did, info := range didInfoMap { 407 - validHandle := ValidateHandle(info.Handle) 408 - if info.Handle != "" && validHandle == "" { 409 - //log.Verbose("Bundle %06d: Skipping invalid handle for DID %s (length: %d)", bundleNum, did, len(info.Handle)) 410 - invalidHandleCount++ 411 - } 412 - 413 - if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil { 414 - log.Error("Failed to index DID %s for bundle %06d: %v", did, bundleNum, err) 415 - errorCount++ 416 - } else { 417 - successCount++ 418 - } 419 - } 420 - 421 - elapsed := time.Since(start) 422 - log.Info("✓ Indexed bundle %06d: %d DIDs succeeded, %d errors, %d invalid handles in %v", 423 - bundleNum, successCount, errorCount, invalidHandleCount, elapsed) 424 - } else { 425 - log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum) 426 - } 427 - 428 - return nil 429 - } 430 - 431 - func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string { 432 - didSet := make(map[string]bool) 433 - for _, op := range ops { 434 - didSet[op.DID] = true 435 - } 436 - 437 - dids := make([]string, 0, len(didSet)) 438 - for did := range didSet { 439 - dids = append(dids, did) 440 - } 441 - return dids 442 - } 443 - 444 - // ===== MEMPOOL BUNDLE CREATION ===== 445 - 446 - func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) { 447 - if !bm.enabled { 448 - return 0, fmt.Errorf("bundle manager disabled") 449 - } 450 - 451 - if len(operations) != BUNDLE_SIZE { 452 - return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations)) 453 - } 454 - 455 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 456 - if err != nil { 457 - return 0, err 458 - } 459 - bundleNum := lastBundle + 1 460 - 461 - bf := bm.newBundleFile(bundleNum) 462 - bf.operations = operations 463 - 464 - if err := bm.save(bf); err != nil { 465 - return 0, err 466 - } 467 - 468 - if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil { 469 - return 0, err 470 - } 471 - 472 - log.Info("✓ Created bundle %06d from mempool (hash: %s...)", 473 - bundleNum, bf.uncompressedHash[:16]) 474 - 475 - return bundleNum, nil 476 - } 477 - 478 - // ===== VERIFICATION ===== 479 - 480 - func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 481 - if !bm.enabled { 482 - return fmt.Errorf("bundle manager disabled") 483 - } 484 - 485 - log.Info("Verifying bundle chain from 1 to %06d...", endBundle) 486 - 487 - for i := 1; i <= endBundle; i++ { 488 - bundle, err := bm.db.GetBundleByNumber(ctx, i) 489 - if err != nil { 490 - return fmt.Errorf("bundle %06d not found: %w", i, err) 491 - } 492 - 493 - // Verify file hash 494 - path := bm.newBundleFile(i).path 495 - if err := bm.verifyHash(path, bundle.CompressedHash); err != nil { 496 - return fmt.Errorf("bundle %06d hash verification failed: %w", i, err) 497 - } 498 - 499 - // Verify chain link 500 - if i > 1 { 501 - prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1) 502 - if err != nil { 503 - return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i) 504 - } 505 - 506 - if bundle.PrevBundleHash != prevBundle.Hash { 507 - return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s", 508 - i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16]) 509 - } 510 - } 511 - 512 - if i%100 == 0 { 513 - log.Verbose(" ✓ Verified bundles 1-%06d", i) 514 - } 515 - } 516 - 517 - log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle) 518 - return nil 519 - } 520 - 521 - func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error { 522 - if !bm.enabled { 523 - return nil 524 - } 525 - 526 - for i := 1; i < targetBundle; i++ { 527 - if !bm.newBundleFile(i).exists() { 528 - if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil { 529 - return fmt.Errorf("bundle %06d is missing (required for continuity)", i) 530 - } 531 - } 532 - } 533 - 534 - return nil 535 - } 536 - 537 - // ===== UTILITY METHODS ===== 538 - 539 - func (bm *BundleManager) hash(data []byte) string { 540 - h := sha256.Sum256(data) 541 - return hex.EncodeToString(h[:]) 542 - } 543 - 544 - func (bm *BundleManager) hashFile(path string) string { 545 - data, _ := os.ReadFile(path) 546 - return bm.hash(data) 547 - } 548 - 549 - func (bm *BundleManager) verifyHash(path, expectedHash string) error { 550 - if expectedHash == "" { 551 - return nil 552 - } 553 - 554 - actualHash := bm.hashFile(path) 555 - if actualHash != expectedHash { 556 - return fmt.Errorf("hash mismatch") 557 - } 558 - return nil 559 - } 560 - 561 - func (bm *BundleManager) getFileSize(path string) int64 { 562 - if info, err := os.Stat(path); err == nil { 563 - return info.Size() 564 - } 565 - return 0 566 - } 567 - 568 - func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) { 569 - if !bm.enabled { 570 - return 0, 0, 0, 0, nil 571 - } 572 - return bm.db.GetBundleStats(ctx) 573 - } 574 - 575 - func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 576 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 577 - if err != nil { 578 - return nil, err 579 - } 580 - 581 - if lastBundle == 0 { 582 - return map[string]interface{}{ 583 - "chain_length": 0, 584 - "status": "empty", 585 - }, nil 586 - } 587 - 588 - firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1) 589 - lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle) 590 - 591 - return map[string]interface{}{ 592 - "chain_length": lastBundle, 593 - "first_bundle": 1, 594 - "last_bundle": lastBundle, 595 - "chain_start_time": firstBundle.StartTime, 596 - "chain_end_time": lastBundleData.EndTime, 597 - "chain_head_hash": lastBundleData.Hash, 598 - }, nil 599 - } 600 - 601 - // ===== EXPORTED HELPERS ===== 602 - 603 - func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) { 604 - if len(operations) == 0 { 605 - return time.Time{}, nil 606 - } 607 - 608 - lastOp := operations[len(operations)-1] 609 - boundaryTime := lastOp.CreatedAt 610 - cidSet := make(map[string]bool) 611 - 612 - for i := len(operations) - 1; i >= 0; i-- { 613 - op := operations[i] 614 - if op.CreatedAt.Equal(boundaryTime) { 615 - cidSet[op.CID] = true 616 - } else { 617 - break 618 - } 619 - } 620 - 621 - return boundaryTime, cidSet 622 - } 623 - 624 - func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation { 625 - if len(operations) == 0 { 626 - return operations 627 - } 628 - 629 - boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp) 630 - if err != nil { 631 - return operations 632 - } 633 - 634 - startIdx := 0 635 - for startIdx < len(operations) { 636 - op := operations[startIdx] 637 - 638 - if op.CreatedAt.After(boundaryTime) { 639 - break 640 - } 641 - 642 - if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] { 643 - startIdx++ 644 - continue 645 - } 646 - 647 - break 648 - } 649 - 650 - return operations[startIdx:] 651 - } 652 - 653 - // LoadBundleOperations is a public method for external access (e.g., API handlers) 654 - func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 655 - if !bm.enabled { 656 - return nil, fmt.Errorf("bundle manager disabled") 657 - } 658 - 659 - bf := bm.newBundleFile(bundleNum) 660 - 661 - if !bf.exists() { 662 - return nil, fmt.Errorf("bundle %06d not found", bundleNum) 663 - } 664 - 665 - if err := bm.load(bf); err != nil { 666 - return nil, err 667 - } 668 - 669 - return bf.operations, nil 670 - } 671 - 672 - // calculateCursor determines the cursor value for a given bundle 673 - // For bundle 1: returns empty string 674 - // For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format 675 - func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string { 676 - if bundleNum == 1 { 677 - return "" 678 - } 679 - 680 - // Try to get cursor from previous bundle in DB 681 - if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 682 - return prevBundle.EndTime.Format(time.RFC3339Nano) 683 - } 684 - 685 - // If previous bundle not in DB, try to load it from file 686 - prevBf := bm.newBundleFile(bundleNum - 1) 687 - if prevBf.exists() { 688 - if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 { 689 - // Return the createdAt of the last operation in previous bundle 690 - lastOp := prevBf.operations[len(prevBf.operations)-1] 691 - return lastOp.CreatedAt.Format(time.RFC3339Nano) 692 - } 693 - } 694 - 695 - return "" 696 - }
-237
internal/plc/client.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "context" 6 - "encoding/json" 7 - "fmt" 8 - "io" 9 - "net/http" 10 - "strconv" 11 - "time" 12 - 13 - "github.com/atscan/atscanner/internal/log" 14 - ) 15 - 16 - type Client struct { 17 - baseURL string 18 - httpClient *http.Client 19 - rateLimiter *RateLimiter 20 - } 21 - 22 - func NewClient(baseURL string) *Client { 23 - // Rate limit: 90 requests per minute (leaving buffer below 100/min limit) 24 - rateLimiter := NewRateLimiter(90, time.Minute) 25 - 26 - return &Client{ 27 - baseURL: baseURL, 28 - httpClient: &http.Client{ 29 - Timeout: 60 * time.Second, 30 - }, 31 - rateLimiter: rateLimiter, 32 - } 33 - } 34 - 35 - func (c *Client) Close() { 36 - if c.rateLimiter != nil { 37 - c.rateLimiter.Stop() 38 - } 39 - } 40 - 41 - type ExportOptions struct { 42 - Count int 43 - After string // ISO 8601 datetime string 44 - } 45 - 46 - // Export fetches export data from PLC directory with rate limiting and retry 47 - func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) { 48 - return c.exportWithRetry(ctx, opts, 5) 49 - } 50 - 51 - // exportWithRetry implements retry logic with exponential backoff for rate limits 52 - func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) { 53 - var lastErr error 54 - backoff := 1 * time.Second 55 - 56 - for attempt := 1; attempt <= maxRetries; attempt++ { 57 - // Wait for rate limiter token 58 - if err := c.rateLimiter.Wait(ctx); err != nil { 59 - return nil, err 60 - } 61 - 62 - operations, retryAfter, err := c.doExport(ctx, opts) 63 - 64 - if err == nil { 65 - return operations, nil 66 - } 67 - 68 - lastErr = err 69 - 70 - // Check if it's a rate limit error (429) 71 - if retryAfter > 0 { 72 - log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d", 73 - retryAfter, attempt, maxRetries) 74 - 75 - select { 76 - case <-time.After(retryAfter): 77 - continue 78 - case <-ctx.Done(): 79 - return nil, ctx.Err() 80 - } 81 - } 82 - 83 - // Other errors - exponential backoff 84 - if attempt < maxRetries { 85 - log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v", 86 - attempt, maxRetries, err, backoff) 87 - 88 - select { 89 - case <-time.After(backoff): 90 - backoff *= 2 // Exponential backoff 91 - case <-ctx.Done(): 92 - return nil, ctx.Err() 93 - } 94 - } 95 - } 96 - 97 - return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr) 98 - } 99 - 100 - // doExport performs the actual HTTP request 101 - func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) { 102 - url := fmt.Sprintf("%s/export", c.baseURL) 103 - 104 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 105 - if err != nil { 106 - return nil, 0, err 107 - } 108 - 109 - // Add query parameters 110 - q := req.URL.Query() 111 - if opts.Count > 0 { 112 - q.Add("count", fmt.Sprintf("%d", opts.Count)) 113 - } 114 - if opts.After != "" { 115 - q.Add("after", opts.After) 116 - } 117 - req.URL.RawQuery = q.Encode() 118 - 119 - resp, err := c.httpClient.Do(req) 120 - if err != nil { 121 - return nil, 0, fmt.Errorf("request failed: %w", err) 122 - } 123 - defer resp.Body.Close() 124 - 125 - // Handle rate limiting (429) 126 - if resp.StatusCode == http.StatusTooManyRequests { 127 - retryAfter := parseRetryAfter(resp) 128 - 129 - // Also check x-ratelimit headers for info 130 - if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" { 131 - log.Verbose("Rate limit: %s", limit) 132 - } 133 - 134 - return nil, retryAfter, fmt.Errorf("rate limited (429)") 135 - } 136 - 137 - if resp.StatusCode != http.StatusOK { 138 - body, _ := io.ReadAll(resp.Body) 139 - return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 140 - } 141 - 142 - var operations []PLCOperation 143 - 144 - // PLC export returns newline-delimited JSON 145 - scanner := bufio.NewScanner(resp.Body) 146 - buf := make([]byte, 0, 64*1024) 147 - scanner.Buffer(buf, 1024*1024) 148 - 149 - lineCount := 0 150 - for scanner.Scan() { 151 - lineCount++ 152 - line := scanner.Bytes() 153 - 154 - if len(line) == 0 { 155 - continue 156 - } 157 - 158 - var op PLCOperation 159 - if err := json.Unmarshal(line, &op); err != nil { 160 - log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err) 161 - continue 162 - } 163 - 164 - // CRITICAL: Store the original raw JSON bytes 165 - op.RawJSON = make([]byte, len(line)) 166 - copy(op.RawJSON, line) 167 - 168 - operations = append(operations, op) 169 - } 170 - 171 - if err := scanner.Err(); err != nil { 172 - return nil, 0, fmt.Errorf("error reading response: %w", err) 173 - } 174 - 175 - return operations, 0, nil 176 - 177 - } 178 - 179 - // parseRetryAfter parses the Retry-After header 180 - func parseRetryAfter(resp *http.Response) time.Duration { 181 - retryAfter := resp.Header.Get("Retry-After") 182 - if retryAfter == "" { 183 - // Default to 5 minutes if no header 184 - return 5 * time.Minute 185 - } 186 - 187 - // Try parsing as seconds 188 - if seconds, err := strconv.Atoi(retryAfter); err == nil { 189 - return time.Duration(seconds) * time.Second 190 - } 191 - 192 - // Try parsing as HTTP date 193 - if t, err := http.ParseTime(retryAfter); err == nil { 194 - return time.Until(t) 195 - } 196 - 197 - // Default 198 - return 5 * time.Minute 199 - } 200 - 201 - // GetDID fetches a specific DID document from PLC 202 - func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) { 203 - // Wait for rate limiter 204 - if err := c.rateLimiter.Wait(ctx); err != nil { 205 - return nil, err 206 - } 207 - 208 - url := fmt.Sprintf("%s/%s", c.baseURL, did) 209 - 210 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 211 - if err != nil { 212 - return nil, err 213 - } 214 - 215 - resp, err := c.httpClient.Do(req) 216 - if err != nil { 217 - return nil, err 218 - } 219 - defer resp.Body.Close() 220 - 221 - if resp.StatusCode == http.StatusTooManyRequests { 222 - retryAfter := parseRetryAfter(resp) 223 - return nil, fmt.Errorf("rate limited, retry after %v", retryAfter) 224 - } 225 - 226 - if resp.StatusCode != http.StatusOK { 227 - body, _ := io.ReadAll(resp.Body) 228 - return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 229 - } 230 - 231 - var doc DIDDocument 232 - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { 233 - return nil, err 234 - } 235 - 236 - return &doc, nil 237 - }
+522
internal/plc/manager.go
··· 1 + package plc 2 + 3 + import ( 4 + "context" 5 + "encoding/csv" 6 + "fmt" 7 + "io" 8 + "os" 9 + "path/filepath" 10 + "sort" 11 + "strconv" 12 + "strings" 13 + "time" 14 + 15 + "github.com/atscan/atscand/internal/log" 16 + "github.com/atscan/atscand/internal/storage" 17 + "github.com/klauspost/compress/zstd" 18 + plcbundle "tangled.org/atscan.net/plcbundle" 19 + ) 20 + 21 + // BundleManager wraps the library's manager with database integration 22 + type BundleManager struct { 23 + libManager *plcbundle.Manager 24 + db storage.Database 25 + bundleDir string 26 + indexDIDs bool 27 + } 28 + 29 + func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) { 30 + // Create library config 31 + config := plcbundle.DefaultConfig(bundleDir) 32 + 33 + // Create PLC client 34 + var client *plcbundle.PLCClient 35 + if plcURL != "" { 36 + client = plcbundle.NewPLCClient(plcURL) 37 + } 38 + 39 + // Create library manager 40 + libMgr, err := plcbundle.NewManager(config, client) 41 + if err != nil { 42 + return nil, fmt.Errorf("failed to create library manager: %w", err) 43 + } 44 + 45 + return &BundleManager{ 46 + libManager: libMgr, 47 + db: db, 48 + bundleDir: bundleDir, 49 + indexDIDs: indexDIDs, 50 + }, nil 51 + } 52 + 53 + func (bm *BundleManager) Close() { 54 + if bm.libManager != nil { 55 + bm.libManager.Close() 56 + } 57 + } 58 + 59 + // LoadBundle loads a bundle (from library) and returns operations 60 + func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 61 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 62 + if err != nil { 63 + return nil, err 64 + } 65 + return bundle.Operations, nil 66 + } 67 + 68 + // LoadBundle loads a full bundle with metadata 69 + func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) { 70 + return bm.libManager.LoadBundle(ctx, bundleNum) 71 + } 72 + 73 + // FetchAndSaveBundle fetches next bundle from PLC and saves 74 + func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) { 75 + // Fetch from PLC using library 76 + bundle, err := bm.libManager.FetchNextBundle(ctx) 77 + if err != nil { 78 + return nil, err 79 + } 80 + 81 + // Save to disk (library handles this) 82 + if err := bm.libManager.SaveBundle(ctx, bundle); err != nil { 83 + return nil, fmt.Errorf("failed to save bundle to disk: %w", err) 84 + } 85 + 86 + // Index DIDs if enabled (still use database for this) 87 + if bm.indexDIDs && len(bundle.Operations) > 0 { 88 + if err := bm.indexBundleDIDs(ctx, bundle); err != nil { 89 + log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err) 90 + } 91 + } 92 + 93 + log.Info("✓ Saved bundle %06d", bundle.BundleNumber) 94 + 95 + return bundle, nil 96 + } 97 + 98 + // indexBundleDIDs indexes DIDs from a bundle into the database 99 + func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error { 100 + start := time.Now() 101 + log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber) 102 + 103 + // Extract DID info from operations 104 + didInfoMap := ExtractDIDInfoMap(bundle.Operations) 105 + 106 + successCount := 0 107 + errorCount := 0 108 + invalidHandleCount := 0 109 + 110 + // Upsert each DID 111 + for did, info := range didInfoMap { 112 + validHandle := ValidateHandle(info.Handle) 113 + if info.Handle != "" && validHandle == "" { 114 + invalidHandleCount++ 115 + } 116 + 117 + if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil { 118 + log.Error("Failed to index DID %s: %v", did, err) 119 + errorCount++ 120 + } else { 121 + successCount++ 122 + } 123 + } 124 + 125 + elapsed := time.Since(start) 126 + log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v", 127 + successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed) 128 + 129 + return nil 130 + } 131 + 132 + // VerifyChain verifies bundle chain integrity 133 + func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 134 + result, err := bm.libManager.VerifyChain(ctx) 135 + if err != nil { 136 + return err 137 + } 138 + 139 + if !result.Valid { 140 + return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error) 141 + } 142 + 143 + return nil 144 + } 145 + 146 + // GetChainInfo returns chain information 147 + func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 148 + return bm.libManager.GetInfo(), nil 149 + } 150 + 151 + // GetMempoolStats returns mempool statistics from the library 152 + func (bm *BundleManager) GetMempoolStats() map[string]interface{} { 153 + return bm.libManager.GetMempoolStats() 154 + } 155 + 156 + // GetMempoolOperations returns all operations currently in mempool 157 + func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) { 158 + return bm.libManager.GetMempoolOperations() 159 + } 160 + 161 + // GetIndex returns the library's bundle index 162 + func (bm *BundleManager) GetIndex() *plcbundle.Index { 163 + return bm.libManager.GetIndex() 164 + } 165 + 166 + // GetLastBundleNumber returns the last bundle number 167 + func (bm *BundleManager) GetLastBundleNumber() int { 168 + index := bm.libManager.GetIndex() 169 + lastBundle := index.GetLastBundle() 170 + if lastBundle == nil { 171 + return 0 172 + } 173 + return lastBundle.BundleNumber 174 + } 175 + 176 + // GetBundleMetadata gets bundle metadata by number 177 + func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) { 178 + index := bm.libManager.GetIndex() 179 + return index.GetBundle(bundleNum) 180 + } 181 + 182 + // GetBundles returns the most recent bundles (newest first) 183 + func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata { 184 + index := bm.libManager.GetIndex() 185 + allBundles := index.GetBundles() 186 + 187 + // Determine how many bundles to return 188 + count := limit 189 + if count <= 0 || count > len(allBundles) { 190 + count = len(allBundles) 191 + } 192 + 193 + // Build result in reverse order (newest first) 194 + result := make([]*plcbundle.BundleMetadata, count) 195 + for i := 0; i < count; i++ { 196 + result[i] = allBundles[len(allBundles)-1-i] 197 + } 198 + 199 + return result 200 + } 201 + 202 + // GetBundleStats returns bundle statistics 203 + func (bm *BundleManager) GetBundleStats() map[string]interface{} { 204 + index := bm.libManager.GetIndex() 205 + stats := index.GetStats() 206 + 207 + // Convert to expected format 208 + lastBundle := stats["last_bundle"] 209 + if lastBundle == nil { 210 + lastBundle = int64(0) 211 + } 212 + 213 + // Calculate total uncompressed size by iterating through all bundles 214 + totalUncompressedSize := int64(0) 215 + allBundles := index.GetBundles() 216 + for _, bundle := range allBundles { 217 + totalUncompressedSize += bundle.UncompressedSize 218 + } 219 + 220 + return map[string]interface{}{ 221 + "bundle_count": int64(stats["bundle_count"].(int)), 222 + "total_size": stats["total_size"].(int64), 223 + "total_uncompressed_size": totalUncompressedSize, 224 + "last_bundle": int64(lastBundle.(int)), 225 + } 226 + } 227 + 228 + // GetDIDsForBundle gets DIDs from a bundle (loads and extracts) 229 + func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) { 230 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 231 + if err != nil { 232 + return nil, 0, err 233 + } 234 + 235 + // Extract unique DIDs 236 + didSet := make(map[string]bool) 237 + for _, op := range bundle.Operations { 238 + didSet[op.DID] = true 239 + } 240 + 241 + dids := make([]string, 0, len(didSet)) 242 + for did := range didSet { 243 + dids = append(dids, did) 244 + } 245 + 246 + return dids, bundle.DIDCount, nil 247 + } 248 + 249 + // FindBundleForTimestamp finds bundle containing a timestamp 250 + func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int { 251 + index := bm.libManager.GetIndex() 252 + bundles := index.GetBundles() 253 + 254 + // Find bundle containing this time 255 + for _, bundle := range bundles { 256 + if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) && 257 + (bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) { 258 + return bundle.BundleNumber 259 + } 260 + } 261 + 262 + // Return closest bundle before this time 263 + for i := len(bundles) - 1; i >= 0; i-- { 264 + if bundles[i].EndTime.Before(afterTime) { 265 + return bundles[i].BundleNumber 266 + } 267 + } 268 + 269 + return 1 // Default to first bundle 270 + } 271 + 272 + // StreamRaw streams raw compressed bundle data 273 + func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 274 + return bm.libManager.StreamBundleRaw(ctx, bundleNumber) 275 + } 276 + 277 + // StreamDecompressed streams decompressed bundle data 278 + func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 279 + return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber) 280 + } 281 + 282 + // GetPLCHistory calculates historical statistics from the bundle index 283 + func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) { 284 + index := bm.libManager.GetIndex() 285 + allBundles := index.GetBundles() 286 + 287 + // Filter bundles >= fromBundle 288 + var filtered []*plcbundle.BundleMetadata 289 + for _, b := range allBundles { 290 + if b.BundleNumber >= fromBundle { 291 + filtered = append(filtered, b) 292 + } 293 + } 294 + 295 + if len(filtered) == 0 { 296 + return []*storage.PLCHistoryPoint{}, nil 297 + } 298 + 299 + // Sort bundles by bundle number to ensure proper cumulative calculation 300 + sort.Slice(filtered, func(i, j int) bool { 301 + return filtered[i].BundleNumber < filtered[j].BundleNumber 302 + }) 303 + 304 + // Group by date 305 + type dailyStat struct { 306 + lastBundle int 307 + bundleCount int 308 + totalUncompressed int64 309 + totalCompressed int64 310 + } 311 + 312 + dailyStats := make(map[string]*dailyStat) 313 + 314 + // Map to store the cumulative values at the end of each date 315 + dateCumulatives := make(map[string]struct { 316 + uncompressed int64 317 + compressed int64 318 + }) 319 + 320 + // Calculate cumulative totals as we iterate through sorted bundles 321 + cumulativeUncompressed := int64(0) 322 + cumulativeCompressed := int64(0) 323 + 324 + for _, bundle := range filtered { 325 + dateStr := bundle.StartTime.Format("2006-01-02") 326 + 327 + // Update cumulative totals 328 + cumulativeUncompressed += bundle.UncompressedSize 329 + cumulativeCompressed += bundle.CompressedSize 330 + 331 + if stat, exists := dailyStats[dateStr]; exists { 332 + // Update existing day 333 + if bundle.BundleNumber > stat.lastBundle { 334 + stat.lastBundle = bundle.BundleNumber 335 + } 336 + stat.bundleCount++ 337 + stat.totalUncompressed += bundle.UncompressedSize 338 + stat.totalCompressed += bundle.CompressedSize 339 + } else { 340 + // Create new day entry 341 + dailyStats[dateStr] = &dailyStat{ 342 + lastBundle: bundle.BundleNumber, 343 + bundleCount: 1, 344 + totalUncompressed: bundle.UncompressedSize, 345 + totalCompressed: bundle.CompressedSize, 346 + } 347 + } 348 + 349 + // Store the cumulative values at the end of this date 350 + // (will be overwritten if there are multiple bundles on the same day) 351 + dateCumulatives[dateStr] = struct { 352 + uncompressed int64 353 + compressed int64 354 + }{ 355 + uncompressed: cumulativeUncompressed, 356 + compressed: cumulativeCompressed, 357 + } 358 + } 359 + 360 + // Convert map to sorted slice by date 361 + var dates []string 362 + for date := range dailyStats { 363 + dates = append(dates, date) 364 + } 365 + sort.Strings(dates) 366 + 367 + // Build history points with cumulative operations 368 + var history []*storage.PLCHistoryPoint 369 + cumulativeOps := 0 370 + 371 + for _, date := range dates { 372 + stat := dailyStats[date] 373 + cumulativeOps += stat.bundleCount * 10000 374 + cumulative := dateCumulatives[date] 375 + 376 + history = append(history, &storage.PLCHistoryPoint{ 377 + Date: date, 378 + BundleNumber: stat.lastBundle, 379 + OperationCount: cumulativeOps, 380 + UncompressedSize: stat.totalUncompressed, 381 + CompressedSize: stat.totalCompressed, 382 + CumulativeUncompressed: cumulative.uncompressed, 383 + CumulativeCompressed: cumulative.compressed, 384 + }) 385 + } 386 + 387 + // Apply limit if specified 388 + if limit > 0 && len(history) > limit { 389 + history = history[:limit] 390 + } 391 + 392 + return history, nil 393 + } 394 + 395 + // GetBundleLabels reads labels from a compressed CSV file for a specific bundle 396 + func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) { 397 + // Define the path to the labels file 398 + labelsDir := filepath.Join(bm.bundleDir, "labels") 399 + labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum)) 400 + 401 + // Check if file exists 402 + if _, err := os.Stat(labelsFile); os.IsNotExist(err) { 403 + log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile) 404 + // Return empty, not an error 405 + return []*PLCOpLabel{}, nil 406 + } 407 + 408 + // Open the Zstd-compressed file 409 + file, err := os.Open(labelsFile) 410 + if err != nil { 411 + return nil, fmt.Errorf("failed to open labels file: %w", err) 412 + } 413 + defer file.Close() 414 + 415 + // Create a Zstd reader 416 + zstdReader, err := zstd.NewReader(file) 417 + if err != nil { 418 + return nil, fmt.Errorf("failed to create zstd reader: %w", err) 419 + } 420 + defer zstdReader.Close() 421 + 422 + // Create a CSV reader 423 + csvReader := csv.NewReader(zstdReader) 424 + // We skipped the header, so no header read needed 425 + // Set FieldsPerRecord to 7 for validation 426 + //csvReader.FieldsPerRecord = 7 427 + 428 + var labels []*PLCOpLabel 429 + 430 + // Read all records 431 + for { 432 + // Check for context cancellation 433 + if err := ctx.Err(); err != nil { 434 + return nil, err 435 + } 436 + 437 + record, err := csvReader.Read() 438 + if err == io.EOF { 439 + break // End of file 440 + } 441 + if err != nil { 442 + log.Error("Error reading CSV record in %s: %v", labelsFile, err) 443 + continue // Skip bad line 444 + } 445 + 446 + // Parse the CSV record (which is []string) 447 + label, err := parseLabelRecord(record) 448 + if err != nil { 449 + log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err) 450 + continue // Skip bad data 451 + } 452 + 453 + labels = append(labels, label) 454 + } 455 + 456 + return labels, nil 457 + } 458 + 459 + // parseLabelRecord converts a new format CSV record into a PLCOpLabel struct 460 + func parseLabelRecord(record []string) (*PLCOpLabel, error) { 461 + // New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels 462 + if len(record) != 6 { 463 + err := fmt.Errorf("invalid record length: expected 6, got %d", len(record)) 464 + // --- ADDED LOG --- 465 + log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ",")) 466 + // --- 467 + return nil, err 468 + } 469 + 470 + // 0:bundle 471 + bundle, err := strconv.Atoi(record[0]) 472 + if err != nil { 473 + // --- ADDED LOG --- 474 + log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ",")) 475 + // --- 476 + return nil, fmt.Errorf("parsing 'bundle': %w", err) 477 + } 478 + 479 + // 1:position 480 + position, err := strconv.Atoi(record[1]) 481 + if err != nil { 482 + // --- ADDED LOG --- 483 + log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ",")) 484 + // --- 485 + return nil, fmt.Errorf("parsing 'position': %w", err) 486 + } 487 + 488 + // 2:cid(short) 489 + shortCID := record[2] 490 + 491 + // 3:size 492 + size, err := strconv.Atoi(record[3]) 493 + if err != nil { 494 + // --- ADDED LOG --- 495 + log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ",")) 496 + // --- 497 + return nil, fmt.Errorf("parsing 'size': %w", err) 498 + } 499 + 500 + // 4:confidence 501 + confidence, err := strconv.ParseFloat(record[4], 64) 502 + if err != nil { 503 + // --- ADDED LOG --- 504 + log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ",")) 505 + // --- 506 + return nil, fmt.Errorf("parsing 'confidence': %w", err) 507 + } 508 + 509 + // 5:labels 510 + detectors := strings.Split(record[5], ";") 511 + 512 + label := &PLCOpLabel{ 513 + Bundle: bundle, 514 + Position: position, 515 + CID: shortCID, 516 + Size: size, 517 + Confidence: confidence, 518 + Detectors: detectors, 519 + } 520 + 521 + return label, nil 522 + }
-70
internal/plc/ratelimiter.go
··· 1 - package plc 2 - 3 - import ( 4 - "context" 5 - "time" 6 - ) 7 - 8 - // RateLimiter implements a token bucket rate limiter 9 - type RateLimiter struct { 10 - tokens chan struct{} 11 - refillRate time.Duration 12 - maxTokens int 13 - stopRefill chan struct{} 14 - } 15 - 16 - // NewRateLimiter creates a new rate limiter 17 - // Example: NewRateLimiter(90, time.Minute) = 90 requests per minute 18 - func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter { 19 - rl := &RateLimiter{ 20 - tokens: make(chan struct{}, requestsPerPeriod), 21 - refillRate: period / time.Duration(requestsPerPeriod), 22 - maxTokens: requestsPerPeriod, 23 - stopRefill: make(chan struct{}), 24 - } 25 - 26 - // Fill initially 27 - for i := 0; i < requestsPerPeriod; i++ { 28 - rl.tokens <- struct{}{} 29 - } 30 - 31 - // Start refill goroutine 32 - go rl.refill() 33 - 34 - return rl 35 - } 36 - 37 - // refill adds tokens at the specified rate 38 - func (rl *RateLimiter) refill() { 39 - ticker := time.NewTicker(rl.refillRate) 40 - defer ticker.Stop() 41 - 42 - for { 43 - select { 44 - case <-ticker.C: 45 - select { 46 - case rl.tokens <- struct{}{}: 47 - // Token added 48 - default: 49 - // Buffer full, skip 50 - } 51 - case <-rl.stopRefill: 52 - return 53 - } 54 - } 55 - } 56 - 57 - // Wait blocks until a token is available 58 - func (rl *RateLimiter) Wait(ctx context.Context) error { 59 - select { 60 - case <-rl.tokens: 61 - return nil 62 - case <-ctx.Done(): 63 - return ctx.Err() 64 - } 65 - } 66 - 67 - // Stop stops the rate limiter 68 - func (rl *RateLimiter) Stop() { 69 - close(rl.stopRefill) 70 - }
+91 -432
internal/plc/scanner.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "encoding/json" 6 5 "fmt" 7 6 "strings" 8 7 "time" 9 8 10 - "github.com/acarl005/stripansi" 11 - "github.com/atscan/atscanner/internal/config" 12 - "github.com/atscan/atscanner/internal/log" 13 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/storage" 14 12 ) 15 13 16 14 type Scanner struct { 17 - client *Client 15 + bundleManager *BundleManager 18 16 db storage.Database 19 17 config config.PLCConfig 20 - bundleManager *BundleManager 21 18 } 22 19 23 - func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner { 20 + func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner { 24 21 log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs) 25 22 26 - bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) 27 - if err != nil { 28 - log.Error("Warning: failed to initialize bundle manager: %v", err) 29 - bundleManager = &BundleManager{enabled: false} 30 - } 31 - 32 23 return &Scanner{ 33 - client: NewClient(cfg.DirectoryURL), 24 + bundleManager: bundleManager, // Use provided instance 34 25 db: db, 35 26 config: cfg, 36 - bundleManager: bundleManager, 37 27 } 38 28 } 39 29 40 30 func (s *Scanner) Close() { 41 - if s.bundleManager != nil { 42 - s.bundleManager.Close() 43 - } 44 - } 45 - 46 - // ScanMetrics tracks scan progress 47 - type ScanMetrics struct { 48 - totalFetched int64 // Total ops fetched from PLC/bundles 49 - totalProcessed int64 // Unique ops processed (after dedup) 50 - newEndpoints int64 // New endpoints discovered 51 - endpointCounts map[string]int64 52 - currentBundle int 53 - startTime time.Time 54 - } 55 - 56 - func newMetrics(startBundle int) *ScanMetrics { 57 - return &ScanMetrics{ 58 - endpointCounts: make(map[string]int64), 59 - currentBundle: startBundle, 60 - startTime: time.Now(), 61 - } 62 - } 63 - 64 - func (m *ScanMetrics) logSummary() { 65 - summary := formatEndpointCounts(m.endpointCounts) 66 - if m.newEndpoints > 0 { 67 - log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v", 68 - m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime)) 69 - } else { 70 - log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v", 71 - m.totalProcessed, m.totalFetched, time.Since(m.startTime)) 72 - } 31 + // Don't close bundleManager here - it's shared 73 32 } 74 33 75 34 func (s *Scanner) Scan(ctx context.Context) error { 76 35 log.Info("Starting PLC directory scan...") 77 - log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes") 78 36 79 37 cursor, err := s.db.GetScanCursor(ctx, "plc_directory") 80 38 if err != nil { 81 39 return fmt.Errorf("failed to get scan cursor: %w", err) 82 40 } 83 41 84 - startBundle := s.calculateStartBundle(cursor.LastBundleNumber) 85 - metrics := newMetrics(startBundle) 86 - 87 - if startBundle > 1 { 88 - if err := s.ensureContinuity(ctx, startBundle); err != nil { 89 - return err 90 - } 91 - } 42 + metrics := newMetrics(cursor.LastBundleNumber + 1) 92 43 93 - // Handle existing mempool first 94 - if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool { 95 - return s.handleMempoolOnly(ctx, metrics) 96 - } 97 - 98 - // Process bundles until incomplete or error 44 + // Main processing loop 99 45 for { 100 46 if err := ctx.Err(); err != nil { 101 47 return err 102 48 } 103 49 104 - if err := s.processSingleBundle(ctx, metrics); err != nil { 105 - if s.shouldRetry(err) { 106 - continue 107 - } 108 - break 109 - } 110 - 111 - if err := s.updateCursor(ctx, cursor, metrics); err != nil { 112 - log.Error("Warning: failed to update cursor: %v", err) 113 - } 114 - } 115 - 116 - // Try to finalize mempool 117 - s.finalizeMempool(ctx, metrics) 118 - 119 - metrics.logSummary() 120 - return nil 121 - } 122 - 123 - func (s *Scanner) calculateStartBundle(lastBundle int) int { 124 - if lastBundle == 0 { 125 - return 1 126 - } 127 - return lastBundle + 1 128 - } 129 - 130 - func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error { 131 - log.Info("Checking bundle continuity...") 132 - if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil { 133 - return fmt.Errorf("bundle continuity check failed: %w", err) 134 - } 135 - return nil 136 - } 137 - 138 - func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) { 139 - count, err := s.db.GetMempoolCount(ctx) 140 - if err != nil { 141 - return false, err 142 - } 143 - return count > 0, nil 144 - } 145 - 146 - func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error { 147 - count, _ := s.db.GetMempoolCount(ctx) 148 - log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count) 149 - 150 - if err := s.fillMempool(ctx, m); err != nil { 151 - return err 152 - } 153 - 154 - if err := s.processMempool(ctx, m); err != nil { 155 - log.Error("Error processing mempool: %v", err) 156 - } 157 - 158 - m.logSummary() 159 - return nil 160 - } 161 - 162 - func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error { 163 - log.Verbose("→ Processing bundle %06d...", m.currentBundle) 164 - 165 - ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client) 166 - if err != nil { 167 - return s.handleBundleError(err, m) 168 - } 169 - 170 - if isComplete { 171 - return s.handleCompleteBundle(ctx, ops, m) 172 - } 173 - return s.handleIncompleteBundle(ctx, ops, m) 174 - } 175 - 176 - func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error { 177 - log.Error("Failed to load bundle %06d: %v", m.currentBundle, err) 178 - 179 - if strings.Contains(err.Error(), "rate limited") { 180 - log.Info("⚠ Rate limit hit, pausing for 5 minutes...") 181 - time.Sleep(5 * time.Minute) 182 - return fmt.Errorf("retry") 183 - } 184 - 185 - if m.currentBundle > 1 { 186 - log.Info("→ Reached end of available data") 187 - } 188 - return err 189 - } 190 - 191 - func (s *Scanner) shouldRetry(err error) bool { 192 - return err != nil && err.Error() == "retry" 193 - } 194 - 195 - func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 196 - counts, err := s.processBatch(ctx, ops) 197 - if err != nil { 198 - return err 199 - } 200 - 201 - s.mergeCounts(m.endpointCounts, counts) 202 - m.totalProcessed += int64(len(ops)) // Unique ops after dedup 203 - m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints 204 - 205 - batchTotal := sumCounts(counts) 206 - log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints", 207 - m.currentBundle, len(ops), batchTotal) 208 - 209 - m.currentBundle++ 210 - return nil 211 - } 212 - 213 - func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 214 - log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops)) 215 - 216 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 217 - return err 218 - } 219 - 220 - s.finalizeMempool(ctx, m) 221 - return fmt.Errorf("incomplete") // Signal end of processing 222 - } 223 - 224 - func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) { 225 - if err := s.fillMempool(ctx, m); err != nil { 226 - log.Error("Error filling mempool: %v", err) 227 - } 228 - if err := s.processMempool(ctx, m); err != nil { 229 - log.Error("Error processing mempool: %v", err) 230 - } 231 - } 232 - 233 - func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error { 234 - const fetchLimit = 1000 235 - 236 - for { 237 - count, err := s.db.GetMempoolCount(ctx) 50 + // Fetch and save bundle (library handles mempool internally) 51 + bundle, err := s.bundleManager.FetchAndSaveBundle(ctx) 238 52 if err != nil { 239 - return err 240 - } 53 + if isInsufficientOpsError(err) { 54 + // Show mempool status 55 + stats := s.bundleManager.libManager.GetMempoolStats() 56 + mempoolCount := stats["count"].(int) 241 57 242 - if count >= BUNDLE_SIZE { 243 - log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE) 244 - return nil 245 - } 58 + if mempoolCount > 0 { 59 + log.Info("→ Waiting for more operations (mempool has %d/%d ops)", 60 + mempoolCount, BUNDLE_SIZE) 61 + } else { 62 + log.Info("→ Caught up! No operations available") 63 + } 64 + break 65 + } 246 66 247 - log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE) 248 - 249 - // ✅ Fix: Don't capture unused 'ops' variable 250 - shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m) 251 - if err != nil { 252 - return err 253 - } 67 + if strings.Contains(err.Error(), "rate limited") { 68 + log.Info("⚠ Rate limited, pausing for 5 minutes...") 69 + time.Sleep(5 * time.Minute) 70 + continue 71 + } 254 72 255 - if !shouldContinue { 256 - finalCount, _ := s.db.GetMempoolCount(ctx) 257 - log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE) 258 - return nil 73 + return fmt.Errorf("failed to fetch bundle: %w", err) 259 74 } 260 - } 261 - } 262 75 263 - func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) { 264 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 265 - if err != nil { 266 - return false, err 267 - } 268 - 269 - var after string 270 - if lastOp != nil { 271 - after = lastOp.CreatedAt.Format(time.RFC3339Nano) 272 - log.Verbose(" Using cursor: %s", after) 273 - } 274 - 275 - ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after}) 276 - if err != nil { 277 - return false, fmt.Errorf("failed to fetch from PLC: %w", err) 278 - } 279 - 280 - fetchedCount := len(ops) 281 - m.totalFetched += int64(fetchedCount) // Track all fetched 282 - log.Verbose(" Fetched %d operations from PLC", fetchedCount) 283 - 284 - if fetchedCount == 0 { 285 - count, _ := s.db.GetMempoolCount(ctx) 286 - log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE) 287 - return false, nil 288 - } 289 - 290 - beforeCount, err := s.db.GetMempoolCount(ctx) 291 - if err != nil { 292 - return false, err 293 - } 294 - 295 - endpointsBefore := sumCounts(m.endpointCounts) 296 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 297 - return false, err 298 - } 299 - endpointsAfter := sumCounts(m.endpointCounts) 300 - m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found 301 - 302 - afterCount, err := s.db.GetMempoolCount(ctx) 303 - if err != nil { 304 - return false, err 305 - } 306 - 307 - uniqueAdded := int64(afterCount - beforeCount) // Cast to int64 308 - m.totalProcessed += uniqueAdded // Track unique ops processed 309 - 310 - log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)", 311 - uniqueAdded, int64(fetchedCount)-uniqueAdded) 312 - 313 - // Continue only if got full batch 314 - shouldContinue := fetchedCount >= limit 315 - if !shouldContinue { 316 - log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit) 317 - } 318 - 319 - return shouldContinue, nil 320 - } 321 - 322 - func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error { 323 - mempoolOps := make([]storage.MempoolOperation, len(ops)) 324 - for i, op := range ops { 325 - mempoolOps[i] = storage.MempoolOperation{ 326 - DID: op.DID, 327 - Operation: string(op.RawJSON), 328 - CID: op.CID, 329 - CreatedAt: op.CreatedAt, 330 - } 331 - } 332 - 333 - if err := s.db.AddToMempool(ctx, mempoolOps); err != nil { 334 - return err 335 - } 336 - 337 - // NEW: Create/update DID records immediately when adding to mempool 338 - for _, op := range ops { 339 - info := ExtractDIDInfo(&op) 340 - 341 - // Validate handle length before saving 342 - validHandle := ValidateHandle(info.Handle) 343 - if info.Handle != "" && validHandle == "" { 344 - log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle)) 345 - } 346 - 347 - if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil { 348 - log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err) 349 - // Don't fail the whole operation, just log 350 - } 351 - } 352 - 353 - // Process for endpoint discovery 354 - batchCounts, err := s.processBatch(ctx, ops) 355 - s.mergeCounts(counts, batchCounts) 356 - return err 357 - } 358 - 359 - func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error { 360 - for { 361 - count, err := s.db.GetMempoolCount(ctx) 76 + // Process operations for endpoint discovery 77 + counts, err := s.processBatch(ctx, bundle.Operations) 362 78 if err != nil { 363 - return err 79 + log.Error("Failed to process batch: %v", err) 80 + // Continue anyway 364 81 } 365 82 366 - log.Verbose("Mempool contains %d operations", count) 83 + // Update metrics 84 + s.mergeCounts(metrics.endpointCounts, counts) 85 + metrics.totalProcessed += int64(len(bundle.Operations)) 86 + metrics.newEndpoints += sumCounts(counts) 87 + metrics.currentBundle = bundle.BundleNumber 367 88 368 - if count < BUNDLE_SIZE { 369 - log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE) 370 - return nil 371 - } 89 + log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints", 90 + bundle.BundleNumber, len(bundle.Operations), sumCounts(counts)) 372 91 373 - log.Info("→ Creating bundle from mempool (%d operations available)...", count) 374 - 375 - // Updated to receive 4 values instead of 3 376 - bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx) 377 - if err != nil { 378 - return err 379 - } 380 - 381 - // Process and update metrics 382 - countsBefore := sumCounts(m.endpointCounts) 383 - counts, _ := s.processBatch(ctx, ops) 384 - s.mergeCounts(m.endpointCounts, counts) 385 - newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore 386 - 387 - m.totalProcessed += int64(len(ops)) 388 - m.newEndpoints += newEndpointsFound 389 - m.currentBundle = bundleNum 390 - 391 - if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil { 92 + // Update cursor 93 + if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil { 392 94 log.Error("Warning: failed to update cursor: %v", err) 393 95 } 394 - 395 - log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor) 396 96 } 397 - } 398 97 399 - func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) { 400 - mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE) 401 - if err != nil { 402 - return 0, nil, "", err 98 + // Show final mempool status 99 + stats := s.bundleManager.libManager.GetMempoolStats() 100 + if count, ok := stats["count"].(int); ok && count > 0 { 101 + log.Info("Mempool contains %d operations (%.1f%% of next bundle)", 102 + count, float64(count)/float64(BUNDLE_SIZE)*100) 403 103 } 404 104 405 - ops, ids := s.deduplicateMempool(mempoolOps) 406 - if len(ops) < BUNDLE_SIZE { 407 - return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE) 408 - } 409 - 410 - // Determine cursor from last bundle 411 - cursor := "" 412 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 413 - if err == nil && lastBundle > 0 { 414 - if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil { 415 - cursor = bundle.EndTime.Format(time.RFC3339Nano) 416 - } 417 - } 418 - 419 - bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor) 420 - if err != nil { 421 - return 0, nil, "", err 422 - } 423 - 424 - if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil { 425 - return 0, nil, "", err 426 - } 427 - 428 - return bundleNum, ops, cursor, nil 105 + metrics.logSummary() 106 + return nil 429 107 } 430 108 431 - func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) { 432 - ops := make([]PLCOperation, 0, BUNDLE_SIZE) 433 - ids := make([]int64, 0, BUNDLE_SIZE) 434 - seenCIDs := make(map[string]bool) 435 - 436 - for _, mop := range mempoolOps { 437 - if seenCIDs[mop.CID] { 438 - ids = append(ids, mop.ID) 439 - continue 440 - } 441 - seenCIDs[mop.CID] = true 442 - 443 - var op PLCOperation 444 - json.Unmarshal([]byte(mop.Operation), &op) 445 - op.RawJSON = []byte(mop.Operation) 446 - 447 - ops = append(ops, op) 448 - ids = append(ids, mop.ID) 449 - 450 - if len(ops) >= BUNDLE_SIZE { 451 - break 452 - } 453 - } 454 - 455 - return ops, ids 456 - } 457 - 109 + // processBatch extracts endpoints from operations 458 110 func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) { 459 111 counts := make(map[string]int64) 460 112 seen := make(map[string]*PLCOperation) 461 113 462 114 // Collect unique endpoints 463 - for _, op := range ops { 115 + for i := range ops { 116 + op := &ops[i] 117 + 464 118 if op.IsNullified() { 465 119 continue 466 120 } 467 - for _, ep := range s.extractEndpointsFromOperation(op) { 121 + 122 + for _, ep := range s.extractEndpointsFromOperation(*op) { 468 123 key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint) 469 124 if _, exists := seen[key]; !exists { 470 - seen[key] = &op 125 + seen[key] = op 471 126 } 472 127 } 473 128 } ··· 483 138 } 484 139 485 140 if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil { 486 - log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err) 141 + log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err) 487 142 continue 488 143 } 489 144 490 - log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint)) 145 + log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint) 491 146 counts[epType]++ 492 147 } 493 148 494 149 return counts, nil 495 - } 496 - 497 - func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 498 - return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 499 - EndpointType: epType, 500 - Endpoint: endpoint, 501 - DiscoveredAt: discoveredAt, 502 - LastChecked: time.Time{}, 503 - Status: storage.EndpointStatusUnknown, 504 - }) 505 150 } 506 151 507 152 func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo { ··· 544 189 return nil 545 190 } 546 191 547 - func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error { 548 - return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 549 - Source: "plc_directory", 550 - LastBundleNumber: m.currentBundle - 1, 551 - LastScanTime: time.Now().UTC(), 552 - RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed, 192 + func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 193 + valid := validateEndpoint(endpoint) 194 + return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 195 + EndpointType: epType, 196 + Endpoint: endpoint, 197 + DiscoveredAt: discoveredAt, 198 + LastChecked: time.Time{}, 199 + Status: storage.EndpointStatusUnknown, 200 + Valid: valid, 553 201 }) 554 202 } 555 203 ··· 577 225 return total 578 226 } 579 227 580 - func formatEndpointCounts(counts map[string]int64) string { 581 - if len(counts) == 0 { 582 - return "0 new endpoints" 583 - } 228 + func isInsufficientOpsError(err error) bool { 229 + return err != nil && strings.Contains(err.Error(), "insufficient operations") 230 + } 584 231 585 - total := sumCounts(counts) 232 + // ScanMetrics tracks scan progress 233 + type ScanMetrics struct { 234 + totalProcessed int64 235 + newEndpoints int64 236 + endpointCounts map[string]int64 237 + currentBundle int 238 + startTime time.Time 239 + } 586 240 587 - if len(counts) == 1 { 588 - for typ, count := range counts { 589 - return fmt.Sprintf("%d new %s endpoint(s)", count, typ) 590 - } 241 + func newMetrics(startBundle int) *ScanMetrics { 242 + return &ScanMetrics{ 243 + endpointCounts: make(map[string]int64), 244 + currentBundle: startBundle, 245 + startTime: time.Now(), 591 246 } 247 + } 592 248 593 - parts := make([]string, 0, len(counts)) 594 - for typ, count := range counts { 595 - parts = append(parts, fmt.Sprintf("%d %s", count, typ)) 249 + func (m *ScanMetrics) logSummary() { 250 + if m.newEndpoints > 0 { 251 + log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v", 252 + m.totalProcessed, m.newEndpoints, time.Since(m.startTime)) 253 + } else { 254 + log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v", 255 + m.totalProcessed, time.Since(m.startTime)) 596 256 } 597 - return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", ")) 598 257 }
+68 -55
internal/plc/types.go
··· 1 1 package plc 2 2 3 - import "time" 4 - 5 - type PLCOperation struct { 6 - DID string `json:"did"` 7 - Operation map[string]interface{} `json:"operation"` 8 - CID string `json:"cid"` 9 - Nullified interface{} `json:"nullified,omitempty"` 10 - CreatedAt time.Time `json:"createdAt"` 11 - 12 - RawJSON []byte `json:"-"` // ✅ Exported (capital R) 13 - } 3 + import ( 4 + "net/url" 5 + "strings" 14 6 15 - // Helper method to check if nullified 16 - func (op *PLCOperation) IsNullified() bool { 17 - if op.Nullified == nil { 18 - return false 19 - } 20 - 21 - switch v := op.Nullified.(type) { 22 - case bool: 23 - return v 24 - case string: 25 - return v != "" 26 - default: 27 - return false 28 - } 29 - } 30 - 31 - // Get nullifying CID if available 32 - func (op *PLCOperation) GetNullifyingCID() string { 33 - if s, ok := op.Nullified.(string); ok { 34 - return s 35 - } 36 - return "" 37 - } 7 + plclib "tangled.org/atscan.net/plcbundle/plc" 8 + ) 38 9 39 - type DIDDocument struct { 40 - Context []string `json:"@context"` 41 - ID string `json:"id"` 42 - AlsoKnownAs []string `json:"alsoKnownAs"` 43 - VerificationMethod []VerificationMethod `json:"verificationMethod"` 44 - Service []Service `json:"service"` 45 - } 10 + // Re-export library types 11 + type PLCOperation = plclib.PLCOperation 12 + type DIDDocument = plclib.DIDDocument 13 + type Client = plclib.Client 14 + type ExportOptions = plclib.ExportOptions 46 15 47 - type VerificationMethod struct { 48 - ID string `json:"id"` 49 - Type string `json:"type"` 50 - Controller string `json:"controller"` 51 - PublicKeyMultibase string `json:"publicKeyMultibase"` 52 - } 16 + // Keep your custom types 17 + const BUNDLE_SIZE = 10000 53 18 54 - type Service struct { 55 - ID string `json:"id"` 56 - Type string `json:"type"` 57 - ServiceEndpoint string `json:"serviceEndpoint"` 58 - } 59 - 60 - // DIDHistoryEntry represents a single operation in DID history 61 19 type DIDHistoryEntry struct { 62 20 Operation PLCOperation `json:"operation"` 63 21 PLCBundle string `json:"plc_bundle,omitempty"` 64 22 } 65 23 66 - // DIDHistory represents the full history of a DID 67 24 type DIDHistory struct { 68 25 DID string `json:"did"` 69 26 Current *PLCOperation `json:"current"` ··· 74 31 Type string 75 32 Endpoint string 76 33 } 34 + 35 + // PLCOpLabel holds metadata from the label CSV file 36 + type PLCOpLabel struct { 37 + Bundle int `json:"bundle"` 38 + Position int `json:"position"` 39 + CID string `json:"cid"` 40 + Size int `json:"size"` 41 + Confidence float64 `json:"confidence"` 42 + Detectors []string `json:"detectors"` 43 + } 44 + 45 + // validateEndpoint checks if endpoint is in correct format: https://<domain> 46 + func validateEndpoint(endpoint string) bool { 47 + // Must not be empty 48 + if endpoint == "" { 49 + return false 50 + } 51 + 52 + // Must not have trailing slash 53 + if strings.HasSuffix(endpoint, "/") { 54 + return false 55 + } 56 + 57 + // Parse URL 58 + u, err := url.Parse(endpoint) 59 + if err != nil { 60 + return false 61 + } 62 + 63 + // Must use https scheme 64 + if u.Scheme != "https" { 65 + return false 66 + } 67 + 68 + // Must have a host 69 + if u.Host == "" { 70 + return false 71 + } 72 + 73 + // Must not have path (except empty) 74 + if u.Path != "" && u.Path != "/" { 75 + return false 76 + } 77 + 78 + // Must not have query parameters 79 + if u.RawQuery != "" { 80 + return false 81 + } 82 + 83 + // Must not have fragment 84 + if u.Fragment != "" { 85 + return false 86 + } 87 + 88 + return true 89 + }
-21
internal/storage/db.go
··· 50 50 GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) 51 51 UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error 52 52 53 - // Bundle operations 54 - CreateBundle(ctx context.Context, bundle *PLCBundle) error 55 - GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) 56 - GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) 57 - GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) 58 - GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error) 59 - GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error) 60 - GetLastBundleNumber(ctx context.Context) (int, error) 61 - GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) 62 - GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) 63 - 64 - // Mempool operations 65 - AddToMempool(ctx context.Context, ops []MempoolOperation) error 66 - GetMempoolCount(ctx context.Context) (int, error) 67 - GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) 68 - DeleteFromMempool(ctx context.Context, ids []int64) error 69 - GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) 70 - GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) 71 - GetMempoolUniqueDIDCount(ctx context.Context) (int, error) 72 - GetMempoolUncompressedSize(ctx context.Context) (int64, error) 73 - 74 53 // Metrics 75 54 StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error 76 55 GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+58 -566
internal/storage/postgres.go
··· 5 5 "database/sql" 6 6 "encoding/json" 7 7 "fmt" 8 - "strings" 9 8 "time" 10 9 11 - "github.com/atscan/atscanner/internal/log" 10 + "github.com/atscan/atscand/internal/log" 12 11 "github.com/jackc/pgx/v5" 13 12 "github.com/jackc/pgx/v5/pgxpool" 14 13 _ "github.com/jackc/pgx/v5/stdlib" ··· 85 84 ip TEXT, 86 85 ipv6 TEXT, 87 86 ip_resolved_at TIMESTAMP, 87 + valid BOOLEAN DEFAULT true, 88 88 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 89 89 UNIQUE(endpoint_type, endpoint) 90 90 ); ··· 96 96 CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6); 97 97 CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did); 98 98 CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 99 + CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid); 99 100 100 101 -- IP infos table (IP as PRIMARY KEY) 101 102 CREATE TABLE IF NOT EXISTS ip_infos ( ··· 157 158 records_processed BIGINT DEFAULT 0 158 159 ); 159 160 160 - CREATE TABLE IF NOT EXISTS plc_bundles ( 161 - bundle_number INTEGER PRIMARY KEY, 162 - start_time TIMESTAMP NOT NULL, 163 - end_time TIMESTAMP NOT NULL, 164 - did_count INTEGER NOT NULL DEFAULT 0, 165 - hash TEXT NOT NULL, 166 - compressed_hash TEXT NOT NULL, 167 - compressed_size BIGINT NOT NULL, 168 - uncompressed_size BIGINT NOT NULL, 169 - cumulative_compressed_size BIGINT NOT NULL, 170 - cumulative_uncompressed_size BIGINT NOT NULL, 171 - cursor TEXT, 172 - prev_bundle_hash TEXT, 173 - compressed BOOLEAN DEFAULT true, 174 - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 175 - ); 176 - 177 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time); 178 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash); 179 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash); 180 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC); 181 - 182 - CREATE TABLE IF NOT EXISTS plc_mempool ( 183 - id BIGSERIAL PRIMARY KEY, 184 - did TEXT NOT NULL, 185 - operation TEXT NOT NULL, 186 - cid TEXT NOT NULL UNIQUE, 187 - created_at TIMESTAMP NOT NULL, 188 - added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 189 - ); 190 - 191 - CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at); 192 - CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did); 193 - CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid); 194 - 195 161 -- Minimal dids table 196 162 CREATE TABLE IF NOT EXISTS dids ( 197 163 did TEXT PRIMARY KEY, ··· 244 210 245 211 func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error { 246 212 query := ` 247 - INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at) 248 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 213 + INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid) 214 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) 249 215 ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET 250 216 last_checked = EXCLUDED.last_checked, 251 217 status = EXCLUDED.status, ··· 261 227 WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at 262 228 ELSE endpoints.ip_resolved_at 263 229 END, 230 + valid = EXCLUDED.valid, 264 231 updated_at = CURRENT_TIMESTAMP 265 232 RETURNING id 266 233 ` 267 234 err := p.db.QueryRowContext(ctx, query, 268 235 endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt, 269 - endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID) 236 + endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID) 270 237 return err 271 238 } 272 239 ··· 287 254 func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) { 288 255 query := ` 289 256 SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, 290 - ip, ipv6, ip_resolved_at, updated_at 257 + ip, ipv6, ip_resolved_at, valid, updated_at 291 258 FROM endpoints 292 259 WHERE endpoint = $1 AND endpoint_type = $2 293 260 ` ··· 298 265 299 266 err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan( 300 267 &ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked, 301 - &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt, 268 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt, 302 269 ) 303 270 if err != nil { 304 271 return nil, err ··· 324 291 query := ` 325 292 SELECT DISTINCT ON (COALESCE(server_did, id::text)) 326 293 id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status, 327 - ip, ipv6, ip_resolved_at, updated_at 294 + ip, ipv6, ip_resolved_at, valid, updated_at 328 295 FROM endpoints 329 296 WHERE 1=1 330 297 ` ··· 336 303 query += fmt.Sprintf(" AND endpoint_type = $%d", argIdx) 337 304 args = append(args, filter.Type) 338 305 argIdx++ 306 + } 307 + 308 + // NEW: Filter by valid flag 309 + if filter.OnlyValid { 310 + query += fmt.Sprintf(" AND valid = true", argIdx) 339 311 } 340 312 if filter.Status != "" { 341 313 statusInt := EndpointStatusUnknown ··· 602 574 last_checked, 603 575 status, 604 576 ip, 605 - ipv6 577 + ipv6, 578 + valid 606 579 FROM endpoints 607 580 WHERE endpoint_type = 'pds' 608 581 ORDER BY COALESCE(server_did, id::text), discovered_at ASC 609 582 ) 610 583 SELECT 611 - e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, 584 + e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid, 612 585 latest.user_count, latest.response_time, latest.version, latest.scanned_at, 613 586 i.city, i.country, i.country_code, i.asn, i.asn_org, 614 587 i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, ··· 679 652 var scannedAt sql.NullTime 680 653 681 654 err := rows.Scan( 682 - &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, 655 + &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid, 683 656 &userCount, &responseTime, &version, &scannedAt, 684 657 &city, &country, &countryCode, &asn, &asnOrg, 685 658 &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, ··· 741 714 742 715 func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) { 743 716 query := ` 744 - WITH target_endpoint AS ( 717 + WITH target_endpoint AS MATERIALIZED ( 745 718 SELECT 746 719 e.id, 747 720 e.endpoint, ··· 750 723 e.last_checked, 751 724 e.status, 752 725 e.ip, 753 - e.ipv6 726 + e.ipv6, 727 + e.valid 754 728 FROM endpoints e 755 - WHERE e.endpoint = $1 AND e.endpoint_type = 'pds' 756 - ), 757 - aliases_agg AS ( 758 - SELECT 759 - te.server_did, 760 - array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases, 761 - MIN(e.discovered_at) as first_discovered_at 762 - FROM target_endpoint te 763 - LEFT JOIN endpoints e ON te.server_did = e.server_did 764 - AND e.endpoint_type = 'pds' 765 - AND te.server_did IS NOT NULL 766 - GROUP BY te.server_did 729 + WHERE e.endpoint = $1 730 + AND e.endpoint_type = 'pds' 731 + LIMIT 1 767 732 ) 768 733 SELECT 769 734 te.id, ··· 774 739 te.status, 775 740 te.ip, 776 741 te.ipv6, 742 + te.valid, 777 743 latest.user_count, 778 744 latest.response_time, 779 745 latest.version, ··· 783 749 i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 784 750 i.latitude, i.longitude, 785 751 i.raw_data, 786 - COALESCE(aa.aliases, ARRAY[]::text[]) as aliases, 787 - aa.first_discovered_at 752 + COALESCE( 753 + ARRAY( 754 + SELECT e2.endpoint 755 + FROM endpoints e2 756 + WHERE e2.server_did = te.server_did 757 + AND e2.endpoint_type = 'pds' 758 + AND e2.endpoint != te.endpoint 759 + AND te.server_did IS NOT NULL 760 + ORDER BY e2.discovered_at 761 + ), 762 + ARRAY[]::text[] 763 + ) as aliases, 764 + CASE 765 + WHEN te.server_did IS NOT NULL THEN ( 766 + SELECT MIN(e3.discovered_at) 767 + FROM endpoints e3 768 + WHERE e3.server_did = te.server_did 769 + AND e3.endpoint_type = 'pds' 770 + ) 771 + ELSE NULL 772 + END as first_discovered_at 788 773 FROM target_endpoint te 789 - LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did 790 774 LEFT JOIN LATERAL ( 791 - SELECT scan_data, response_time, version, scanned_at, user_count 792 - FROM endpoint_scans 793 - WHERE endpoint_id = te.id 794 - ORDER BY scanned_at DESC 775 + SELECT 776 + es.scan_data, 777 + es.response_time, 778 + es.version, 779 + es.scanned_at, 780 + es.user_count 781 + FROM endpoint_scans es 782 + WHERE es.endpoint_id = te.id 783 + ORDER BY es.scanned_at DESC 795 784 LIMIT 1 796 785 ) latest ON true 797 - LEFT JOIN ip_infos i ON te.ip = i.ip 786 + LEFT JOIN ip_infos i ON te.ip = i.ip; 798 787 ` 799 788 800 789 detail := &PDSDetail{} ··· 812 801 var firstDiscoveredAt sql.NullTime 813 802 814 803 err := p.db.QueryRowContext(ctx, query, endpoint).Scan( 815 - &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, 804 + &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid, 816 805 &userCount, &responseTime, &version, &serverInfoJSON, &scannedAt, 817 806 &city, &country, &countryCode, &asn, &asnOrg, 818 807 &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, ··· 839 828 // Set aliases and is_primary 840 829 detail.Aliases = aliases 841 830 if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid { 842 - // Has server_did - check if this is the first discovered 843 831 detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) || 844 832 detail.DiscoveredAt.Before(firstDiscoveredAt.Time) 845 833 } else { 846 - // No server_did means unique server 847 834 detail.IsPrimary = true 848 835 } 849 836 ··· 1174 1161 } 1175 1162 } 1176 1163 return 0 1177 - } 1178 - 1179 - // ===== BUNDLE OPERATIONS ===== 1180 - 1181 - func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error { 1182 - // Calculate cumulative sizes from previous bundle 1183 - if bundle.BundleNumber > 1 { 1184 - prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1) 1185 - if err == nil && prevBundle != nil { 1186 - bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize 1187 - bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize 1188 - } else { 1189 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1190 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1191 - } 1192 - } else { 1193 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1194 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1195 - } 1196 - 1197 - query := ` 1198 - INSERT INTO plc_bundles ( 1199 - bundle_number, start_time, end_time, did_count, 1200 - hash, compressed_hash, compressed_size, uncompressed_size, 1201 - cumulative_compressed_size, cumulative_uncompressed_size, 1202 - cursor, prev_bundle_hash, compressed 1203 - ) 1204 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) 1205 - ON CONFLICT(bundle_number) DO UPDATE SET 1206 - start_time = EXCLUDED.start_time, 1207 - end_time = EXCLUDED.end_time, 1208 - did_count = EXCLUDED.did_count, 1209 - hash = EXCLUDED.hash, 1210 - compressed_hash = EXCLUDED.compressed_hash, 1211 - compressed_size = EXCLUDED.compressed_size, 1212 - uncompressed_size = EXCLUDED.uncompressed_size, 1213 - cumulative_compressed_size = EXCLUDED.cumulative_compressed_size, 1214 - cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size, 1215 - cursor = EXCLUDED.cursor, 1216 - prev_bundle_hash = EXCLUDED.prev_bundle_hash, 1217 - compressed = EXCLUDED.compressed 1218 - ` 1219 - _, err := p.db.ExecContext(ctx, query, 1220 - bundle.BundleNumber, bundle.StartTime, bundle.EndTime, 1221 - bundle.DIDCount, bundle.Hash, bundle.CompressedHash, 1222 - bundle.CompressedSize, bundle.UncompressedSize, 1223 - bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize, 1224 - bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed, 1225 - ) 1226 - 1227 - return err 1228 - } 1229 - 1230 - func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) { 1231 - query := ` 1232 - SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash, 1233 - compressed_size, uncompressed_size, cumulative_compressed_size, 1234 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1235 - FROM plc_bundles 1236 - WHERE bundle_number = $1 1237 - ` 1238 - 1239 - var bundle PLCBundle 1240 - var prevHash sql.NullString 1241 - var cursor sql.NullString 1242 - 1243 - err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan( 1244 - &bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime, 1245 - &bundle.DIDCount, &bundle.Hash, &bundle.CompressedHash, 1246 - &bundle.CompressedSize, &bundle.UncompressedSize, 1247 - &bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize, 1248 - &cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt, 1249 - ) 1250 - if err != nil { 1251 - return nil, err 1252 - } 1253 - 1254 - if prevHash.Valid { 1255 - bundle.PrevBundleHash = prevHash.String 1256 - } 1257 - if cursor.Valid { 1258 - bundle.Cursor = cursor.String 1259 - } 1260 - 1261 - return &bundle, nil 1262 - } 1263 - 1264 - func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) { 1265 - query := ` 1266 - SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash, 1267 - compressed_size, uncompressed_size, cumulative_compressed_size, 1268 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1269 - FROM plc_bundles 1270 - ORDER BY bundle_number DESC 1271 - LIMIT $1 1272 - ` 1273 - 1274 - rows, err := p.db.QueryContext(ctx, query, limit) 1275 - if err != nil { 1276 - return nil, err 1277 - } 1278 - defer rows.Close() 1279 - 1280 - return p.scanBundles(rows) 1281 - } 1282 - 1283 - func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) { 1284 - // Get bundle numbers from dids table 1285 - var bundleNumbersJSON []byte 1286 - err := p.db.QueryRowContext(ctx, ` 1287 - SELECT bundle_numbers FROM dids WHERE did = $1 1288 - `, did).Scan(&bundleNumbersJSON) 1289 - 1290 - if err == sql.ErrNoRows { 1291 - return []*PLCBundle{}, nil 1292 - } 1293 - if err != nil { 1294 - return nil, err 1295 - } 1296 - 1297 - var bundleNumbers []int 1298 - if err := json.Unmarshal(bundleNumbersJSON, &bundleNumbers); err != nil { 1299 - return nil, err 1300 - } 1301 - 1302 - if len(bundleNumbers) == 0 { 1303 - return []*PLCBundle{}, nil 1304 - } 1305 - 1306 - // Build query with IN clause 1307 - placeholders := make([]string, len(bundleNumbers)) 1308 - args := make([]interface{}, len(bundleNumbers)) 1309 - for i, num := range bundleNumbers { 1310 - placeholders[i] = fmt.Sprintf("$%d", i+1) 1311 - args[i] = num 1312 - } 1313 - 1314 - query := fmt.Sprintf(` 1315 - SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash, 1316 - compressed_size, uncompressed_size, cumulative_compressed_size, 1317 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1318 - FROM plc_bundles 1319 - WHERE bundle_number IN (%s) 1320 - ORDER BY bundle_number ASC 1321 - `, strings.Join(placeholders, ",")) 1322 - 1323 - rows, err := p.db.QueryContext(ctx, query, args...) 1324 - if err != nil { 1325 - return nil, err 1326 - } 1327 - defer rows.Close() 1328 - 1329 - return p.scanBundles(rows) 1330 - } 1331 - 1332 - func (p *PostgresDB) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error) { 1333 - query := ` 1334 - SELECT did 1335 - FROM dids 1336 - WHERE bundle_numbers @> $1::jsonb 1337 - ORDER BY did 1338 - ` 1339 - 1340 - rows, err := p.db.QueryContext(ctx, query, fmt.Sprintf("[%d]", bundleNum)) 1341 - if err != nil { 1342 - return nil, err 1343 - } 1344 - defer rows.Close() 1345 - 1346 - var dids []string 1347 - for rows.Next() { 1348 - var did string 1349 - if err := rows.Scan(&did); err != nil { 1350 - return nil, err 1351 - } 1352 - dids = append(dids, did) 1353 - } 1354 - 1355 - return dids, rows.Err() 1356 - } 1357 - 1358 - func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) { 1359 - var bundles []*PLCBundle 1360 - 1361 - for rows.Next() { 1362 - var bundle PLCBundle 1363 - var prevHash sql.NullString 1364 - var cursor sql.NullString 1365 - 1366 - if err := rows.Scan( 1367 - &bundle.BundleNumber, 1368 - &bundle.StartTime, 1369 - &bundle.EndTime, 1370 - &bundle.DIDCount, 1371 - &bundle.Hash, 1372 - &bundle.CompressedHash, 1373 - &bundle.CompressedSize, 1374 - &bundle.UncompressedSize, 1375 - &bundle.CumulativeCompressedSize, 1376 - &bundle.CumulativeUncompressedSize, 1377 - &cursor, 1378 - &prevHash, 1379 - &bundle.Compressed, 1380 - &bundle.CreatedAt, 1381 - ); err != nil { 1382 - return nil, err 1383 - } 1384 - 1385 - if prevHash.Valid { 1386 - bundle.PrevBundleHash = prevHash.String 1387 - } 1388 - if cursor.Valid { 1389 - bundle.Cursor = cursor.String 1390 - } 1391 - 1392 - bundles = append(bundles, &bundle) 1393 - } 1394 - 1395 - return bundles, rows.Err() 1396 - } 1397 - 1398 - func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) { 1399 - var count, lastBundleNum int64 1400 - err := p.db.QueryRowContext(ctx, ` 1401 - SELECT COUNT(*), COALESCE(MAX(bundle_number), 0) 1402 - FROM plc_bundles 1403 - `).Scan(&count, &lastBundleNum) 1404 - if err != nil { 1405 - return 0, 0, 0, 0, err 1406 - } 1407 - 1408 - if lastBundleNum == 0 { 1409 - return 0, 0, 0, 0, nil 1410 - } 1411 - 1412 - var compressedSize, uncompressedSize int64 1413 - err = p.db.QueryRowContext(ctx, ` 1414 - SELECT cumulative_compressed_size, cumulative_uncompressed_size 1415 - FROM plc_bundles 1416 - WHERE bundle_number = $1 1417 - `, lastBundleNum).Scan(&compressedSize, &uncompressedSize) 1418 - if err != nil { 1419 - return 0, 0, 0, 0, err 1420 - } 1421 - 1422 - return count, compressedSize, uncompressedSize, lastBundleNum, nil 1423 - } 1424 - 1425 - func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) { 1426 - query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles" 1427 - var num int 1428 - err := p.db.QueryRowContext(ctx, query).Scan(&num) 1429 - return num, err 1430 - } 1431 - 1432 - func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) { 1433 - query := ` 1434 - SELECT bundle_number 1435 - FROM plc_bundles 1436 - WHERE start_time <= $1 AND end_time >= $1 1437 - ORDER BY bundle_number ASC 1438 - LIMIT 1 1439 - ` 1440 - 1441 - var bundleNum int 1442 - err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1443 - if err == sql.ErrNoRows { 1444 - query = ` 1445 - SELECT bundle_number 1446 - FROM plc_bundles 1447 - WHERE end_time < $1 1448 - ORDER BY bundle_number DESC 1449 - LIMIT 1 1450 - ` 1451 - err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1452 - if err == sql.ErrNoRows { 1453 - return 1, nil 1454 - } 1455 - if err != nil { 1456 - return 0, err 1457 - } 1458 - return bundleNum, nil 1459 - } 1460 - if err != nil { 1461 - return 0, err 1462 - } 1463 - 1464 - return bundleNum, nil 1465 - } 1466 - 1467 - func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) { 1468 - query := ` 1469 - WITH daily_stats AS ( 1470 - SELECT 1471 - DATE(start_time) as date, 1472 - MAX(bundle_number) as last_bundle, 1473 - COUNT(*) as bundle_count, 1474 - SUM(uncompressed_size) as total_uncompressed, 1475 - SUM(compressed_size) as total_compressed, 1476 - MAX(cumulative_uncompressed_size) as cumulative_uncompressed, 1477 - MAX(cumulative_compressed_size) as cumulative_compressed 1478 - FROM plc_bundles 1479 - WHERE bundle_number >= $1 1480 - GROUP BY DATE(start_time) 1481 - ) 1482 - SELECT 1483 - date::text, 1484 - last_bundle, 1485 - SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations, 1486 - total_uncompressed, 1487 - total_compressed, 1488 - cumulative_uncompressed, 1489 - cumulative_compressed 1490 - FROM daily_stats 1491 - ORDER BY date ASC 1492 - ` 1493 - 1494 - if limit > 0 { 1495 - query += fmt.Sprintf(" LIMIT %d", limit) 1496 - } 1497 - 1498 - rows, err := p.db.QueryContext(ctx, query, fromBundle) 1499 - if err != nil { 1500 - return nil, err 1501 - } 1502 - defer rows.Close() 1503 - 1504 - var history []*PLCHistoryPoint 1505 - for rows.Next() { 1506 - var point PLCHistoryPoint 1507 - var cumulativeOps int64 1508 - 1509 - err := rows.Scan( 1510 - &point.Date, 1511 - &point.BundleNumber, 1512 - &cumulativeOps, 1513 - &point.UncompressedSize, 1514 - &point.CompressedSize, 1515 - &point.CumulativeUncompressed, 1516 - &point.CumulativeCompressed, 1517 - ) 1518 - if err != nil { 1519 - return nil, err 1520 - } 1521 - 1522 - point.OperationCount = int(cumulativeOps) 1523 - 1524 - history = append(history, &point) 1525 - } 1526 - 1527 - return history, rows.Err() 1528 - } 1529 - 1530 - // ===== MEMPOOL OPERATIONS ===== 1531 - 1532 - func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error { 1533 - if len(ops) == 0 { 1534 - return nil 1535 - } 1536 - 1537 - tx, err := p.db.BeginTx(ctx, nil) 1538 - if err != nil { 1539 - return err 1540 - } 1541 - defer tx.Rollback() 1542 - 1543 - stmt, err := tx.PrepareContext(ctx, ` 1544 - INSERT INTO plc_mempool (did, operation, cid, created_at) 1545 - VALUES ($1, $2, $3, $4) 1546 - ON CONFLICT(cid) DO NOTHING 1547 - `) 1548 - if err != nil { 1549 - return err 1550 - } 1551 - defer stmt.Close() 1552 - 1553 - for _, op := range ops { 1554 - _, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt) 1555 - if err != nil { 1556 - return err 1557 - } 1558 - } 1559 - 1560 - return tx.Commit() 1561 - } 1562 - 1563 - func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) { 1564 - query := "SELECT COUNT(*) FROM plc_mempool" 1565 - var count int 1566 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1567 - return count, err 1568 - } 1569 - 1570 - func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) { 1571 - query := ` 1572 - SELECT id, did, operation, cid, created_at, added_at 1573 - FROM plc_mempool 1574 - ORDER BY created_at ASC 1575 - LIMIT $1 1576 - ` 1577 - 1578 - rows, err := p.db.QueryContext(ctx, query, limit) 1579 - if err != nil { 1580 - return nil, err 1581 - } 1582 - defer rows.Close() 1583 - 1584 - var ops []MempoolOperation 1585 - for rows.Next() { 1586 - var op MempoolOperation 1587 - err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt) 1588 - if err != nil { 1589 - return nil, err 1590 - } 1591 - ops = append(ops, op) 1592 - } 1593 - 1594 - return ops, rows.Err() 1595 - } 1596 - 1597 - func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error { 1598 - if len(ids) == 0 { 1599 - return nil 1600 - } 1601 - 1602 - placeholders := make([]string, len(ids)) 1603 - args := make([]interface{}, len(ids)) 1604 - for i, id := range ids { 1605 - placeholders[i] = fmt.Sprintf("$%d", i+1) 1606 - args[i] = id 1607 - } 1608 - 1609 - query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)", 1610 - strings.Join(placeholders, ",")) 1611 - 1612 - _, err := p.db.ExecContext(ctx, query, args...) 1613 - return err 1614 - } 1615 - 1616 - func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1617 - query := ` 1618 - SELECT id, did, operation, cid, created_at, added_at 1619 - FROM plc_mempool 1620 - ORDER BY created_at ASC, id ASC 1621 - LIMIT 1 1622 - ` 1623 - 1624 - var op MempoolOperation 1625 - err := p.db.QueryRowContext(ctx, query).Scan( 1626 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1627 - ) 1628 - if err == sql.ErrNoRows { 1629 - return nil, nil 1630 - } 1631 - if err != nil { 1632 - return nil, err 1633 - } 1634 - 1635 - return &op, nil 1636 - } 1637 - 1638 - func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1639 - query := ` 1640 - SELECT id, did, operation, cid, created_at, added_at 1641 - FROM plc_mempool 1642 - ORDER BY created_at DESC, id DESC 1643 - LIMIT 1 1644 - ` 1645 - 1646 - var op MempoolOperation 1647 - err := p.db.QueryRowContext(ctx, query).Scan( 1648 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1649 - ) 1650 - if err == sql.ErrNoRows { 1651 - return nil, nil 1652 - } 1653 - if err != nil { 1654 - return nil, err 1655 - } 1656 - 1657 - return &op, nil 1658 - } 1659 - 1660 - func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) { 1661 - query := "SELECT COUNT(DISTINCT did) FROM plc_mempool" 1662 - var count int 1663 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1664 - return count, err 1665 - } 1666 - 1667 - func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) { 1668 - query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool" 1669 - var size int64 1670 - err := p.db.QueryRowContext(ctx, query).Scan(&size) 1671 - return size, err 1672 1164 } 1673 1165 1674 1166 // ===== CURSOR OPERATIONS =====
+7 -14
internal/storage/types.go
··· 26 26 LastChecked time.Time 27 27 Status int 28 28 IP string 29 - IPv6 string // NEW 29 + IPv6 string 30 30 IPResolvedAt time.Time 31 + Valid bool 31 32 UpdatedAt time.Time 32 33 } 33 34 ··· 76 77 77 78 // EndpointFilter for querying endpoints 78 79 type EndpointFilter struct { 79 - Type string // "pds", "labeler", etc. 80 + Type string 80 81 Status string 81 82 MinUserCount int64 82 83 OnlyStale bool 84 + OnlyValid bool 83 85 RecheckInterval time.Duration 84 - Random bool // NEW: Return results in random order 86 + Random bool 85 87 Limit int 86 88 Offset int 87 89 } ··· 153 155 CumulativeCompressed int64 `json:"cumulative_compressed"` 154 156 } 155 157 156 - // MempoolOperation represents an operation waiting to be bundled 157 - type MempoolOperation struct { 158 - ID int64 159 - DID string 160 - Operation string 161 - CID string 162 - CreatedAt time.Time 163 - AddedAt time.Time 164 - } 165 - 166 158 // ScanCursor stores scanning progress 167 159 type ScanCursor struct { 168 160 Source string ··· 223 215 LastChecked time.Time 224 216 Status int 225 217 IP string 226 - IPv6 string // NEW 218 + IPv6 string 219 + Valid bool // NEW 227 220 228 221 // From latest endpoint_scans (via JOIN) 229 222 LatestScan *struct {
+2 -2
internal/worker/scheduler.go
··· 5 5 "sync" 6 6 "time" 7 7 8 - "github.com/atscan/atscanner/internal/log" 9 - "github.com/atscan/atscanner/internal/monitor" 8 + "github.com/atscan/atscand/internal/log" 9 + "github.com/atscan/atscand/internal/monitor" 10 10 ) 11 11 12 12 type Job struct {
+113
utils/import-labels.js
··· 1 + import { file, write } from "bun"; 2 + import { join } from "path"; 3 + import { mkdir } from "fs/promises"; 4 + import { init, compress } from "@bokuweb/zstd-wasm"; 5 + 6 + // --- Configuration --- 7 + const CSV_FILE = process.argv[2]; 8 + const CONFIG_FILE = "config.yaml"; 9 + const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance) 10 + // --------------------- 11 + 12 + if (!CSV_FILE) { 13 + console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>"); 14 + process.exit(1); 15 + } 16 + 17 + console.log("========================================"); 18 + console.log("PLC Operation Labels Import (Bun + WASM)"); 19 + console.log("========================================"); 20 + 21 + // 1. Read and parse config 22 + console.log(`Loading config from ${CONFIG_FILE}...`); 23 + const configFile = await file(CONFIG_FILE).text(); 24 + const config = Bun.YAML.parse(configFile); 25 + const bundleDir = config?.plc?.bundle_dir; 26 + 27 + if (!bundleDir) { 28 + console.error("Error: Could not parse plc.bundle_dir from config.yaml"); 29 + process.exit(1); 30 + } 31 + 32 + const FINAL_LABELS_DIR = join(bundleDir, "labels"); 33 + await mkdir(FINAL_LABELS_DIR, { recursive: true }); 34 + 35 + console.log(`CSV File: ${CSV_FILE}`); 36 + console.log(`Output Dir: ${FINAL_LABELS_DIR}`); 37 + console.log(""); 38 + 39 + // 2. Initialize Zstd WASM module 40 + await init(); 41 + 42 + // --- Pass 1: Read entire file into memory and group by bundle --- 43 + console.log("Pass 1/2: Reading and grouping all lines by bundle..."); 44 + console.warn("This will use a large amount of RAM!"); 45 + 46 + const startTime = Date.now(); 47 + const bundles = new Map(); // Map<string, string[]> 48 + let lineCount = 0; 49 + 50 + const inputFile = file(CSV_FILE); 51 + const fileStream = inputFile.stream(); 52 + const decoder = new TextDecoder(); 53 + let remainder = ""; 54 + 55 + for await (const chunk of fileStream) { 56 + const text = remainder + decoder.decode(chunk); 57 + const lines = text.split("\n"); 58 + remainder = lines.pop() || ""; 59 + 60 + for (const line of lines) { 61 + if (line === "") continue; 62 + lineCount++; 63 + 64 + if (lineCount === 1 && line.startsWith("bundle,")) { 65 + continue; // Skip header 66 + } 67 + 68 + const firstCommaIndex = line.indexOf(","); 69 + if (firstCommaIndex === -1) { 70 + console.warn(`Skipping malformed line: ${line}`); 71 + continue; 72 + } 73 + const bundleNumStr = line.substring(0, firstCommaIndex); 74 + const bundleKey = bundleNumStr.padStart(6, "0"); 75 + 76 + // Add line to the correct bundle's array 77 + if (!bundles.has(bundleKey)) { 78 + bundles.set(bundleKey, []); 79 + } 80 + bundles.get(bundleKey).push(line); 81 + } 82 + } 83 + // Note: We ignore any final `remainder` as it's likely an empty line 84 + 85 + console.log(`Finished reading ${lineCount.toLocaleString()} lines.`); 86 + console.log(`Found ${bundles.size} unique bundles.`); 87 + 88 + // --- Pass 2: Compress and write each bundle --- 89 + console.log("\nPass 2/2: Compressing and writing bundle files..."); 90 + let i = 0; 91 + for (const [bundleKey, lines] of bundles.entries()) { 92 + i++; 93 + console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`); 94 + 95 + // Join all lines for this bundle into one big string 96 + const content = lines.join("\n"); 97 + 98 + // Compress the string 99 + const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL); 100 + 101 + // Write the compressed data to the file 102 + const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`); 103 + await write(outPath, compressedData); 104 + } 105 + 106 + // 3. Clean up 107 + const totalTime = (Date.now() - startTime) / 1000; 108 + console.log("\n========================================"); 109 + console.log("Import Summary"); 110 + console.log("========================================"); 111 + console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`); 112 + console.log(`Total lines processed: ${lineCount.toLocaleString()}`); 113 + console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
··· 1 + #!/bin/bash 2 + # import-labels-v4-sorted-pipe.sh 3 + 4 + set -e 5 + 6 + if [ $# -lt 1 ]; then 7 + echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>" 8 + exit 1 9 + fi 10 + 11 + CSV_FILE="$1" 12 + CONFIG_FILE="config.yaml" 13 + 14 + [ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1 15 + [ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1 16 + 17 + # Extract bundle directory path 18 + BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1) 19 + 20 + [ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1 21 + 22 + FINAL_LABELS_DIR="$BUNDLE_DIR/labels" 23 + 24 + echo "========================================" 25 + echo "PLC Operation Labels Import (Sorted Pipe)" 26 + echo "========================================" 27 + echo "CSV File: $CSV_FILE" 28 + echo "Output Dir: $FINAL_LABELS_DIR" 29 + echo "" 30 + 31 + # Ensure the final directory exists 32 + mkdir -p "$FINAL_LABELS_DIR" 33 + 34 + echo "Streaming, sorting, and compressing on the fly..." 35 + echo "This will take time. `pv` will show progress of the TAIL command." 36 + echo "The `sort` command will run after `pv` is complete." 37 + echo "" 38 + 39 + # This is the single-pass pipeline 40 + tail -n +2 "$CSV_FILE" | \ 41 + pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \ 42 + sort -t, -k1,1n | \ 43 + awk -F',' -v final_dir="$FINAL_LABELS_DIR" ' 44 + # This awk script EXPECTS input sorted by bundle number (col 1) 45 + BEGIN { 46 + # last_bundle_num tracks the bundle we are currently writing 47 + last_bundle_num = -1 48 + # cmd holds the current zstd pipe command 49 + cmd = "" 50 + } 51 + { 52 + current_bundle_num = $1 53 + 54 + # Check if the bundle number has changed 55 + if (current_bundle_num != last_bundle_num) { 56 + 57 + # If it changed, and we have an old pipe open, close it 58 + if (last_bundle_num != -1) { 59 + close(cmd) 60 + } 61 + 62 + # Create the new pipe command, writing to the final .zst file 63 + outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num) 64 + cmd = "zstd -T0 -o " outfile 65 + 66 + # Update the tracker 67 + last_bundle_num = current_bundle_num 68 + 69 + # Print progress to stderr 70 + printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr" 71 + } 72 + 73 + # Print the current line ($0) to the open pipe 74 + # The first time this runs for a bundle, it opens the pipe 75 + # Subsequent times, it writes to the already-open pipe 76 + print $0 | cmd 77 + } 78 + # END block: close the very last pipe 79 + END { 80 + if (last_bundle_num != -1) { 81 + close(cmd) 82 + } 83 + printf " Finished. Total lines: %d\n", NR > "/dev/stderr" 84 + }' 85 + 86 + echo "" 87 + echo "========================================" 88 + echo "Import Summary" 89 + echo "========================================" 90 + echo "✓ Import completed successfully!" 91 + echo "Label files are stored in: $FINAL_LABELS_DIR"
+2 -2
utils/migrate-ipinfo.sh
··· 4 4 # Configuration (edit these) 5 5 DB_HOST="localhost" 6 6 DB_PORT="5432" 7 - DB_NAME="atscanner" 8 - DB_USER="atscanner" 7 + DB_NAME="atscand" 8 + DB_USER="atscand" 9 9 DB_PASSWORD="Noor1kooz5eeFai9leZagh5ua5eihai4" 10 10 11 11 # Colors for output