Compare changes

Choose any two refs to compare.

+2 -1
.gitignore
··· 5 5 .DS_Store 6 6 plc_cache\.tmp/* 7 7 plc_bundles* 8 - config.yaml 8 + config.yaml 9 + /atscand
+39 -5
Makefile
··· 1 - all: run 1 + .PHONY: all build install test clean fmt lint help 2 + 3 + # Binary name 4 + BINARY_NAME=atscand 5 + INSTALL_PATH=$(GOPATH)/bin 6 + 7 + # Go commands 8 + GOCMD=go 9 + GOBUILD=$(GOCMD) build 10 + GOINSTALL=$(GOCMD) install 11 + GOCLEAN=$(GOCMD) clean 12 + GOTEST=$(GOCMD) test 13 + GOGET=$(GOCMD) get 14 + GOFMT=$(GOCMD) fmt 15 + GOMOD=$(GOCMD) mod 16 + GORUN=$(GOCMD) run 17 + 18 + # Default target 19 + all: build 20 + 21 + # Build the CLI tool 22 + build: 23 + @echo "Building $(BINARY_NAME)..." 24 + $(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand 25 + 26 + # Install the CLI tool globally 27 + install: 28 + @echo "Installing $(BINARY_NAME)..." 29 + $(GOINSTALL) ./cmd/atscand 2 30 3 31 run: 4 - go run cmd/atscanner.go -verbose 32 + $(GORUN) cmd/atscand/main.go -verbose 5 33 6 - clean-db: 7 - dropdb -U atscanner atscanner 8 - createdb atscanner -O atscanner 34 + update-plcbundle: 35 + GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest 36 + 37 + # Show help 38 + help: 39 + @echo "Available targets:" 40 + @echo " make build - Build the binary" 41 + @echo " make install - Install binary globally" 42 + @echo " make run - Run app"
+159
cmd/atscand/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "os/signal" 9 + "syscall" 10 + "time" 11 + 12 + "github.com/atscan/atscand/internal/api" 13 + "github.com/atscan/atscand/internal/config" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/pds" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 18 + "github.com/atscan/atscand/internal/worker" 19 + ) 20 + 21 + const VERSION = "1.0.0" 22 + 23 + func main() { 24 + configPath := flag.String("config", "config.yaml", "path to config file") 25 + verbose := flag.Bool("verbose", false, "enable verbose logging") 26 + flag.Parse() 27 + 28 + // Load configuration 29 + cfg, err := config.Load(*configPath) 30 + if err != nil { 31 + fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 + os.Exit(1) 33 + } 34 + 35 + // Override verbose setting if flag is provided 36 + if *verbose { 37 + cfg.API.Verbose = true 38 + } 39 + 40 + // Initialize logger 41 + log.Init(cfg.API.Verbose) 42 + 43 + // Print banner 44 + log.Banner(VERSION) 45 + 46 + // Print configuration summary 47 + log.PrintConfig(map[string]string{ 48 + "Database Type": cfg.Database.Type, 49 + "Database Path": cfg.Database.Path, // Will be auto-redacted 50 + "PLC Directory": cfg.PLC.DirectoryURL, 51 + "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 + "PLC Bundle Dir": cfg.PLC.BundleDir, 53 + "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 + "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 + "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 + "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 + "PDS Timeout": cfg.PDS.Timeout.String(), 58 + "API Host": cfg.API.Host, 59 + "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 + "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 + }) 62 + 63 + // Initialize database using factory pattern 64 + db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 + if err != nil { 66 + log.Fatal("Failed to initialize database: %v", err) 67 + } 68 + defer func() { 69 + log.Info("Closing database connection...") 70 + db.Close() 71 + }() 72 + 73 + // Set scan retention from config 74 + if cfg.PDS.ScanRetention > 0 { 75 + db.SetScanRetention(cfg.PDS.ScanRetention) 76 + log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 + } 78 + 79 + // Run migrations 80 + if err := db.Migrate(); err != nil { 81 + log.Fatal("Failed to run migrations: %v", err) 82 + } 83 + 84 + ctx, cancel := context.WithCancel(context.Background()) 85 + defer cancel() 86 + 87 + // Initialize workers 88 + log.Info("Initializing scanners...") 89 + 90 + bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs) 91 + if err != nil { 92 + log.Fatal("Failed to create bundle manager: %v", err) 93 + } 94 + defer bundleManager.Close() 95 + log.Verbose("✓ Bundle manager initialized (shared)") 96 + 97 + plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager) 98 + defer plcScanner.Close() 99 + log.Verbose("✓ PLC scanner initialized") 100 + 101 + pdsScanner := pds.NewScanner(db, cfg.PDS) 102 + log.Verbose("✓ PDS scanner initialized") 103 + 104 + scheduler := worker.NewScheduler() 105 + 106 + // Schedule PLC directory scan 107 + scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 108 + if err := plcScanner.Scan(ctx); err != nil { 109 + log.Error("PLC scan error: %v", err) 110 + } 111 + }) 112 + log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 113 + 114 + // Schedule PDS availability checks 115 + scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 116 + if err := pdsScanner.ScanAll(ctx); err != nil { 117 + log.Error("PDS scan error: %v", err) 118 + } 119 + }) 120 + log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 121 + 122 + // Start API server 123 + log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 124 + apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager) 125 + go func() { 126 + if err := apiServer.Start(); err != nil { 127 + log.Fatal("API server error: %v", err) 128 + } 129 + }() 130 + 131 + // Give the API server a moment to start 132 + time.Sleep(100 * time.Millisecond) 133 + log.Info("✓ API server started successfully") 134 + log.Info("") 135 + log.Info("🚀 ATScanner is running!") 136 + log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 137 + log.Info(" Press Ctrl+C to stop") 138 + log.Info("") 139 + 140 + // Start scheduler 141 + scheduler.Start(ctx) 142 + 143 + // Wait for interrupt 144 + sigChan := make(chan os.Signal, 1) 145 + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 146 + <-sigChan 147 + 148 + log.Info("") 149 + log.Info("Shutting down gracefully...") 150 + cancel() 151 + 152 + log.Info("Stopping API server...") 153 + apiServer.Shutdown(context.Background()) 154 + 155 + log.Info("Waiting for active tasks to complete...") 156 + time.Sleep(2 * time.Second) 157 + 158 + log.Info("✓ Shutdown complete. Goodbye!") 159 + }
-152
cmd/atscanner.go
··· 1 - package main 2 - 3 - import ( 4 - "context" 5 - "flag" 6 - "fmt" 7 - "os" 8 - "os/signal" 9 - "syscall" 10 - "time" 11 - 12 - "github.com/atscan/atscanner/internal/api" 13 - "github.com/atscan/atscanner/internal/config" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/pds" 16 - "github.com/atscan/atscanner/internal/plc" 17 - "github.com/atscan/atscanner/internal/storage" 18 - "github.com/atscan/atscanner/internal/worker" 19 - ) 20 - 21 - const VERSION = "1.0.0" 22 - 23 - func main() { 24 - configPath := flag.String("config", "config.yaml", "path to config file") 25 - verbose := flag.Bool("verbose", false, "enable verbose logging") 26 - flag.Parse() 27 - 28 - // Load configuration 29 - cfg, err := config.Load(*configPath) 30 - if err != nil { 31 - fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err) 32 - os.Exit(1) 33 - } 34 - 35 - // Override verbose setting if flag is provided 36 - if *verbose { 37 - cfg.API.Verbose = true 38 - } 39 - 40 - // Initialize logger 41 - log.Init(cfg.API.Verbose) 42 - 43 - // Print banner 44 - log.Banner(VERSION) 45 - 46 - // Print configuration summary 47 - log.PrintConfig(map[string]string{ 48 - "Database Type": cfg.Database.Type, 49 - "Database Path": cfg.Database.Path, // Will be auto-redacted 50 - "PLC Directory": cfg.PLC.DirectoryURL, 51 - "PLC Scan Interval": cfg.PLC.ScanInterval.String(), 52 - "PLC Bundle Dir": cfg.PLC.BundleDir, 53 - "PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache), 54 - "PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs), 55 - "PDS Scan Interval": cfg.PDS.ScanInterval.String(), 56 - "PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers), 57 - "PDS Timeout": cfg.PDS.Timeout.String(), 58 - "API Host": cfg.API.Host, 59 - "API Port": fmt.Sprintf("%d", cfg.API.Port), 60 - "Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose), 61 - }) 62 - 63 - // Initialize database using factory pattern 64 - db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path) 65 - if err != nil { 66 - log.Fatal("Failed to initialize database: %v", err) 67 - } 68 - defer func() { 69 - log.Info("Closing database connection...") 70 - db.Close() 71 - }() 72 - 73 - // Set scan retention from config 74 - if cfg.PDS.ScanRetention > 0 { 75 - db.SetScanRetention(cfg.PDS.ScanRetention) 76 - log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention) 77 - } 78 - 79 - // Run migrations 80 - if err := db.Migrate(); err != nil { 81 - log.Fatal("Failed to run migrations: %v", err) 82 - } 83 - 84 - ctx, cancel := context.WithCancel(context.Background()) 85 - defer cancel() 86 - 87 - // Initialize workers 88 - log.Info("Initializing scanners...") 89 - 90 - plcScanner := plc.NewScanner(db, cfg.PLC) 91 - defer plcScanner.Close() 92 - log.Verbose("✓ PLC scanner initialized") 93 - 94 - pdsScanner := pds.NewScanner(db, cfg.PDS) 95 - log.Verbose("✓ PDS scanner initialized") 96 - 97 - scheduler := worker.NewScheduler() 98 - 99 - // Schedule PLC directory scan 100 - scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() { 101 - if err := plcScanner.Scan(ctx); err != nil { 102 - log.Error("PLC scan error: %v", err) 103 - } 104 - }) 105 - log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval) 106 - 107 - // Schedule PDS availability checks 108 - scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() { 109 - if err := pdsScanner.ScanAll(ctx); err != nil { 110 - log.Error("PDS scan error: %v", err) 111 - } 112 - }) 113 - log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval) 114 - 115 - // Start API server 116 - log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port) 117 - apiServer := api.NewServer(db, cfg.API, cfg.PLC) 118 - go func() { 119 - if err := apiServer.Start(); err != nil { 120 - log.Fatal("API server error: %v", err) 121 - } 122 - }() 123 - 124 - // Give the API server a moment to start 125 - time.Sleep(100 * time.Millisecond) 126 - log.Info("✓ API server started successfully") 127 - log.Info("") 128 - log.Info("🚀 ATScanner is running!") 129 - log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port) 130 - log.Info(" Press Ctrl+C to stop") 131 - log.Info("") 132 - 133 - // Start scheduler 134 - scheduler.Start(ctx) 135 - 136 - // Wait for interrupt 137 - sigChan := make(chan os.Signal, 1) 138 - signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) 139 - <-sigChan 140 - 141 - log.Info("") 142 - log.Info("Shutting down gracefully...") 143 - cancel() 144 - 145 - log.Info("Stopping API server...") 146 - apiServer.Shutdown(context.Background()) 147 - 148 - log.Info("Waiting for active tasks to complete...") 149 - time.Sleep(2 * time.Second) 150 - 151 - log.Info("✓ Shutdown complete. Goodbye!") 152 - }
+168
cmd/import-labels/main.go
··· 1 + package main 2 + 3 + import ( 4 + "bufio" 5 + "flag" 6 + "fmt" 7 + "os" 8 + "path/filepath" 9 + "strings" 10 + "time" 11 + 12 + "github.com/klauspost/compress/zstd" 13 + "gopkg.in/yaml.v3" 14 + ) 15 + 16 + type Config struct { 17 + PLC struct { 18 + BundleDir string `yaml:"bundle_dir"` 19 + } `yaml:"plc"` 20 + } 21 + 22 + var CONFIG_FILE = "config.yaml" 23 + 24 + // --------------------- 25 + 26 + func main() { 27 + // Define a new flag for changing the directory 28 + workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)") 29 + flag.Usage = func() { // Custom usage message 30 + fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0]) 31 + fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.") 32 + flag.PrintDefaults() 33 + } 34 + flag.Parse() // Parse all defined flags 35 + 36 + // Change directory if the flag was used 37 + if *workDir != "." { 38 + fmt.Printf("Changing working directory to %s...\n", *workDir) 39 + if err := os.Chdir(*workDir); err != nil { 40 + fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err) 41 + os.Exit(1) 42 + } 43 + } 44 + 45 + // --- REMOVED UNUSED CODE --- 46 + // The csvFilePath variable and NArg check were removed 47 + // as the script now reads from stdin. 48 + // --------------------------- 49 + 50 + fmt.Println("========================================") 51 + fmt.Println("PLC Operation Labels Import (Go STDIN)") 52 + fmt.Println("========================================") 53 + 54 + // 1. Read config (will now read from the new CWD) 55 + fmt.Printf("Loading config from %s...\n", CONFIG_FILE) 56 + configData, err := os.ReadFile(CONFIG_FILE) 57 + if err != nil { 58 + fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err) 59 + os.Exit(1) 60 + } 61 + 62 + var config Config 63 + if err := yaml.Unmarshal(configData, &config); err != nil { 64 + fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err) 65 + os.Exit(1) 66 + } 67 + 68 + if config.PLC.BundleDir == "" { 69 + fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml") 70 + os.Exit(1) 71 + } 72 + 73 + finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels") 74 + if err := os.MkdirAll(finalLabelsDir, 0755); err != nil { 75 + fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) 76 + os.Exit(1) 77 + } 78 + 79 + fmt.Printf("Output Dir: %s\n", finalLabelsDir) 80 + fmt.Println("Waiting for sorted data from stdin...") 81 + 82 + // 2. Process sorted data from stdin 83 + // This script *requires* the input to be sorted by bundle number. 84 + 85 + var currentWriter *zstd.Encoder 86 + var currentFile *os.File 87 + var lastBundleKey string = "" 88 + 89 + lineCount := 0 90 + startTime := time.Now() 91 + 92 + scanner := bufio.NewScanner(os.Stdin) 93 + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) 94 + 95 + for scanner.Scan() { 96 + line := scanner.Text() 97 + lineCount++ 98 + 99 + parts := strings.SplitN(line, ",", 2) 100 + if len(parts) < 1 { 101 + continue // Skip empty/bad lines 102 + } 103 + 104 + bundleNumStr := parts[0] 105 + bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros 106 + 107 + // If the bundle key is new, close the old writer and open a new one. 108 + if bundleKey != lastBundleKey { 109 + // Close the previous writer/file 110 + if currentWriter != nil { 111 + if err := currentWriter.Close(); err != nil { 112 + fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err) 113 + } 114 + currentFile.Close() 115 + } 116 + 117 + // Start the new one 118 + fmt.Printf(" -> Writing bundle %s\n", bundleKey) 119 + outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey)) 120 + 121 + file, err := os.Create(outPath) 122 + if err != nil { 123 + fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err) 124 + os.Exit(1) 125 + } 126 + currentFile = file 127 + 128 + writer, err := zstd.NewWriter(file) 129 + if err != nil { 130 + fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err) 131 + os.Exit(1) 132 + } 133 + currentWriter = writer 134 + lastBundleKey = bundleKey 135 + } 136 + 137 + // Write the line to the currently active writer 138 + if _, err := currentWriter.Write([]byte(line + "\n")); err != nil { 139 + fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err) 140 + } 141 + 142 + // Progress update 143 + if lineCount%100000 == 0 { 144 + elapsed := time.Since(startTime).Seconds() 145 + rate := float64(lineCount) / elapsed 146 + fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate) 147 + } 148 + } 149 + 150 + // 3. Close the very last writer 151 + if currentWriter != nil { 152 + if err := currentWriter.Close(); err != nil { 153 + fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err) 154 + } 155 + currentFile.Close() 156 + } 157 + 158 + if err := scanner.Err(); err != nil { 159 + fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err) 160 + } 161 + 162 + totalTime := time.Since(startTime) 163 + fmt.Println("\n========================================") 164 + fmt.Println("Import Summary") 165 + fmt.Println("========================================") 166 + fmt.Printf("✓ Import completed in %v\n", totalTime) 167 + fmt.Printf("Total lines processed: %d\n", lineCount) 168 + }
+1 -1
config.sample.yaml
··· 1 1 database: 2 2 type: "postgres" # or "sqlite" 3 - path: "postgres://atscanner:YOUR_PASSWORD@localhost:5432/atscanner?sslmode=disable" 3 + path: "postgres://atscand:YOUR_PASSWORD@localhost:5432/atscand?sslmode=disable" 4 4 # For SQLite: path: "atscan.db" 5 5 6 6 plc:
+6 -5
go.mod
··· 1 - module github.com/atscan/atscanner 1 + module github.com/atscan/atscand 2 2 3 3 go 1.23.0 4 4 5 5 require ( 6 6 github.com/gorilla/mux v1.8.1 7 7 github.com/lib/pq v1.10.9 8 - github.com/mattn/go-sqlite3 v1.14.18 9 8 gopkg.in/yaml.v3 v3.0.1 10 9 ) 11 10 12 - require github.com/klauspost/compress v1.18.0 11 + require github.com/klauspost/compress v1.18.1 13 12 14 13 require ( 15 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d 16 14 github.com/gorilla/handlers v1.5.2 15 + github.com/jackc/pgx/v5 v5.7.6 16 + tangled.org/atscan.net/plcbundle v0.3.6 17 17 ) 18 18 19 19 require ( 20 20 github.com/felixge/httpsnoop v1.0.3 // indirect 21 21 github.com/jackc/pgpassfile v1.0.0 // indirect 22 22 github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 23 - github.com/jackc/pgx/v5 v5.7.6 // indirect 24 23 github.com/jackc/puddle/v2 v2.2.2 // indirect 24 + github.com/kr/text v0.2.0 // indirect 25 + github.com/rogpeppe/go-internal v1.14.1 // indirect 25 26 golang.org/x/crypto v0.37.0 // indirect 26 27 golang.org/x/sync v0.13.0 // indirect 27 28 golang.org/x/text v0.24.0 // indirect
+17 -7
go.sum
··· 1 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 2 - github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 1 + github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 3 2 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 5 github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= 5 6 github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 6 7 github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= ··· 15 16 github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= 16 17 github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 17 18 github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 18 - github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= 19 - github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= 19 + github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= 20 + github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= 21 + github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 22 + github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 23 + github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 24 + github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 20 25 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= 21 26 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 22 - github.com/mattn/go-sqlite3 v1.14.18 h1:JL0eqdCOq6DJVNPSvArO/bIV9/P7fbGrV00LZHc+5aI= 23 - github.com/mattn/go-sqlite3 v1.14.18/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= 27 + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 24 28 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 29 + github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= 30 + github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= 25 31 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 26 32 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 27 33 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 34 + github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 35 + github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 28 36 golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 29 37 golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 30 38 golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= 31 39 golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 32 40 golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 33 41 golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 34 - gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 35 42 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 36 43 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 44 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 37 45 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 38 46 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 39 47 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 48 + tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY= 49 + tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+275 -451
internal/api/handlers.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "crypto/sha256" 6 5 "database/sql" 7 - "encoding/hex" 8 6 "encoding/json" 9 7 "fmt" 8 + "io" 10 9 "net/http" 11 - "os" 12 - "path/filepath" 13 10 "strconv" 14 11 "strings" 15 12 "time" 16 13 17 - "github.com/atscan/atscanner/internal/log" 18 - "github.com/atscan/atscanner/internal/monitor" 19 - "github.com/atscan/atscanner/internal/plc" 20 - "github.com/atscan/atscanner/internal/storage" 14 + "github.com/atscan/atscand/internal/log" 15 + "github.com/atscan/atscand/internal/monitor" 16 + "github.com/atscan/atscand/internal/plc" 17 + "github.com/atscan/atscand/internal/storage" 21 18 "github.com/gorilla/mux" 19 + "tangled.org/atscan.net/plcbundle" 22 20 ) 23 21 24 22 // ===== RESPONSE HELPERS ===== ··· 40 38 http.Error(r.w, msg, code) 41 39 } 42 40 43 - func (r *response) bundleHeaders(bundle *storage.PLCBundle) { 41 + func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) { 44 42 r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber)) 45 43 r.w.Header().Set("X-Bundle-Hash", bundle.Hash) 46 44 r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash) 47 45 r.w.Header().Set("X-Bundle-Start-Time", bundle.StartTime.Format(time.RFC3339Nano)) 48 46 r.w.Header().Set("X-Bundle-End-Time", bundle.EndTime.Format(time.RFC3339Nano)) 49 47 r.w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 50 - r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(bundle.DIDs))) 48 + r.w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", bundle.DIDCount)) 51 49 } 52 50 53 51 // ===== REQUEST HELPERS ===== ··· 77 75 78 76 // ===== FORMATTING HELPERS ===== 79 77 80 - func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} { 81 - return map[string]interface{}{ 82 - "plc_bundle_number": bundle.BundleNumber, 83 - "start_time": bundle.StartTime, 84 - "end_time": bundle.EndTime, 85 - "operation_count": plc.BUNDLE_SIZE, 86 - "did_count": len(bundle.DIDs), 87 - "hash": bundle.Hash, 88 - "compressed_hash": bundle.CompressedHash, 89 - "compressed_size": bundle.CompressedSize, 90 - "uncompressed_size": bundle.UncompressedSize, 91 - "compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize), 92 - "cursor": bundle.Cursor, 93 - "prev_bundle_hash": bundle.PrevBundleHash, 94 - "created_at": bundle.CreatedAt, 95 - } 96 - } 97 - 98 78 func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} { 99 79 response := map[string]interface{}{ 100 80 "id": ep.ID, ··· 103 83 "discovered_at": ep.DiscoveredAt, 104 84 "last_checked": ep.LastChecked, 105 85 "status": statusToString(ep.Status), 106 - // REMOVED: "user_count": ep.UserCount, // No longer exists 107 86 } 108 87 109 - // Add IP if available 88 + // Add IPs if available 110 89 if ep.IP != "" { 111 90 response["ip"] = ep.IP 112 91 } 113 - 114 - // REMOVED: IP info extraction - no longer in Endpoint struct 115 - // IPInfo is now in separate table, joined only in PDS handlers 92 + if ep.IPv6 != "" { 93 + response["ipv6"] = ep.IPv6 94 + } 116 95 117 96 return response 118 97 } ··· 165 144 resp.json(stats) 166 145 } 167 146 147 + // handleGetRandomEndpoint returns a random endpoint of specified type 148 + func (s *Server) handleGetRandomEndpoint(w http.ResponseWriter, r *http.Request) { 149 + resp := newResponse(w) 150 + 151 + // Get required type parameter 152 + endpointType := r.URL.Query().Get("type") 153 + if endpointType == "" { 154 + resp.error("type parameter is required", http.StatusBadRequest) 155 + return 156 + } 157 + 158 + // Get optional status parameter 159 + status := r.URL.Query().Get("status") 160 + 161 + filter := &storage.EndpointFilter{ 162 + Type: endpointType, 163 + Status: status, 164 + Random: true, 165 + Limit: 1, 166 + Offset: 0, 167 + } 168 + 169 + endpoints, err := s.db.GetEndpoints(r.Context(), filter) 170 + if err != nil { 171 + resp.error(err.Error(), http.StatusInternalServerError) 172 + return 173 + } 174 + 175 + if len(endpoints) == 0 { 176 + resp.error("no endpoints found matching criteria", http.StatusNotFound) 177 + return 178 + } 179 + 180 + resp.json(formatEndpointResponse(endpoints[0])) 181 + } 182 + 168 183 // ===== PDS HANDLERS ===== 169 184 170 185 func (s *Server) handleGetPDSList(w http.ResponseWriter, r *http.Request) { ··· 233 248 "endpoint": pds.Endpoint, 234 249 "discovered_at": pds.DiscoveredAt, 235 250 "status": statusToString(pds.Status), 251 + "valid": pds.Valid, // NEW 236 252 } 237 253 238 254 // Add server_did if available ··· 257 273 } 258 274 } 259 275 260 - // Add IP if available 276 + // Add IPs if available 261 277 if pds.IP != "" { 262 278 response["ip"] = pds.IP 263 279 } 280 + if pds.IPv6 != "" { 281 + response["ipv6"] = pds.IPv6 282 + } 264 283 265 284 // Add IP info (from ip_infos table via JOIN) 266 285 if pds.IPInfo != nil { ··· 358 377 scanMap["response_time"] = scan.ResponseTime 359 378 } 360 379 361 - // NEW: Add version if available 362 380 if scan.Version != "" { 363 381 scanMap["version"] = scan.Version 364 382 } 365 383 384 + if scan.UsedIP != "" { 385 + scanMap["used_ip"] = scan.UsedIP 386 + } 387 + 366 388 // Use the top-level UserCount field first 367 389 if scan.UserCount > 0 { 368 390 scanMap["user_count"] = scan.UserCount ··· 662 684 return 663 685 } 664 686 665 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 666 - if err != nil { 667 - resp.error(err.Error(), http.StatusInternalServerError) 668 - return 669 - } 670 - 687 + lastBundle := s.bundleManager.GetLastBundleNumber() 671 688 resp.json(map[string]interface{}{ 672 689 "total_unique_dids": totalDIDs, 673 690 "last_bundle": lastBundle, ··· 678 695 679 696 func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) { 680 697 resp := newResponse(w) 681 - 682 698 bundleNum, err := getBundleNumber(r) 683 699 if err != nil { 684 700 resp.error("invalid bundle number", http.StatusBadRequest) 685 701 return 686 702 } 687 703 688 - // Try to get existing bundle 689 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 690 - if err == nil { 691 - // Bundle exists, return it normally 692 - resp.json(formatBundleResponse(bundle)) 693 - return 694 - } 695 - 696 - // Bundle not found - check if it's the next upcoming bundle 697 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 704 + // Get from library's index 705 + index := s.bundleManager.GetIndex() 706 + bundleMeta, err := index.GetBundle(bundleNum) 698 707 if err != nil { 699 - resp.error("bundle not found", http.StatusNotFound) 700 - return 701 - } 702 - 703 - if bundleNum == lastBundle+1 { 704 - // This is the upcoming bundle - return preview based on mempool 705 - upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum) 706 - if err != nil { 707 - resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError) 708 + // Check if it's upcoming bundle 709 + lastBundle := index.GetLastBundle() 710 + if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 { 711 + upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum) 712 + if err != nil { 713 + resp.error(err.Error(), http.StatusInternalServerError) 714 + return 715 + } 716 + resp.json(upcomingBundle) 708 717 return 709 718 } 710 - resp.json(upcomingBundle) 719 + resp.error("bundle not found", http.StatusNotFound) 711 720 return 712 721 } 713 722 714 - // Not an upcoming bundle, just not found 715 - resp.error("bundle not found", http.StatusNotFound) 723 + resp.json(formatBundleMetadata(bundleMeta)) 716 724 } 717 725 718 - func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) { 719 - // Get mempool stats 720 - mempoolCount, err := s.db.GetMempoolCount(ctx) 721 - if err != nil { 722 - return nil, err 726 + // Helper to format library's BundleMetadata 727 + func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} { 728 + return map[string]interface{}{ 729 + "plc_bundle_number": meta.BundleNumber, 730 + "start_time": meta.StartTime, 731 + "end_time": meta.EndTime, 732 + "operation_count": meta.OperationCount, 733 + "did_count": meta.DIDCount, 734 + "hash": meta.Hash, // Chain hash (primary) 735 + "content_hash": meta.ContentHash, // Content hash 736 + "parent": meta.Parent, // Parent chain hash 737 + "compressed_hash": meta.CompressedHash, 738 + "compressed_size": meta.CompressedSize, 739 + "uncompressed_size": meta.UncompressedSize, 740 + "compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize), 741 + "cursor": meta.Cursor, 742 + "created_at": meta.CreatedAt, 723 743 } 744 + } 724 745 725 - if mempoolCount == 0 { 746 + func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) { 747 + // Get mempool stats from library via wrapper 748 + stats := s.bundleManager.GetMempoolStats() 749 + 750 + count, ok := stats["count"].(int) 751 + if !ok || count == 0 { 726 752 return map[string]interface{}{ 727 753 "plc_bundle_number": bundleNum, 728 754 "is_upcoming": true, ··· 732 758 }, nil 733 759 } 734 760 735 - // Get first and last operations for time range 736 - firstOp, err := s.db.GetFirstMempoolOperation(ctx) 737 - if err != nil { 738 - return nil, err 761 + // Build response 762 + result := map[string]interface{}{ 763 + "plc_bundle_number": bundleNum, 764 + "is_upcoming": true, 765 + "status": "filling", 766 + "operation_count": count, 767 + "did_count": stats["did_count"], 768 + "target_operation_count": 10000, 769 + "progress_percent": float64(count) / 100.0, 770 + "operations_needed": 10000 - count, 739 771 } 740 772 741 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 742 - if err != nil { 743 - return nil, err 773 + if count >= 10000 { 774 + result["status"] = "ready" 744 775 } 745 776 746 - // Get unique DID count 747 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 748 - if err != nil { 749 - return nil, err 777 + // Add time range if available 778 + if firstTime, ok := stats["first_time"]; ok { 779 + result["start_time"] = firstTime 750 780 } 751 - 752 - // Get uncompressed size estimate 753 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 754 - if err != nil { 755 - return nil, err 781 + if lastTime, ok := stats["last_time"]; ok { 782 + result["current_end_time"] = lastTime 756 783 } 757 784 758 - // Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data) 759 - estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12) 760 - 761 - // Calculate completion estimate 762 - var estimatedCompletionTime *time.Time 763 - var operationsNeeded int 764 - var currentRate float64 765 - 766 - operationsNeeded = plc.BUNDLE_SIZE - mempoolCount 767 - 768 - if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 { 769 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 770 - if timeSpan > 0 { 771 - currentRate = float64(mempoolCount) / timeSpan 772 - if currentRate > 0 { 773 - secondsNeeded := float64(operationsNeeded) / currentRate 774 - completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 775 - estimatedCompletionTime = &completionTime 776 - } 777 - } 785 + // Add size info if available 786 + if sizeBytes, ok := stats["size_bytes"]; ok { 787 + result["uncompressed_size"] = sizeBytes 788 + result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12) 778 789 } 779 790 780 - // Get previous bundle for cursor context 781 - var prevBundleHash string 782 - var cursor string 791 + // Get previous bundle info 783 792 if bundleNum > 1 { 784 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 785 - if err == nil { 786 - prevBundleHash = prevBundle.Hash 787 - cursor = prevBundle.EndTime.Format(time.RFC3339Nano) 788 - } 789 - } 790 - 791 - // Determine bundle status 792 - status := "filling" 793 - if mempoolCount >= plc.BUNDLE_SIZE { 794 - status = "ready" 795 - } 796 - 797 - // Build upcoming bundle response 798 - result := map[string]interface{}{ 799 - "plc_bundle_number": bundleNum, 800 - "is_upcoming": true, 801 - "status": status, 802 - "operation_count": mempoolCount, 803 - "target_operation_count": plc.BUNDLE_SIZE, 804 - "progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100, 805 - "operations_needed": operationsNeeded, 806 - "did_count": uniqueDIDCount, 807 - "start_time": firstOp.CreatedAt, // This is FIXED once first op exists 808 - "current_end_time": lastOp.CreatedAt, // This will change as more ops arrive 809 - "uncompressed_size": uncompressedSize, 810 - "estimated_compressed_size": estimatedCompressedSize, 811 - "compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize), 812 - "prev_bundle_hash": prevBundleHash, 813 - "cursor": cursor, 814 - } 815 - 816 - if estimatedCompletionTime != nil { 817 - result["estimated_completion_time"] = *estimatedCompletionTime 818 - result["current_rate_per_second"] = currentRate 819 - } 820 - 821 - // Get actual mempool operations if requested 822 - if r.URL.Query().Get("include_dids") == "true" { 823 - ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 824 - if err == nil { 825 - // Extract unique DIDs 826 - didSet := make(map[string]bool) 827 - for _, op := range ops { 828 - didSet[op.DID] = true 829 - } 830 - dids := make([]string, 0, len(didSet)) 831 - for did := range didSet { 832 - dids = append(dids, did) 833 - } 834 - result["dids"] = dids 793 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 794 + result["parent"] = prevBundle.Hash // Parent chain hash 795 + result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano) 835 796 } 836 797 } 837 798 ··· 847 808 return 848 809 } 849 810 850 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 811 + // Get from library 812 + dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum) 851 813 if err != nil { 852 814 resp.error("bundle not found", http.StatusNotFound) 853 815 return 854 816 } 855 817 856 818 resp.json(map[string]interface{}{ 857 - "plc_bundle_number": bundle.BundleNumber, 858 - "did_count": len(bundle.DIDs), 859 - "dids": bundle.DIDs, 819 + "plc_bundle_number": bundleNum, 820 + "did_count": didCount, 821 + "dids": dids, 860 822 }) 861 823 } 862 824 ··· 871 833 872 834 compressed := r.URL.Query().Get("compressed") != "false" 873 835 874 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum) 836 + bundle, err := s.bundleManager.GetBundleMetadata(bundleNum) 875 837 if err == nil { 876 838 // Bundle exists, serve it normally 877 839 resp.bundleHeaders(bundle) ··· 885 847 } 886 848 887 849 // Bundle not found - check if it's the upcoming bundle 888 - lastBundle, err := s.db.GetLastBundleNumber(r.Context()) 889 - if err != nil { 890 - resp.error("bundle not found", http.StatusNotFound) 891 - return 892 - } 893 - 850 + lastBundle := s.bundleManager.GetLastBundleNumber() 894 851 if bundleNum == lastBundle+1 { 895 852 // This is the upcoming bundle - serve from mempool 896 - s.serveUpcomingBundle(w, r, bundleNum) 853 + s.serveUpcomingBundle(w, bundleNum) 897 854 return 898 855 } 899 856 ··· 901 858 resp.error("bundle not found", http.StatusNotFound) 902 859 } 903 860 904 - func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) { 905 - ctx := r.Context() 906 - 907 - // Get mempool count 908 - mempoolCount, err := s.db.GetMempoolCount(ctx) 909 - if err != nil { 910 - http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError) 911 - return 912 - } 861 + func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) { 862 + // Get mempool stats 863 + stats := s.bundleManager.GetMempoolStats() 864 + count, ok := stats["count"].(int) 913 865 914 - if mempoolCount == 0 { 866 + if !ok || count == 0 { 915 867 http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound) 916 868 return 917 869 } 918 870 919 - // Get mempool operations (up to BUNDLE_SIZE) 920 - mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE) 871 + // Get operations from mempool 872 + ops, err := s.bundleManager.GetMempoolOperations() 921 873 if err != nil { 922 874 http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError) 923 875 return 924 876 } 925 877 926 - if len(mempoolOps) == 0 { 927 - http.Error(w, "upcoming bundle is empty", http.StatusNotFound) 878 + if len(ops) == 0 { 879 + http.Error(w, "no operations in mempool", http.StatusNotFound) 928 880 return 929 881 } 930 882 931 - // Get time range 932 - firstOp := mempoolOps[0] 933 - lastOp := mempoolOps[len(mempoolOps)-1] 883 + // Calculate times 884 + firstOp := ops[0] 885 + lastOp := ops[len(ops)-1] 934 886 935 887 // Extract unique DIDs 936 888 didSet := make(map[string]bool) 937 - for _, op := range mempoolOps { 889 + for _, op := range ops { 938 890 didSet[op.DID] = true 939 891 } 940 892 893 + // Calculate uncompressed size 894 + uncompressedSize := int64(0) 895 + for _, op := range ops { 896 + uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 897 + } 898 + 941 899 // Get previous bundle hash 942 900 prevBundleHash := "" 943 901 if bundleNum > 1 { 944 - if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 902 + if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil { 945 903 prevBundleHash = prevBundle.Hash 946 904 } 947 905 } 948 906 949 - // Serialize operations to JSONL 950 - var buf []byte 951 - for _, mop := range mempoolOps { 952 - buf = append(buf, []byte(mop.Operation)...) 953 - buf = append(buf, '\n') 954 - } 955 - 956 - // Calculate size 957 - uncompressedSize := int64(len(buf)) 958 - 959 907 // Set headers 960 908 w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum)) 961 909 w.Header().Set("X-Bundle-Is-Upcoming", "true") 962 910 w.Header().Set("X-Bundle-Status", "preview") 963 911 w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano)) 964 912 w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano)) 965 - w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps))) 966 - w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE)) 967 - w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100)) 913 + w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops))) 914 + w.Header().Set("X-Bundle-Target-Count", "10000") 915 + w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0)) 968 916 w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet))) 969 917 w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash) 918 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 970 919 971 920 w.Header().Set("Content-Type", "application/jsonl") 972 921 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum)) 973 - w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize)) 974 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize)) 975 922 923 + // Stream operations as JSONL 976 924 w.WriteHeader(http.StatusOK) 977 - w.Write(buf) 925 + 926 + for _, op := range ops { 927 + // Use RawJSON if available (preserves exact format) 928 + if len(op.RawJSON) > 0 { 929 + w.Write(op.RawJSON) 930 + } else { 931 + // Fallback to marshaling 932 + data, _ := json.Marshal(op) 933 + w.Write(data) 934 + } 935 + w.Write([]byte("\n")) 936 + } 978 937 } 979 938 980 - func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 939 + func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 981 940 resp := newResponse(w) 982 - path := bundle.GetFilePath(s.plcBundleDir) 983 941 984 - file, err := os.Open(path) 942 + // Use the new streaming API for compressed data 943 + reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber) 985 944 if err != nil { 986 - resp.error("bundle file not found on disk", http.StatusNotFound) 945 + resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError) 987 946 return 988 947 } 989 - defer file.Close() 990 - 991 - fileInfo, _ := file.Stat() 948 + defer reader.Close() 992 949 993 950 w.Header().Set("Content-Type", "application/zstd") 994 951 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber)) 995 - w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size())) 996 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size())) 952 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize)) 953 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 997 954 998 - http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file) 955 + // Stream the data directly to the response 956 + w.WriteHeader(http.StatusOK) 957 + io.Copy(w, reader) 999 958 } 1000 959 1001 - func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) { 960 + func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) { 1002 961 resp := newResponse(w) 1003 962 1004 - ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber) 963 + // Use the new streaming API for decompressed data 964 + reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber) 1005 965 if err != nil { 1006 - resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError) 966 + resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError) 1007 967 return 1008 968 } 1009 - 1010 - // Serialize to JSONL 1011 - var buf []byte 1012 - for _, op := range ops { 1013 - buf = append(buf, op.RawJSON...) 1014 - buf = append(buf, '\n') 1015 - } 1016 - 1017 - fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir)) 1018 - compressedSize := int64(0) 1019 - if fileInfo != nil { 1020 - compressedSize = fileInfo.Size() 1021 - } 969 + defer reader.Close() 1022 970 1023 971 w.Header().Set("Content-Type", "application/jsonl") 1024 972 w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber)) 1025 - w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf))) 1026 - w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize)) 1027 - w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf))) 1028 - if compressedSize > 0 { 1029 - w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize))) 973 + w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize)) 974 + w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize)) 975 + w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize)) 976 + if bundle.CompressedSize > 0 { 977 + w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize))) 1030 978 } 1031 979 980 + // Stream the data directly to the response 1032 981 w.WriteHeader(http.StatusOK) 1033 - w.Write(buf) 982 + io.Copy(w, reader) 1034 983 } 1035 984 1036 985 func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) { 1037 986 resp := newResponse(w) 1038 987 limit := getQueryInt(r, "limit", 50) 1039 988 1040 - bundles, err := s.db.GetBundles(r.Context(), limit) 1041 - if err != nil { 1042 - resp.error(err.Error(), http.StatusInternalServerError) 1043 - return 1044 - } 989 + bundles := s.bundleManager.GetBundles(limit) 1045 990 1046 991 response := make([]map[string]interface{}, len(bundles)) 1047 992 for i, bundle := range bundles { 1048 - response[i] = formatBundleResponse(bundle) 993 + response[i] = formatBundleMetadata(bundle) 1049 994 } 1050 995 1051 996 resp.json(response) ··· 1054 999 func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) { 1055 1000 resp := newResponse(w) 1056 1001 1057 - count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context()) 1058 - if err != nil { 1059 - resp.error(err.Error(), http.StatusInternalServerError) 1060 - return 1061 - } 1002 + stats := s.bundleManager.GetBundleStats() 1003 + 1004 + bundleCount := stats["bundle_count"].(int64) 1005 + totalSize := stats["total_size"].(int64) 1006 + totalUncompressedSize := stats["total_uncompressed_size"].(int64) 1007 + lastBundle := stats["last_bundle"].(int64) 1062 1008 1063 1009 resp.json(map[string]interface{}{ 1064 - "plc_bundle_count": count, 1065 - "last_bundle_number": lastBundle, 1066 - "total_compressed_size": compressedSize, 1067 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 1068 - "total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024, 1069 - "total_uncompressed_size": uncompressedSize, 1070 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1071 - "total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024, 1072 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1010 + "plc_bundle_count": bundleCount, 1011 + "last_bundle_number": lastBundle, 1012 + "total_compressed_size": totalSize, 1013 + "total_uncompressed_size": totalUncompressedSize, 1014 + "overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize), 1073 1015 }) 1074 1016 } 1075 1017 ··· 1077 1019 1078 1020 func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) { 1079 1021 resp := newResponse(w) 1080 - ctx := r.Context() 1081 1022 1082 - count, err := s.db.GetMempoolCount(ctx) 1083 - if err != nil { 1084 - resp.error(err.Error(), http.StatusInternalServerError) 1085 - return 1086 - } 1023 + // Get stats from library's mempool via wrapper method 1024 + stats := s.bundleManager.GetMempoolStats() 1087 1025 1088 - uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx) 1089 - if err != nil { 1090 - resp.error(err.Error(), http.StatusInternalServerError) 1091 - return 1026 + // Convert to API response format 1027 + result := map[string]interface{}{ 1028 + "operation_count": stats["count"], 1029 + "can_create_bundle": stats["can_create_bundle"], 1092 1030 } 1093 1031 1094 - uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx) 1095 - if err != nil { 1096 - resp.error(err.Error(), http.StatusInternalServerError) 1097 - return 1032 + // Add size information 1033 + if sizeBytes, ok := stats["size_bytes"]; ok { 1034 + result["uncompressed_size"] = sizeBytes 1035 + result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024 1098 1036 } 1099 1037 1100 - result := map[string]interface{}{ 1101 - "operation_count": count, 1102 - "unique_did_count": uniqueDIDCount, 1103 - "uncompressed_size": uncompressedSize, 1104 - "uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1105 - "can_create_bundle": count >= plc.BUNDLE_SIZE, 1106 - } 1038 + // Add time range and calculate estimated completion 1039 + if count, ok := stats["count"].(int); ok && count > 0 { 1040 + if firstTime, ok := stats["first_time"].(time.Time); ok { 1041 + result["mempool_start_time"] = firstTime 1107 1042 1108 - if count > 0 { 1109 - if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil { 1110 - result["mempool_start_time"] = firstOp.CreatedAt 1043 + if lastTime, ok := stats["last_time"].(time.Time); ok { 1044 + result["mempool_end_time"] = lastTime 1111 1045 1112 - if count < plc.BUNDLE_SIZE { 1113 - if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil { 1114 - timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds() 1046 + // Calculate estimated next bundle time if not complete 1047 + if count < 10000 { 1048 + timeSpan := lastTime.Sub(firstTime).Seconds() 1115 1049 if timeSpan > 0 { 1116 1050 opsPerSecond := float64(count) / timeSpan 1117 1051 if opsPerSecond > 0 { 1118 - remainingOps := plc.BUNDLE_SIZE - count 1052 + remainingOps := 10000 - count 1119 1053 secondsNeeded := float64(remainingOps) / opsPerSecond 1120 - result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second) 1054 + estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second) 1055 + 1056 + result["estimated_next_bundle_time"] = estimatedTime 1057 + result["current_rate_per_second"] = opsPerSecond 1121 1058 result["operations_needed"] = remainingOps 1122 - result["current_rate_per_second"] = opsPerSecond 1123 1059 } 1124 1060 } 1061 + result["progress_percent"] = float64(count) / 100.0 1062 + } else { 1063 + // Ready to create bundle 1064 + result["estimated_next_bundle_time"] = time.Now() 1065 + result["operations_needed"] = 0 1125 1066 } 1126 - } else { 1127 - result["estimated_next_bundle_time"] = time.Now() 1128 - result["operations_needed"] = 0 1129 1067 } 1130 1068 } 1131 1069 } else { 1070 + // Empty mempool 1132 1071 result["mempool_start_time"] = nil 1133 1072 result["estimated_next_bundle_time"] = nil 1134 1073 } ··· 1153 1092 1154 1093 // ===== VERIFICATION HANDLERS ===== 1155 1094 1156 - func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) { 1157 - resp := newResponse(w) 1158 - vars := mux.Vars(r) 1159 - 1160 - bundleNumber, err := strconv.Atoi(vars["bundleNumber"]) 1161 - if err != nil { 1162 - resp.error("Invalid bundle number", http.StatusBadRequest) 1163 - return 1164 - } 1165 - 1166 - bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber) 1167 - if err != nil { 1168 - resp.error("Bundle not found", http.StatusNotFound) 1169 - return 1170 - } 1171 - 1172 - // Fetch from PLC and verify 1173 - remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber) 1174 - if err != nil { 1175 - resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError) 1176 - return 1177 - } 1178 - 1179 - remoteHash := computeOperationsHash(remoteOps) 1180 - verified := bundle.Hash == remoteHash 1181 - 1182 - resp.json(map[string]interface{}{ 1183 - "bundle_number": bundleNumber, 1184 - "verified": verified, 1185 - "local_hash": bundle.Hash, 1186 - "remote_hash": remoteHash, 1187 - "local_op_count": plc.BUNDLE_SIZE, 1188 - "remote_op_count": len(remoteOps), 1189 - "boundary_cids_used": len(prevCIDs), 1190 - }) 1191 - } 1192 - 1193 - func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) { 1194 - var after string 1195 - var prevBoundaryCIDs map[string]bool 1196 - 1197 - if bundleNum > 1 { 1198 - prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1) 1199 - if err != nil { 1200 - return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err) 1201 - } 1202 - 1203 - after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z") 1204 - 1205 - if len(prevBundle.BoundaryCIDs) > 0 { 1206 - prevBoundaryCIDs = make(map[string]bool) 1207 - for _, cid := range prevBundle.BoundaryCIDs { 1208 - prevBoundaryCIDs[cid] = true 1209 - } 1210 - } 1211 - } 1212 - 1213 - var allRemoteOps []plc.PLCOperation 1214 - seenCIDs := make(map[string]bool) 1215 - 1216 - for cid := range prevBoundaryCIDs { 1217 - seenCIDs[cid] = true 1218 - } 1219 - 1220 - currentAfter := after 1221 - maxFetches := 20 1222 - 1223 - for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ { 1224 - batch, err := s.plcClient.Export(ctx, plc.ExportOptions{ 1225 - Count: 1000, 1226 - After: currentAfter, 1227 - }) 1228 - if err != nil || len(batch) == 0 { 1229 - break 1230 - } 1231 - 1232 - for _, op := range batch { 1233 - if !seenCIDs[op.CID] { 1234 - seenCIDs[op.CID] = true 1235 - allRemoteOps = append(allRemoteOps, op) 1236 - if len(allRemoteOps) >= plc.BUNDLE_SIZE { 1237 - break 1238 - } 1239 - } 1240 - } 1241 - 1242 - if len(batch) > 0 { 1243 - lastOp := batch[len(batch)-1] 1244 - currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z") 1245 - } 1246 - 1247 - if len(batch) < 1000 { 1248 - break 1249 - } 1250 - } 1251 - 1252 - if len(allRemoteOps) > plc.BUNDLE_SIZE { 1253 - allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE] 1254 - } 1255 - 1256 - return allRemoteOps, prevBoundaryCIDs, nil 1257 - } 1258 - 1259 1095 func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) { 1260 1096 resp := newResponse(w) 1261 - ctx := r.Context() 1262 1097 1263 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1264 - if err != nil { 1265 - resp.error(err.Error(), http.StatusInternalServerError) 1266 - return 1267 - } 1268 - 1098 + lastBundle := s.bundleManager.GetLastBundleNumber() 1269 1099 if lastBundle == 0 { 1270 1100 resp.json(map[string]interface{}{ 1271 1101 "status": "empty", ··· 1279 1109 var errorMsg string 1280 1110 1281 1111 for i := 1; i <= lastBundle; i++ { 1282 - bundle, err := s.db.GetBundleByNumber(ctx, i) 1112 + bundle, err := s.bundleManager.GetBundleMetadata(i) 1283 1113 if err != nil { 1284 1114 valid = false 1285 1115 brokenAt = i ··· 1288 1118 } 1289 1119 1290 1120 if i > 1 { 1291 - prevBundle, err := s.db.GetBundleByNumber(ctx, i-1) 1121 + prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1) 1292 1122 if err != nil { 1293 1123 valid = false 1294 1124 brokenAt = i ··· 1296 1126 break 1297 1127 } 1298 1128 1299 - if bundle.PrevBundleHash != prevBundle.Hash { 1129 + if bundle.Parent != prevBundle.Hash { 1300 1130 valid = false 1301 1131 brokenAt = i 1302 - errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1) 1132 + errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1) 1303 1133 break 1304 1134 } 1305 1135 } ··· 1320 1150 1321 1151 func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) { 1322 1152 resp := newResponse(w) 1323 - ctx := r.Context() 1324 1153 1325 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 1326 - if err != nil { 1327 - resp.error(err.Error(), http.StatusInternalServerError) 1328 - return 1329 - } 1330 - 1154 + lastBundle := s.bundleManager.GetLastBundleNumber() 1331 1155 if lastBundle == 0 { 1332 1156 resp.json(map[string]interface{}{ 1333 1157 "chain_length": 0, ··· 1336 1160 return 1337 1161 } 1338 1162 1339 - firstBundle, _ := s.db.GetBundleByNumber(ctx, 1) 1340 - lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle) 1341 - 1342 - // Updated to receive 5 values instead of 3 1343 - count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx) 1344 - if err != nil { 1345 - resp.error(err.Error(), http.StatusInternalServerError) 1346 - return 1347 - } 1163 + firstBundle, _ := s.bundleManager.GetBundleMetadata(1) 1164 + lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle) 1165 + stats := s.bundleManager.GetBundleStats() 1348 1166 1349 1167 resp.json(map[string]interface{}{ 1350 - "chain_length": lastBundle, 1351 - "total_bundles": count, 1352 - "total_compressed_size": compressedSize, 1353 - "total_compressed_size_mb": float64(compressedSize) / 1024 / 1024, 1354 - "total_uncompressed_size": uncompressedSize, 1355 - "total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024, 1356 - "compression_ratio": float64(uncompressedSize) / float64(compressedSize), 1357 - "chain_start_time": firstBundle.StartTime, 1358 - "chain_end_time": lastBundleData.EndTime, 1359 - "chain_head_hash": lastBundleData.Hash, 1360 - "first_prev_hash": firstBundle.PrevBundleHash, 1361 - "last_prev_hash": lastBundleData.PrevBundleHash, 1168 + "chain_length": lastBundle, 1169 + "total_bundles": stats["bundle_count"], 1170 + "total_compressed_size": stats["total_size"], 1171 + "total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024, 1172 + "chain_start_time": firstBundle.StartTime, 1173 + "chain_end_time": lastBundleData.EndTime, 1174 + "chain_head_hash": lastBundleData.Hash, 1175 + "first_parent": firstBundle.Parent, 1176 + "last_parent": lastBundleData.Parent, 1362 1177 }) 1363 1178 } 1364 1179 ··· 1379 1194 return 1380 1195 } 1381 1196 1382 - startBundle := s.findStartBundle(ctx, afterTime) 1197 + startBundle := s.findStartBundle(afterTime) 1383 1198 ops := s.collectOperations(ctx, startBundle, afterTime, count) 1384 1199 1385 1200 w.Header().Set("Content-Type", "application/jsonl") ··· 1419 1234 return time.Time{}, fmt.Errorf("invalid timestamp format") 1420 1235 } 1421 1236 1422 - func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int { 1237 + func (s *Server) findStartBundle(afterTime time.Time) int { 1423 1238 if afterTime.IsZero() { 1424 1239 return 1 1425 1240 } 1426 1241 1427 - foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime) 1428 - if err != nil { 1429 - return 1 1430 - } 1431 - 1242 + foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime) 1432 1243 if foundBundle > 1 { 1433 1244 return foundBundle - 1 1434 1245 } ··· 1439 1250 var allOps []plc.PLCOperation 1440 1251 seenCIDs := make(map[string]bool) 1441 1252 1442 - lastBundle, _ := s.db.GetLastBundleNumber(ctx) 1253 + lastBundle := s.bundleManager.GetLastBundleNumber() 1443 1254 1444 1255 for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ { 1445 1256 ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum) ··· 1599 1410 limit := getQueryInt(r, "limit", 0) 1600 1411 fromBundle := getQueryInt(r, "from", 1) 1601 1412 1602 - history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle) 1413 + // Use BundleManager instead of database 1414 + history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle) 1603 1415 if err != nil { 1604 1416 resp.error(err.Error(), http.StatusInternalServerError) 1605 1417 return ··· 1671 1483 }) 1672 1484 } 1673 1485 1674 - // ===== UTILITY FUNCTIONS ===== 1486 + func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) { 1487 + resp := newResponse(w) 1488 + 1489 + bundleNum, err := getBundleNumber(r) 1490 + if err != nil { 1491 + resp.error("invalid bundle number", http.StatusBadRequest) 1492 + return 1493 + } 1675 1494 1676 - func computeOperationsHash(ops []plc.PLCOperation) string { 1677 - var jsonlData []byte 1678 - for _, op := range ops { 1679 - jsonlData = append(jsonlData, op.RawJSON...) 1680 - jsonlData = append(jsonlData, '\n') 1495 + labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum) 1496 + if err != nil { 1497 + resp.error(err.Error(), http.StatusInternalServerError) 1498 + return 1681 1499 } 1682 - hash := sha256.Sum256(jsonlData) 1683 - return hex.EncodeToString(hash[:]) 1500 + 1501 + resp.json(map[string]interface{}{ 1502 + "bundle": bundleNum, 1503 + "count": len(labels), 1504 + "labels": labels, 1505 + }) 1684 1506 } 1507 + 1508 + // ===== UTILITY FUNCTIONS ===== 1685 1509 1686 1510 func normalizeEndpoint(endpoint string) string { 1687 1511 endpoint = strings.TrimPrefix(endpoint, "https://")
+8 -11
internal/api/server.go
··· 6 6 "net/http" 7 7 "time" 8 8 9 - "github.com/atscan/atscanner/internal/config" 10 - "github.com/atscan/atscanner/internal/log" 11 - "github.com/atscan/atscanner/internal/plc" 12 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/plc" 12 + "github.com/atscan/atscand/internal/storage" 13 13 "github.com/gorilla/handlers" 14 14 "github.com/gorilla/mux" 15 15 ) ··· 18 18 router *mux.Router 19 19 server *http.Server 20 20 db storage.Database 21 - plcClient *plc.Client 22 21 plcBundleDir string 23 22 bundleManager *plc.BundleManager 24 23 plcIndexDIDs bool 25 24 } 26 25 27 - func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server { 28 - bundleManager, _ := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.UseCache, db, plcCfg.IndexDIDs) 29 - 26 + func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server { 30 27 s := &Server{ 31 28 router: mux.NewRouter(), 32 29 db: db, 33 - plcClient: plc.NewClient(plcCfg.DirectoryURL), 34 30 plcBundleDir: plcCfg.BundleDir, 35 - bundleManager: bundleManager, 31 + bundleManager: bundleManager, // Use provided shared instance 36 32 plcIndexDIDs: plcCfg.IndexDIDs, 37 33 } 38 34 ··· 61 57 // Generic endpoints (keep as-is) 62 58 api.HandleFunc("/endpoints", s.handleGetEndpoints).Methods("GET") 63 59 api.HandleFunc("/endpoints/stats", s.handleGetEndpointStats).Methods("GET") 60 + api.HandleFunc("/endpoints/random", s.handleGetRandomEndpoint).Methods("GET") 64 61 65 62 //PDS-specific endpoints (virtual, created via JOINs) 66 63 api.HandleFunc("/pds", s.handleGetPDSList).Methods("GET") ··· 87 84 api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET") 88 85 api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET") 89 86 api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET") 90 - api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST") 87 + api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET") 91 88 92 89 // PLC history/metrics 93 90 api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+2 -2
internal/log/log.go
··· 28 28 errorLog = log.New(os.Stderr, "", 0) 29 29 } 30 30 31 - // timestamp returns current time in ISO 8601 format 31 + // timestamp returns current time with milliseconds (local time, no timezone) 32 32 func timestamp() string { 33 - return time.Now().Format(time.RFC3339) 33 + return time.Now().Format("2006-01-02T15:04:05.000") 34 34 } 35 35 36 36 func Verbose(format string, v ...interface{}) {
+37 -8
internal/pds/client.go
··· 4 4 "context" 5 5 "encoding/json" 6 6 "fmt" 7 + "net" 7 8 "net/http" 8 9 "time" 9 10 ) ··· 83 84 } 84 85 85 86 // DescribeServer fetches com.atproto.server.describeServer 86 - func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) { 87 + // Returns: description, responseTime, usedIP, error 88 + func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) { 89 + startTime := time.Now() 87 90 url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint) 88 91 89 - //fmt.Println(url) 92 + // Track which IP was used 93 + var usedIP string 94 + transport := &http.Transport{ 95 + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 96 + conn, err := (&net.Dialer{ 97 + Timeout: 30 * time.Second, 98 + KeepAlive: 30 * time.Second, 99 + }).DialContext(ctx, network, addr) 100 + 101 + if err == nil && conn != nil { 102 + if remoteAddr := conn.RemoteAddr(); remoteAddr != nil { 103 + if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok { 104 + usedIP = tcpAddr.IP.String() 105 + } 106 + } 107 + } 108 + return conn, err 109 + }, 110 + } 111 + 112 + client := &http.Client{ 113 + Timeout: c.httpClient.Timeout, 114 + Transport: transport, 115 + } 90 116 91 117 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 92 118 if err != nil { 93 - return nil, err 119 + return nil, 0, "", err 94 120 } 95 121 96 - resp, err := c.httpClient.Do(req) 122 + resp, err := client.Do(req) 123 + responseTime := time.Since(startTime) 124 + 97 125 if err != nil { 98 - return nil, err 126 + return nil, responseTime, usedIP, err 99 127 } 100 128 defer resp.Body.Close() 101 129 102 130 if resp.StatusCode != http.StatusOK { 103 - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 131 + return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode) 104 132 } 105 133 106 134 var desc ServerDescription 107 135 if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil { 108 - return nil, err 136 + return nil, responseTime, usedIP, err 109 137 } 110 138 111 - return &desc, nil 139 + return &desc, responseTime, usedIP, nil 112 140 } 113 141 114 142 // CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version" 143 + // Returns: available, responseTime, version, error 115 144 func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) { 116 145 startTime := time.Now() 117 146
+41 -34
internal/pds/scanner.go
··· 8 8 "sync/atomic" 9 9 "time" 10 10 11 - "github.com/acarl005/stripansi" 12 - "github.com/atscan/atscanner/internal/config" 13 - "github.com/atscan/atscanner/internal/ipinfo" 14 - "github.com/atscan/atscanner/internal/log" 15 - "github.com/atscan/atscanner/internal/monitor" 16 - "github.com/atscan/atscanner/internal/storage" 11 + "github.com/atscan/atscand/internal/config" 12 + "github.com/atscan/atscand/internal/ipinfo" 13 + "github.com/atscan/atscand/internal/log" 14 + "github.com/atscan/atscand/internal/monitor" 15 + "github.com/atscan/atscand/internal/storage" 17 16 ) 18 17 19 18 type Scanner struct { ··· 40 39 servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{ 41 40 Type: "pds", 42 41 OnlyStale: true, 42 + OnlyValid: true, 43 43 RecheckInterval: s.config.RecheckInterval, 44 44 }) 45 45 if err != nil { ··· 127 127 // STEP 1: Resolve IPs (both IPv4 and IPv6) 128 128 ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint) 129 129 if err != nil { 130 - // Mark as offline due to DNS failure 131 130 s.saveScanResult(ctx, ep.ID, &ScanResult{ 132 131 Status: storage.EndpointStatusOffline, 133 132 ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err), ··· 146 145 go s.updateIPInfoIfNeeded(ctx, ips.IPv6) 147 146 } 148 147 149 - // STEP 2: Health check (rest remains the same) 150 - available, responseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 151 - if err != nil || !available { 152 - errMsg := "health check failed" 153 - if err != nil { 154 - errMsg = err.Error() 155 - } 148 + // STEP 2: Call describeServer (primary health check + metadata) 149 + desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint) 150 + if err != nil { 156 151 s.saveScanResult(ctx, ep.ID, &ScanResult{ 157 152 Status: storage.EndpointStatusOffline, 158 - ResponseTime: responseTime, 159 - ErrorMessage: errMsg, 153 + ResponseTime: descResponseTime, 154 + ErrorMessage: fmt.Sprintf("describeServer failed: %v", err), 155 + UsedIP: usedIP, 160 156 }) 161 157 return 162 158 } 163 159 164 - // STEP 3: Fetch PDS-specific data 165 - desc, err := s.client.DescribeServer(ctx, ep.Endpoint) 166 - if err != nil { 167 - log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err) 168 - } else if desc != nil && desc.DID != "" { 160 + // Update server DID immediately 161 + if desc.DID != "" { 169 162 s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID) 170 163 } 171 164 172 - // Fetch repos with full info 165 + // STEP 3: Call _health to get version 166 + available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint) 167 + if err != nil || !available { 168 + log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err) 169 + // Server is online (describeServer worked) but _health failed 170 + // Continue with empty version 171 + version = "" 172 + } 173 + 174 + // Calculate average response time from both calls 175 + avgResponseTime := descResponseTime 176 + if available { 177 + avgResponseTime = (descResponseTime + healthResponseTime) / 2 178 + } 179 + 180 + // STEP 4: Fetch repos 173 181 repoList, err := s.client.ListRepos(ctx, ep.Endpoint) 174 182 if err != nil { 175 183 log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err) 176 184 repoList = []Repo{} 177 185 } 178 186 179 - // Convert to DIDs for backward compatibility 187 + // Convert to DIDs 180 188 dids := make([]string, len(repoList)) 181 189 for i, repo := range repoList { 182 190 dids[i] = repo.DID 183 191 } 184 192 185 - // STEP 4: SAVE scan result 193 + // STEP 5: SAVE scan result 186 194 s.saveScanResult(ctx, ep.ID, &ScanResult{ 187 195 Status: storage.EndpointStatusOnline, 188 - ResponseTime: responseTime, 196 + ResponseTime: avgResponseTime, 189 197 Description: desc, 190 198 DIDs: dids, 191 199 Version: version, 200 + UsedIP: usedIP, // Only from describeServer 192 201 }) 193 202 194 - // Save repos in batches (only tracks changes) 203 + // STEP 6: Save repos in batches (only tracks changes) 195 204 if len(repoList) > 0 { 196 - batchSize := 10000 205 + batchSize := 100_000 197 206 198 207 log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint) 199 208 ··· 233 242 234 243 log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint) 235 244 } 236 - 237 - // IP info fetch already started at the beginning (step 1.5) 238 - // It will complete in the background 239 245 } 240 246 241 247 func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) { ··· 245 251 Metadata: make(map[string]interface{}), 246 252 } 247 253 248 - var userCount int64 // NEW: Declare user count 254 + var userCount int64 249 255 250 256 // Add PDS-specific metadata 251 257 if result.Status == storage.EndpointStatusOnline { 252 - userCount = int64(len(result.DIDs)) // NEW: Get user count 253 - scanData.Metadata["user_count"] = userCount // Keep in JSON for completeness 258 + userCount = int64(len(result.DIDs)) 259 + scanData.Metadata["user_count"] = userCount 254 260 if result.Description != nil { 255 261 scanData.Metadata["server_info"] = result.Description 256 262 } ··· 267 273 Status: result.Status, 268 274 ResponseTime: result.ResponseTime.Seconds() * 1000, // Convert to ms 269 275 UserCount: userCount, 270 - Version: result.Version, // NEW: Set the version field 276 + Version: result.Version, 277 + UsedIP: result.UsedIP, // NEW 271 278 ScanData: scanData, 272 279 ScannedAt: time.Now().UTC(), 273 280 }
+2 -1
internal/pds/types.go
··· 37 37 ErrorMessage string 38 38 Description *ServerDescription 39 39 DIDs []string 40 - Version string // NEW: Add this field to pass the version 40 + Version string 41 + UsedIP string // NEW 41 42 }
-676
internal/plc/bundle.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "bytes" 6 - "context" 7 - "crypto/sha256" 8 - "encoding/hex" 9 - "encoding/json" 10 - "fmt" 11 - "os" 12 - "path/filepath" 13 - "time" 14 - 15 - "github.com/atscan/atscanner/internal/log" 16 - "github.com/atscan/atscanner/internal/storage" 17 - "github.com/klauspost/compress/zstd" 18 - ) 19 - 20 - const BUNDLE_SIZE = 10000 21 - 22 - type BundleManager struct { 23 - dir string 24 - enabled bool 25 - encoder *zstd.Encoder 26 - decoder *zstd.Decoder 27 - db storage.Database 28 - indexDIDs bool 29 - } 30 - 31 - // ===== INITIALIZATION ===== 32 - 33 - func NewBundleManager(dir string, enabled bool, db storage.Database, indexDIDs bool) (*BundleManager, error) { 34 - if !enabled { 35 - return &BundleManager{enabled: false}, nil 36 - } 37 - 38 - if err := os.MkdirAll(dir, 0755); err != nil { 39 - return nil, fmt.Errorf("failed to create bundle dir: %w", err) 40 - } 41 - 42 - encoder, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 43 - if err != nil { 44 - return nil, err 45 - } 46 - 47 - decoder, err := zstd.NewReader(nil) 48 - if err != nil { 49 - return nil, err 50 - } 51 - 52 - return &BundleManager{ 53 - dir: dir, 54 - enabled: enabled, 55 - encoder: encoder, 56 - decoder: decoder, 57 - db: db, 58 - indexDIDs: indexDIDs, // NEW 59 - }, nil 60 - } 61 - 62 - func (bm *BundleManager) Close() { 63 - if bm.encoder != nil { 64 - bm.encoder.Close() 65 - } 66 - if bm.decoder != nil { 67 - bm.decoder.Close() 68 - } 69 - } 70 - 71 - // ===== BUNDLE FILE ABSTRACTION ===== 72 - 73 - type bundleFile struct { 74 - path string 75 - operations []PLCOperation 76 - uncompressedHash string 77 - compressedHash string 78 - } 79 - 80 - func (bm *BundleManager) newBundleFile(bundleNum int) *bundleFile { 81 - return &bundleFile{ 82 - path: filepath.Join(bm.dir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)), 83 - } 84 - } 85 - 86 - func (bf *bundleFile) exists() bool { 87 - _, err := os.Stat(bf.path) 88 - return err == nil 89 - } 90 - 91 - func (bm *BundleManager) load(bf *bundleFile) error { 92 - compressed, err := os.ReadFile(bf.path) 93 - if err != nil { 94 - return fmt.Errorf("read failed: %w", err) 95 - } 96 - 97 - decompressed, err := bm.decoder.DecodeAll(compressed, nil) 98 - if err != nil { 99 - return fmt.Errorf("decompress failed: %w", err) 100 - } 101 - 102 - bf.operations = bm.parseJSONL(decompressed) 103 - return nil 104 - } 105 - 106 - func (bm *BundleManager) save(bf *bundleFile) error { 107 - jsonlData := bm.serializeJSONL(bf.operations) 108 - bf.uncompressedHash = bm.hash(jsonlData) 109 - 110 - compressed := bm.encoder.EncodeAll(jsonlData, nil) 111 - bf.compressedHash = bm.hash(compressed) 112 - 113 - return os.WriteFile(bf.path, compressed, 0644) 114 - } 115 - 116 - func (bm *BundleManager) parseJSONL(data []byte) []PLCOperation { 117 - var ops []PLCOperation 118 - scanner := bufio.NewScanner(bytes.NewReader(data)) 119 - 120 - for scanner.Scan() { 121 - line := scanner.Bytes() 122 - if len(line) == 0 { 123 - continue 124 - } 125 - 126 - var op PLCOperation 127 - if err := json.Unmarshal(line, &op); err == nil { 128 - op.RawJSON = append([]byte(nil), line...) 129 - ops = append(ops, op) 130 - } 131 - } 132 - 133 - return ops 134 - } 135 - 136 - func (bm *BundleManager) serializeJSONL(ops []PLCOperation) []byte { 137 - var buf []byte 138 - for _, op := range ops { 139 - buf = append(buf, op.RawJSON...) 140 - buf = append(buf, '\n') 141 - } 142 - return buf 143 - } 144 - 145 - // ===== BUNDLE FETCHING ===== 146 - 147 - type bundleFetcher struct { 148 - client *Client 149 - seenCIDs map[string]bool 150 - currentAfter string 151 - fetchCount int 152 - } 153 - 154 - func newBundleFetcher(client *Client, afterTime string, prevBoundaryCIDs map[string]bool) *bundleFetcher { 155 - seen := make(map[string]bool) 156 - for cid := range prevBoundaryCIDs { 157 - seen[cid] = true 158 - } 159 - 160 - return &bundleFetcher{ 161 - client: client, 162 - seenCIDs: seen, 163 - currentAfter: afterTime, 164 - } 165 - } 166 - 167 - func (bf *bundleFetcher) fetchUntilComplete(ctx context.Context, target int) ([]PLCOperation, bool) { 168 - var ops []PLCOperation 169 - maxFetches := (target / 900) + 5 170 - 171 - for len(ops) < target && bf.fetchCount < maxFetches { 172 - bf.fetchCount++ 173 - batchSize := bf.calculateBatchSize(target - len(ops)) 174 - 175 - log.Verbose(" Fetch #%d: need %d more, requesting %d", bf.fetchCount, target-len(ops), batchSize) 176 - 177 - batch, shouldContinue := bf.fetchBatch(ctx, batchSize) 178 - 179 - for _, op := range batch { 180 - if !bf.seenCIDs[op.CID] { 181 - bf.seenCIDs[op.CID] = true 182 - ops = append(ops, op) 183 - 184 - if len(ops) >= target { 185 - return ops[:target], true 186 - } 187 - } 188 - } 189 - 190 - if !shouldContinue { 191 - break 192 - } 193 - } 194 - 195 - return ops, len(ops) >= target 196 - } 197 - 198 - func (bf *bundleFetcher) calculateBatchSize(remaining int) int { 199 - if bf.fetchCount == 0 { 200 - return 1000 201 - } 202 - if remaining < 100 { 203 - return 50 204 - } 205 - if remaining < 500 { 206 - return 200 207 - } 208 - return 1000 209 - } 210 - 211 - func (bf *bundleFetcher) fetchBatch(ctx context.Context, size int) ([]PLCOperation, bool) { 212 - ops, err := bf.client.Export(ctx, ExportOptions{ 213 - Count: size, 214 - After: bf.currentAfter, 215 - }) 216 - 217 - if err != nil || len(ops) == 0 { 218 - return nil, false 219 - } 220 - 221 - if len(ops) > 0 { 222 - bf.currentAfter = ops[len(ops)-1].CreatedAt.Format(time.RFC3339Nano) 223 - } 224 - 225 - return ops, len(ops) >= size 226 - } 227 - 228 - // ===== MAIN BUNDLE LOADING ===== 229 - 230 - func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int, plcClient *Client) ([]PLCOperation, bool, error) { 231 - if !bm.enabled { 232 - return nil, false, fmt.Errorf("bundle manager disabled") 233 - } 234 - 235 - bf := bm.newBundleFile(bundleNum) 236 - 237 - // Try local file first 238 - if bf.exists() { 239 - return bm.loadFromFile(ctx, bundleNum, bf) 240 - } 241 - 242 - // Fetch from PLC 243 - return bm.fetchFromPLC(ctx, bundleNum, bf, plcClient) 244 - } 245 - 246 - func (bm *BundleManager) loadFromFile(ctx context.Context, bundleNum int, bf *bundleFile) ([]PLCOperation, bool, error) { 247 - log.Verbose("→ Loading bundle %06d from local file", bundleNum) 248 - 249 - // Verify hash if bundle is in DB 250 - if dbBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum); err == nil && dbBundle != nil { 251 - if err := bm.verifyHash(bf.path, dbBundle.CompressedHash); err != nil { 252 - log.Error("⚠ Hash mismatch for bundle %06d! Re-fetching...", bundleNum) 253 - os.Remove(bf.path) 254 - return nil, false, fmt.Errorf("hash mismatch") 255 - } 256 - log.Verbose("✓ Hash verified for bundle %06d", bundleNum) 257 - } 258 - 259 - if err := bm.load(bf); err != nil { 260 - return nil, false, err 261 - } 262 - 263 - // Index if not in DB 264 - if _, err := bm.db.GetBundleByNumber(ctx, bundleNum); err != nil { 265 - bf.compressedHash = bm.hashFile(bf.path) 266 - bf.uncompressedHash = bm.hash(bm.serializeJSONL(bf.operations)) 267 - 268 - // Calculate cursor from previous bundle 269 - cursor := bm.calculateCursor(ctx, bundleNum) 270 - 271 - bm.indexBundle(ctx, bundleNum, bf, cursor) 272 - } 273 - 274 - return bf.operations, true, nil 275 - } 276 - 277 - func (bm *BundleManager) fetchFromPLC(ctx context.Context, bundleNum int, bf *bundleFile, client *Client) ([]PLCOperation, bool, error) { 278 - log.Info("→ Bundle %06d not found locally, fetching from PLC directory...", bundleNum) 279 - 280 - afterTime, prevCIDs := bm.getBoundaryInfo(ctx, bundleNum) 281 - fetcher := newBundleFetcher(client, afterTime, prevCIDs) 282 - 283 - ops, isComplete := fetcher.fetchUntilComplete(ctx, BUNDLE_SIZE) 284 - 285 - log.Info(" Collected %d unique operations after %d fetches (complete=%v)", 286 - len(ops), fetcher.fetchCount, isComplete) 287 - 288 - if isComplete { 289 - bf.operations = ops 290 - if err := bm.save(bf); err != nil { 291 - log.Error("Warning: failed to save bundle: %v", err) 292 - } else { 293 - // The cursor is the afterTime that was used to fetch this bundle 294 - cursor := afterTime 295 - bm.indexBundle(ctx, bundleNum, bf, cursor) 296 - log.Info("✓ Bundle %06d saved [%d ops, hash: %s..., cursor: %s]", 297 - bundleNum, len(ops), bf.uncompressedHash[:16], cursor) 298 - } 299 - } 300 - 301 - return ops, isComplete, nil 302 - } 303 - 304 - func (bm *BundleManager) getBoundaryInfo(ctx context.Context, bundleNum int) (string, map[string]bool) { 305 - if bundleNum == 1 { 306 - return "", nil 307 - } 308 - 309 - prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1) 310 - if err != nil { 311 - return "", nil 312 - } 313 - 314 - afterTime := prevBundle.EndTime.Format(time.RFC3339Nano) 315 - 316 - // Return stored boundary CIDs if available 317 - if len(prevBundle.BoundaryCIDs) > 0 { 318 - cids := make(map[string]bool) 319 - for _, cid := range prevBundle.BoundaryCIDs { 320 - cids[cid] = true 321 - } 322 - return afterTime, cids 323 - } 324 - 325 - // Fallback: compute from file 326 - bf := bm.newBundleFile(bundleNum - 1) 327 - if bf.exists() { 328 - if err := bm.load(bf); err == nil { 329 - _, cids := GetBoundaryCIDs(bf.operations) 330 - return afterTime, cids 331 - } 332 - } 333 - 334 - return afterTime, nil 335 - } 336 - 337 - // ===== BUNDLE INDEXING ===== 338 - 339 - func (bm *BundleManager) indexBundle(ctx context.Context, bundleNum int, bf *bundleFile, cursor string) error { 340 - prevHash := "" 341 - if bundleNum > 1 { 342 - if prev, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 343 - prevHash = prev.Hash 344 - } 345 - } 346 - 347 - dids := bm.extractUniqueDIDs(bf.operations) 348 - compressedFileSize := bm.getFileSize(bf.path) 349 - 350 - // Calculate uncompressed size 351 - uncompressedSize := int64(0) 352 - for _, op := range bf.operations { 353 - uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline 354 - } 355 - 356 - // Get time range from operations 357 - firstSeenAt := bf.operations[0].CreatedAt 358 - lastSeenAt := bf.operations[len(bf.operations)-1].CreatedAt 359 - 360 - bundle := &storage.PLCBundle{ 361 - BundleNumber: bundleNum, 362 - StartTime: firstSeenAt, 363 - EndTime: lastSeenAt, 364 - DIDs: dids, 365 - Hash: bf.uncompressedHash, 366 - CompressedHash: bf.compressedHash, 367 - CompressedSize: compressedFileSize, 368 - UncompressedSize: uncompressedSize, 369 - Cursor: cursor, 370 - PrevBundleHash: prevHash, 371 - Compressed: true, 372 - CreatedAt: time.Now().UTC(), 373 - } 374 - 375 - // Create bundle first 376 - if err := bm.db.CreateBundle(ctx, bundle); err != nil { 377 - return err 378 - } 379 - 380 - // NEW: Only index DIDs if enabled 381 - if bm.indexDIDs { 382 - start := time.Now() 383 - 384 - // Extract handle and PDS for each DID using centralized helper 385 - didInfoMap := ExtractDIDInfoMap(bf.operations) 386 - 387 - if err := bm.db.AddBundleDIDs(ctx, bundleNum, dids); err != nil { 388 - log.Error("Failed to index DIDs for bundle %06d: %v", bundleNum, err) 389 - // Don't return error - bundle is already created 390 - } else { 391 - // Update handle and PDS for each DID 392 - for did, info := range didInfoMap { 393 - // Validate handle length before saving 394 - validHandle := ValidateHandle(info.Handle) 395 - 396 - if err := bm.db.UpsertDID(ctx, did, bundleNum, validHandle, info.PDS); err != nil { 397 - log.Error("Failed to update DID %s metadata: %v", did, err) 398 - } 399 - } 400 - 401 - elapsed := time.Since(start) 402 - log.Verbose("✓ Indexed %d unique DIDs for bundle %06d in %v", len(dids), bundleNum, elapsed) 403 - } 404 - } else { 405 - log.Verbose("⊘ Skipped DID indexing for bundle %06d (disabled in config)", bundleNum) 406 - } 407 - 408 - return nil 409 - } 410 - 411 - func (bm *BundleManager) extractUniqueDIDs(ops []PLCOperation) []string { 412 - didSet := make(map[string]bool) 413 - for _, op := range ops { 414 - didSet[op.DID] = true 415 - } 416 - 417 - dids := make([]string, 0, len(didSet)) 418 - for did := range didSet { 419 - dids = append(dids, did) 420 - } 421 - return dids 422 - } 423 - 424 - // ===== MEMPOOL BUNDLE CREATION ===== 425 - 426 - func (bm *BundleManager) CreateBundleFromMempool(ctx context.Context, operations []PLCOperation, cursor string) (int, error) { 427 - if !bm.enabled { 428 - return 0, fmt.Errorf("bundle manager disabled") 429 - } 430 - 431 - if len(operations) != BUNDLE_SIZE { 432 - return 0, fmt.Errorf("bundle must have exactly %d operations, got %d", BUNDLE_SIZE, len(operations)) 433 - } 434 - 435 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 436 - if err != nil { 437 - return 0, err 438 - } 439 - bundleNum := lastBundle + 1 440 - 441 - bf := bm.newBundleFile(bundleNum) 442 - bf.operations = operations 443 - 444 - if err := bm.save(bf); err != nil { 445 - return 0, err 446 - } 447 - 448 - if err := bm.indexBundle(ctx, bundleNum, bf, cursor); err != nil { 449 - return 0, err 450 - } 451 - 452 - log.Info("✓ Created bundle %06d from mempool (hash: %s...)", 453 - bundleNum, bf.uncompressedHash[:16]) 454 - 455 - return bundleNum, nil 456 - } 457 - 458 - // ===== VERIFICATION ===== 459 - 460 - func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 461 - if !bm.enabled { 462 - return fmt.Errorf("bundle manager disabled") 463 - } 464 - 465 - log.Info("Verifying bundle chain from 1 to %06d...", endBundle) 466 - 467 - for i := 1; i <= endBundle; i++ { 468 - bundle, err := bm.db.GetBundleByNumber(ctx, i) 469 - if err != nil { 470 - return fmt.Errorf("bundle %06d not found: %w", i, err) 471 - } 472 - 473 - // Verify file hash 474 - path := bm.newBundleFile(i).path 475 - if err := bm.verifyHash(path, bundle.CompressedHash); err != nil { 476 - return fmt.Errorf("bundle %06d hash verification failed: %w", i, err) 477 - } 478 - 479 - // Verify chain link 480 - if i > 1 { 481 - prevBundle, err := bm.db.GetBundleByNumber(ctx, i-1) 482 - if err != nil { 483 - return fmt.Errorf("bundle %06d missing (required by %06d)", i-1, i) 484 - } 485 - 486 - if bundle.PrevBundleHash != prevBundle.Hash { 487 - return fmt.Errorf("bundle %06d chain broken! Expected prev_hash=%s, got=%s", 488 - i, prevBundle.Hash[:16], bundle.PrevBundleHash[:16]) 489 - } 490 - } 491 - 492 - if i%100 == 0 { 493 - log.Verbose(" ✓ Verified bundles 1-%06d", i) 494 - } 495 - } 496 - 497 - log.Info("✓ Chain verification complete: bundles 1-%06d are valid and continuous", endBundle) 498 - return nil 499 - } 500 - 501 - func (bm *BundleManager) EnsureBundleContinuity(ctx context.Context, targetBundle int) error { 502 - if !bm.enabled { 503 - return nil 504 - } 505 - 506 - for i := 1; i < targetBundle; i++ { 507 - if !bm.newBundleFile(i).exists() { 508 - if _, err := bm.db.GetBundleByNumber(ctx, i); err != nil { 509 - return fmt.Errorf("bundle %06d is missing (required for continuity)", i) 510 - } 511 - } 512 - } 513 - 514 - return nil 515 - } 516 - 517 - // ===== UTILITY METHODS ===== 518 - 519 - func (bm *BundleManager) hash(data []byte) string { 520 - h := sha256.Sum256(data) 521 - return hex.EncodeToString(h[:]) 522 - } 523 - 524 - func (bm *BundleManager) hashFile(path string) string { 525 - data, _ := os.ReadFile(path) 526 - return bm.hash(data) 527 - } 528 - 529 - func (bm *BundleManager) verifyHash(path, expectedHash string) error { 530 - if expectedHash == "" { 531 - return nil 532 - } 533 - 534 - actualHash := bm.hashFile(path) 535 - if actualHash != expectedHash { 536 - return fmt.Errorf("hash mismatch") 537 - } 538 - return nil 539 - } 540 - 541 - func (bm *BundleManager) getFileSize(path string) int64 { 542 - if info, err := os.Stat(path); err == nil { 543 - return info.Size() 544 - } 545 - return 0 546 - } 547 - 548 - func (bm *BundleManager) GetStats(ctx context.Context) (int64, int64, int64, int64, error) { 549 - if !bm.enabled { 550 - return 0, 0, 0, 0, nil 551 - } 552 - return bm.db.GetBundleStats(ctx) 553 - } 554 - 555 - func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 556 - lastBundle, err := bm.db.GetLastBundleNumber(ctx) 557 - if err != nil { 558 - return nil, err 559 - } 560 - 561 - if lastBundle == 0 { 562 - return map[string]interface{}{ 563 - "chain_length": 0, 564 - "status": "empty", 565 - }, nil 566 - } 567 - 568 - firstBundle, _ := bm.db.GetBundleByNumber(ctx, 1) 569 - lastBundleData, _ := bm.db.GetBundleByNumber(ctx, lastBundle) 570 - 571 - return map[string]interface{}{ 572 - "chain_length": lastBundle, 573 - "first_bundle": 1, 574 - "last_bundle": lastBundle, 575 - "chain_start_time": firstBundle.StartTime, 576 - "chain_end_time": lastBundleData.EndTime, 577 - "chain_head_hash": lastBundleData.Hash, 578 - }, nil 579 - } 580 - 581 - // ===== EXPORTED HELPERS ===== 582 - 583 - func GetBoundaryCIDs(operations []PLCOperation) (time.Time, map[string]bool) { 584 - if len(operations) == 0 { 585 - return time.Time{}, nil 586 - } 587 - 588 - lastOp := operations[len(operations)-1] 589 - boundaryTime := lastOp.CreatedAt 590 - cidSet := make(map[string]bool) 591 - 592 - for i := len(operations) - 1; i >= 0; i-- { 593 - op := operations[i] 594 - if op.CreatedAt.Equal(boundaryTime) { 595 - cidSet[op.CID] = true 596 - } else { 597 - break 598 - } 599 - } 600 - 601 - return boundaryTime, cidSet 602 - } 603 - 604 - func StripBoundaryDuplicates(operations []PLCOperation, boundaryTimestamp string, prevBoundaryCIDs map[string]bool) []PLCOperation { 605 - if len(operations) == 0 { 606 - return operations 607 - } 608 - 609 - boundaryTime, err := time.Parse(time.RFC3339Nano, boundaryTimestamp) 610 - if err != nil { 611 - return operations 612 - } 613 - 614 - startIdx := 0 615 - for startIdx < len(operations) { 616 - op := operations[startIdx] 617 - 618 - if op.CreatedAt.After(boundaryTime) { 619 - break 620 - } 621 - 622 - if op.CreatedAt.Equal(boundaryTime) && prevBoundaryCIDs[op.CID] { 623 - startIdx++ 624 - continue 625 - } 626 - 627 - break 628 - } 629 - 630 - return operations[startIdx:] 631 - } 632 - 633 - // LoadBundleOperations is a public method for external access (e.g., API handlers) 634 - func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 635 - if !bm.enabled { 636 - return nil, fmt.Errorf("bundle manager disabled") 637 - } 638 - 639 - bf := bm.newBundleFile(bundleNum) 640 - 641 - if !bf.exists() { 642 - return nil, fmt.Errorf("bundle %06d not found", bundleNum) 643 - } 644 - 645 - if err := bm.load(bf); err != nil { 646 - return nil, err 647 - } 648 - 649 - return bf.operations, nil 650 - } 651 - 652 - // calculateCursor determines the cursor value for a given bundle 653 - // For bundle 1: returns empty string 654 - // For bundle N: returns the end_time of bundle N-1 in RFC3339Nano format 655 - func (bm *BundleManager) calculateCursor(ctx context.Context, bundleNum int) string { 656 - if bundleNum == 1 { 657 - return "" 658 - } 659 - 660 - // Try to get cursor from previous bundle in DB 661 - if prevBundle, err := bm.db.GetBundleByNumber(ctx, bundleNum-1); err == nil { 662 - return prevBundle.EndTime.Format(time.RFC3339Nano) 663 - } 664 - 665 - // If previous bundle not in DB, try to load it from file 666 - prevBf := bm.newBundleFile(bundleNum - 1) 667 - if prevBf.exists() { 668 - if err := bm.load(prevBf); err == nil && len(prevBf.operations) > 0 { 669 - // Return the createdAt of the last operation in previous bundle 670 - lastOp := prevBf.operations[len(prevBf.operations)-1] 671 - return lastOp.CreatedAt.Format(time.RFC3339Nano) 672 - } 673 - } 674 - 675 - return "" 676 - }
-237
internal/plc/client.go
··· 1 - package plc 2 - 3 - import ( 4 - "bufio" 5 - "context" 6 - "encoding/json" 7 - "fmt" 8 - "io" 9 - "net/http" 10 - "strconv" 11 - "time" 12 - 13 - "github.com/atscan/atscanner/internal/log" 14 - ) 15 - 16 - type Client struct { 17 - baseURL string 18 - httpClient *http.Client 19 - rateLimiter *RateLimiter 20 - } 21 - 22 - func NewClient(baseURL string) *Client { 23 - // Rate limit: 90 requests per minute (leaving buffer below 100/min limit) 24 - rateLimiter := NewRateLimiter(90, time.Minute) 25 - 26 - return &Client{ 27 - baseURL: baseURL, 28 - httpClient: &http.Client{ 29 - Timeout: 60 * time.Second, 30 - }, 31 - rateLimiter: rateLimiter, 32 - } 33 - } 34 - 35 - func (c *Client) Close() { 36 - if c.rateLimiter != nil { 37 - c.rateLimiter.Stop() 38 - } 39 - } 40 - 41 - type ExportOptions struct { 42 - Count int 43 - After string // ISO 8601 datetime string 44 - } 45 - 46 - // Export fetches export data from PLC directory with rate limiting and retry 47 - func (c *Client) Export(ctx context.Context, opts ExportOptions) ([]PLCOperation, error) { 48 - return c.exportWithRetry(ctx, opts, 5) 49 - } 50 - 51 - // exportWithRetry implements retry logic with exponential backoff for rate limits 52 - func (c *Client) exportWithRetry(ctx context.Context, opts ExportOptions, maxRetries int) ([]PLCOperation, error) { 53 - var lastErr error 54 - backoff := 1 * time.Second 55 - 56 - for attempt := 1; attempt <= maxRetries; attempt++ { 57 - // Wait for rate limiter token 58 - if err := c.rateLimiter.Wait(ctx); err != nil { 59 - return nil, err 60 - } 61 - 62 - operations, retryAfter, err := c.doExport(ctx, opts) 63 - 64 - if err == nil { 65 - return operations, nil 66 - } 67 - 68 - lastErr = err 69 - 70 - // Check if it's a rate limit error (429) 71 - if retryAfter > 0 { 72 - log.Info("⚠ Rate limited by PLC directory, waiting %v before retry %d/%d", 73 - retryAfter, attempt, maxRetries) 74 - 75 - select { 76 - case <-time.After(retryAfter): 77 - continue 78 - case <-ctx.Done(): 79 - return nil, ctx.Err() 80 - } 81 - } 82 - 83 - // Other errors - exponential backoff 84 - if attempt < maxRetries { 85 - log.Verbose("Request failed (attempt %d/%d): %v, retrying in %v", 86 - attempt, maxRetries, err, backoff) 87 - 88 - select { 89 - case <-time.After(backoff): 90 - backoff *= 2 // Exponential backoff 91 - case <-ctx.Done(): 92 - return nil, ctx.Err() 93 - } 94 - } 95 - } 96 - 97 - return nil, fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr) 98 - } 99 - 100 - // doExport performs the actual HTTP request 101 - func (c *Client) doExport(ctx context.Context, opts ExportOptions) ([]PLCOperation, time.Duration, error) { 102 - url := fmt.Sprintf("%s/export", c.baseURL) 103 - 104 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 105 - if err != nil { 106 - return nil, 0, err 107 - } 108 - 109 - // Add query parameters 110 - q := req.URL.Query() 111 - if opts.Count > 0 { 112 - q.Add("count", fmt.Sprintf("%d", opts.Count)) 113 - } 114 - if opts.After != "" { 115 - q.Add("after", opts.After) 116 - } 117 - req.URL.RawQuery = q.Encode() 118 - 119 - resp, err := c.httpClient.Do(req) 120 - if err != nil { 121 - return nil, 0, fmt.Errorf("request failed: %w", err) 122 - } 123 - defer resp.Body.Close() 124 - 125 - // Handle rate limiting (429) 126 - if resp.StatusCode == http.StatusTooManyRequests { 127 - retryAfter := parseRetryAfter(resp) 128 - 129 - // Also check x-ratelimit headers for info 130 - if limit := resp.Header.Get("x-ratelimit-limit"); limit != "" { 131 - log.Verbose("Rate limit: %s", limit) 132 - } 133 - 134 - return nil, retryAfter, fmt.Errorf("rate limited (429)") 135 - } 136 - 137 - if resp.StatusCode != http.StatusOK { 138 - body, _ := io.ReadAll(resp.Body) 139 - return nil, 0, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 140 - } 141 - 142 - var operations []PLCOperation 143 - 144 - // PLC export returns newline-delimited JSON 145 - scanner := bufio.NewScanner(resp.Body) 146 - buf := make([]byte, 0, 64*1024) 147 - scanner.Buffer(buf, 1024*1024) 148 - 149 - lineCount := 0 150 - for scanner.Scan() { 151 - lineCount++ 152 - line := scanner.Bytes() 153 - 154 - if len(line) == 0 { 155 - continue 156 - } 157 - 158 - var op PLCOperation 159 - if err := json.Unmarshal(line, &op); err != nil { 160 - log.Error("Warning: failed to parse operation on line %d: %v", lineCount, err) 161 - continue 162 - } 163 - 164 - // CRITICAL: Store the original raw JSON bytes 165 - op.RawJSON = make([]byte, len(line)) 166 - copy(op.RawJSON, line) 167 - 168 - operations = append(operations, op) 169 - } 170 - 171 - if err := scanner.Err(); err != nil { 172 - return nil, 0, fmt.Errorf("error reading response: %w", err) 173 - } 174 - 175 - return operations, 0, nil 176 - 177 - } 178 - 179 - // parseRetryAfter parses the Retry-After header 180 - func parseRetryAfter(resp *http.Response) time.Duration { 181 - retryAfter := resp.Header.Get("Retry-After") 182 - if retryAfter == "" { 183 - // Default to 5 minutes if no header 184 - return 5 * time.Minute 185 - } 186 - 187 - // Try parsing as seconds 188 - if seconds, err := strconv.Atoi(retryAfter); err == nil { 189 - return time.Duration(seconds) * time.Second 190 - } 191 - 192 - // Try parsing as HTTP date 193 - if t, err := http.ParseTime(retryAfter); err == nil { 194 - return time.Until(t) 195 - } 196 - 197 - // Default 198 - return 5 * time.Minute 199 - } 200 - 201 - // GetDID fetches a specific DID document from PLC 202 - func (c *Client) GetDID(ctx context.Context, did string) (*DIDDocument, error) { 203 - // Wait for rate limiter 204 - if err := c.rateLimiter.Wait(ctx); err != nil { 205 - return nil, err 206 - } 207 - 208 - url := fmt.Sprintf("%s/%s", c.baseURL, did) 209 - 210 - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 211 - if err != nil { 212 - return nil, err 213 - } 214 - 215 - resp, err := c.httpClient.Do(req) 216 - if err != nil { 217 - return nil, err 218 - } 219 - defer resp.Body.Close() 220 - 221 - if resp.StatusCode == http.StatusTooManyRequests { 222 - retryAfter := parseRetryAfter(resp) 223 - return nil, fmt.Errorf("rate limited, retry after %v", retryAfter) 224 - } 225 - 226 - if resp.StatusCode != http.StatusOK { 227 - body, _ := io.ReadAll(resp.Body) 228 - return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) 229 - } 230 - 231 - var doc DIDDocument 232 - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { 233 - return nil, err 234 - } 235 - 236 - return &doc, nil 237 - }
+20 -2
internal/plc/helpers.go
··· 1 1 package plc 2 2 3 - import "strings" 3 + import ( 4 + "regexp" 5 + "strings" 6 + ) 4 7 5 8 // MaxHandleLength is the maximum allowed handle length for database storage 6 9 const MaxHandleLength = 500 10 + 11 + // Handle validation regex per AT Protocol spec 12 + // Ensures proper domain format: alphanumeric labels separated by dots, TLD starts with letter 13 + var handleRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`) 7 14 8 15 // ExtractHandle safely extracts the handle from a PLC operation 9 16 func ExtractHandle(op *PLCOperation) string { ··· 29 36 } 30 37 31 38 // ValidateHandle checks if a handle is valid for database storage 32 - // Returns empty string if handle is too long 39 + // Returns empty string if handle is invalid (too long or wrong format) 33 40 func ValidateHandle(handle string) string { 41 + if handle == "" { 42 + return "" 43 + } 44 + 45 + // Check length first (faster) 34 46 if len(handle) > MaxHandleLength { 35 47 return "" 36 48 } 49 + 50 + // Validate format using regex 51 + if !handleRegex.MatchString(handle) { 52 + return "" 53 + } 54 + 37 55 return handle 38 56 } 39 57
+522
internal/plc/manager.go
··· 1 + package plc 2 + 3 + import ( 4 + "context" 5 + "encoding/csv" 6 + "fmt" 7 + "io" 8 + "os" 9 + "path/filepath" 10 + "sort" 11 + "strconv" 12 + "strings" 13 + "time" 14 + 15 + "github.com/atscan/atscand/internal/log" 16 + "github.com/atscan/atscand/internal/storage" 17 + "github.com/klauspost/compress/zstd" 18 + plcbundle "tangled.org/atscan.net/plcbundle" 19 + ) 20 + 21 + // BundleManager wraps the library's manager with database integration 22 + type BundleManager struct { 23 + libManager *plcbundle.Manager 24 + db storage.Database 25 + bundleDir string 26 + indexDIDs bool 27 + } 28 + 29 + func NewBundleManager(bundleDir string, plcURL string, db storage.Database, indexDIDs bool) (*BundleManager, error) { 30 + // Create library config 31 + config := plcbundle.DefaultConfig(bundleDir) 32 + 33 + // Create PLC client 34 + var client *plcbundle.PLCClient 35 + if plcURL != "" { 36 + client = plcbundle.NewPLCClient(plcURL) 37 + } 38 + 39 + // Create library manager 40 + libMgr, err := plcbundle.NewManager(config, client) 41 + if err != nil { 42 + return nil, fmt.Errorf("failed to create library manager: %w", err) 43 + } 44 + 45 + return &BundleManager{ 46 + libManager: libMgr, 47 + db: db, 48 + bundleDir: bundleDir, 49 + indexDIDs: indexDIDs, 50 + }, nil 51 + } 52 + 53 + func (bm *BundleManager) Close() { 54 + if bm.libManager != nil { 55 + bm.libManager.Close() 56 + } 57 + } 58 + 59 + // LoadBundle loads a bundle (from library) and returns operations 60 + func (bm *BundleManager) LoadBundleOperations(ctx context.Context, bundleNum int) ([]PLCOperation, error) { 61 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 62 + if err != nil { 63 + return nil, err 64 + } 65 + return bundle.Operations, nil 66 + } 67 + 68 + // LoadBundle loads a full bundle with metadata 69 + func (bm *BundleManager) LoadBundle(ctx context.Context, bundleNum int) (*plcbundle.Bundle, error) { 70 + return bm.libManager.LoadBundle(ctx, bundleNum) 71 + } 72 + 73 + // FetchAndSaveBundle fetches next bundle from PLC and saves 74 + func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) { 75 + // Fetch from PLC using library 76 + bundle, err := bm.libManager.FetchNextBundle(ctx) 77 + if err != nil { 78 + return nil, err 79 + } 80 + 81 + // Save to disk (library handles this) 82 + if err := bm.libManager.SaveBundle(ctx, bundle); err != nil { 83 + return nil, fmt.Errorf("failed to save bundle to disk: %w", err) 84 + } 85 + 86 + // Index DIDs if enabled (still use database for this) 87 + if bm.indexDIDs && len(bundle.Operations) > 0 { 88 + if err := bm.indexBundleDIDs(ctx, bundle); err != nil { 89 + log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err) 90 + } 91 + } 92 + 93 + log.Info("✓ Saved bundle %06d", bundle.BundleNumber) 94 + 95 + return bundle, nil 96 + } 97 + 98 + // indexBundleDIDs indexes DIDs from a bundle into the database 99 + func (bm *BundleManager) indexBundleDIDs(ctx context.Context, bundle *plcbundle.Bundle) error { 100 + start := time.Now() 101 + log.Verbose("Indexing DIDs for bundle %06d...", bundle.BundleNumber) 102 + 103 + // Extract DID info from operations 104 + didInfoMap := ExtractDIDInfoMap(bundle.Operations) 105 + 106 + successCount := 0 107 + errorCount := 0 108 + invalidHandleCount := 0 109 + 110 + // Upsert each DID 111 + for did, info := range didInfoMap { 112 + validHandle := ValidateHandle(info.Handle) 113 + if info.Handle != "" && validHandle == "" { 114 + invalidHandleCount++ 115 + } 116 + 117 + if err := bm.db.UpsertDID(ctx, did, bundle.BundleNumber, validHandle, info.PDS); err != nil { 118 + log.Error("Failed to index DID %s: %v", did, err) 119 + errorCount++ 120 + } else { 121 + successCount++ 122 + } 123 + } 124 + 125 + elapsed := time.Since(start) 126 + log.Info("✓ Indexed %d DIDs for bundle %06d (%d errors, %d invalid handles) in %v", 127 + successCount, bundle.BundleNumber, errorCount, invalidHandleCount, elapsed) 128 + 129 + return nil 130 + } 131 + 132 + // VerifyChain verifies bundle chain integrity 133 + func (bm *BundleManager) VerifyChain(ctx context.Context, endBundle int) error { 134 + result, err := bm.libManager.VerifyChain(ctx) 135 + if err != nil { 136 + return err 137 + } 138 + 139 + if !result.Valid { 140 + return fmt.Errorf("chain verification failed at bundle %d: %s", result.BrokenAt, result.Error) 141 + } 142 + 143 + return nil 144 + } 145 + 146 + // GetChainInfo returns chain information 147 + func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) { 148 + return bm.libManager.GetInfo(), nil 149 + } 150 + 151 + // GetMempoolStats returns mempool statistics from the library 152 + func (bm *BundleManager) GetMempoolStats() map[string]interface{} { 153 + return bm.libManager.GetMempoolStats() 154 + } 155 + 156 + // GetMempoolOperations returns all operations currently in mempool 157 + func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) { 158 + return bm.libManager.GetMempoolOperations() 159 + } 160 + 161 + // GetIndex returns the library's bundle index 162 + func (bm *BundleManager) GetIndex() *plcbundle.Index { 163 + return bm.libManager.GetIndex() 164 + } 165 + 166 + // GetLastBundleNumber returns the last bundle number 167 + func (bm *BundleManager) GetLastBundleNumber() int { 168 + index := bm.libManager.GetIndex() 169 + lastBundle := index.GetLastBundle() 170 + if lastBundle == nil { 171 + return 0 172 + } 173 + return lastBundle.BundleNumber 174 + } 175 + 176 + // GetBundleMetadata gets bundle metadata by number 177 + func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) { 178 + index := bm.libManager.GetIndex() 179 + return index.GetBundle(bundleNum) 180 + } 181 + 182 + // GetBundles returns the most recent bundles (newest first) 183 + func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata { 184 + index := bm.libManager.GetIndex() 185 + allBundles := index.GetBundles() 186 + 187 + // Determine how many bundles to return 188 + count := limit 189 + if count <= 0 || count > len(allBundles) { 190 + count = len(allBundles) 191 + } 192 + 193 + // Build result in reverse order (newest first) 194 + result := make([]*plcbundle.BundleMetadata, count) 195 + for i := 0; i < count; i++ { 196 + result[i] = allBundles[len(allBundles)-1-i] 197 + } 198 + 199 + return result 200 + } 201 + 202 + // GetBundleStats returns bundle statistics 203 + func (bm *BundleManager) GetBundleStats() map[string]interface{} { 204 + index := bm.libManager.GetIndex() 205 + stats := index.GetStats() 206 + 207 + // Convert to expected format 208 + lastBundle := stats["last_bundle"] 209 + if lastBundle == nil { 210 + lastBundle = int64(0) 211 + } 212 + 213 + // Calculate total uncompressed size by iterating through all bundles 214 + totalUncompressedSize := int64(0) 215 + allBundles := index.GetBundles() 216 + for _, bundle := range allBundles { 217 + totalUncompressedSize += bundle.UncompressedSize 218 + } 219 + 220 + return map[string]interface{}{ 221 + "bundle_count": int64(stats["bundle_count"].(int)), 222 + "total_size": stats["total_size"].(int64), 223 + "total_uncompressed_size": totalUncompressedSize, 224 + "last_bundle": int64(lastBundle.(int)), 225 + } 226 + } 227 + 228 + // GetDIDsForBundle gets DIDs from a bundle (loads and extracts) 229 + func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) { 230 + bundle, err := bm.libManager.LoadBundle(ctx, bundleNum) 231 + if err != nil { 232 + return nil, 0, err 233 + } 234 + 235 + // Extract unique DIDs 236 + didSet := make(map[string]bool) 237 + for _, op := range bundle.Operations { 238 + didSet[op.DID] = true 239 + } 240 + 241 + dids := make([]string, 0, len(didSet)) 242 + for did := range didSet { 243 + dids = append(dids, did) 244 + } 245 + 246 + return dids, bundle.DIDCount, nil 247 + } 248 + 249 + // FindBundleForTimestamp finds bundle containing a timestamp 250 + func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int { 251 + index := bm.libManager.GetIndex() 252 + bundles := index.GetBundles() 253 + 254 + // Find bundle containing this time 255 + for _, bundle := range bundles { 256 + if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) && 257 + (bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) { 258 + return bundle.BundleNumber 259 + } 260 + } 261 + 262 + // Return closest bundle before this time 263 + for i := len(bundles) - 1; i >= 0; i-- { 264 + if bundles[i].EndTime.Before(afterTime) { 265 + return bundles[i].BundleNumber 266 + } 267 + } 268 + 269 + return 1 // Default to first bundle 270 + } 271 + 272 + // StreamRaw streams raw compressed bundle data 273 + func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 274 + return bm.libManager.StreamBundleRaw(ctx, bundleNumber) 275 + } 276 + 277 + // StreamDecompressed streams decompressed bundle data 278 + func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) { 279 + return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber) 280 + } 281 + 282 + // GetPLCHistory calculates historical statistics from the bundle index 283 + func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) { 284 + index := bm.libManager.GetIndex() 285 + allBundles := index.GetBundles() 286 + 287 + // Filter bundles >= fromBundle 288 + var filtered []*plcbundle.BundleMetadata 289 + for _, b := range allBundles { 290 + if b.BundleNumber >= fromBundle { 291 + filtered = append(filtered, b) 292 + } 293 + } 294 + 295 + if len(filtered) == 0 { 296 + return []*storage.PLCHistoryPoint{}, nil 297 + } 298 + 299 + // Sort bundles by bundle number to ensure proper cumulative calculation 300 + sort.Slice(filtered, func(i, j int) bool { 301 + return filtered[i].BundleNumber < filtered[j].BundleNumber 302 + }) 303 + 304 + // Group by date 305 + type dailyStat struct { 306 + lastBundle int 307 + bundleCount int 308 + totalUncompressed int64 309 + totalCompressed int64 310 + } 311 + 312 + dailyStats := make(map[string]*dailyStat) 313 + 314 + // Map to store the cumulative values at the end of each date 315 + dateCumulatives := make(map[string]struct { 316 + uncompressed int64 317 + compressed int64 318 + }) 319 + 320 + // Calculate cumulative totals as we iterate through sorted bundles 321 + cumulativeUncompressed := int64(0) 322 + cumulativeCompressed := int64(0) 323 + 324 + for _, bundle := range filtered { 325 + dateStr := bundle.StartTime.Format("2006-01-02") 326 + 327 + // Update cumulative totals 328 + cumulativeUncompressed += bundle.UncompressedSize 329 + cumulativeCompressed += bundle.CompressedSize 330 + 331 + if stat, exists := dailyStats[dateStr]; exists { 332 + // Update existing day 333 + if bundle.BundleNumber > stat.lastBundle { 334 + stat.lastBundle = bundle.BundleNumber 335 + } 336 + stat.bundleCount++ 337 + stat.totalUncompressed += bundle.UncompressedSize 338 + stat.totalCompressed += bundle.CompressedSize 339 + } else { 340 + // Create new day entry 341 + dailyStats[dateStr] = &dailyStat{ 342 + lastBundle: bundle.BundleNumber, 343 + bundleCount: 1, 344 + totalUncompressed: bundle.UncompressedSize, 345 + totalCompressed: bundle.CompressedSize, 346 + } 347 + } 348 + 349 + // Store the cumulative values at the end of this date 350 + // (will be overwritten if there are multiple bundles on the same day) 351 + dateCumulatives[dateStr] = struct { 352 + uncompressed int64 353 + compressed int64 354 + }{ 355 + uncompressed: cumulativeUncompressed, 356 + compressed: cumulativeCompressed, 357 + } 358 + } 359 + 360 + // Convert map to sorted slice by date 361 + var dates []string 362 + for date := range dailyStats { 363 + dates = append(dates, date) 364 + } 365 + sort.Strings(dates) 366 + 367 + // Build history points with cumulative operations 368 + var history []*storage.PLCHistoryPoint 369 + cumulativeOps := 0 370 + 371 + for _, date := range dates { 372 + stat := dailyStats[date] 373 + cumulativeOps += stat.bundleCount * 10000 374 + cumulative := dateCumulatives[date] 375 + 376 + history = append(history, &storage.PLCHistoryPoint{ 377 + Date: date, 378 + BundleNumber: stat.lastBundle, 379 + OperationCount: cumulativeOps, 380 + UncompressedSize: stat.totalUncompressed, 381 + CompressedSize: stat.totalCompressed, 382 + CumulativeUncompressed: cumulative.uncompressed, 383 + CumulativeCompressed: cumulative.compressed, 384 + }) 385 + } 386 + 387 + // Apply limit if specified 388 + if limit > 0 && len(history) > limit { 389 + history = history[:limit] 390 + } 391 + 392 + return history, nil 393 + } 394 + 395 + // GetBundleLabels reads labels from a compressed CSV file for a specific bundle 396 + func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) { 397 + // Define the path to the labels file 398 + labelsDir := filepath.Join(bm.bundleDir, "labels") 399 + labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum)) 400 + 401 + // Check if file exists 402 + if _, err := os.Stat(labelsFile); os.IsNotExist(err) { 403 + log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile) 404 + // Return empty, not an error 405 + return []*PLCOpLabel{}, nil 406 + } 407 + 408 + // Open the Zstd-compressed file 409 + file, err := os.Open(labelsFile) 410 + if err != nil { 411 + return nil, fmt.Errorf("failed to open labels file: %w", err) 412 + } 413 + defer file.Close() 414 + 415 + // Create a Zstd reader 416 + zstdReader, err := zstd.NewReader(file) 417 + if err != nil { 418 + return nil, fmt.Errorf("failed to create zstd reader: %w", err) 419 + } 420 + defer zstdReader.Close() 421 + 422 + // Create a CSV reader 423 + csvReader := csv.NewReader(zstdReader) 424 + // We skipped the header, so no header read needed 425 + // Set FieldsPerRecord to 7 for validation 426 + //csvReader.FieldsPerRecord = 7 427 + 428 + var labels []*PLCOpLabel 429 + 430 + // Read all records 431 + for { 432 + // Check for context cancellation 433 + if err := ctx.Err(); err != nil { 434 + return nil, err 435 + } 436 + 437 + record, err := csvReader.Read() 438 + if err == io.EOF { 439 + break // End of file 440 + } 441 + if err != nil { 442 + log.Error("Error reading CSV record in %s: %v", labelsFile, err) 443 + continue // Skip bad line 444 + } 445 + 446 + // Parse the CSV record (which is []string) 447 + label, err := parseLabelRecord(record) 448 + if err != nil { 449 + log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err) 450 + continue // Skip bad data 451 + } 452 + 453 + labels = append(labels, label) 454 + } 455 + 456 + return labels, nil 457 + } 458 + 459 + // parseLabelRecord converts a new format CSV record into a PLCOpLabel struct 460 + func parseLabelRecord(record []string) (*PLCOpLabel, error) { 461 + // New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels 462 + if len(record) != 6 { 463 + err := fmt.Errorf("invalid record length: expected 6, got %d", len(record)) 464 + // --- ADDED LOG --- 465 + log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ",")) 466 + // --- 467 + return nil, err 468 + } 469 + 470 + // 0:bundle 471 + bundle, err := strconv.Atoi(record[0]) 472 + if err != nil { 473 + // --- ADDED LOG --- 474 + log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ",")) 475 + // --- 476 + return nil, fmt.Errorf("parsing 'bundle': %w", err) 477 + } 478 + 479 + // 1:position 480 + position, err := strconv.Atoi(record[1]) 481 + if err != nil { 482 + // --- ADDED LOG --- 483 + log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ",")) 484 + // --- 485 + return nil, fmt.Errorf("parsing 'position': %w", err) 486 + } 487 + 488 + // 2:cid(short) 489 + shortCID := record[2] 490 + 491 + // 3:size 492 + size, err := strconv.Atoi(record[3]) 493 + if err != nil { 494 + // --- ADDED LOG --- 495 + log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ",")) 496 + // --- 497 + return nil, fmt.Errorf("parsing 'size': %w", err) 498 + } 499 + 500 + // 4:confidence 501 + confidence, err := strconv.ParseFloat(record[4], 64) 502 + if err != nil { 503 + // --- ADDED LOG --- 504 + log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ",")) 505 + // --- 506 + return nil, fmt.Errorf("parsing 'confidence': %w", err) 507 + } 508 + 509 + // 5:labels 510 + detectors := strings.Split(record[5], ";") 511 + 512 + label := &PLCOpLabel{ 513 + Bundle: bundle, 514 + Position: position, 515 + CID: shortCID, 516 + Size: size, 517 + Confidence: confidence, 518 + Detectors: detectors, 519 + } 520 + 521 + return label, nil 522 + }
-70
internal/plc/ratelimiter.go
··· 1 - package plc 2 - 3 - import ( 4 - "context" 5 - "time" 6 - ) 7 - 8 - // RateLimiter implements a token bucket rate limiter 9 - type RateLimiter struct { 10 - tokens chan struct{} 11 - refillRate time.Duration 12 - maxTokens int 13 - stopRefill chan struct{} 14 - } 15 - 16 - // NewRateLimiter creates a new rate limiter 17 - // Example: NewRateLimiter(90, time.Minute) = 90 requests per minute 18 - func NewRateLimiter(requestsPerPeriod int, period time.Duration) *RateLimiter { 19 - rl := &RateLimiter{ 20 - tokens: make(chan struct{}, requestsPerPeriod), 21 - refillRate: period / time.Duration(requestsPerPeriod), 22 - maxTokens: requestsPerPeriod, 23 - stopRefill: make(chan struct{}), 24 - } 25 - 26 - // Fill initially 27 - for i := 0; i < requestsPerPeriod; i++ { 28 - rl.tokens <- struct{}{} 29 - } 30 - 31 - // Start refill goroutine 32 - go rl.refill() 33 - 34 - return rl 35 - } 36 - 37 - // refill adds tokens at the specified rate 38 - func (rl *RateLimiter) refill() { 39 - ticker := time.NewTicker(rl.refillRate) 40 - defer ticker.Stop() 41 - 42 - for { 43 - select { 44 - case <-ticker.C: 45 - select { 46 - case rl.tokens <- struct{}{}: 47 - // Token added 48 - default: 49 - // Buffer full, skip 50 - } 51 - case <-rl.stopRefill: 52 - return 53 - } 54 - } 55 - } 56 - 57 - // Wait blocks until a token is available 58 - func (rl *RateLimiter) Wait(ctx context.Context) error { 59 - select { 60 - case <-rl.tokens: 61 - return nil 62 - case <-ctx.Done(): 63 - return ctx.Err() 64 - } 65 - } 66 - 67 - // Stop stops the rate limiter 68 - func (rl *RateLimiter) Stop() { 69 - close(rl.stopRefill) 70 - }
+92 -431
internal/plc/scanner.go
··· 2 2 3 3 import ( 4 4 "context" 5 - "encoding/json" 6 5 "fmt" 7 6 "strings" 8 7 "time" 9 8 10 - "github.com/acarl005/stripansi" 11 - "github.com/atscan/atscanner/internal/config" 12 - "github.com/atscan/atscanner/internal/log" 13 - "github.com/atscan/atscanner/internal/storage" 9 + "github.com/atscan/atscand/internal/config" 10 + "github.com/atscan/atscand/internal/log" 11 + "github.com/atscan/atscand/internal/storage" 14 12 ) 15 13 16 14 type Scanner struct { 17 - client *Client 15 + bundleManager *BundleManager 18 16 db storage.Database 19 17 config config.PLCConfig 20 - bundleManager *BundleManager 21 18 } 22 19 23 - func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner { 24 - bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db, cfg.IndexDIDs) // NEW: pass IndexDIDs 25 - if err != nil { 26 - log.Error("Warning: failed to initialize bundle manager: %v", err) 27 - bundleManager = &BundleManager{enabled: false} 28 - } 20 + func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner { 21 + log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs) 29 22 30 23 return &Scanner{ 31 - client: NewClient(cfg.DirectoryURL), 24 + bundleManager: bundleManager, // Use provided instance 32 25 db: db, 33 26 config: cfg, 34 - bundleManager: bundleManager, 35 27 } 36 28 } 37 29 38 30 func (s *Scanner) Close() { 39 - if s.bundleManager != nil { 40 - s.bundleManager.Close() 41 - } 42 - } 43 - 44 - // ScanMetrics tracks scan progress 45 - type ScanMetrics struct { 46 - totalFetched int64 // Total ops fetched from PLC/bundles 47 - totalProcessed int64 // Unique ops processed (after dedup) 48 - newEndpoints int64 // New endpoints discovered 49 - endpointCounts map[string]int64 50 - currentBundle int 51 - startTime time.Time 52 - } 53 - 54 - func newMetrics(startBundle int) *ScanMetrics { 55 - return &ScanMetrics{ 56 - endpointCounts: make(map[string]int64), 57 - currentBundle: startBundle, 58 - startTime: time.Now(), 59 - } 60 - } 61 - 62 - func (m *ScanMetrics) logSummary() { 63 - summary := formatEndpointCounts(m.endpointCounts) 64 - if m.newEndpoints > 0 { 65 - log.Info("PLC scan completed: %d operations processed (%d fetched), %s in %v", 66 - m.totalProcessed, m.totalFetched, summary, time.Since(m.startTime)) 67 - } else { 68 - log.Info("PLC scan completed: %d operations processed (%d fetched), 0 new endpoints in %v", 69 - m.totalProcessed, m.totalFetched, time.Since(m.startTime)) 70 - } 31 + // Don't close bundleManager here - it's shared 71 32 } 72 33 73 34 func (s *Scanner) Scan(ctx context.Context) error { 74 35 log.Info("Starting PLC directory scan...") 75 - log.Info("⚠ Note: PLC directory has rate limit of 500 requests per 5 minutes") 76 36 77 37 cursor, err := s.db.GetScanCursor(ctx, "plc_directory") 78 38 if err != nil { 79 39 return fmt.Errorf("failed to get scan cursor: %w", err) 80 40 } 81 41 82 - startBundle := s.calculateStartBundle(cursor.LastBundleNumber) 83 - metrics := newMetrics(startBundle) 84 - 85 - if startBundle > 1 { 86 - if err := s.ensureContinuity(ctx, startBundle); err != nil { 87 - return err 88 - } 89 - } 42 + metrics := newMetrics(cursor.LastBundleNumber + 1) 90 43 91 - // Handle existing mempool first 92 - if hasMempool, _ := s.hasSufficientMempool(ctx); hasMempool { 93 - return s.handleMempoolOnly(ctx, metrics) 94 - } 95 - 96 - // Process bundles until incomplete or error 44 + // Main processing loop 97 45 for { 98 46 if err := ctx.Err(); err != nil { 99 47 return err 100 48 } 101 49 102 - if err := s.processSingleBundle(ctx, metrics); err != nil { 103 - if s.shouldRetry(err) { 104 - continue 105 - } 106 - break 107 - } 108 - 109 - if err := s.updateCursor(ctx, cursor, metrics); err != nil { 110 - log.Error("Warning: failed to update cursor: %v", err) 111 - } 112 - } 113 - 114 - // Try to finalize mempool 115 - s.finalizeMempool(ctx, metrics) 116 - 117 - metrics.logSummary() 118 - return nil 119 - } 120 - 121 - func (s *Scanner) calculateStartBundle(lastBundle int) int { 122 - if lastBundle == 0 { 123 - return 1 124 - } 125 - return lastBundle + 1 126 - } 127 - 128 - func (s *Scanner) ensureContinuity(ctx context.Context, bundle int) error { 129 - log.Info("Checking bundle continuity...") 130 - if err := s.bundleManager.EnsureBundleContinuity(ctx, bundle); err != nil { 131 - return fmt.Errorf("bundle continuity check failed: %w", err) 132 - } 133 - return nil 134 - } 135 - 136 - func (s *Scanner) hasSufficientMempool(ctx context.Context) (bool, error) { 137 - count, err := s.db.GetMempoolCount(ctx) 138 - if err != nil { 139 - return false, err 140 - } 141 - return count > 0, nil 142 - } 143 - 144 - func (s *Scanner) handleMempoolOnly(ctx context.Context, m *ScanMetrics) error { 145 - count, _ := s.db.GetMempoolCount(ctx) 146 - log.Info("→ Mempool has %d operations, continuing to fill it before fetching new bundles", count) 147 - 148 - if err := s.fillMempool(ctx, m); err != nil { 149 - return err 150 - } 151 - 152 - if err := s.processMempool(ctx, m); err != nil { 153 - log.Error("Error processing mempool: %v", err) 154 - } 155 - 156 - m.logSummary() 157 - return nil 158 - } 159 - 160 - func (s *Scanner) processSingleBundle(ctx context.Context, m *ScanMetrics) error { 161 - log.Verbose("→ Processing bundle %06d...", m.currentBundle) 162 - 163 - ops, isComplete, err := s.bundleManager.LoadBundle(ctx, m.currentBundle, s.client) 164 - if err != nil { 165 - return s.handleBundleError(err, m) 166 - } 167 - 168 - if isComplete { 169 - return s.handleCompleteBundle(ctx, ops, m) 170 - } 171 - return s.handleIncompleteBundle(ctx, ops, m) 172 - } 173 - 174 - func (s *Scanner) handleBundleError(err error, m *ScanMetrics) error { 175 - log.Error("Failed to load bundle %06d: %v", m.currentBundle, err) 176 - 177 - if strings.Contains(err.Error(), "rate limited") { 178 - log.Info("⚠ Rate limit hit, pausing for 5 minutes...") 179 - time.Sleep(5 * time.Minute) 180 - return fmt.Errorf("retry") 181 - } 182 - 183 - if m.currentBundle > 1 { 184 - log.Info("→ Reached end of available data") 185 - } 186 - return err 187 - } 188 - 189 - func (s *Scanner) shouldRetry(err error) bool { 190 - return err != nil && err.Error() == "retry" 191 - } 192 - 193 - func (s *Scanner) handleCompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 194 - counts, err := s.processBatch(ctx, ops) 195 - if err != nil { 196 - return err 197 - } 198 - 199 - s.mergeCounts(m.endpointCounts, counts) 200 - m.totalProcessed += int64(len(ops)) // Unique ops after dedup 201 - m.newEndpoints += sumCounts(counts) // NEW: Track new endpoints 202 - 203 - batchTotal := sumCounts(counts) 204 - log.Verbose("✓ Processed bundle %06d: %d operations (after dedup), %d new endpoints", 205 - m.currentBundle, len(ops), batchTotal) 206 - 207 - m.currentBundle++ 208 - return nil 209 - } 210 - 211 - func (s *Scanner) handleIncompleteBundle(ctx context.Context, ops []PLCOperation, m *ScanMetrics) error { 212 - log.Info("→ Bundle %06d incomplete (%d ops), adding to mempool", m.currentBundle, len(ops)) 213 - 214 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 215 - return err 216 - } 217 - 218 - s.finalizeMempool(ctx, m) 219 - return fmt.Errorf("incomplete") // Signal end of processing 220 - } 221 - 222 - func (s *Scanner) finalizeMempool(ctx context.Context, m *ScanMetrics) { 223 - if err := s.fillMempool(ctx, m); err != nil { 224 - log.Error("Error filling mempool: %v", err) 225 - } 226 - if err := s.processMempool(ctx, m); err != nil { 227 - log.Error("Error processing mempool: %v", err) 228 - } 229 - } 230 - 231 - func (s *Scanner) fillMempool(ctx context.Context, m *ScanMetrics) error { 232 - const fetchLimit = 1000 233 - 234 - for { 235 - count, err := s.db.GetMempoolCount(ctx) 50 + // Fetch and save bundle (library handles mempool internally) 51 + bundle, err := s.bundleManager.FetchAndSaveBundle(ctx) 236 52 if err != nil { 237 - return err 238 - } 53 + if isInsufficientOpsError(err) { 54 + // Show mempool status 55 + stats := s.bundleManager.libManager.GetMempoolStats() 56 + mempoolCount := stats["count"].(int) 239 57 240 - if count >= BUNDLE_SIZE { 241 - log.Info("✓ Mempool filled to %d operations (target: %d)", count, BUNDLE_SIZE) 242 - return nil 243 - } 58 + if mempoolCount > 0 { 59 + log.Info("→ Waiting for more operations (mempool has %d/%d ops)", 60 + mempoolCount, BUNDLE_SIZE) 61 + } else { 62 + log.Info("→ Caught up! No operations available") 63 + } 64 + break 65 + } 244 66 245 - log.Info("→ Mempool has %d/%d operations, fetching more from PLC directory...", count, BUNDLE_SIZE) 246 - 247 - // ✅ Fix: Don't capture unused 'ops' variable 248 - shouldContinue, err := s.fetchNextBatch(ctx, fetchLimit, m) 249 - if err != nil { 250 - return err 251 - } 67 + if strings.Contains(err.Error(), "rate limited") { 68 + log.Info("⚠ Rate limited, pausing for 5 minutes...") 69 + time.Sleep(5 * time.Minute) 70 + continue 71 + } 252 72 253 - if !shouldContinue { 254 - finalCount, _ := s.db.GetMempoolCount(ctx) 255 - log.Info("→ Stopping fill, mempool has %d/%d operations", finalCount, BUNDLE_SIZE) 256 - return nil 73 + return fmt.Errorf("failed to fetch bundle: %w", err) 257 74 } 258 - } 259 - } 260 75 261 - func (s *Scanner) fetchNextBatch(ctx context.Context, limit int, m *ScanMetrics) (bool, error) { 262 - lastOp, err := s.db.GetLastMempoolOperation(ctx) 263 - if err != nil { 264 - return false, err 265 - } 266 - 267 - var after string 268 - if lastOp != nil { 269 - after = lastOp.CreatedAt.Format(time.RFC3339Nano) 270 - log.Verbose(" Using cursor: %s", after) 271 - } 272 - 273 - ops, err := s.client.Export(ctx, ExportOptions{Count: limit, After: after}) 274 - if err != nil { 275 - return false, fmt.Errorf("failed to fetch from PLC: %w", err) 276 - } 277 - 278 - fetchedCount := len(ops) 279 - m.totalFetched += int64(fetchedCount) // Track all fetched 280 - log.Verbose(" Fetched %d operations from PLC", fetchedCount) 281 - 282 - if fetchedCount == 0 { 283 - count, _ := s.db.GetMempoolCount(ctx) 284 - log.Info("→ No more data available from PLC directory (mempool has %d/%d)", count, BUNDLE_SIZE) 285 - return false, nil 286 - } 287 - 288 - beforeCount, err := s.db.GetMempoolCount(ctx) 289 - if err != nil { 290 - return false, err 291 - } 292 - 293 - endpointsBefore := sumCounts(m.endpointCounts) 294 - if err := s.addToMempool(ctx, ops, m.endpointCounts); err != nil { 295 - return false, err 296 - } 297 - endpointsAfter := sumCounts(m.endpointCounts) 298 - m.newEndpoints += (endpointsAfter - endpointsBefore) // Add new endpoints found 299 - 300 - afterCount, err := s.db.GetMempoolCount(ctx) 301 - if err != nil { 302 - return false, err 303 - } 304 - 305 - uniqueAdded := int64(afterCount - beforeCount) // Cast to int64 306 - m.totalProcessed += uniqueAdded // Track unique ops processed 307 - 308 - log.Verbose(" Added %d new unique operations to mempool (%d were duplicates)", 309 - uniqueAdded, int64(fetchedCount)-uniqueAdded) 310 - 311 - // Continue only if got full batch 312 - shouldContinue := fetchedCount >= limit 313 - if !shouldContinue { 314 - log.Info("→ Received incomplete batch (%d/%d), caught up to latest data", fetchedCount, limit) 315 - } 316 - 317 - return shouldContinue, nil 318 - } 319 - 320 - func (s *Scanner) addToMempool(ctx context.Context, ops []PLCOperation, counts map[string]int64) error { 321 - mempoolOps := make([]storage.MempoolOperation, len(ops)) 322 - for i, op := range ops { 323 - mempoolOps[i] = storage.MempoolOperation{ 324 - DID: op.DID, 325 - Operation: string(op.RawJSON), 326 - CID: op.CID, 327 - CreatedAt: op.CreatedAt, 328 - } 329 - } 330 - 331 - if err := s.db.AddToMempool(ctx, mempoolOps); err != nil { 332 - return err 333 - } 334 - 335 - // NEW: Create/update DID records immediately when adding to mempool 336 - for _, op := range ops { 337 - info := ExtractDIDInfo(&op) 338 - 339 - // Validate handle length before saving 340 - validHandle := ValidateHandle(info.Handle) 341 - if info.Handle != "" && validHandle == "" { 342 - log.Verbose("Skipping invalid handle for DID %s (length: %d)", op.DID, len(info.Handle)) 343 - } 344 - 345 - if err := s.db.UpsertDIDFromMempool(ctx, op.DID, validHandle, info.PDS); err != nil { 346 - log.Error("Failed to upsert DID %s in mempool: %v", op.DID, err) 347 - // Don't fail the whole operation, just log 348 - } 349 - } 350 - 351 - // Process for endpoint discovery 352 - batchCounts, err := s.processBatch(ctx, ops) 353 - s.mergeCounts(counts, batchCounts) 354 - return err 355 - } 356 - 357 - func (s *Scanner) processMempool(ctx context.Context, m *ScanMetrics) error { 358 - for { 359 - count, err := s.db.GetMempoolCount(ctx) 76 + // Process operations for endpoint discovery 77 + counts, err := s.processBatch(ctx, bundle.Operations) 360 78 if err != nil { 361 - return err 79 + log.Error("Failed to process batch: %v", err) 80 + // Continue anyway 362 81 } 363 82 364 - log.Verbose("Mempool contains %d operations", count) 83 + // Update metrics 84 + s.mergeCounts(metrics.endpointCounts, counts) 85 + metrics.totalProcessed += int64(len(bundle.Operations)) 86 + metrics.newEndpoints += sumCounts(counts) 87 + metrics.currentBundle = bundle.BundleNumber 365 88 366 - if count < BUNDLE_SIZE { 367 - log.Info("Mempool has %d/%d operations, cannot create bundle yet", count, BUNDLE_SIZE) 368 - return nil 369 - } 89 + log.Info("✓ Processed bundle %06d: %d operations, %d new endpoints", 90 + bundle.BundleNumber, len(bundle.Operations), sumCounts(counts)) 370 91 371 - log.Info("→ Creating bundle from mempool (%d operations available)...", count) 372 - 373 - // Updated to receive 4 values instead of 3 374 - bundleNum, ops, cursor, err := s.createBundleFromMempool(ctx) 375 - if err != nil { 376 - return err 377 - } 378 - 379 - // Process and update metrics 380 - countsBefore := sumCounts(m.endpointCounts) 381 - counts, _ := s.processBatch(ctx, ops) 382 - s.mergeCounts(m.endpointCounts, counts) 383 - newEndpointsFound := sumCounts(m.endpointCounts) - countsBefore 384 - 385 - m.totalProcessed += int64(len(ops)) 386 - m.newEndpoints += newEndpointsFound 387 - m.currentBundle = bundleNum 388 - 389 - if err := s.updateCursorForBundle(ctx, bundleNum, m.totalProcessed); err != nil { 92 + // Update cursor 93 + if err := s.updateCursorForBundle(ctx, bundle.BundleNumber, metrics.totalProcessed); err != nil { 390 94 log.Error("Warning: failed to update cursor: %v", err) 391 95 } 392 - 393 - log.Info("✓ Created bundle %06d from mempool (cursor: %s)", bundleNum, cursor) 394 96 } 395 - } 396 97 397 - func (s *Scanner) createBundleFromMempool(ctx context.Context) (int, []PLCOperation, string, error) { 398 - mempoolOps, err := s.db.GetMempoolOperations(ctx, BUNDLE_SIZE) 399 - if err != nil { 400 - return 0, nil, "", err 98 + // Show final mempool status 99 + stats := s.bundleManager.libManager.GetMempoolStats() 100 + if count, ok := stats["count"].(int); ok && count > 0 { 101 + log.Info("Mempool contains %d operations (%.1f%% of next bundle)", 102 + count, float64(count)/float64(BUNDLE_SIZE)*100) 401 103 } 402 104 403 - ops, ids := s.deduplicateMempool(mempoolOps) 404 - if len(ops) < BUNDLE_SIZE { 405 - return 0, nil, "", fmt.Errorf("only got %d unique operations from mempool, need %d", len(ops), BUNDLE_SIZE) 406 - } 407 - 408 - // Determine cursor from last bundle 409 - cursor := "" 410 - lastBundle, err := s.db.GetLastBundleNumber(ctx) 411 - if err == nil && lastBundle > 0 { 412 - if bundle, err := s.db.GetBundleByNumber(ctx, lastBundle); err == nil { 413 - cursor = bundle.EndTime.Format(time.RFC3339Nano) 414 - } 415 - } 416 - 417 - bundleNum, err := s.bundleManager.CreateBundleFromMempool(ctx, ops, cursor) 418 - if err != nil { 419 - return 0, nil, "", err 420 - } 421 - 422 - if err := s.db.DeleteFromMempool(ctx, ids[:len(ops)]); err != nil { 423 - return 0, nil, "", err 424 - } 425 - 426 - return bundleNum, ops, cursor, nil 105 + metrics.logSummary() 106 + return nil 427 107 } 428 108 429 - func (s *Scanner) deduplicateMempool(mempoolOps []storage.MempoolOperation) ([]PLCOperation, []int64) { 430 - ops := make([]PLCOperation, 0, BUNDLE_SIZE) 431 - ids := make([]int64, 0, BUNDLE_SIZE) 432 - seenCIDs := make(map[string]bool) 433 - 434 - for _, mop := range mempoolOps { 435 - if seenCIDs[mop.CID] { 436 - ids = append(ids, mop.ID) 437 - continue 438 - } 439 - seenCIDs[mop.CID] = true 440 - 441 - var op PLCOperation 442 - json.Unmarshal([]byte(mop.Operation), &op) 443 - op.RawJSON = []byte(mop.Operation) 444 - 445 - ops = append(ops, op) 446 - ids = append(ids, mop.ID) 447 - 448 - if len(ops) >= BUNDLE_SIZE { 449 - break 450 - } 451 - } 452 - 453 - return ops, ids 454 - } 455 - 109 + // processBatch extracts endpoints from operations 456 110 func (s *Scanner) processBatch(ctx context.Context, ops []PLCOperation) (map[string]int64, error) { 457 111 counts := make(map[string]int64) 458 112 seen := make(map[string]*PLCOperation) 459 113 460 114 // Collect unique endpoints 461 - for _, op := range ops { 115 + for i := range ops { 116 + op := &ops[i] 117 + 462 118 if op.IsNullified() { 463 119 continue 464 120 } 465 - for _, ep := range s.extractEndpointsFromOperation(op) { 121 + 122 + for _, ep := range s.extractEndpointsFromOperation(*op) { 466 123 key := fmt.Sprintf("%s:%s", ep.Type, ep.Endpoint) 467 124 if _, exists := seen[key]; !exists { 468 - seen[key] = &op 125 + seen[key] = op 469 126 } 470 127 } 471 128 } ··· 481 138 } 482 139 483 140 if err := s.storeEndpoint(ctx, epType, endpoint, firstOp.CreatedAt); err != nil { 484 - log.Error("Error storing %s endpoint %s: %v", epType, stripansi.Strip(endpoint), err) 141 + log.Error("Error storing %s endpoint %s: %v", epType, endpoint, err) 485 142 continue 486 143 } 487 144 488 - log.Info("✓ Discovered new %s endpoint: %s", epType, stripansi.Strip(endpoint)) 145 + log.Info("✓ Discovered new %s endpoint: %s", epType, endpoint) 489 146 counts[epType]++ 490 147 } 491 148 492 149 return counts, nil 493 - } 494 - 495 - func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 496 - return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 497 - EndpointType: epType, 498 - Endpoint: endpoint, 499 - DiscoveredAt: discoveredAt, 500 - LastChecked: time.Time{}, 501 - Status: storage.EndpointStatusUnknown, 502 - }) 503 150 } 504 151 505 152 func (s *Scanner) extractEndpointsFromOperation(op PLCOperation) []EndpointInfo { ··· 542 189 return nil 543 190 } 544 191 545 - func (s *Scanner) updateCursor(ctx context.Context, cursor *storage.ScanCursor, m *ScanMetrics) error { 546 - return s.db.UpdateScanCursor(ctx, &storage.ScanCursor{ 547 - Source: "plc_directory", 548 - LastBundleNumber: m.currentBundle - 1, 549 - LastScanTime: time.Now().UTC(), 550 - RecordsProcessed: cursor.RecordsProcessed + m.totalProcessed, 192 + func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error { 193 + valid := validateEndpoint(endpoint) 194 + return s.db.UpsertEndpoint(ctx, &storage.Endpoint{ 195 + EndpointType: epType, 196 + Endpoint: endpoint, 197 + DiscoveredAt: discoveredAt, 198 + LastChecked: time.Time{}, 199 + Status: storage.EndpointStatusUnknown, 200 + Valid: valid, 551 201 }) 552 202 } 553 203 ··· 575 225 return total 576 226 } 577 227 578 - func formatEndpointCounts(counts map[string]int64) string { 579 - if len(counts) == 0 { 580 - return "0 new endpoints" 581 - } 228 + func isInsufficientOpsError(err error) bool { 229 + return err != nil && strings.Contains(err.Error(), "insufficient operations") 230 + } 582 231 583 - total := sumCounts(counts) 232 + // ScanMetrics tracks scan progress 233 + type ScanMetrics struct { 234 + totalProcessed int64 235 + newEndpoints int64 236 + endpointCounts map[string]int64 237 + currentBundle int 238 + startTime time.Time 239 + } 584 240 585 - if len(counts) == 1 { 586 - for typ, count := range counts { 587 - return fmt.Sprintf("%d new %s endpoint(s)", count, typ) 588 - } 241 + func newMetrics(startBundle int) *ScanMetrics { 242 + return &ScanMetrics{ 243 + endpointCounts: make(map[string]int64), 244 + currentBundle: startBundle, 245 + startTime: time.Now(), 589 246 } 247 + } 590 248 591 - parts := make([]string, 0, len(counts)) 592 - for typ, count := range counts { 593 - parts = append(parts, fmt.Sprintf("%d %s", count, typ)) 249 + func (m *ScanMetrics) logSummary() { 250 + if m.newEndpoints > 0 { 251 + log.Info("PLC scan completed: %d operations processed, %d new endpoints in %v", 252 + m.totalProcessed, m.newEndpoints, time.Since(m.startTime)) 253 + } else { 254 + log.Info("PLC scan completed: %d operations processed, 0 new endpoints in %v", 255 + m.totalProcessed, time.Since(m.startTime)) 594 256 } 595 - return fmt.Sprintf("%d new endpoints (%s)", total, strings.Join(parts, ", ")) 596 257 }
+68 -55
internal/plc/types.go
··· 1 1 package plc 2 2 3 - import "time" 4 - 5 - type PLCOperation struct { 6 - DID string `json:"did"` 7 - Operation map[string]interface{} `json:"operation"` 8 - CID string `json:"cid"` 9 - Nullified interface{} `json:"nullified,omitempty"` 10 - CreatedAt time.Time `json:"createdAt"` 11 - 12 - RawJSON []byte `json:"-"` // ✅ Exported (capital R) 13 - } 3 + import ( 4 + "net/url" 5 + "strings" 14 6 15 - // Helper method to check if nullified 16 - func (op *PLCOperation) IsNullified() bool { 17 - if op.Nullified == nil { 18 - return false 19 - } 20 - 21 - switch v := op.Nullified.(type) { 22 - case bool: 23 - return v 24 - case string: 25 - return v != "" 26 - default: 27 - return false 28 - } 29 - } 30 - 31 - // Get nullifying CID if available 32 - func (op *PLCOperation) GetNullifyingCID() string { 33 - if s, ok := op.Nullified.(string); ok { 34 - return s 35 - } 36 - return "" 37 - } 7 + plclib "tangled.org/atscan.net/plcbundle/plc" 8 + ) 38 9 39 - type DIDDocument struct { 40 - Context []string `json:"@context"` 41 - ID string `json:"id"` 42 - AlsoKnownAs []string `json:"alsoKnownAs"` 43 - VerificationMethod []VerificationMethod `json:"verificationMethod"` 44 - Service []Service `json:"service"` 45 - } 10 + // Re-export library types 11 + type PLCOperation = plclib.PLCOperation 12 + type DIDDocument = plclib.DIDDocument 13 + type Client = plclib.Client 14 + type ExportOptions = plclib.ExportOptions 46 15 47 - type VerificationMethod struct { 48 - ID string `json:"id"` 49 - Type string `json:"type"` 50 - Controller string `json:"controller"` 51 - PublicKeyMultibase string `json:"publicKeyMultibase"` 52 - } 16 + // Keep your custom types 17 + const BUNDLE_SIZE = 10000 53 18 54 - type Service struct { 55 - ID string `json:"id"` 56 - Type string `json:"type"` 57 - ServiceEndpoint string `json:"serviceEndpoint"` 58 - } 59 - 60 - // DIDHistoryEntry represents a single operation in DID history 61 19 type DIDHistoryEntry struct { 62 20 Operation PLCOperation `json:"operation"` 63 21 PLCBundle string `json:"plc_bundle,omitempty"` 64 22 } 65 23 66 - // DIDHistory represents the full history of a DID 67 24 type DIDHistory struct { 68 25 DID string `json:"did"` 69 26 Current *PLCOperation `json:"current"` ··· 74 31 Type string 75 32 Endpoint string 76 33 } 34 + 35 + // PLCOpLabel holds metadata from the label CSV file 36 + type PLCOpLabel struct { 37 + Bundle int `json:"bundle"` 38 + Position int `json:"position"` 39 + CID string `json:"cid"` 40 + Size int `json:"size"` 41 + Confidence float64 `json:"confidence"` 42 + Detectors []string `json:"detectors"` 43 + } 44 + 45 + // validateEndpoint checks if endpoint is in correct format: https://<domain> 46 + func validateEndpoint(endpoint string) bool { 47 + // Must not be empty 48 + if endpoint == "" { 49 + return false 50 + } 51 + 52 + // Must not have trailing slash 53 + if strings.HasSuffix(endpoint, "/") { 54 + return false 55 + } 56 + 57 + // Parse URL 58 + u, err := url.Parse(endpoint) 59 + if err != nil { 60 + return false 61 + } 62 + 63 + // Must use https scheme 64 + if u.Scheme != "https" { 65 + return false 66 + } 67 + 68 + // Must have a host 69 + if u.Host == "" { 70 + return false 71 + } 72 + 73 + // Must not have path (except empty) 74 + if u.Path != "" && u.Path != "/" { 75 + return false 76 + } 77 + 78 + // Must not have query parameters 79 + if u.RawQuery != "" { 80 + return false 81 + } 82 + 83 + // Must not have fragment 84 + if u.Fragment != "" { 85 + return false 86 + } 87 + 88 + return true 89 + }
-20
internal/storage/db.go
··· 50 50 GetScanCursor(ctx context.Context, source string) (*ScanCursor, error) 51 51 UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error 52 52 53 - // Bundle operations 54 - CreateBundle(ctx context.Context, bundle *PLCBundle) error 55 - GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) 56 - GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) 57 - GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) 58 - GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error) 59 - GetLastBundleNumber(ctx context.Context) (int, error) 60 - GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) 61 - GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) 62 - 63 - // Mempool operations 64 - AddToMempool(ctx context.Context, ops []MempoolOperation) error 65 - GetMempoolCount(ctx context.Context) (int, error) 66 - GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) 67 - DeleteFromMempool(ctx context.Context, ids []int64) error 68 - GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) 69 - GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) 70 - GetMempoolUniqueDIDCount(ctx context.Context) (int, error) 71 - GetMempoolUncompressedSize(ctx context.Context) (int64, error) 72 - 73 53 // Metrics 74 54 StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error 75 55 GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+120 -554
internal/storage/postgres.go
··· 5 5 "database/sql" 6 6 "encoding/json" 7 7 "fmt" 8 - "strings" 9 8 "time" 10 9 11 - "github.com/atscan/atscanner/internal/log" 10 + "github.com/atscan/atscand/internal/log" 12 11 "github.com/jackc/pgx/v5" 13 12 "github.com/jackc/pgx/v5/pgxpool" 14 13 _ "github.com/jackc/pgx/v5/stdlib" ··· 73 72 log.Info("Running database migrations...") 74 73 75 74 schema := ` 76 - -- Endpoints table (NO user_count, NO ip_info) 77 - CREATE TABLE IF NOT EXISTS endpoints ( 78 - id BIGSERIAL PRIMARY KEY, 79 - endpoint_type TEXT NOT NULL DEFAULT 'pds', 80 - endpoint TEXT NOT NULL, 81 - server_did TEXT, 82 - discovered_at TIMESTAMP NOT NULL, 83 - last_checked TIMESTAMP, 84 - status INTEGER DEFAULT 0, 85 - ip TEXT, 86 - ipv6 TEXT, 87 - ip_resolved_at TIMESTAMP, 88 - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 89 - UNIQUE(endpoint_type, endpoint) 90 - ); 75 + -- Endpoints table (with IPv6 support) 76 + CREATE TABLE IF NOT EXISTS endpoints ( 77 + id BIGSERIAL PRIMARY KEY, 78 + endpoint_type TEXT NOT NULL DEFAULT 'pds', 79 + endpoint TEXT NOT NULL, 80 + server_did TEXT, 81 + discovered_at TIMESTAMP NOT NULL, 82 + last_checked TIMESTAMP, 83 + status INTEGER DEFAULT 0, 84 + ip TEXT, 85 + ipv6 TEXT, 86 + ip_resolved_at TIMESTAMP, 87 + valid BOOLEAN DEFAULT true, 88 + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 89 + UNIQUE(endpoint_type, endpoint) 90 + ); 91 91 92 - CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint); 93 - CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status); 94 - CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type); 95 - CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip); 96 - CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6); 97 - CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did); 98 - CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 92 + CREATE INDEX IF NOT EXISTS idx_endpoints_type_endpoint ON endpoints(endpoint_type, endpoint); 93 + CREATE INDEX IF NOT EXISTS idx_endpoints_status ON endpoints(status); 94 + CREATE INDEX IF NOT EXISTS idx_endpoints_type ON endpoints(endpoint_type); 95 + CREATE INDEX IF NOT EXISTS idx_endpoints_ip ON endpoints(ip); 96 + CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6); 97 + CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did); 98 + CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at); 99 + CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid); 99 100 100 101 -- IP infos table (IP as PRIMARY KEY) 101 102 CREATE TABLE IF NOT EXISTS ip_infos ( ··· 120 121 CREATE INDEX IF NOT EXISTS idx_ip_infos_country_code ON ip_infos(country_code); 121 122 CREATE INDEX IF NOT EXISTS idx_ip_infos_asn ON ip_infos(asn); 122 123 123 - -- Endpoint scans (renamed from pds_scans) 124 + -- Endpoint scans 124 125 CREATE TABLE IF NOT EXISTS endpoint_scans ( 125 126 id BIGSERIAL PRIMARY KEY, 126 127 endpoint_id BIGINT NOT NULL, ··· 128 129 response_time DOUBLE PRECISION, 129 130 user_count BIGINT, 130 131 version TEXT, 132 + used_ip TEXT, 131 133 scan_data JSONB, 132 134 scanned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 133 135 FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE ··· 136 138 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_endpoint_status_scanned ON endpoint_scans(endpoint_id, status, scanned_at DESC); 137 139 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_scanned_at ON endpoint_scans(scanned_at); 138 140 CREATE INDEX IF NOT EXISTS idx_endpoint_scans_user_count ON endpoint_scans(user_count DESC NULLS LAST); 141 + CREATE INDEX IF NOT EXISTS idx_endpoint_scans_used_ip ON endpoint_scans(used_ip); 142 + 139 143 140 144 CREATE TABLE IF NOT EXISTS plc_metrics ( 141 145 id BIGSERIAL PRIMARY KEY, ··· 154 158 records_processed BIGINT DEFAULT 0 155 159 ); 156 160 157 - CREATE TABLE IF NOT EXISTS plc_bundles ( 158 - bundle_number INTEGER PRIMARY KEY, 159 - start_time TIMESTAMP NOT NULL, 160 - end_time TIMESTAMP NOT NULL, 161 - dids JSONB NOT NULL, 162 - hash TEXT NOT NULL, 163 - compressed_hash TEXT NOT NULL, 164 - compressed_size BIGINT NOT NULL, 165 - uncompressed_size BIGINT NOT NULL, 166 - cumulative_compressed_size BIGINT NOT NULL, 167 - cumulative_uncompressed_size BIGINT NOT NULL, 168 - cursor TEXT, 169 - prev_bundle_hash TEXT, 170 - compressed BOOLEAN DEFAULT true, 171 - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 172 - ); 173 - 174 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time); 175 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash); 176 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash); 177 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC); 178 - CREATE INDEX IF NOT EXISTS idx_plc_bundles_dids ON plc_bundles USING gin(dids); 179 - 180 - CREATE TABLE IF NOT EXISTS plc_mempool ( 181 - id BIGSERIAL PRIMARY KEY, 182 - did TEXT NOT NULL, 183 - operation TEXT NOT NULL, 184 - cid TEXT NOT NULL UNIQUE, 185 - created_at TIMESTAMP NOT NULL, 186 - added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 187 - ); 188 - 189 - CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at); 190 - CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did); 191 - CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid); 192 - 193 161 -- Minimal dids table 194 162 CREATE TABLE IF NOT EXISTS dids ( 195 163 did TEXT PRIMARY KEY, ··· 242 210 243 211 func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error { 244 212 query := ` 245 - INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at) 246 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 213 + INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid) 214 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) 247 215 ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET 248 216 last_checked = EXCLUDED.last_checked, 249 217 status = EXCLUDED.status, ··· 259 227 WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at 260 228 ELSE endpoints.ip_resolved_at 261 229 END, 230 + valid = EXCLUDED.valid, 262 231 updated_at = CURRENT_TIMESTAMP 263 232 RETURNING id 264 233 ` 265 234 err := p.db.QueryRowContext(ctx, query, 266 235 endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt, 267 - endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID) 236 + endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID) 268 237 return err 269 238 } 270 239 ··· 285 254 func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) { 286 255 query := ` 287 256 SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status, 288 - ip, ipv6, ip_resolved_at, updated_at 257 + ip, ipv6, ip_resolved_at, valid, updated_at 289 258 FROM endpoints 290 259 WHERE endpoint = $1 AND endpoint_type = $2 291 260 ` ··· 296 265 297 266 err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan( 298 267 &ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked, 299 - &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt, 268 + &ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt, 300 269 ) 301 270 if err != nil { 302 271 return nil, err ··· 322 291 query := ` 323 292 SELECT DISTINCT ON (COALESCE(server_did, id::text)) 324 293 id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status, 325 - ip, ipv6, ip_resolved_at, updated_at 294 + ip, ipv6, ip_resolved_at, valid, updated_at 326 295 FROM endpoints 327 296 WHERE 1=1 328 297 ` ··· 335 304 args = append(args, filter.Type) 336 305 argIdx++ 337 306 } 307 + 308 + // NEW: Filter by valid flag 309 + if filter.OnlyValid { 310 + query += fmt.Sprintf(" AND valid = true", argIdx) 311 + } 338 312 if filter.Status != "" { 339 313 statusInt := EndpointStatusUnknown 340 314 switch filter.Status { ··· 357 331 } 358 332 } 359 333 360 - // NEW: Order by server_did and discovered_at to get primary endpoints 361 - query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC" 334 + // NEW: Choose ordering strategy 335 + if filter != nil && filter.Random { 336 + // For random selection, we need to wrap in a subquery 337 + query = fmt.Sprintf(` 338 + WITH filtered_endpoints AS ( 339 + %s 340 + ) 341 + SELECT * FROM filtered_endpoints 342 + ORDER BY RANDOM() 343 + `, query) 344 + } else { 345 + // Original ordering for non-random queries 346 + query += " ORDER BY COALESCE(server_did, id::text), discovered_at ASC" 347 + } 362 348 363 349 if filter != nil && filter.Limit > 0 { 364 350 query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argIdx, argIdx+1) ··· 490 476 defer tx.Rollback() 491 477 492 478 query := ` 493 - INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, scan_data, scanned_at) 494 - VALUES ($1, $2, $3, $4, $5, $6, $7) 479 + INSERT INTO endpoint_scans (endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at) 480 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) 495 481 ` 496 - _, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scanDataJSON, scan.ScannedAt) 482 + _, err = tx.ExecContext(ctx, query, scan.EndpointID, scan.Status, scan.ResponseTime, scan.UserCount, scan.Version, scan.UsedIP, scanDataJSON, scan.ScannedAt) 497 483 if err != nil { 498 484 return err 499 485 } ··· 520 506 521 507 func (p *PostgresDB) GetEndpointScans(ctx context.Context, endpointID int64, limit int) ([]*EndpointScan, error) { 522 508 query := ` 523 - SELECT id, endpoint_id, status, response_time, user_count, version, scan_data, scanned_at 509 + SELECT id, endpoint_id, status, response_time, user_count, version, used_ip, scan_data, scanned_at 524 510 FROM endpoint_scans 525 511 WHERE endpoint_id = $1 526 512 ORDER BY scanned_at DESC ··· 538 524 var scan EndpointScan 539 525 var responseTime sql.NullFloat64 540 526 var userCount sql.NullInt64 541 - var version sql.NullString // NEW 527 + var version, usedIP sql.NullString 542 528 var scanDataJSON []byte 543 529 544 - err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &scanDataJSON, &scan.ScannedAt) 530 + err := rows.Scan(&scan.ID, &scan.EndpointID, &scan.Status, &responseTime, &userCount, &version, &usedIP, &scanDataJSON, &scan.ScannedAt) 545 531 if err != nil { 546 532 return nil, err 547 533 } ··· 554 540 scan.UserCount = userCount.Int64 555 541 } 556 542 557 - if version.Valid { // NEW 543 + if version.Valid { 558 544 scan.Version = version.String 559 545 } 560 546 547 + if usedIP.Valid { 548 + scan.UsedIP = usedIP.String 549 + } 550 + 561 551 if len(scanDataJSON) > 0 { 562 552 var scanData EndpointScanData 563 553 if err := json.Unmarshal(scanDataJSON, &scanData); err == nil { ··· 583 573 discovered_at, 584 574 last_checked, 585 575 status, 586 - ip 576 + ip, 577 + ipv6, 578 + valid 587 579 FROM endpoints 588 580 WHERE endpoint_type = 'pds' 589 581 ORDER BY COALESCE(server_did, id::text), discovered_at ASC 590 582 ) 591 583 SELECT 592 - e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, 584 + e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid, 593 585 latest.user_count, latest.response_time, latest.version, latest.scanned_at, 594 586 i.city, i.country, i.country_code, i.asn, i.asn_org, 595 587 i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, ··· 650 642 var items []*PDSListItem 651 643 for rows.Next() { 652 644 item := &PDSListItem{} 653 - var ip, serverDID, city, country, countryCode, asnOrg sql.NullString 645 + var ip, ipv6, serverDID, city, country, countryCode, asnOrg sql.NullString 654 646 var asn sql.NullInt32 655 647 var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 656 648 var lat, lon sql.NullFloat64 ··· 660 652 var scannedAt sql.NullTime 661 653 662 654 err := rows.Scan( 663 - &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, 655 + &item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid, 664 656 &userCount, &responseTime, &version, &scannedAt, 665 657 &city, &country, &countryCode, &asn, &asnOrg, 666 658 &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, ··· 672 664 673 665 if ip.Valid { 674 666 item.IP = ip.String 667 + } 668 + if ipv6.Valid { 669 + item.IPv6 = ipv6.String 675 670 } 676 671 if serverDID.Valid { 677 672 item.ServerDID = serverDID.String ··· 719 714 720 715 func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) { 721 716 query := ` 722 - WITH target_endpoint AS ( 717 + WITH target_endpoint AS MATERIALIZED ( 723 718 SELECT 724 719 e.id, 725 720 e.endpoint, ··· 727 722 e.discovered_at, 728 723 e.last_checked, 729 724 e.status, 730 - e.ip 725 + e.ip, 726 + e.ipv6, 727 + e.valid 731 728 FROM endpoints e 732 - WHERE e.endpoint = $1 AND e.endpoint_type = 'pds' 733 - ), 734 - aliases_agg AS ( 735 - SELECT 736 - te.server_did, 737 - array_agg(e.endpoint ORDER BY e.discovered_at) FILTER (WHERE e.endpoint != te.endpoint) as aliases, 738 - MIN(e.discovered_at) as first_discovered_at 739 - FROM target_endpoint te 740 - LEFT JOIN endpoints e ON te.server_did = e.server_did 741 - AND e.endpoint_type = 'pds' 742 - AND te.server_did IS NOT NULL 743 - GROUP BY te.server_did 729 + WHERE e.endpoint = $1 730 + AND e.endpoint_type = 'pds' 731 + LIMIT 1 744 732 ) 745 733 SELECT 746 734 te.id, ··· 750 738 te.last_checked, 751 739 te.status, 752 740 te.ip, 741 + te.ipv6, 742 + te.valid, 753 743 latest.user_count, 754 744 latest.response_time, 755 745 latest.version, ··· 759 749 i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy, 760 750 i.latitude, i.longitude, 761 751 i.raw_data, 762 - COALESCE(aa.aliases, ARRAY[]::text[]) as aliases, 763 - aa.first_discovered_at 752 + COALESCE( 753 + ARRAY( 754 + SELECT e2.endpoint 755 + FROM endpoints e2 756 + WHERE e2.server_did = te.server_did 757 + AND e2.endpoint_type = 'pds' 758 + AND e2.endpoint != te.endpoint 759 + AND te.server_did IS NOT NULL 760 + ORDER BY e2.discovered_at 761 + ), 762 + ARRAY[]::text[] 763 + ) as aliases, 764 + CASE 765 + WHEN te.server_did IS NOT NULL THEN ( 766 + SELECT MIN(e3.discovered_at) 767 + FROM endpoints e3 768 + WHERE e3.server_did = te.server_did 769 + AND e3.endpoint_type = 'pds' 770 + ) 771 + ELSE NULL 772 + END as first_discovered_at 764 773 FROM target_endpoint te 765 - LEFT JOIN aliases_agg aa ON te.server_did = aa.server_did 766 774 LEFT JOIN LATERAL ( 767 - SELECT scan_data, response_time, version, scanned_at, user_count 768 - FROM endpoint_scans 769 - WHERE endpoint_id = te.id 770 - ORDER BY scanned_at DESC 775 + SELECT 776 + es.scan_data, 777 + es.response_time, 778 + es.version, 779 + es.scanned_at, 780 + es.user_count 781 + FROM endpoint_scans es 782 + WHERE es.endpoint_id = te.id 783 + ORDER BY es.scanned_at DESC 771 784 LIMIT 1 772 785 ) latest ON true 773 - LEFT JOIN ip_infos i ON te.ip = i.ip 786 + LEFT JOIN ip_infos i ON te.ip = i.ip; 774 787 ` 775 788 776 789 detail := &PDSDetail{} 777 - var ip, city, country, countryCode, asnOrg, serverDID sql.NullString 790 + var ip, ipv6, city, country, countryCode, asnOrg, serverDID sql.NullString 778 791 var asn sql.NullInt32 779 792 var isDatacenter, isVPN, isCrawler, isTor, isProxy sql.NullBool 780 793 var lat, lon sql.NullFloat64 ··· 788 801 var firstDiscoveredAt sql.NullTime 789 802 790 803 err := p.db.QueryRowContext(ctx, query, endpoint).Scan( 791 - &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, 804 + &detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid, 792 805 &userCount, &responseTime, &version, &serverInfoJSON, &scannedAt, 793 806 &city, &country, &countryCode, &asn, &asnOrg, 794 807 &isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy, ··· 804 817 if ip.Valid { 805 818 detail.IP = ip.String 806 819 } 820 + if ipv6.Valid { 821 + detail.IPv6 = ipv6.String 822 + } 807 823 808 824 if serverDID.Valid { 809 825 detail.ServerDID = serverDID.String ··· 812 828 // Set aliases and is_primary 813 829 detail.Aliases = aliases 814 830 if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid { 815 - // Has server_did - check if this is the first discovered 816 831 detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) || 817 832 detail.DiscoveredAt.Before(firstDiscoveredAt.Time) 818 833 } else { 819 - // No server_did means unique server 820 834 detail.IsPrimary = true 821 835 } 822 836 ··· 1147 1161 } 1148 1162 } 1149 1163 return 0 1150 - } 1151 - 1152 - // ===== BUNDLE OPERATIONS ===== 1153 - 1154 - func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error { 1155 - didsJSON, err := json.Marshal(bundle.DIDs) 1156 - if err != nil { 1157 - return err 1158 - } 1159 - 1160 - // Calculate cumulative sizes from previous bundle 1161 - if bundle.BundleNumber > 1 { 1162 - prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1) 1163 - if err == nil && prevBundle != nil { 1164 - bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize 1165 - bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize 1166 - } else { 1167 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1168 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1169 - } 1170 - } else { 1171 - bundle.CumulativeCompressedSize = bundle.CompressedSize 1172 - bundle.CumulativeUncompressedSize = bundle.UncompressedSize 1173 - } 1174 - 1175 - query := ` 1176 - INSERT INTO plc_bundles ( 1177 - bundle_number, start_time, end_time, dids, 1178 - hash, compressed_hash, compressed_size, uncompressed_size, 1179 - cumulative_compressed_size, cumulative_uncompressed_size, 1180 - cursor, prev_bundle_hash, compressed 1181 - ) 1182 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) 1183 - ON CONFLICT(bundle_number) DO UPDATE SET 1184 - start_time = EXCLUDED.start_time, 1185 - end_time = EXCLUDED.end_time, 1186 - dids = EXCLUDED.dids, 1187 - hash = EXCLUDED.hash, 1188 - compressed_hash = EXCLUDED.compressed_hash, 1189 - compressed_size = EXCLUDED.compressed_size, 1190 - uncompressed_size = EXCLUDED.uncompressed_size, 1191 - cumulative_compressed_size = EXCLUDED.cumulative_compressed_size, 1192 - cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size, 1193 - cursor = EXCLUDED.cursor, 1194 - prev_bundle_hash = EXCLUDED.prev_bundle_hash, 1195 - compressed = EXCLUDED.compressed 1196 - ` 1197 - _, err = p.db.ExecContext(ctx, query, 1198 - bundle.BundleNumber, bundle.StartTime, bundle.EndTime, 1199 - didsJSON, bundle.Hash, bundle.CompressedHash, 1200 - bundle.CompressedSize, bundle.UncompressedSize, 1201 - bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize, 1202 - bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed, 1203 - ) 1204 - 1205 - return err 1206 - } 1207 - 1208 - func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) { 1209 - query := ` 1210 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1211 - compressed_size, uncompressed_size, cumulative_compressed_size, 1212 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1213 - FROM plc_bundles 1214 - WHERE bundle_number = $1 1215 - ` 1216 - 1217 - var bundle PLCBundle 1218 - var didsJSON []byte 1219 - var prevHash sql.NullString 1220 - var cursor sql.NullString 1221 - 1222 - err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan( 1223 - &bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime, 1224 - &didsJSON, &bundle.Hash, &bundle.CompressedHash, 1225 - &bundle.CompressedSize, &bundle.UncompressedSize, 1226 - &bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize, 1227 - &cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt, 1228 - ) 1229 - if err != nil { 1230 - return nil, err 1231 - } 1232 - 1233 - if prevHash.Valid { 1234 - bundle.PrevBundleHash = prevHash.String 1235 - } 1236 - if cursor.Valid { 1237 - bundle.Cursor = cursor.String 1238 - } 1239 - 1240 - json.Unmarshal(didsJSON, &bundle.DIDs) 1241 - return &bundle, nil 1242 - } 1243 - 1244 - func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) { 1245 - query := ` 1246 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1247 - compressed_size, uncompressed_size, cumulative_compressed_size, 1248 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1249 - FROM plc_bundles 1250 - ORDER BY bundle_number DESC 1251 - LIMIT $1 1252 - ` 1253 - 1254 - rows, err := p.db.QueryContext(ctx, query, limit) 1255 - if err != nil { 1256 - return nil, err 1257 - } 1258 - defer rows.Close() 1259 - 1260 - return p.scanBundles(rows) 1261 - } 1262 - 1263 - func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) { 1264 - query := ` 1265 - SELECT bundle_number, start_time, end_time, dids, hash, compressed_hash, 1266 - compressed_size, uncompressed_size, cumulative_compressed_size, 1267 - cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at 1268 - FROM plc_bundles 1269 - WHERE dids ? $1 1270 - ORDER BY bundle_number ASC 1271 - ` 1272 - 1273 - rows, err := p.db.QueryContext(ctx, query, did) 1274 - if err != nil { 1275 - return nil, err 1276 - } 1277 - defer rows.Close() 1278 - 1279 - return p.scanBundles(rows) 1280 - } 1281 - 1282 - func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) { 1283 - var bundles []*PLCBundle 1284 - 1285 - for rows.Next() { 1286 - var bundle PLCBundle 1287 - var didsJSON []byte 1288 - var prevHash sql.NullString 1289 - var cursor sql.NullString 1290 - 1291 - if err := rows.Scan( 1292 - &bundle.BundleNumber, 1293 - &bundle.StartTime, 1294 - &bundle.EndTime, 1295 - &didsJSON, 1296 - &bundle.Hash, 1297 - &bundle.CompressedHash, 1298 - &bundle.CompressedSize, 1299 - &bundle.UncompressedSize, 1300 - &bundle.CumulativeCompressedSize, 1301 - &bundle.CumulativeUncompressedSize, 1302 - &cursor, 1303 - &prevHash, 1304 - &bundle.Compressed, 1305 - &bundle.CreatedAt, 1306 - ); err != nil { 1307 - return nil, err 1308 - } 1309 - 1310 - if prevHash.Valid { 1311 - bundle.PrevBundleHash = prevHash.String 1312 - } 1313 - if cursor.Valid { 1314 - bundle.Cursor = cursor.String 1315 - } 1316 - 1317 - json.Unmarshal(didsJSON, &bundle.DIDs) 1318 - bundles = append(bundles, &bundle) 1319 - } 1320 - 1321 - return bundles, rows.Err() 1322 - } 1323 - 1324 - func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) { 1325 - var count, lastBundleNum int64 1326 - err := p.db.QueryRowContext(ctx, ` 1327 - SELECT COUNT(*), COALESCE(MAX(bundle_number), 0) 1328 - FROM plc_bundles 1329 - `).Scan(&count, &lastBundleNum) 1330 - if err != nil { 1331 - return 0, 0, 0, 0, err 1332 - } 1333 - 1334 - if lastBundleNum == 0 { 1335 - return 0, 0, 0, 0, nil 1336 - } 1337 - 1338 - var compressedSize, uncompressedSize int64 1339 - err = p.db.QueryRowContext(ctx, ` 1340 - SELECT cumulative_compressed_size, cumulative_uncompressed_size 1341 - FROM plc_bundles 1342 - WHERE bundle_number = $1 1343 - `, lastBundleNum).Scan(&compressedSize, &uncompressedSize) 1344 - if err != nil { 1345 - return 0, 0, 0, 0, err 1346 - } 1347 - 1348 - return count, compressedSize, uncompressedSize, lastBundleNum, nil 1349 - } 1350 - 1351 - func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) { 1352 - query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles" 1353 - var num int 1354 - err := p.db.QueryRowContext(ctx, query).Scan(&num) 1355 - return num, err 1356 - } 1357 - 1358 - func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) { 1359 - query := ` 1360 - SELECT bundle_number 1361 - FROM plc_bundles 1362 - WHERE start_time <= $1 AND end_time >= $1 1363 - ORDER BY bundle_number ASC 1364 - LIMIT 1 1365 - ` 1366 - 1367 - var bundleNum int 1368 - err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1369 - if err == sql.ErrNoRows { 1370 - query = ` 1371 - SELECT bundle_number 1372 - FROM plc_bundles 1373 - WHERE end_time < $1 1374 - ORDER BY bundle_number DESC 1375 - LIMIT 1 1376 - ` 1377 - err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum) 1378 - if err == sql.ErrNoRows { 1379 - return 1, nil 1380 - } 1381 - if err != nil { 1382 - return 0, err 1383 - } 1384 - return bundleNum, nil 1385 - } 1386 - if err != nil { 1387 - return 0, err 1388 - } 1389 - 1390 - return bundleNum, nil 1391 - } 1392 - 1393 - func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) { 1394 - query := ` 1395 - WITH daily_stats AS ( 1396 - SELECT 1397 - DATE(start_time) as date, 1398 - MAX(bundle_number) as last_bundle, 1399 - COUNT(*) as bundle_count, 1400 - SUM(uncompressed_size) as total_uncompressed, 1401 - SUM(compressed_size) as total_compressed, 1402 - MAX(cumulative_uncompressed_size) as cumulative_uncompressed, 1403 - MAX(cumulative_compressed_size) as cumulative_compressed 1404 - FROM plc_bundles 1405 - WHERE bundle_number >= $1 1406 - GROUP BY DATE(start_time) 1407 - ) 1408 - SELECT 1409 - date::text, 1410 - last_bundle, 1411 - SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations, 1412 - total_uncompressed, 1413 - total_compressed, 1414 - cumulative_uncompressed, 1415 - cumulative_compressed 1416 - FROM daily_stats 1417 - ORDER BY date ASC 1418 - ` 1419 - 1420 - if limit > 0 { 1421 - query += fmt.Sprintf(" LIMIT %d", limit) 1422 - } 1423 - 1424 - rows, err := p.db.QueryContext(ctx, query, fromBundle) 1425 - if err != nil { 1426 - return nil, err 1427 - } 1428 - defer rows.Close() 1429 - 1430 - var history []*PLCHistoryPoint 1431 - for rows.Next() { 1432 - var point PLCHistoryPoint 1433 - var cumulativeOps int64 1434 - 1435 - err := rows.Scan( 1436 - &point.Date, 1437 - &point.BundleNumber, 1438 - &cumulativeOps, 1439 - &point.UncompressedSize, 1440 - &point.CompressedSize, 1441 - &point.CumulativeUncompressed, 1442 - &point.CumulativeCompressed, 1443 - ) 1444 - if err != nil { 1445 - return nil, err 1446 - } 1447 - 1448 - point.OperationCount = int(cumulativeOps) 1449 - 1450 - history = append(history, &point) 1451 - } 1452 - 1453 - return history, rows.Err() 1454 - } 1455 - 1456 - // ===== MEMPOOL OPERATIONS ===== 1457 - 1458 - func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error { 1459 - if len(ops) == 0 { 1460 - return nil 1461 - } 1462 - 1463 - tx, err := p.db.BeginTx(ctx, nil) 1464 - if err != nil { 1465 - return err 1466 - } 1467 - defer tx.Rollback() 1468 - 1469 - stmt, err := tx.PrepareContext(ctx, ` 1470 - INSERT INTO plc_mempool (did, operation, cid, created_at) 1471 - VALUES ($1, $2, $3, $4) 1472 - ON CONFLICT(cid) DO NOTHING 1473 - `) 1474 - if err != nil { 1475 - return err 1476 - } 1477 - defer stmt.Close() 1478 - 1479 - for _, op := range ops { 1480 - _, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt) 1481 - if err != nil { 1482 - return err 1483 - } 1484 - } 1485 - 1486 - return tx.Commit() 1487 - } 1488 - 1489 - func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) { 1490 - query := "SELECT COUNT(*) FROM plc_mempool" 1491 - var count int 1492 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1493 - return count, err 1494 - } 1495 - 1496 - func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) { 1497 - query := ` 1498 - SELECT id, did, operation, cid, created_at, added_at 1499 - FROM plc_mempool 1500 - ORDER BY created_at ASC 1501 - LIMIT $1 1502 - ` 1503 - 1504 - rows, err := p.db.QueryContext(ctx, query, limit) 1505 - if err != nil { 1506 - return nil, err 1507 - } 1508 - defer rows.Close() 1509 - 1510 - var ops []MempoolOperation 1511 - for rows.Next() { 1512 - var op MempoolOperation 1513 - err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt) 1514 - if err != nil { 1515 - return nil, err 1516 - } 1517 - ops = append(ops, op) 1518 - } 1519 - 1520 - return ops, rows.Err() 1521 - } 1522 - 1523 - func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error { 1524 - if len(ids) == 0 { 1525 - return nil 1526 - } 1527 - 1528 - placeholders := make([]string, len(ids)) 1529 - args := make([]interface{}, len(ids)) 1530 - for i, id := range ids { 1531 - placeholders[i] = fmt.Sprintf("$%d", i+1) 1532 - args[i] = id 1533 - } 1534 - 1535 - query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)", 1536 - strings.Join(placeholders, ",")) 1537 - 1538 - _, err := p.db.ExecContext(ctx, query, args...) 1539 - return err 1540 - } 1541 - 1542 - func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1543 - query := ` 1544 - SELECT id, did, operation, cid, created_at, added_at 1545 - FROM plc_mempool 1546 - ORDER BY created_at ASC, id ASC 1547 - LIMIT 1 1548 - ` 1549 - 1550 - var op MempoolOperation 1551 - err := p.db.QueryRowContext(ctx, query).Scan( 1552 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1553 - ) 1554 - if err == sql.ErrNoRows { 1555 - return nil, nil 1556 - } 1557 - if err != nil { 1558 - return nil, err 1559 - } 1560 - 1561 - return &op, nil 1562 - } 1563 - 1564 - func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) { 1565 - query := ` 1566 - SELECT id, did, operation, cid, created_at, added_at 1567 - FROM plc_mempool 1568 - ORDER BY created_at DESC, id DESC 1569 - LIMIT 1 1570 - ` 1571 - 1572 - var op MempoolOperation 1573 - err := p.db.QueryRowContext(ctx, query).Scan( 1574 - &op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt, 1575 - ) 1576 - if err == sql.ErrNoRows { 1577 - return nil, nil 1578 - } 1579 - if err != nil { 1580 - return nil, err 1581 - } 1582 - 1583 - return &op, nil 1584 - } 1585 - 1586 - func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) { 1587 - query := "SELECT COUNT(DISTINCT did) FROM plc_mempool" 1588 - var count int 1589 - err := p.db.QueryRowContext(ctx, query).Scan(&count) 1590 - return count, err 1591 - } 1592 - 1593 - func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) { 1594 - query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool" 1595 - var size int64 1596 - err := p.db.QueryRowContext(ctx, query).Scan(&size) 1597 - return size, err 1598 1164 } 1599 1165 1600 1166 // ===== CURSOR OPERATIONS =====
+13 -17
internal/storage/types.go
··· 26 26 LastChecked time.Time 27 27 Status int 28 28 IP string 29 - IPv6 string // NEW 29 + IPv6 string 30 30 IPResolvedAt time.Time 31 + Valid bool 31 32 UpdatedAt time.Time 32 33 } 33 34 ··· 54 55 Status int 55 56 ResponseTime float64 56 57 UserCount int64 57 - Version string // NEW: Add this field 58 + Version string 59 + UsedIP string // NEW: Track which IP was actually used 58 60 ScanData *EndpointScanData 59 61 ScannedAt time.Time 60 62 } ··· 75 77 76 78 // EndpointFilter for querying endpoints 77 79 type EndpointFilter struct { 78 - Type string // "pds", "labeler", etc. 80 + Type string 79 81 Status string 80 82 MinUserCount int64 81 - OnlyStale bool // NEW: Only return endpoints that need re-checking 82 - RecheckInterval time.Duration // NEW: How long before an endpoint is considered stale 83 + OnlyStale bool 84 + OnlyValid bool 85 + RecheckInterval time.Duration 86 + Random bool 83 87 Limit int 84 88 Offset int 85 89 } ··· 118 122 StartTime time.Time 119 123 EndTime time.Time 120 124 BoundaryCIDs []string 121 - DIDs []string 125 + DIDCount int // Changed from DIDs []string 122 126 Hash string 123 127 CompressedHash string 124 128 CompressedSize int64 ··· 149 153 CompressedSize int64 `json:"size_compressed"` 150 154 CumulativeUncompressed int64 `json:"cumulative_uncompressed"` 151 155 CumulativeCompressed int64 `json:"cumulative_compressed"` 152 - } 153 - 154 - // MempoolOperation represents an operation waiting to be bundled 155 - type MempoolOperation struct { 156 - ID int64 157 - DID string 158 - Operation string 159 - CID string 160 - CreatedAt time.Time 161 - AddedAt time.Time 162 156 } 163 157 164 158 // ScanCursor stores scanning progress ··· 216 210 // From endpoints table 217 211 ID int64 218 212 Endpoint string 219 - ServerDID string // NEW: Add this 213 + ServerDID string 220 214 DiscoveredAt time.Time 221 215 LastChecked time.Time 222 216 Status int 223 217 IP string 218 + IPv6 string 219 + Valid bool // NEW 224 220 225 221 // From latest endpoint_scans (via JOIN) 226 222 LatestScan *struct {
+2 -2
internal/worker/scheduler.go
··· 5 5 "sync" 6 6 "time" 7 7 8 - "github.com/atscan/atscanner/internal/log" 9 - "github.com/atscan/atscanner/internal/monitor" 8 + "github.com/atscan/atscand/internal/log" 9 + "github.com/atscan/atscand/internal/monitor" 10 10 ) 11 11 12 12 type Job struct {
+113
utils/import-labels.js
··· 1 + import { file, write } from "bun"; 2 + import { join } from "path"; 3 + import { mkdir } from "fs/promises"; 4 + import { init, compress } from "@bokuweb/zstd-wasm"; 5 + 6 + // --- Configuration --- 7 + const CSV_FILE = process.argv[2]; 8 + const CONFIG_FILE = "config.yaml"; 9 + const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance) 10 + // --------------------- 11 + 12 + if (!CSV_FILE) { 13 + console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>"); 14 + process.exit(1); 15 + } 16 + 17 + console.log("========================================"); 18 + console.log("PLC Operation Labels Import (Bun + WASM)"); 19 + console.log("========================================"); 20 + 21 + // 1. Read and parse config 22 + console.log(`Loading config from ${CONFIG_FILE}...`); 23 + const configFile = await file(CONFIG_FILE).text(); 24 + const config = Bun.YAML.parse(configFile); 25 + const bundleDir = config?.plc?.bundle_dir; 26 + 27 + if (!bundleDir) { 28 + console.error("Error: Could not parse plc.bundle_dir from config.yaml"); 29 + process.exit(1); 30 + } 31 + 32 + const FINAL_LABELS_DIR = join(bundleDir, "labels"); 33 + await mkdir(FINAL_LABELS_DIR, { recursive: true }); 34 + 35 + console.log(`CSV File: ${CSV_FILE}`); 36 + console.log(`Output Dir: ${FINAL_LABELS_DIR}`); 37 + console.log(""); 38 + 39 + // 2. Initialize Zstd WASM module 40 + await init(); 41 + 42 + // --- Pass 1: Read entire file into memory and group by bundle --- 43 + console.log("Pass 1/2: Reading and grouping all lines by bundle..."); 44 + console.warn("This will use a large amount of RAM!"); 45 + 46 + const startTime = Date.now(); 47 + const bundles = new Map(); // Map<string, string[]> 48 + let lineCount = 0; 49 + 50 + const inputFile = file(CSV_FILE); 51 + const fileStream = inputFile.stream(); 52 + const decoder = new TextDecoder(); 53 + let remainder = ""; 54 + 55 + for await (const chunk of fileStream) { 56 + const text = remainder + decoder.decode(chunk); 57 + const lines = text.split("\n"); 58 + remainder = lines.pop() || ""; 59 + 60 + for (const line of lines) { 61 + if (line === "") continue; 62 + lineCount++; 63 + 64 + if (lineCount === 1 && line.startsWith("bundle,")) { 65 + continue; // Skip header 66 + } 67 + 68 + const firstCommaIndex = line.indexOf(","); 69 + if (firstCommaIndex === -1) { 70 + console.warn(`Skipping malformed line: ${line}`); 71 + continue; 72 + } 73 + const bundleNumStr = line.substring(0, firstCommaIndex); 74 + const bundleKey = bundleNumStr.padStart(6, "0"); 75 + 76 + // Add line to the correct bundle's array 77 + if (!bundles.has(bundleKey)) { 78 + bundles.set(bundleKey, []); 79 + } 80 + bundles.get(bundleKey).push(line); 81 + } 82 + } 83 + // Note: We ignore any final `remainder` as it's likely an empty line 84 + 85 + console.log(`Finished reading ${lineCount.toLocaleString()} lines.`); 86 + console.log(`Found ${bundles.size} unique bundles.`); 87 + 88 + // --- Pass 2: Compress and write each bundle --- 89 + console.log("\nPass 2/2: Compressing and writing bundle files..."); 90 + let i = 0; 91 + for (const [bundleKey, lines] of bundles.entries()) { 92 + i++; 93 + console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`); 94 + 95 + // Join all lines for this bundle into one big string 96 + const content = lines.join("\n"); 97 + 98 + // Compress the string 99 + const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL); 100 + 101 + // Write the compressed data to the file 102 + const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`); 103 + await write(outPath, compressedData); 104 + } 105 + 106 + // 3. Clean up 107 + const totalTime = (Date.now() - startTime) / 1000; 108 + console.log("\n========================================"); 109 + console.log("Import Summary"); 110 + console.log("========================================"); 111 + console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`); 112 + console.log(`Total lines processed: ${lineCount.toLocaleString()}`); 113 + console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
··· 1 + #!/bin/bash 2 + # import-labels-v4-sorted-pipe.sh 3 + 4 + set -e 5 + 6 + if [ $# -lt 1 ]; then 7 + echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>" 8 + exit 1 9 + fi 10 + 11 + CSV_FILE="$1" 12 + CONFIG_FILE="config.yaml" 13 + 14 + [ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1 15 + [ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1 16 + 17 + # Extract bundle directory path 18 + BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1) 19 + 20 + [ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1 21 + 22 + FINAL_LABELS_DIR="$BUNDLE_DIR/labels" 23 + 24 + echo "========================================" 25 + echo "PLC Operation Labels Import (Sorted Pipe)" 26 + echo "========================================" 27 + echo "CSV File: $CSV_FILE" 28 + echo "Output Dir: $FINAL_LABELS_DIR" 29 + echo "" 30 + 31 + # Ensure the final directory exists 32 + mkdir -p "$FINAL_LABELS_DIR" 33 + 34 + echo "Streaming, sorting, and compressing on the fly..." 35 + echo "This will take time. `pv` will show progress of the TAIL command." 36 + echo "The `sort` command will run after `pv` is complete." 37 + echo "" 38 + 39 + # This is the single-pass pipeline 40 + tail -n +2 "$CSV_FILE" | \ 41 + pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \ 42 + sort -t, -k1,1n | \ 43 + awk -F',' -v final_dir="$FINAL_LABELS_DIR" ' 44 + # This awk script EXPECTS input sorted by bundle number (col 1) 45 + BEGIN { 46 + # last_bundle_num tracks the bundle we are currently writing 47 + last_bundle_num = -1 48 + # cmd holds the current zstd pipe command 49 + cmd = "" 50 + } 51 + { 52 + current_bundle_num = $1 53 + 54 + # Check if the bundle number has changed 55 + if (current_bundle_num != last_bundle_num) { 56 + 57 + # If it changed, and we have an old pipe open, close it 58 + if (last_bundle_num != -1) { 59 + close(cmd) 60 + } 61 + 62 + # Create the new pipe command, writing to the final .zst file 63 + outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num) 64 + cmd = "zstd -T0 -o " outfile 65 + 66 + # Update the tracker 67 + last_bundle_num = current_bundle_num 68 + 69 + # Print progress to stderr 70 + printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr" 71 + } 72 + 73 + # Print the current line ($0) to the open pipe 74 + # The first time this runs for a bundle, it opens the pipe 75 + # Subsequent times, it writes to the already-open pipe 76 + print $0 | cmd 77 + } 78 + # END block: close the very last pipe 79 + END { 80 + if (last_bundle_num != -1) { 81 + close(cmd) 82 + } 83 + printf " Finished. Total lines: %d\n", NR > "/dev/stderr" 84 + }' 85 + 86 + echo "" 87 + echo "========================================" 88 + echo "Import Summary" 89 + echo "========================================" 90 + echo "✓ Import completed successfully!" 91 + echo "Label files are stored in: $FINAL_LABELS_DIR"
+2 -2
utils/migrate-ipinfo.sh
··· 4 4 # Configuration (edit these) 5 5 DB_HOST="localhost" 6 6 DB_PORT="5432" 7 - DB_NAME="atscanner" 8 - DB_USER="atscanner" 7 + DB_NAME="atscand" 8 + DB_USER="atscand" 9 9 DB_PASSWORD="Noor1kooz5eeFai9leZagh5ua5eihai4" 10 10 11 11 # Colors for output