+2
-1
.gitignore
+2
-1
.gitignore
+39
-5
Makefile
+39
-5
Makefile
···
1
-
all: run
1
+
.PHONY: all build install test clean fmt lint help
2
+
3
+
# Binary name
4
+
BINARY_NAME=atscand
5
+
INSTALL_PATH=$(GOPATH)/bin
6
+
7
+
# Go commands
8
+
GOCMD=go
9
+
GOBUILD=$(GOCMD) build
10
+
GOINSTALL=$(GOCMD) install
11
+
GOCLEAN=$(GOCMD) clean
12
+
GOTEST=$(GOCMD) test
13
+
GOGET=$(GOCMD) get
14
+
GOFMT=$(GOCMD) fmt
15
+
GOMOD=$(GOCMD) mod
16
+
GORUN=$(GOCMD) run
17
+
18
+
# Default target
19
+
all: build
20
+
21
+
# Build the CLI tool
22
+
build:
23
+
@echo "Building $(BINARY_NAME)..."
24
+
$(GOBUILD) -o $(BINARY_NAME) ./cmd/atscand
25
+
26
+
# Install the CLI tool globally
27
+
install:
28
+
@echo "Installing $(BINARY_NAME)..."
29
+
$(GOINSTALL) ./cmd/atscand
2
30
3
31
run:
4
-
go run cmd/atscanner.go -verbose
32
+
$(GORUN) cmd/atscand/main.go -verbose
5
33
6
-
clean-db:
7
-
dropdb -U atscanner atscanner
8
-
createdb atscanner -O atscanner
34
+
update-plcbundle:
35
+
GOPROXY=direct go get -u tangled.org/atscan.net/plcbundle@latest
36
+
37
+
# Show help
38
+
help:
39
+
@echo "Available targets:"
40
+
@echo " make build - Build the binary"
41
+
@echo " make install - Install binary globally"
42
+
@echo " make run - Run app"
+159
cmd/atscand/main.go
+159
cmd/atscand/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/atscan/atscand/internal/api"
13
+
"github.com/atscan/atscand/internal/config"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/pds"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
18
+
"github.com/atscan/atscand/internal/worker"
19
+
)
20
+
21
+
const VERSION = "1.0.0"
22
+
23
+
func main() {
24
+
configPath := flag.String("config", "config.yaml", "path to config file")
25
+
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
+
flag.Parse()
27
+
28
+
// Load configuration
29
+
cfg, err := config.Load(*configPath)
30
+
if err != nil {
31
+
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
+
os.Exit(1)
33
+
}
34
+
35
+
// Override verbose setting if flag is provided
36
+
if *verbose {
37
+
cfg.API.Verbose = true
38
+
}
39
+
40
+
// Initialize logger
41
+
log.Init(cfg.API.Verbose)
42
+
43
+
// Print banner
44
+
log.Banner(VERSION)
45
+
46
+
// Print configuration summary
47
+
log.PrintConfig(map[string]string{
48
+
"Database Type": cfg.Database.Type,
49
+
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
+
"PLC Directory": cfg.PLC.DirectoryURL,
51
+
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
+
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
+
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
+
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
+
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
+
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
+
"PDS Timeout": cfg.PDS.Timeout.String(),
58
+
"API Host": cfg.API.Host,
59
+
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
+
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
+
})
62
+
63
+
// Initialize database using factory pattern
64
+
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
+
if err != nil {
66
+
log.Fatal("Failed to initialize database: %v", err)
67
+
}
68
+
defer func() {
69
+
log.Info("Closing database connection...")
70
+
db.Close()
71
+
}()
72
+
73
+
// Set scan retention from config
74
+
if cfg.PDS.ScanRetention > 0 {
75
+
db.SetScanRetention(cfg.PDS.ScanRetention)
76
+
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
+
}
78
+
79
+
// Run migrations
80
+
if err := db.Migrate(); err != nil {
81
+
log.Fatal("Failed to run migrations: %v", err)
82
+
}
83
+
84
+
ctx, cancel := context.WithCancel(context.Background())
85
+
defer cancel()
86
+
87
+
// Initialize workers
88
+
log.Info("Initializing scanners...")
89
+
90
+
bundleManager, err := plc.NewBundleManager(cfg.PLC.BundleDir, cfg.PLC.DirectoryURL, db, cfg.PLC.IndexDIDs)
91
+
if err != nil {
92
+
log.Fatal("Failed to create bundle manager: %v", err)
93
+
}
94
+
defer bundleManager.Close()
95
+
log.Verbose("✓ Bundle manager initialized (shared)")
96
+
97
+
plcScanner := plc.NewScanner(db, cfg.PLC, bundleManager)
98
+
defer plcScanner.Close()
99
+
log.Verbose("✓ PLC scanner initialized")
100
+
101
+
pdsScanner := pds.NewScanner(db, cfg.PDS)
102
+
log.Verbose("✓ PDS scanner initialized")
103
+
104
+
scheduler := worker.NewScheduler()
105
+
106
+
// Schedule PLC directory scan
107
+
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
108
+
if err := plcScanner.Scan(ctx); err != nil {
109
+
log.Error("PLC scan error: %v", err)
110
+
}
111
+
})
112
+
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
113
+
114
+
// Schedule PDS availability checks
115
+
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
116
+
if err := pdsScanner.ScanAll(ctx); err != nil {
117
+
log.Error("PDS scan error: %v", err)
118
+
}
119
+
})
120
+
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
121
+
122
+
// Start API server
123
+
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
124
+
apiServer := api.NewServer(db, cfg.API, cfg.PLC, bundleManager)
125
+
go func() {
126
+
if err := apiServer.Start(); err != nil {
127
+
log.Fatal("API server error: %v", err)
128
+
}
129
+
}()
130
+
131
+
// Give the API server a moment to start
132
+
time.Sleep(100 * time.Millisecond)
133
+
log.Info("✓ API server started successfully")
134
+
log.Info("")
135
+
log.Info("🚀 ATScanner is running!")
136
+
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
137
+
log.Info(" Press Ctrl+C to stop")
138
+
log.Info("")
139
+
140
+
// Start scheduler
141
+
scheduler.Start(ctx)
142
+
143
+
// Wait for interrupt
144
+
sigChan := make(chan os.Signal, 1)
145
+
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
146
+
<-sigChan
147
+
148
+
log.Info("")
149
+
log.Info("Shutting down gracefully...")
150
+
cancel()
151
+
152
+
log.Info("Stopping API server...")
153
+
apiServer.Shutdown(context.Background())
154
+
155
+
log.Info("Waiting for active tasks to complete...")
156
+
time.Sleep(2 * time.Second)
157
+
158
+
log.Info("✓ Shutdown complete. Goodbye!")
159
+
}
-152
cmd/atscanner.go
-152
cmd/atscanner.go
···
1
-
package main
2
-
3
-
import (
4
-
"context"
5
-
"flag"
6
-
"fmt"
7
-
"os"
8
-
"os/signal"
9
-
"syscall"
10
-
"time"
11
-
12
-
"github.com/atscan/atscanner/internal/api"
13
-
"github.com/atscan/atscanner/internal/config"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/pds"
16
-
"github.com/atscan/atscanner/internal/plc"
17
-
"github.com/atscan/atscanner/internal/storage"
18
-
"github.com/atscan/atscanner/internal/worker"
19
-
)
20
-
21
-
const VERSION = "1.0.0"
22
-
23
-
func main() {
24
-
configPath := flag.String("config", "config.yaml", "path to config file")
25
-
verbose := flag.Bool("verbose", false, "enable verbose logging")
26
-
flag.Parse()
27
-
28
-
// Load configuration
29
-
cfg, err := config.Load(*configPath)
30
-
if err != nil {
31
-
fmt.Fprintf(os.Stderr, "Failed to load config: %v\n", err)
32
-
os.Exit(1)
33
-
}
34
-
35
-
// Override verbose setting if flag is provided
36
-
if *verbose {
37
-
cfg.API.Verbose = true
38
-
}
39
-
40
-
// Initialize logger
41
-
log.Init(cfg.API.Verbose)
42
-
43
-
// Print banner
44
-
log.Banner(VERSION)
45
-
46
-
// Print configuration summary
47
-
log.PrintConfig(map[string]string{
48
-
"Database Type": cfg.Database.Type,
49
-
"Database Path": cfg.Database.Path, // Will be auto-redacted
50
-
"PLC Directory": cfg.PLC.DirectoryURL,
51
-
"PLC Scan Interval": cfg.PLC.ScanInterval.String(),
52
-
"PLC Bundle Dir": cfg.PLC.BundleDir,
53
-
"PLC Cache": fmt.Sprintf("%v", cfg.PLC.UseCache),
54
-
"PLC Index DIDs": fmt.Sprintf("%v", cfg.PLC.IndexDIDs),
55
-
"PDS Scan Interval": cfg.PDS.ScanInterval.String(),
56
-
"PDS Workers": fmt.Sprintf("%d", cfg.PDS.Workers),
57
-
"PDS Timeout": cfg.PDS.Timeout.String(),
58
-
"API Host": cfg.API.Host,
59
-
"API Port": fmt.Sprintf("%d", cfg.API.Port),
60
-
"Verbose Logging": fmt.Sprintf("%v", cfg.API.Verbose),
61
-
})
62
-
63
-
// Initialize database using factory pattern
64
-
db, err := storage.NewDatabase(cfg.Database.Type, cfg.Database.Path)
65
-
if err != nil {
66
-
log.Fatal("Failed to initialize database: %v", err)
67
-
}
68
-
defer func() {
69
-
log.Info("Closing database connection...")
70
-
db.Close()
71
-
}()
72
-
73
-
// Set scan retention from config
74
-
if cfg.PDS.ScanRetention > 0 {
75
-
db.SetScanRetention(cfg.PDS.ScanRetention)
76
-
log.Verbose("Scan retention set to %d scans per endpoint", cfg.PDS.ScanRetention)
77
-
}
78
-
79
-
// Run migrations
80
-
if err := db.Migrate(); err != nil {
81
-
log.Fatal("Failed to run migrations: %v", err)
82
-
}
83
-
84
-
ctx, cancel := context.WithCancel(context.Background())
85
-
defer cancel()
86
-
87
-
// Initialize workers
88
-
log.Info("Initializing scanners...")
89
-
90
-
plcScanner := plc.NewScanner(db, cfg.PLC)
91
-
defer plcScanner.Close()
92
-
log.Verbose("✓ PLC scanner initialized")
93
-
94
-
pdsScanner := pds.NewScanner(db, cfg.PDS)
95
-
log.Verbose("✓ PDS scanner initialized")
96
-
97
-
scheduler := worker.NewScheduler()
98
-
99
-
// Schedule PLC directory scan
100
-
scheduler.AddJob("plc_scan", cfg.PLC.ScanInterval, func() {
101
-
if err := plcScanner.Scan(ctx); err != nil {
102
-
log.Error("PLC scan error: %v", err)
103
-
}
104
-
})
105
-
log.Verbose("✓ PLC scan job scheduled (interval: %s)", cfg.PLC.ScanInterval)
106
-
107
-
// Schedule PDS availability checks
108
-
scheduler.AddJob("pds_scan", cfg.PDS.ScanInterval, func() {
109
-
if err := pdsScanner.ScanAll(ctx); err != nil {
110
-
log.Error("PDS scan error: %v", err)
111
-
}
112
-
})
113
-
log.Verbose("✓ PDS scan job scheduled (interval: %s)", cfg.PDS.ScanInterval)
114
-
115
-
// Start API server
116
-
log.Info("Starting API server on %s:%d...", cfg.API.Host, cfg.API.Port)
117
-
apiServer := api.NewServer(db, cfg.API, cfg.PLC)
118
-
go func() {
119
-
if err := apiServer.Start(); err != nil {
120
-
log.Fatal("API server error: %v", err)
121
-
}
122
-
}()
123
-
124
-
// Give the API server a moment to start
125
-
time.Sleep(100 * time.Millisecond)
126
-
log.Info("✓ API server started successfully")
127
-
log.Info("")
128
-
log.Info("🚀 ATScanner is running!")
129
-
log.Info(" API available at: http://%s:%d", cfg.API.Host, cfg.API.Port)
130
-
log.Info(" Press Ctrl+C to stop")
131
-
log.Info("")
132
-
133
-
// Start scheduler
134
-
scheduler.Start(ctx)
135
-
136
-
// Wait for interrupt
137
-
sigChan := make(chan os.Signal, 1)
138
-
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
139
-
<-sigChan
140
-
141
-
log.Info("")
142
-
log.Info("Shutting down gracefully...")
143
-
cancel()
144
-
145
-
log.Info("Stopping API server...")
146
-
apiServer.Shutdown(context.Background())
147
-
148
-
log.Info("Waiting for active tasks to complete...")
149
-
time.Sleep(2 * time.Second)
150
-
151
-
log.Info("✓ Shutdown complete. Goodbye!")
152
-
}
+168
cmd/import-labels/main.go
+168
cmd/import-labels/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"flag"
6
+
"fmt"
7
+
"os"
8
+
"path/filepath"
9
+
"strings"
10
+
"time"
11
+
12
+
"github.com/klauspost/compress/zstd"
13
+
"gopkg.in/yaml.v3"
14
+
)
15
+
16
+
type Config struct {
17
+
PLC struct {
18
+
BundleDir string `yaml:"bundle_dir"`
19
+
} `yaml:"plc"`
20
+
}
21
+
22
+
var CONFIG_FILE = "config.yaml"
23
+
24
+
// ---------------------
25
+
26
+
func main() {
27
+
// Define a new flag for changing the directory
28
+
workDir := flag.String("C", ".", "Change to this directory before running (for finding config.yaml)")
29
+
flag.Usage = func() { // Custom usage message
30
+
fmt.Fprintf(os.Stderr, "Usage: ... | %s [-C /path/to/dir]\n", os.Args[0])
31
+
fmt.Fprintln(os.Stderr, "Reads sorted CSV from stdin and writes compressed bundle files.")
32
+
flag.PrintDefaults()
33
+
}
34
+
flag.Parse() // Parse all defined flags
35
+
36
+
// Change directory if the flag was used
37
+
if *workDir != "." {
38
+
fmt.Printf("Changing working directory to %s...\n", *workDir)
39
+
if err := os.Chdir(*workDir); err != nil {
40
+
fmt.Fprintf(os.Stderr, "Error changing directory to %s: %v\n", *workDir, err)
41
+
os.Exit(1)
42
+
}
43
+
}
44
+
45
+
// --- REMOVED UNUSED CODE ---
46
+
// The csvFilePath variable and NArg check were removed
47
+
// as the script now reads from stdin.
48
+
// ---------------------------
49
+
50
+
fmt.Println("========================================")
51
+
fmt.Println("PLC Operation Labels Import (Go STDIN)")
52
+
fmt.Println("========================================")
53
+
54
+
// 1. Read config (will now read from the new CWD)
55
+
fmt.Printf("Loading config from %s...\n", CONFIG_FILE)
56
+
configData, err := os.ReadFile(CONFIG_FILE)
57
+
if err != nil {
58
+
fmt.Fprintf(os.Stderr, "Error reading config file: %v\n", err)
59
+
os.Exit(1)
60
+
}
61
+
62
+
var config Config
63
+
if err := yaml.Unmarshal(configData, &config); err != nil {
64
+
fmt.Fprintf(os.Stderr, "Error parsing config.yaml: %v\n", err)
65
+
os.Exit(1)
66
+
}
67
+
68
+
if config.PLC.BundleDir == "" {
69
+
fmt.Fprintln(os.Stderr, "Error: Could not parse plc.bundle_dir from config.yaml")
70
+
os.Exit(1)
71
+
}
72
+
73
+
finalLabelsDir := filepath.Join(config.PLC.BundleDir, "labels")
74
+
if err := os.MkdirAll(finalLabelsDir, 0755); err != nil {
75
+
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
76
+
os.Exit(1)
77
+
}
78
+
79
+
fmt.Printf("Output Dir: %s\n", finalLabelsDir)
80
+
fmt.Println("Waiting for sorted data from stdin...")
81
+
82
+
// 2. Process sorted data from stdin
83
+
// This script *requires* the input to be sorted by bundle number.
84
+
85
+
var currentWriter *zstd.Encoder
86
+
var currentFile *os.File
87
+
var lastBundleKey string = ""
88
+
89
+
lineCount := 0
90
+
startTime := time.Now()
91
+
92
+
scanner := bufio.NewScanner(os.Stdin)
93
+
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
94
+
95
+
for scanner.Scan() {
96
+
line := scanner.Text()
97
+
lineCount++
98
+
99
+
parts := strings.SplitN(line, ",", 2)
100
+
if len(parts) < 1 {
101
+
continue // Skip empty/bad lines
102
+
}
103
+
104
+
bundleNumStr := parts[0]
105
+
bundleKey := fmt.Sprintf("%06s", bundleNumStr) // Pad with zeros
106
+
107
+
// If the bundle key is new, close the old writer and open a new one.
108
+
if bundleKey != lastBundleKey {
109
+
// Close the previous writer/file
110
+
if currentWriter != nil {
111
+
if err := currentWriter.Close(); err != nil {
112
+
fmt.Fprintf(os.Stderr, "Error closing writer for %s: %v\n", lastBundleKey, err)
113
+
}
114
+
currentFile.Close()
115
+
}
116
+
117
+
// Start the new one
118
+
fmt.Printf(" -> Writing bundle %s\n", bundleKey)
119
+
outPath := filepath.Join(finalLabelsDir, fmt.Sprintf("%s.csv.zst", bundleKey))
120
+
121
+
file, err := os.Create(outPath)
122
+
if err != nil {
123
+
fmt.Fprintf(os.Stderr, "Error creating file %s: %v\n", outPath, err)
124
+
os.Exit(1)
125
+
}
126
+
currentFile = file
127
+
128
+
writer, err := zstd.NewWriter(file)
129
+
if err != nil {
130
+
fmt.Fprintf(os.Stderr, "Error creating zstd writer: %v\n", err)
131
+
os.Exit(1)
132
+
}
133
+
currentWriter = writer
134
+
lastBundleKey = bundleKey
135
+
}
136
+
137
+
// Write the line to the currently active writer
138
+
if _, err := currentWriter.Write([]byte(line + "\n")); err != nil {
139
+
fmt.Fprintf(os.Stderr, "Error writing line: %v\n", err)
140
+
}
141
+
142
+
// Progress update
143
+
if lineCount%100000 == 0 {
144
+
elapsed := time.Since(startTime).Seconds()
145
+
rate := float64(lineCount) / elapsed
146
+
fmt.Printf(" ... processed %d lines (%.0f lines/sec)\n", lineCount, rate)
147
+
}
148
+
}
149
+
150
+
// 3. Close the very last writer
151
+
if currentWriter != nil {
152
+
if err := currentWriter.Close(); err != nil {
153
+
fmt.Fprintf(os.Stderr, "Error closing final writer: %v\n", err)
154
+
}
155
+
currentFile.Close()
156
+
}
157
+
158
+
if err := scanner.Err(); err != nil {
159
+
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", err)
160
+
}
161
+
162
+
totalTime := time.Since(startTime)
163
+
fmt.Println("\n========================================")
164
+
fmt.Println("Import Summary")
165
+
fmt.Println("========================================")
166
+
fmt.Printf("✓ Import completed in %v\n", totalTime)
167
+
fmt.Printf("Total lines processed: %d\n", lineCount)
168
+
}
+1
-1
config.sample.yaml
+1
-1
config.sample.yaml
+3
-4
go.mod
+3
-4
go.mod
···
1
-
module github.com/atscan/atscanner
1
+
module github.com/atscan/atscand
2
2
3
3
go 1.23.0
4
4
···
8
8
gopkg.in/yaml.v3 v3.0.1
9
9
)
10
10
11
-
require github.com/klauspost/compress v1.18.1 // indirect
11
+
require github.com/klauspost/compress v1.18.1
12
12
13
13
require (
14
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
15
14
github.com/gorilla/handlers v1.5.2
16
15
github.com/jackc/pgx/v5 v5.7.6
16
+
tangled.org/atscan.net/plcbundle v0.3.6
17
17
)
18
18
19
19
require (
20
-
github.com/atscan/plcbundle v0.0.0-20251027193653-3678d57c1dee // indirect
21
20
github.com/felixge/httpsnoop v1.0.3 // indirect
22
21
github.com/jackc/pgpassfile v1.0.0 // indirect
23
22
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
+2
-6
go.sum
+2
-6
go.sum
···
1
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
2
-
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
3
-
github.com/atscan/plcbundle v0.0.0-20251027192009-9350d30fd185 h1:E/fQ1jsaydY6x5JRv+gBiMZVHxKEGD4cK+JxZUZuskU=
4
-
github.com/atscan/plcbundle v0.0.0-20251027192009-9350d30fd185/go.mod h1:vqyqs+zyaxFYtIp6I4+zSQD76oiylnGenzD7ZeA4cxs=
5
-
github.com/atscan/plcbundle v0.0.0-20251027193653-3678d57c1dee h1:wepjgNZxBJGuWmVpplG2BTcoICGafaHALiQoXJV1Iwk=
6
-
github.com/atscan/plcbundle v0.0.0-20251027193653-3678d57c1dee/go.mod h1:vqyqs+zyaxFYtIp6I4+zSQD76oiylnGenzD7ZeA4cxs=
7
1
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
8
2
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
9
3
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
···
51
45
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
52
46
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
53
47
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
48
+
tangled.org/atscan.net/plcbundle v0.3.6 h1:8eSOxEwHRRT7cLhOTUxut80fYLAi+jodR9UTshofIvY=
49
+
tangled.org/atscan.net/plcbundle v0.3.6/go.mod h1:XUzSi6wmqAECCLThmuBTzYV5mEd0q/J1wE45cagoBqs=
+225
-449
internal/api/handlers.go
+225
-449
internal/api/handlers.go
···
2
2
3
3
import (
4
4
"context"
5
-
"crypto/sha256"
6
5
"database/sql"
7
-
"encoding/hex"
8
6
"encoding/json"
9
7
"fmt"
8
+
"io"
10
9
"net/http"
11
-
"os"
12
-
"path/filepath"
13
10
"strconv"
14
11
"strings"
15
12
"time"
16
13
17
-
"github.com/atscan/atscanner/internal/log"
18
-
"github.com/atscan/atscanner/internal/monitor"
19
-
"github.com/atscan/atscanner/internal/plc"
20
-
"github.com/atscan/atscanner/internal/storage"
14
+
"github.com/atscan/atscand/internal/log"
15
+
"github.com/atscan/atscand/internal/monitor"
16
+
"github.com/atscan/atscand/internal/plc"
17
+
"github.com/atscan/atscand/internal/storage"
21
18
"github.com/gorilla/mux"
19
+
"tangled.org/atscan.net/plcbundle"
22
20
)
23
21
24
22
// ===== RESPONSE HELPERS =====
···
40
38
http.Error(r.w, msg, code)
41
39
}
42
40
43
-
func (r *response) bundleHeaders(bundle *storage.PLCBundle) {
41
+
func (r *response) bundleHeaders(bundle *plcbundle.BundleMetadata) {
44
42
r.w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundle.BundleNumber))
45
43
r.w.Header().Set("X-Bundle-Hash", bundle.Hash)
46
44
r.w.Header().Set("X-Bundle-Compressed-Hash", bundle.CompressedHash)
···
76
74
}
77
75
78
76
// ===== FORMATTING HELPERS =====
79
-
80
-
func formatBundleResponse(bundle *storage.PLCBundle) map[string]interface{} {
81
-
return map[string]interface{}{
82
-
"plc_bundle_number": bundle.BundleNumber,
83
-
"start_time": bundle.StartTime,
84
-
"end_time": bundle.EndTime,
85
-
"operation_count": plc.BUNDLE_SIZE,
86
-
"did_count": bundle.DIDCount, // Use DIDCount instead of len(DIDs)
87
-
"hash": bundle.Hash,
88
-
"compressed_hash": bundle.CompressedHash,
89
-
"compressed_size": bundle.CompressedSize,
90
-
"uncompressed_size": bundle.UncompressedSize,
91
-
"compression_ratio": float64(bundle.UncompressedSize) / float64(bundle.CompressedSize),
92
-
"cursor": bundle.Cursor,
93
-
"prev_bundle_hash": bundle.PrevBundleHash,
94
-
"created_at": bundle.CreatedAt,
95
-
}
96
-
}
97
77
98
78
func formatEndpointResponse(ep *storage.Endpoint) map[string]interface{} {
99
79
response := map[string]interface{}{
···
268
248
"endpoint": pds.Endpoint,
269
249
"discovered_at": pds.DiscoveredAt,
270
250
"status": statusToString(pds.Status),
251
+
"valid": pds.Valid, // NEW
271
252
}
272
253
273
254
// Add server_did if available
···
703
684
return
704
685
}
705
686
706
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
707
-
if err != nil {
708
-
resp.error(err.Error(), http.StatusInternalServerError)
709
-
return
710
-
}
711
-
687
+
lastBundle := s.bundleManager.GetLastBundleNumber()
712
688
resp.json(map[string]interface{}{
713
689
"total_unique_dids": totalDIDs,
714
690
"last_bundle": lastBundle,
···
719
695
720
696
func (s *Server) handleGetPLCBundle(w http.ResponseWriter, r *http.Request) {
721
697
resp := newResponse(w)
722
-
723
698
bundleNum, err := getBundleNumber(r)
724
699
if err != nil {
725
700
resp.error("invalid bundle number", http.StatusBadRequest)
726
701
return
727
702
}
728
703
729
-
// Try to get existing bundle
730
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
731
-
if err == nil {
732
-
// Bundle exists, return it normally
733
-
resp.json(formatBundleResponse(bundle))
734
-
return
735
-
}
736
-
737
-
// Bundle not found - check if it's the next upcoming bundle
738
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
704
+
// Get from library's index
705
+
index := s.bundleManager.GetIndex()
706
+
bundleMeta, err := index.GetBundle(bundleNum)
739
707
if err != nil {
740
-
resp.error("bundle not found", http.StatusNotFound)
741
-
return
742
-
}
743
-
744
-
if bundleNum == lastBundle+1 {
745
-
// This is the upcoming bundle - return preview based on mempool
746
-
upcomingBundle, err := s.createUpcomingBundlePreview(r.Context(), r, bundleNum)
747
-
if err != nil {
748
-
resp.error(fmt.Sprintf("failed to create upcoming bundle preview: %v", err), http.StatusInternalServerError)
708
+
// Check if it's upcoming bundle
709
+
lastBundle := index.GetLastBundle()
710
+
if lastBundle != nil && bundleNum == lastBundle.BundleNumber+1 {
711
+
upcomingBundle, err := s.createUpcomingBundlePreview(bundleNum)
712
+
if err != nil {
713
+
resp.error(err.Error(), http.StatusInternalServerError)
714
+
return
715
+
}
716
+
resp.json(upcomingBundle)
749
717
return
750
718
}
751
-
resp.json(upcomingBundle)
719
+
resp.error("bundle not found", http.StatusNotFound)
752
720
return
753
721
}
754
722
755
-
// Not an upcoming bundle, just not found
756
-
resp.error("bundle not found", http.StatusNotFound)
723
+
resp.json(formatBundleMetadata(bundleMeta))
757
724
}
758
725
759
-
func (s *Server) createUpcomingBundlePreview(ctx context.Context, r *http.Request, bundleNum int) (map[string]interface{}, error) {
760
-
// Get mempool stats
761
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
762
-
if err != nil {
763
-
return nil, err
726
+
// Helper to format library's BundleMetadata
727
+
func formatBundleMetadata(meta *plcbundle.BundleMetadata) map[string]interface{} {
728
+
return map[string]interface{}{
729
+
"plc_bundle_number": meta.BundleNumber,
730
+
"start_time": meta.StartTime,
731
+
"end_time": meta.EndTime,
732
+
"operation_count": meta.OperationCount,
733
+
"did_count": meta.DIDCount,
734
+
"hash": meta.Hash, // Chain hash (primary)
735
+
"content_hash": meta.ContentHash, // Content hash
736
+
"parent": meta.Parent, // Parent chain hash
737
+
"compressed_hash": meta.CompressedHash,
738
+
"compressed_size": meta.CompressedSize,
739
+
"uncompressed_size": meta.UncompressedSize,
740
+
"compression_ratio": float64(meta.UncompressedSize) / float64(meta.CompressedSize),
741
+
"cursor": meta.Cursor,
742
+
"created_at": meta.CreatedAt,
764
743
}
744
+
}
765
745
766
-
if mempoolCount == 0 {
746
+
func (s *Server) createUpcomingBundlePreview(bundleNum int) (map[string]interface{}, error) {
747
+
// Get mempool stats from library via wrapper
748
+
stats := s.bundleManager.GetMempoolStats()
749
+
750
+
count, ok := stats["count"].(int)
751
+
if !ok || count == 0 {
767
752
return map[string]interface{}{
768
753
"plc_bundle_number": bundleNum,
769
754
"is_upcoming": true,
···
773
758
}, nil
774
759
}
775
760
776
-
// Get first and last operations for time range
777
-
firstOp, err := s.db.GetFirstMempoolOperation(ctx)
778
-
if err != nil {
779
-
return nil, err
761
+
// Build response
762
+
result := map[string]interface{}{
763
+
"plc_bundle_number": bundleNum,
764
+
"is_upcoming": true,
765
+
"status": "filling",
766
+
"operation_count": count,
767
+
"did_count": stats["did_count"],
768
+
"target_operation_count": 10000,
769
+
"progress_percent": float64(count) / 100.0,
770
+
"operations_needed": 10000 - count,
780
771
}
781
772
782
-
lastOp, err := s.db.GetLastMempoolOperation(ctx)
783
-
if err != nil {
784
-
return nil, err
773
+
if count >= 10000 {
774
+
result["status"] = "ready"
785
775
}
786
776
787
-
// Get unique DID count
788
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
789
-
if err != nil {
790
-
return nil, err
777
+
// Add time range if available
778
+
if firstTime, ok := stats["first_time"]; ok {
779
+
result["start_time"] = firstTime
791
780
}
792
-
793
-
// Get uncompressed size estimate
794
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
795
-
if err != nil {
796
-
return nil, err
781
+
if lastTime, ok := stats["last_time"]; ok {
782
+
result["current_end_time"] = lastTime
797
783
}
798
784
799
-
// Estimate compressed size (typical ratio is ~0.1-0.15 for PLC data)
800
-
estimatedCompressedSize := int64(float64(uncompressedSize) * 0.12)
801
-
802
-
// Calculate completion estimate
803
-
var estimatedCompletionTime *time.Time
804
-
var operationsNeeded int
805
-
var currentRate float64
806
-
807
-
operationsNeeded = plc.BUNDLE_SIZE - mempoolCount
808
-
809
-
if mempoolCount < plc.BUNDLE_SIZE && mempoolCount > 0 {
810
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
811
-
if timeSpan > 0 {
812
-
currentRate = float64(mempoolCount) / timeSpan
813
-
if currentRate > 0 {
814
-
secondsNeeded := float64(operationsNeeded) / currentRate
815
-
completionTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
816
-
estimatedCompletionTime = &completionTime
817
-
}
818
-
}
785
+
// Add size info if available
786
+
if sizeBytes, ok := stats["size_bytes"]; ok {
787
+
result["uncompressed_size"] = sizeBytes
788
+
result["estimated_compressed_size"] = int64(float64(sizeBytes.(int)) * 0.12)
819
789
}
820
790
821
-
// Get previous bundle for cursor context
822
-
var prevBundleHash string
823
-
var cursor string
791
+
// Get previous bundle info
824
792
if bundleNum > 1 {
825
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
826
-
if err == nil {
827
-
prevBundleHash = prevBundle.Hash
828
-
cursor = prevBundle.EndTime.Format(time.RFC3339Nano)
829
-
}
830
-
}
831
-
832
-
// Determine bundle status
833
-
status := "filling"
834
-
if mempoolCount >= plc.BUNDLE_SIZE {
835
-
status = "ready"
836
-
}
837
-
838
-
// Build upcoming bundle response
839
-
result := map[string]interface{}{
840
-
"plc_bundle_number": bundleNum,
841
-
"is_upcoming": true,
842
-
"status": status,
843
-
"operation_count": mempoolCount,
844
-
"target_operation_count": plc.BUNDLE_SIZE,
845
-
"progress_percent": float64(mempoolCount) / float64(plc.BUNDLE_SIZE) * 100,
846
-
"operations_needed": operationsNeeded,
847
-
"did_count": uniqueDIDCount,
848
-
"start_time": firstOp.CreatedAt,
849
-
"current_end_time": lastOp.CreatedAt,
850
-
"uncompressed_size": uncompressedSize,
851
-
"estimated_compressed_size": estimatedCompressedSize,
852
-
"compression_ratio": float64(uncompressedSize) / float64(estimatedCompressedSize),
853
-
"prev_bundle_hash": prevBundleHash,
854
-
"cursor": cursor,
855
-
}
856
-
857
-
if estimatedCompletionTime != nil {
858
-
result["estimated_completion_time"] = *estimatedCompletionTime
859
-
result["current_rate_per_second"] = currentRate
860
-
}
861
-
862
-
// Get actual mempool operations if requested (for DIDs list)
863
-
if r.URL.Query().Get("include_dids") == "true" {
864
-
ops, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
865
-
if err == nil {
866
-
// Extract unique DIDs
867
-
didSet := make(map[string]bool)
868
-
for _, op := range ops {
869
-
didSet[op.DID] = true
870
-
}
871
-
dids := make([]string, 0, len(didSet))
872
-
for did := range didSet {
873
-
dids = append(dids, did)
874
-
}
875
-
result["dids"] = dids
793
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
794
+
result["parent"] = prevBundle.Hash // Parent chain hash
795
+
result["cursor"] = prevBundle.EndTime.Format(time.RFC3339Nano)
876
796
}
877
797
}
878
798
···
888
808
return
889
809
}
890
810
891
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
811
+
// Get from library
812
+
dids, didCount, err := s.bundleManager.GetDIDsForBundle(r.Context(), bundleNum)
892
813
if err != nil {
893
814
resp.error("bundle not found", http.StatusNotFound)
894
815
return
895
816
}
896
817
897
-
// Query DIDs from dids table instead
898
-
dids, err := s.db.GetDIDsForBundle(r.Context(), bundleNum)
899
-
if err != nil {
900
-
resp.error(fmt.Sprintf("failed to get DIDs: %v", err), http.StatusInternalServerError)
901
-
return
902
-
}
903
-
904
818
resp.json(map[string]interface{}{
905
-
"plc_bundle_number": bundle.BundleNumber,
906
-
"did_count": bundle.DIDCount,
819
+
"plc_bundle_number": bundleNum,
820
+
"did_count": didCount,
907
821
"dids": dids,
908
822
})
909
823
}
···
919
833
920
834
compressed := r.URL.Query().Get("compressed") != "false"
921
835
922
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNum)
836
+
bundle, err := s.bundleManager.GetBundleMetadata(bundleNum)
923
837
if err == nil {
924
838
// Bundle exists, serve it normally
925
839
resp.bundleHeaders(bundle)
···
933
847
}
934
848
935
849
// Bundle not found - check if it's the upcoming bundle
936
-
lastBundle, err := s.db.GetLastBundleNumber(r.Context())
937
-
if err != nil {
938
-
resp.error("bundle not found", http.StatusNotFound)
939
-
return
940
-
}
941
-
850
+
lastBundle := s.bundleManager.GetLastBundleNumber()
942
851
if bundleNum == lastBundle+1 {
943
852
// This is the upcoming bundle - serve from mempool
944
-
s.serveUpcomingBundle(w, r, bundleNum)
853
+
s.serveUpcomingBundle(w, bundleNum)
945
854
return
946
855
}
947
856
···
949
858
resp.error("bundle not found", http.StatusNotFound)
950
859
}
951
860
952
-
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, r *http.Request, bundleNum int) {
953
-
ctx := r.Context()
954
-
955
-
// Get mempool count
956
-
mempoolCount, err := s.db.GetMempoolCount(ctx)
957
-
if err != nil {
958
-
http.Error(w, fmt.Sprintf("failed to get mempool count: %v", err), http.StatusInternalServerError)
959
-
return
960
-
}
861
+
func (s *Server) serveUpcomingBundle(w http.ResponseWriter, bundleNum int) {
862
+
// Get mempool stats
863
+
stats := s.bundleManager.GetMempoolStats()
864
+
count, ok := stats["count"].(int)
961
865
962
-
if mempoolCount == 0 {
866
+
if !ok || count == 0 {
963
867
http.Error(w, "upcoming bundle is empty (no operations in mempool)", http.StatusNotFound)
964
868
return
965
869
}
966
870
967
-
// Get mempool operations (up to BUNDLE_SIZE)
968
-
mempoolOps, err := s.db.GetMempoolOperations(ctx, plc.BUNDLE_SIZE)
871
+
// Get operations from mempool
872
+
ops, err := s.bundleManager.GetMempoolOperations()
969
873
if err != nil {
970
874
http.Error(w, fmt.Sprintf("failed to get mempool operations: %v", err), http.StatusInternalServerError)
971
875
return
972
876
}
973
877
974
-
if len(mempoolOps) == 0 {
975
-
http.Error(w, "upcoming bundle is empty", http.StatusNotFound)
878
+
if len(ops) == 0 {
879
+
http.Error(w, "no operations in mempool", http.StatusNotFound)
976
880
return
977
881
}
978
882
979
-
// Get time range
980
-
firstOp := mempoolOps[0]
981
-
lastOp := mempoolOps[len(mempoolOps)-1]
883
+
// Calculate times
884
+
firstOp := ops[0]
885
+
lastOp := ops[len(ops)-1]
982
886
983
887
// Extract unique DIDs
984
888
didSet := make(map[string]bool)
985
-
for _, op := range mempoolOps {
889
+
for _, op := range ops {
986
890
didSet[op.DID] = true
987
891
}
988
892
893
+
// Calculate uncompressed size
894
+
uncompressedSize := int64(0)
895
+
for _, op := range ops {
896
+
uncompressedSize += int64(len(op.RawJSON)) + 1 // +1 for newline
897
+
}
898
+
989
899
// Get previous bundle hash
990
900
prevBundleHash := ""
991
901
if bundleNum > 1 {
992
-
if prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1); err == nil {
902
+
if prevBundle, err := s.bundleManager.GetBundleMetadata(bundleNum - 1); err == nil {
993
903
prevBundleHash = prevBundle.Hash
994
904
}
995
905
}
996
906
997
-
// Serialize operations to JSONL
998
-
var buf []byte
999
-
for _, mop := range mempoolOps {
1000
-
buf = append(buf, []byte(mop.Operation)...)
1001
-
buf = append(buf, '\n')
1002
-
}
1003
-
1004
-
// Calculate size
1005
-
uncompressedSize := int64(len(buf))
1006
-
1007
907
// Set headers
1008
908
w.Header().Set("X-Bundle-Number", fmt.Sprintf("%d", bundleNum))
1009
909
w.Header().Set("X-Bundle-Is-Upcoming", "true")
1010
910
w.Header().Set("X-Bundle-Status", "preview")
1011
911
w.Header().Set("X-Bundle-Start-Time", firstOp.CreatedAt.Format(time.RFC3339Nano))
1012
912
w.Header().Set("X-Bundle-Current-End-Time", lastOp.CreatedAt.Format(time.RFC3339Nano))
1013
-
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(mempoolOps)))
1014
-
w.Header().Set("X-Bundle-Target-Count", fmt.Sprintf("%d", plc.BUNDLE_SIZE))
1015
-
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(mempoolOps))/float64(plc.BUNDLE_SIZE)*100))
913
+
w.Header().Set("X-Bundle-Operation-Count", fmt.Sprintf("%d", len(ops)))
914
+
w.Header().Set("X-Bundle-Target-Count", "10000")
915
+
w.Header().Set("X-Bundle-Progress-Percent", fmt.Sprintf("%.2f", float64(len(ops))/100.0))
1016
916
w.Header().Set("X-Bundle-DID-Count", fmt.Sprintf("%d", len(didSet)))
1017
917
w.Header().Set("X-Bundle-Prev-Hash", prevBundleHash)
918
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
1018
919
1019
920
w.Header().Set("Content-Type", "application/jsonl")
1020
921
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d-upcoming.jsonl", bundleNum))
1021
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", uncompressedSize))
1022
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", uncompressedSize))
1023
922
923
+
// Stream operations as JSONL
1024
924
w.WriteHeader(http.StatusOK)
1025
-
w.Write(buf)
925
+
926
+
for _, op := range ops {
927
+
// Use RawJSON if available (preserves exact format)
928
+
if len(op.RawJSON) > 0 {
929
+
w.Write(op.RawJSON)
930
+
} else {
931
+
// Fallback to marshaling
932
+
data, _ := json.Marshal(op)
933
+
w.Write(data)
934
+
}
935
+
w.Write([]byte("\n"))
936
+
}
1026
937
}
1027
938
1028
-
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
939
+
func (s *Server) serveCompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1029
940
resp := newResponse(w)
1030
-
path := bundle.GetFilePath(s.plcBundleDir)
1031
941
1032
-
file, err := os.Open(path)
942
+
// Use the new streaming API for compressed data
943
+
reader, err := s.bundleManager.StreamRaw(r.Context(), bundle.BundleNumber)
1033
944
if err != nil {
1034
-
resp.error("bundle file not found on disk", http.StatusNotFound)
945
+
resp.error(fmt.Sprintf("error streaming compressed bundle: %v", err), http.StatusInternalServerError)
1035
946
return
1036
947
}
1037
-
defer file.Close()
1038
-
1039
-
fileInfo, _ := file.Stat()
948
+
defer reader.Close()
1040
949
1041
950
w.Header().Set("Content-Type", "application/zstd")
1042
951
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl.zst", bundle.BundleNumber))
1043
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", fileInfo.Size()))
1044
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", fileInfo.Size()))
952
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.CompressedSize))
953
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
1045
954
1046
-
http.ServeContent(w, r, filepath.Base(path), bundle.CreatedAt, file)
955
+
// Stream the data directly to the response
956
+
w.WriteHeader(http.StatusOK)
957
+
io.Copy(w, reader)
1047
958
}
1048
959
1049
-
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *storage.PLCBundle) {
960
+
func (s *Server) serveUncompressedBundle(w http.ResponseWriter, r *http.Request, bundle *plcbundle.BundleMetadata) {
1050
961
resp := newResponse(w)
1051
962
1052
-
ops, err := s.bundleManager.LoadBundleOperations(r.Context(), bundle.BundleNumber)
963
+
// Use the new streaming API for decompressed data
964
+
reader, err := s.bundleManager.StreamDecompressed(r.Context(), bundle.BundleNumber)
1053
965
if err != nil {
1054
-
resp.error(fmt.Sprintf("error loading bundle: %v", err), http.StatusInternalServerError)
966
+
resp.error(fmt.Sprintf("error streaming decompressed bundle: %v", err), http.StatusInternalServerError)
1055
967
return
1056
968
}
1057
-
1058
-
// Serialize to JSONL
1059
-
var buf []byte
1060
-
for _, op := range ops {
1061
-
buf = append(buf, op.RawJSON...)
1062
-
buf = append(buf, '\n')
1063
-
}
1064
-
1065
-
fileInfo, _ := os.Stat(bundle.GetFilePath(s.plcBundleDir))
1066
-
compressedSize := int64(0)
1067
-
if fileInfo != nil {
1068
-
compressedSize = fileInfo.Size()
1069
-
}
969
+
defer reader.Close()
1070
970
1071
971
w.Header().Set("Content-Type", "application/jsonl")
1072
972
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%06d.jsonl", bundle.BundleNumber))
1073
-
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(buf)))
1074
-
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", compressedSize))
1075
-
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", len(buf)))
1076
-
if compressedSize > 0 {
1077
-
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(len(buf))/float64(compressedSize)))
973
+
w.Header().Set("Content-Length", fmt.Sprintf("%d", bundle.UncompressedSize))
974
+
w.Header().Set("X-Compressed-Size", fmt.Sprintf("%d", bundle.CompressedSize))
975
+
w.Header().Set("X-Uncompressed-Size", fmt.Sprintf("%d", bundle.UncompressedSize))
976
+
if bundle.CompressedSize > 0 {
977
+
w.Header().Set("X-Compression-Ratio", fmt.Sprintf("%.2f", float64(bundle.UncompressedSize)/float64(bundle.CompressedSize)))
1078
978
}
1079
979
980
+
// Stream the data directly to the response
1080
981
w.WriteHeader(http.StatusOK)
1081
-
w.Write(buf)
982
+
io.Copy(w, reader)
1082
983
}
1083
984
1084
985
func (s *Server) handleGetPLCBundles(w http.ResponseWriter, r *http.Request) {
1085
986
resp := newResponse(w)
1086
987
limit := getQueryInt(r, "limit", 50)
1087
988
1088
-
bundles, err := s.db.GetBundles(r.Context(), limit)
1089
-
if err != nil {
1090
-
resp.error(err.Error(), http.StatusInternalServerError)
1091
-
return
1092
-
}
989
+
bundles := s.bundleManager.GetBundles(limit)
1093
990
1094
991
response := make([]map[string]interface{}, len(bundles))
1095
992
for i, bundle := range bundles {
1096
-
response[i] = formatBundleResponse(bundle)
993
+
response[i] = formatBundleMetadata(bundle)
1097
994
}
1098
995
1099
996
resp.json(response)
···
1102
999
func (s *Server) handleGetPLCBundleStats(w http.ResponseWriter, r *http.Request) {
1103
1000
resp := newResponse(w)
1104
1001
1105
-
count, compressedSize, uncompressedSize, lastBundle, err := s.db.GetBundleStats(r.Context())
1106
-
if err != nil {
1107
-
resp.error(err.Error(), http.StatusInternalServerError)
1108
-
return
1109
-
}
1002
+
stats := s.bundleManager.GetBundleStats()
1003
+
1004
+
bundleCount := stats["bundle_count"].(int64)
1005
+
totalSize := stats["total_size"].(int64)
1006
+
totalUncompressedSize := stats["total_uncompressed_size"].(int64)
1007
+
lastBundle := stats["last_bundle"].(int64)
1110
1008
1111
1009
resp.json(map[string]interface{}{
1112
-
"plc_bundle_count": count,
1113
-
"last_bundle_number": lastBundle,
1114
-
"total_compressed_size": compressedSize,
1115
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1116
-
"total_compressed_size_gb": float64(compressedSize) / 1024 / 1024 / 1024,
1117
-
"total_uncompressed_size": uncompressedSize,
1118
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1119
-
"total_uncompressed_size_gb": float64(uncompressedSize) / 1024 / 1024 / 1024,
1120
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1010
+
"plc_bundle_count": bundleCount,
1011
+
"last_bundle_number": lastBundle,
1012
+
"total_compressed_size": totalSize,
1013
+
"total_uncompressed_size": totalUncompressedSize,
1014
+
"overall_compression_ratio": float64(totalUncompressedSize) / float64(totalSize),
1121
1015
})
1122
1016
}
1123
1017
···
1125
1019
1126
1020
func (s *Server) handleGetMempoolStats(w http.ResponseWriter, r *http.Request) {
1127
1021
resp := newResponse(w)
1128
-
ctx := r.Context()
1129
1022
1130
-
count, err := s.db.GetMempoolCount(ctx)
1131
-
if err != nil {
1132
-
resp.error(err.Error(), http.StatusInternalServerError)
1133
-
return
1134
-
}
1023
+
// Get stats from library's mempool via wrapper method
1024
+
stats := s.bundleManager.GetMempoolStats()
1135
1025
1136
-
uniqueDIDCount, err := s.db.GetMempoolUniqueDIDCount(ctx)
1137
-
if err != nil {
1138
-
resp.error(err.Error(), http.StatusInternalServerError)
1139
-
return
1026
+
// Convert to API response format
1027
+
result := map[string]interface{}{
1028
+
"operation_count": stats["count"],
1029
+
"can_create_bundle": stats["can_create_bundle"],
1140
1030
}
1141
1031
1142
-
uncompressedSize, err := s.db.GetMempoolUncompressedSize(ctx)
1143
-
if err != nil {
1144
-
resp.error(err.Error(), http.StatusInternalServerError)
1145
-
return
1032
+
// Add size information
1033
+
if sizeBytes, ok := stats["size_bytes"]; ok {
1034
+
result["uncompressed_size"] = sizeBytes
1035
+
result["uncompressed_size_mb"] = float64(sizeBytes.(int)) / 1024 / 1024
1146
1036
}
1147
1037
1148
-
result := map[string]interface{}{
1149
-
"operation_count": count,
1150
-
"unique_did_count": uniqueDIDCount,
1151
-
"uncompressed_size": uncompressedSize,
1152
-
"uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1153
-
"can_create_bundle": count >= plc.BUNDLE_SIZE,
1154
-
}
1038
+
// Add time range and calculate estimated completion
1039
+
if count, ok := stats["count"].(int); ok && count > 0 {
1040
+
if firstTime, ok := stats["first_time"].(time.Time); ok {
1041
+
result["mempool_start_time"] = firstTime
1155
1042
1156
-
if count > 0 {
1157
-
if firstOp, err := s.db.GetFirstMempoolOperation(ctx); err == nil && firstOp != nil {
1158
-
result["mempool_start_time"] = firstOp.CreatedAt
1043
+
if lastTime, ok := stats["last_time"].(time.Time); ok {
1044
+
result["mempool_end_time"] = lastTime
1159
1045
1160
-
if count < plc.BUNDLE_SIZE {
1161
-
if lastOp, err := s.db.GetLastMempoolOperation(ctx); err == nil && lastOp != nil {
1162
-
timeSpan := lastOp.CreatedAt.Sub(firstOp.CreatedAt).Seconds()
1046
+
// Calculate estimated next bundle time if not complete
1047
+
if count < 10000 {
1048
+
timeSpan := lastTime.Sub(firstTime).Seconds()
1163
1049
if timeSpan > 0 {
1164
1050
opsPerSecond := float64(count) / timeSpan
1165
1051
if opsPerSecond > 0 {
1166
-
remainingOps := plc.BUNDLE_SIZE - count
1052
+
remainingOps := 10000 - count
1167
1053
secondsNeeded := float64(remainingOps) / opsPerSecond
1168
-
result["estimated_next_bundle_time"] = time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1169
-
result["operations_needed"] = remainingOps
1054
+
estimatedTime := time.Now().Add(time.Duration(secondsNeeded) * time.Second)
1055
+
1056
+
result["estimated_next_bundle_time"] = estimatedTime
1170
1057
result["current_rate_per_second"] = opsPerSecond
1058
+
result["operations_needed"] = remainingOps
1171
1059
}
1172
1060
}
1061
+
result["progress_percent"] = float64(count) / 100.0
1062
+
} else {
1063
+
// Ready to create bundle
1064
+
result["estimated_next_bundle_time"] = time.Now()
1065
+
result["operations_needed"] = 0
1173
1066
}
1174
-
} else {
1175
-
result["estimated_next_bundle_time"] = time.Now()
1176
-
result["operations_needed"] = 0
1177
1067
}
1178
1068
}
1179
1069
} else {
1070
+
// Empty mempool
1180
1071
result["mempool_start_time"] = nil
1181
1072
result["estimated_next_bundle_time"] = nil
1182
1073
}
···
1201
1092
1202
1093
// ===== VERIFICATION HANDLERS =====
1203
1094
1204
-
func (s *Server) handleVerifyPLCBundle(w http.ResponseWriter, r *http.Request) {
1205
-
resp := newResponse(w)
1206
-
vars := mux.Vars(r)
1207
-
1208
-
bundleNumber, err := strconv.Atoi(vars["bundleNumber"])
1209
-
if err != nil {
1210
-
resp.error("Invalid bundle number", http.StatusBadRequest)
1211
-
return
1212
-
}
1213
-
1214
-
bundle, err := s.db.GetBundleByNumber(r.Context(), bundleNumber)
1215
-
if err != nil {
1216
-
resp.error("Bundle not found", http.StatusNotFound)
1217
-
return
1218
-
}
1219
-
1220
-
// Fetch from PLC and verify
1221
-
remoteOps, prevCIDs, err := s.fetchRemoteBundleOps(r.Context(), bundleNumber)
1222
-
if err != nil {
1223
-
resp.error(fmt.Sprintf("Failed to fetch from PLC directory: %v", err), http.StatusInternalServerError)
1224
-
return
1225
-
}
1226
-
1227
-
remoteHash := computeOperationsHash(remoteOps)
1228
-
verified := bundle.Hash == remoteHash
1229
-
1230
-
resp.json(map[string]interface{}{
1231
-
"bundle_number": bundleNumber,
1232
-
"verified": verified,
1233
-
"local_hash": bundle.Hash,
1234
-
"remote_hash": remoteHash,
1235
-
"local_op_count": plc.BUNDLE_SIZE,
1236
-
"remote_op_count": len(remoteOps),
1237
-
"boundary_cids_used": len(prevCIDs),
1238
-
})
1239
-
}
1240
-
1241
-
func (s *Server) fetchRemoteBundleOps(ctx context.Context, bundleNum int) ([]plc.PLCOperation, map[string]bool, error) {
1242
-
var after string
1243
-
var prevBoundaryCIDs map[string]bool
1244
-
1245
-
if bundleNum > 1 {
1246
-
prevBundle, err := s.db.GetBundleByNumber(ctx, bundleNum-1)
1247
-
if err != nil {
1248
-
return nil, nil, fmt.Errorf("failed to get previous bundle: %w", err)
1249
-
}
1250
-
1251
-
after = prevBundle.EndTime.Format("2006-01-02T15:04:05.000Z")
1252
-
1253
-
if len(prevBundle.BoundaryCIDs) > 0 {
1254
-
prevBoundaryCIDs = make(map[string]bool)
1255
-
for _, cid := range prevBundle.BoundaryCIDs {
1256
-
prevBoundaryCIDs[cid] = true
1257
-
}
1258
-
}
1259
-
}
1260
-
1261
-
var allRemoteOps []plc.PLCOperation
1262
-
seenCIDs := make(map[string]bool)
1263
-
1264
-
for cid := range prevBoundaryCIDs {
1265
-
seenCIDs[cid] = true
1266
-
}
1267
-
1268
-
currentAfter := after
1269
-
maxFetches := 20
1270
-
1271
-
for fetchNum := 0; fetchNum < maxFetches && len(allRemoteOps) < plc.BUNDLE_SIZE; fetchNum++ {
1272
-
batch, err := s.plcClient.Export(ctx, plc.ExportOptions{
1273
-
Count: 1000,
1274
-
After: currentAfter,
1275
-
})
1276
-
if err != nil || len(batch) == 0 {
1277
-
break
1278
-
}
1279
-
1280
-
for _, op := range batch {
1281
-
if !seenCIDs[op.CID] {
1282
-
seenCIDs[op.CID] = true
1283
-
allRemoteOps = append(allRemoteOps, op)
1284
-
if len(allRemoteOps) >= plc.BUNDLE_SIZE {
1285
-
break
1286
-
}
1287
-
}
1288
-
}
1289
-
1290
-
if len(batch) > 0 {
1291
-
lastOp := batch[len(batch)-1]
1292
-
currentAfter = lastOp.CreatedAt.Format("2006-01-02T15:04:05.000Z")
1293
-
}
1294
-
1295
-
if len(batch) < 1000 {
1296
-
break
1297
-
}
1298
-
}
1299
-
1300
-
if len(allRemoteOps) > plc.BUNDLE_SIZE {
1301
-
allRemoteOps = allRemoteOps[:plc.BUNDLE_SIZE]
1302
-
}
1303
-
1304
-
return allRemoteOps, prevBoundaryCIDs, nil
1305
-
}
1306
-
1307
1095
func (s *Server) handleVerifyChain(w http.ResponseWriter, r *http.Request) {
1308
1096
resp := newResponse(w)
1309
-
ctx := r.Context()
1310
1097
1311
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1312
-
if err != nil {
1313
-
resp.error(err.Error(), http.StatusInternalServerError)
1314
-
return
1315
-
}
1316
-
1098
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1317
1099
if lastBundle == 0 {
1318
1100
resp.json(map[string]interface{}{
1319
1101
"status": "empty",
···
1327
1109
var errorMsg string
1328
1110
1329
1111
for i := 1; i <= lastBundle; i++ {
1330
-
bundle, err := s.db.GetBundleByNumber(ctx, i)
1112
+
bundle, err := s.bundleManager.GetBundleMetadata(i)
1331
1113
if err != nil {
1332
1114
valid = false
1333
1115
brokenAt = i
···
1336
1118
}
1337
1119
1338
1120
if i > 1 {
1339
-
prevBundle, err := s.db.GetBundleByNumber(ctx, i-1)
1121
+
prevBundle, err := s.bundleManager.GetBundleMetadata(i - 1)
1340
1122
if err != nil {
1341
1123
valid = false
1342
1124
brokenAt = i
···
1344
1126
break
1345
1127
}
1346
1128
1347
-
if bundle.PrevBundleHash != prevBundle.Hash {
1129
+
if bundle.Parent != prevBundle.Hash {
1348
1130
valid = false
1349
1131
brokenAt = i
1350
-
errorMsg = fmt.Sprintf("Chain broken: bundle %06d prev_hash doesn't match bundle %06d hash", i, i-1)
1132
+
errorMsg = fmt.Sprintf("Chain broken: bundle %06d parent doesn't match bundle %06d hash", i, i-1)
1351
1133
break
1352
1134
}
1353
1135
}
···
1368
1150
1369
1151
func (s *Server) handleGetChainInfo(w http.ResponseWriter, r *http.Request) {
1370
1152
resp := newResponse(w)
1371
-
ctx := r.Context()
1372
1153
1373
-
lastBundle, err := s.db.GetLastBundleNumber(ctx)
1374
-
if err != nil {
1375
-
resp.error(err.Error(), http.StatusInternalServerError)
1376
-
return
1377
-
}
1378
-
1154
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1379
1155
if lastBundle == 0 {
1380
1156
resp.json(map[string]interface{}{
1381
1157
"chain_length": 0,
···
1384
1160
return
1385
1161
}
1386
1162
1387
-
firstBundle, _ := s.db.GetBundleByNumber(ctx, 1)
1388
-
lastBundleData, _ := s.db.GetBundleByNumber(ctx, lastBundle)
1389
-
1390
-
// Updated to receive 5 values instead of 3
1391
-
count, compressedSize, uncompressedSize, _, err := s.db.GetBundleStats(ctx)
1392
-
if err != nil {
1393
-
resp.error(err.Error(), http.StatusInternalServerError)
1394
-
return
1395
-
}
1163
+
firstBundle, _ := s.bundleManager.GetBundleMetadata(1)
1164
+
lastBundleData, _ := s.bundleManager.GetBundleMetadata(lastBundle)
1165
+
stats := s.bundleManager.GetBundleStats()
1396
1166
1397
1167
resp.json(map[string]interface{}{
1398
-
"chain_length": lastBundle,
1399
-
"total_bundles": count,
1400
-
"total_compressed_size": compressedSize,
1401
-
"total_compressed_size_mb": float64(compressedSize) / 1024 / 1024,
1402
-
"total_uncompressed_size": uncompressedSize,
1403
-
"total_uncompressed_size_mb": float64(uncompressedSize) / 1024 / 1024,
1404
-
"compression_ratio": float64(uncompressedSize) / float64(compressedSize),
1405
-
"chain_start_time": firstBundle.StartTime,
1406
-
"chain_end_time": lastBundleData.EndTime,
1407
-
"chain_head_hash": lastBundleData.Hash,
1408
-
"first_prev_hash": firstBundle.PrevBundleHash,
1409
-
"last_prev_hash": lastBundleData.PrevBundleHash,
1168
+
"chain_length": lastBundle,
1169
+
"total_bundles": stats["bundle_count"],
1170
+
"total_compressed_size": stats["total_size"],
1171
+
"total_compressed_size_mb": float64(stats["total_size"].(int64)) / 1024 / 1024,
1172
+
"chain_start_time": firstBundle.StartTime,
1173
+
"chain_end_time": lastBundleData.EndTime,
1174
+
"chain_head_hash": lastBundleData.Hash,
1175
+
"first_parent": firstBundle.Parent,
1176
+
"last_parent": lastBundleData.Parent,
1410
1177
})
1411
1178
}
1412
1179
···
1427
1194
return
1428
1195
}
1429
1196
1430
-
startBundle := s.findStartBundle(ctx, afterTime)
1197
+
startBundle := s.findStartBundle(afterTime)
1431
1198
ops := s.collectOperations(ctx, startBundle, afterTime, count)
1432
1199
1433
1200
w.Header().Set("Content-Type", "application/jsonl")
···
1467
1234
return time.Time{}, fmt.Errorf("invalid timestamp format")
1468
1235
}
1469
1236
1470
-
func (s *Server) findStartBundle(ctx context.Context, afterTime time.Time) int {
1237
+
func (s *Server) findStartBundle(afterTime time.Time) int {
1471
1238
if afterTime.IsZero() {
1472
1239
return 1
1473
1240
}
1474
1241
1475
-
foundBundle, err := s.db.GetBundleForTimestamp(ctx, afterTime)
1476
-
if err != nil {
1477
-
return 1
1478
-
}
1479
-
1242
+
foundBundle := s.bundleManager.FindBundleForTimestamp(afterTime)
1480
1243
if foundBundle > 1 {
1481
1244
return foundBundle - 1
1482
1245
}
···
1487
1250
var allOps []plc.PLCOperation
1488
1251
seenCIDs := make(map[string]bool)
1489
1252
1490
-
lastBundle, _ := s.db.GetLastBundleNumber(ctx)
1253
+
lastBundle := s.bundleManager.GetLastBundleNumber()
1491
1254
1492
1255
for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ {
1493
1256
ops, err := s.bundleManager.LoadBundleOperations(ctx, bundleNum)
···
1647
1410
limit := getQueryInt(r, "limit", 0)
1648
1411
fromBundle := getQueryInt(r, "from", 1)
1649
1412
1650
-
history, err := s.db.GetPLCHistory(r.Context(), limit, fromBundle)
1413
+
// Use BundleManager instead of database
1414
+
history, err := s.bundleManager.GetPLCHistory(r.Context(), limit, fromBundle)
1651
1415
if err != nil {
1652
1416
resp.error(err.Error(), http.StatusInternalServerError)
1653
1417
return
···
1719
1483
})
1720
1484
}
1721
1485
1722
-
// ===== UTILITY FUNCTIONS =====
1486
+
func (s *Server) handleGetBundleLabels(w http.ResponseWriter, r *http.Request) {
1487
+
resp := newResponse(w)
1723
1488
1724
-
func computeOperationsHash(ops []plc.PLCOperation) string {
1725
-
var jsonlData []byte
1726
-
for _, op := range ops {
1727
-
jsonlData = append(jsonlData, op.RawJSON...)
1728
-
jsonlData = append(jsonlData, '\n')
1489
+
bundleNum, err := getBundleNumber(r)
1490
+
if err != nil {
1491
+
resp.error("invalid bundle number", http.StatusBadRequest)
1492
+
return
1729
1493
}
1730
-
hash := sha256.Sum256(jsonlData)
1731
-
return hex.EncodeToString(hash[:])
1494
+
1495
+
labels, err := s.bundleManager.GetBundleLabels(r.Context(), bundleNum)
1496
+
if err != nil {
1497
+
resp.error(err.Error(), http.StatusInternalServerError)
1498
+
return
1499
+
}
1500
+
1501
+
resp.json(map[string]interface{}{
1502
+
"bundle": bundleNum,
1503
+
"count": len(labels),
1504
+
"labels": labels,
1505
+
})
1732
1506
}
1507
+
1508
+
// ===== UTILITY FUNCTIONS =====
1733
1509
1734
1510
func normalizeEndpoint(endpoint string) string {
1735
1511
endpoint = strings.TrimPrefix(endpoint, "https://")
+7
-13
internal/api/server.go
+7
-13
internal/api/server.go
···
6
6
"net/http"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/plc"
12
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/plc"
12
+
"github.com/atscan/atscand/internal/storage"
13
13
"github.com/gorilla/handlers"
14
14
"github.com/gorilla/mux"
15
15
)
···
18
18
router *mux.Router
19
19
server *http.Server
20
20
db storage.Database
21
-
plcClient *plc.Client
22
21
plcBundleDir string
23
22
bundleManager *plc.BundleManager
24
23
plcIndexDIDs bool
25
24
}
26
25
27
-
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server {
28
-
bundleManager, err := plc.NewBundleManager(plcCfg.BundleDir, plcCfg.DirectoryURL, db, plcCfg.IndexDIDs)
29
-
if err != nil {
30
-
log.Fatal("Failed to create bundle manager: %v", err)
31
-
}
32
-
26
+
func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig, bundleManager *plc.BundleManager) *Server {
33
27
s := &Server{
34
28
router: mux.NewRouter(),
35
29
db: db,
36
30
plcBundleDir: plcCfg.BundleDir,
37
-
bundleManager: bundleManager,
31
+
bundleManager: bundleManager, // Use provided shared instance
38
32
plcIndexDIDs: plcCfg.IndexDIDs,
39
33
}
40
34
···
90
84
api.HandleFunc("/plc/bundles/{number}", s.handleGetPLCBundle).Methods("GET")
91
85
api.HandleFunc("/plc/bundles/{number}/dids", s.handleGetPLCBundleDIDs).Methods("GET")
92
86
api.HandleFunc("/plc/bundles/{number}/download", s.handleDownloadPLCBundle).Methods("GET")
93
-
api.HandleFunc("/plc/bundles/{bundleNumber}/verify", s.handleVerifyPLCBundle).Methods("POST")
87
+
api.HandleFunc("/plc/bundles/{number}/labels", s.handleGetBundleLabels).Methods("GET")
94
88
95
89
// PLC history/metrics
96
90
api.HandleFunc("/plc/history", s.handleGetPLCHistory).Methods("GET")
+44
-45
internal/pds/client.go
+44
-45
internal/pds/client.go
···
84
84
}
85
85
86
86
// DescribeServer fetches com.atproto.server.describeServer
87
-
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, error) {
87
+
// Returns: description, responseTime, usedIP, error
88
+
func (c *Client) DescribeServer(ctx context.Context, endpoint string) (*ServerDescription, time.Duration, string, error) {
89
+
startTime := time.Now()
88
90
url := fmt.Sprintf("%s/xrpc/com.atproto.server.describeServer", endpoint)
89
91
90
-
//fmt.Println(url)
92
+
// Track which IP was used
93
+
var usedIP string
94
+
transport := &http.Transport{
95
+
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
96
+
conn, err := (&net.Dialer{
97
+
Timeout: 30 * time.Second,
98
+
KeepAlive: 30 * time.Second,
99
+
}).DialContext(ctx, network, addr)
100
+
101
+
if err == nil && conn != nil {
102
+
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
103
+
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
104
+
usedIP = tcpAddr.IP.String()
105
+
}
106
+
}
107
+
}
108
+
return conn, err
109
+
},
110
+
}
111
+
112
+
client := &http.Client{
113
+
Timeout: c.httpClient.Timeout,
114
+
Transport: transport,
115
+
}
91
116
92
117
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
93
118
if err != nil {
94
-
return nil, err
119
+
return nil, 0, "", err
95
120
}
96
121
97
-
resp, err := c.httpClient.Do(req)
122
+
resp, err := client.Do(req)
123
+
responseTime := time.Since(startTime)
124
+
98
125
if err != nil {
99
-
return nil, err
126
+
return nil, responseTime, usedIP, err
100
127
}
101
128
defer resp.Body.Close()
102
129
103
130
if resp.StatusCode != http.StatusOK {
104
-
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
131
+
return nil, responseTime, usedIP, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
105
132
}
106
133
107
134
var desc ServerDescription
108
135
if err := json.NewDecoder(resp.Body).Decode(&desc); err != nil {
109
-
return nil, err
136
+
return nil, responseTime, usedIP, err
110
137
}
111
138
112
-
return &desc, nil
139
+
return &desc, responseTime, usedIP, nil
113
140
}
114
141
115
142
// CheckHealth performs a basic health check, ensuring the endpoint returns JSON with a "version"
116
-
// Returns: available, responseTime, version, usedIP, error
117
-
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, string, error) {
143
+
// Returns: available, responseTime, version, error
144
+
func (c *Client) CheckHealth(ctx context.Context, endpoint string) (bool, time.Duration, string, error) {
118
145
startTime := time.Now()
119
146
120
147
url := fmt.Sprintf("%s/xrpc/_health", endpoint)
121
148
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
122
149
if err != nil {
123
-
return false, 0, "", "", err
124
-
}
125
-
126
-
// Create a custom dialer to track which IP was actually used
127
-
var usedIP string
128
-
transport := &http.Transport{
129
-
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
130
-
conn, err := (&net.Dialer{
131
-
Timeout: 30 * time.Second,
132
-
KeepAlive: 30 * time.Second,
133
-
}).DialContext(ctx, network, addr)
134
-
135
-
if err == nil && conn != nil {
136
-
if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
137
-
// Extract IP from "ip:port" format
138
-
if tcpAddr, ok := remoteAddr.(*net.TCPAddr); ok {
139
-
usedIP = tcpAddr.IP.String()
140
-
}
141
-
}
142
-
}
143
-
144
-
return conn, err
145
-
},
150
+
return false, 0, "", err
146
151
}
147
152
148
-
// Create a client with our custom transport
149
-
client := &http.Client{
150
-
Timeout: c.httpClient.Timeout,
151
-
Transport: transport,
152
-
}
153
-
154
-
resp, err := client.Do(req)
153
+
resp, err := c.httpClient.Do(req)
155
154
duration := time.Since(startTime)
156
155
157
156
if err != nil {
158
-
return false, duration, "", usedIP, err
157
+
return false, duration, "", err
159
158
}
160
159
defer resp.Body.Close()
161
160
162
161
if resp.StatusCode != http.StatusOK {
163
-
return false, duration, "", usedIP, fmt.Errorf("health check returned status %d", resp.StatusCode)
162
+
return false, duration, "", fmt.Errorf("health check returned status %d", resp.StatusCode)
164
163
}
165
164
166
165
// Decode the JSON response and check for "version"
···
169
168
}
170
169
171
170
if err := json.NewDecoder(resp.Body).Decode(&healthResponse); err != nil {
172
-
return false, duration, "", usedIP, fmt.Errorf("failed to decode health JSON: %w", err)
171
+
return false, duration, "", fmt.Errorf("failed to decode health JSON: %w", err)
173
172
}
174
173
175
174
if healthResponse.Version == "" {
176
-
return false, duration, "", usedIP, fmt.Errorf("health JSON response missing 'version' field")
175
+
return false, duration, "", fmt.Errorf("health JSON response missing 'version' field")
177
176
}
178
177
179
178
// All checks passed
180
-
return true, duration, healthResponse.Version, usedIP, nil
179
+
return true, duration, healthResponse.Version, nil
181
180
}
+36
-32
internal/pds/scanner.go
+36
-32
internal/pds/scanner.go
···
8
8
"sync/atomic"
9
9
"time"
10
10
11
-
"github.com/acarl005/stripansi"
12
-
"github.com/atscan/atscanner/internal/config"
13
-
"github.com/atscan/atscanner/internal/ipinfo"
14
-
"github.com/atscan/atscanner/internal/log"
15
-
"github.com/atscan/atscanner/internal/monitor"
16
-
"github.com/atscan/atscanner/internal/storage"
11
+
"github.com/atscan/atscand/internal/config"
12
+
"github.com/atscan/atscand/internal/ipinfo"
13
+
"github.com/atscan/atscand/internal/log"
14
+
"github.com/atscan/atscand/internal/monitor"
15
+
"github.com/atscan/atscand/internal/storage"
17
16
)
18
17
19
18
type Scanner struct {
···
40
39
servers, err := s.db.GetEndpoints(ctx, &storage.EndpointFilter{
41
40
Type: "pds",
42
41
OnlyStale: true,
42
+
OnlyValid: true,
43
43
RecheckInterval: s.config.RecheckInterval,
44
44
})
45
45
if err != nil {
···
127
127
// STEP 1: Resolve IPs (both IPv4 and IPv6)
128
128
ips, err := ipinfo.ExtractIPsFromEndpoint(ep.Endpoint)
129
129
if err != nil {
130
-
// Mark as offline due to DNS failure
131
130
s.saveScanResult(ctx, ep.ID, &ScanResult{
132
131
Status: storage.EndpointStatusOffline,
133
132
ErrorMessage: fmt.Sprintf("DNS resolution failed: %v", err),
···
146
145
go s.updateIPInfoIfNeeded(ctx, ips.IPv6)
147
146
}
148
147
149
-
// STEP 2: Health check (now returns which IP was used)
150
-
available, responseTime, version, usedIP, err := s.client.CheckHealth(ctx, ep.Endpoint)
151
-
if err != nil || !available {
152
-
errMsg := "health check failed"
153
-
if err != nil {
154
-
errMsg = err.Error()
155
-
}
148
+
// STEP 2: Call describeServer (primary health check + metadata)
149
+
desc, descResponseTime, usedIP, err := s.client.DescribeServer(ctx, ep.Endpoint)
150
+
if err != nil {
156
151
s.saveScanResult(ctx, ep.ID, &ScanResult{
157
152
Status: storage.EndpointStatusOffline,
158
-
ResponseTime: responseTime,
159
-
ErrorMessage: errMsg,
160
-
UsedIP: usedIP, // Save even if failed
153
+
ResponseTime: descResponseTime,
154
+
ErrorMessage: fmt.Sprintf("describeServer failed: %v", err),
155
+
UsedIP: usedIP,
161
156
})
162
157
return
163
158
}
164
159
165
-
// STEP 3: Fetch PDS-specific data
166
-
desc, err := s.client.DescribeServer(ctx, ep.Endpoint)
167
-
if err != nil {
168
-
log.Verbose("Warning: failed to describe server %s: %v", stripansi.Strip(ep.Endpoint), err)
169
-
} else if desc != nil && desc.DID != "" {
160
+
// Update server DID immediately
161
+
if desc.DID != "" {
170
162
s.db.UpdateEndpointServerDID(ctx, ep.ID, desc.DID)
171
163
}
172
164
173
-
// Fetch repos with full info
165
+
// STEP 3: Call _health to get version
166
+
available, healthResponseTime, version, err := s.client.CheckHealth(ctx, ep.Endpoint)
167
+
if err != nil || !available {
168
+
log.Verbose("Warning: _health check failed for %s: %v", ep.Endpoint, err)
169
+
// Server is online (describeServer worked) but _health failed
170
+
// Continue with empty version
171
+
version = ""
172
+
}
173
+
174
+
// Calculate average response time from both calls
175
+
avgResponseTime := descResponseTime
176
+
if available {
177
+
avgResponseTime = (descResponseTime + healthResponseTime) / 2
178
+
}
179
+
180
+
// STEP 4: Fetch repos
174
181
repoList, err := s.client.ListRepos(ctx, ep.Endpoint)
175
182
if err != nil {
176
183
log.Verbose("Warning: failed to list repos for %s: %v", ep.Endpoint, err)
177
184
repoList = []Repo{}
178
185
}
179
186
180
-
// Convert to DIDs for backward compatibility
187
+
// Convert to DIDs
181
188
dids := make([]string, len(repoList))
182
189
for i, repo := range repoList {
183
190
dids[i] = repo.DID
184
191
}
185
192
186
-
// STEP 4: SAVE scan result
193
+
// STEP 5: SAVE scan result
187
194
s.saveScanResult(ctx, ep.ID, &ScanResult{
188
195
Status: storage.EndpointStatusOnline,
189
-
ResponseTime: responseTime,
196
+
ResponseTime: avgResponseTime,
190
197
Description: desc,
191
198
DIDs: dids,
192
199
Version: version,
193
-
UsedIP: usedIP, // NEW: Save which IP was used
200
+
UsedIP: usedIP, // Only from describeServer
194
201
})
195
202
196
-
// Save repos in batches (only tracks changes)
203
+
// STEP 6: Save repos in batches (only tracks changes)
197
204
if len(repoList) > 0 {
198
-
batchSize := 200000
205
+
batchSize := 100_000
199
206
200
207
log.Verbose("Processing %d repos for %s (tracking changes only)", len(repoList), ep.Endpoint)
201
208
···
235
242
236
243
log.Verbose("✓ Processed %d repos for %s", len(repoList), ep.Endpoint)
237
244
}
238
-
239
-
// IP info fetch already started at the beginning (step 1.5)
240
-
// It will complete in the background
241
245
}
242
246
243
247
func (s *Scanner) saveScanResult(ctx context.Context, endpointID int64, result *ScanResult) {
+390
-41
internal/plc/manager.go
+390
-41
internal/plc/manager.go
···
2
2
3
3
import (
4
4
"context"
5
+
"encoding/csv"
5
6
"fmt"
7
+
"io"
8
+
"os"
9
+
"path/filepath"
10
+
"sort"
11
+
"strconv"
12
+
"strings"
6
13
"time"
7
14
8
-
"github.com/atscan/atscanner/internal/log"
9
-
"github.com/atscan/atscanner/internal/storage"
10
-
plcbundle "github.com/atscan/plcbundle"
15
+
"github.com/atscan/atscand/internal/log"
16
+
"github.com/atscan/atscand/internal/storage"
17
+
"github.com/klauspost/compress/zstd"
18
+
plcbundle "tangled.org/atscan.net/plcbundle"
11
19
)
12
20
13
21
// BundleManager wraps the library's manager with database integration
···
62
70
return bm.libManager.LoadBundle(ctx, bundleNum)
63
71
}
64
72
65
-
// FetchAndSaveBundle fetches next bundle from PLC and saves to both disk and DB
73
+
// FetchAndSaveBundle fetches next bundle from PLC and saves
66
74
func (bm *BundleManager) FetchAndSaveBundle(ctx context.Context) (*plcbundle.Bundle, error) {
67
75
// Fetch from PLC using library
68
76
bundle, err := bm.libManager.FetchNextBundle(ctx)
···
70
78
return nil, err
71
79
}
72
80
73
-
// Save to disk (library)
81
+
// Save to disk (library handles this)
74
82
if err := bm.libManager.SaveBundle(ctx, bundle); err != nil {
75
83
return nil, fmt.Errorf("failed to save bundle to disk: %w", err)
76
84
}
77
85
78
-
// Save to database
79
-
if err := bm.saveBundleToDatabase(ctx, bundle); err != nil {
80
-
return nil, fmt.Errorf("failed to save bundle to database: %w", err)
81
-
}
82
-
83
-
log.Info("✓ Saved bundle %06d (disk + database)", bundle.BundleNumber)
84
-
85
-
return bundle, nil
86
-
}
87
-
88
-
// saveBundleToDatabase saves bundle metadata to PostgreSQL
89
-
func (bm *BundleManager) saveBundleToDatabase(ctx context.Context, bundle *plcbundle.Bundle) error {
90
-
// Convert library bundle to storage bundle
91
-
dbBundle := &storage.PLCBundle{
92
-
BundleNumber: bundle.BundleNumber,
93
-
StartTime: bundle.StartTime,
94
-
EndTime: bundle.EndTime,
95
-
DIDCount: bundle.DIDCount,
96
-
Hash: bundle.Hash,
97
-
CompressedHash: bundle.CompressedHash,
98
-
CompressedSize: bundle.CompressedSize,
99
-
UncompressedSize: bundle.UncompressedSize,
100
-
Cursor: bundle.Cursor,
101
-
PrevBundleHash: bundle.PrevBundleHash,
102
-
Compressed: bundle.Compressed,
103
-
CreatedAt: bundle.CreatedAt,
104
-
}
105
-
106
-
// Save to database
107
-
if err := bm.db.CreateBundle(ctx, dbBundle); err != nil {
108
-
return err
109
-
}
110
-
111
-
// Index DIDs if enabled
86
+
// Index DIDs if enabled (still use database for this)
112
87
if bm.indexDIDs && len(bundle.Operations) > 0 {
113
88
if err := bm.indexBundleDIDs(ctx, bundle); err != nil {
114
89
log.Error("Failed to index DIDs for bundle %d: %v", bundle.BundleNumber, err)
115
-
// Don't fail the entire operation
116
90
}
117
91
}
118
92
119
-
return nil
93
+
log.Info("✓ Saved bundle %06d", bundle.BundleNumber)
94
+
95
+
return bundle, nil
120
96
}
121
97
122
98
// indexBundleDIDs indexes DIDs from a bundle into the database
···
171
147
func (bm *BundleManager) GetChainInfo(ctx context.Context) (map[string]interface{}, error) {
172
148
return bm.libManager.GetInfo(), nil
173
149
}
150
+
151
+
// GetMempoolStats returns mempool statistics from the library
152
+
func (bm *BundleManager) GetMempoolStats() map[string]interface{} {
153
+
return bm.libManager.GetMempoolStats()
154
+
}
155
+
156
+
// GetMempoolOperations returns all operations currently in mempool
157
+
func (bm *BundleManager) GetMempoolOperations() ([]PLCOperation, error) {
158
+
return bm.libManager.GetMempoolOperations()
159
+
}
160
+
161
+
// GetIndex returns the library's bundle index
162
+
func (bm *BundleManager) GetIndex() *plcbundle.Index {
163
+
return bm.libManager.GetIndex()
164
+
}
165
+
166
+
// GetLastBundleNumber returns the last bundle number
167
+
func (bm *BundleManager) GetLastBundleNumber() int {
168
+
index := bm.libManager.GetIndex()
169
+
lastBundle := index.GetLastBundle()
170
+
if lastBundle == nil {
171
+
return 0
172
+
}
173
+
return lastBundle.BundleNumber
174
+
}
175
+
176
+
// GetBundleMetadata gets bundle metadata by number
177
+
func (bm *BundleManager) GetBundleMetadata(bundleNum int) (*plcbundle.BundleMetadata, error) {
178
+
index := bm.libManager.GetIndex()
179
+
return index.GetBundle(bundleNum)
180
+
}
181
+
182
+
// GetBundles returns the most recent bundles (newest first)
183
+
func (bm *BundleManager) GetBundles(limit int) []*plcbundle.BundleMetadata {
184
+
index := bm.libManager.GetIndex()
185
+
allBundles := index.GetBundles()
186
+
187
+
// Determine how many bundles to return
188
+
count := limit
189
+
if count <= 0 || count > len(allBundles) {
190
+
count = len(allBundles)
191
+
}
192
+
193
+
// Build result in reverse order (newest first)
194
+
result := make([]*plcbundle.BundleMetadata, count)
195
+
for i := 0; i < count; i++ {
196
+
result[i] = allBundles[len(allBundles)-1-i]
197
+
}
198
+
199
+
return result
200
+
}
201
+
202
+
// GetBundleStats returns bundle statistics
203
+
func (bm *BundleManager) GetBundleStats() map[string]interface{} {
204
+
index := bm.libManager.GetIndex()
205
+
stats := index.GetStats()
206
+
207
+
// Convert to expected format
208
+
lastBundle := stats["last_bundle"]
209
+
if lastBundle == nil {
210
+
lastBundle = int64(0)
211
+
}
212
+
213
+
// Calculate total uncompressed size by iterating through all bundles
214
+
totalUncompressedSize := int64(0)
215
+
allBundles := index.GetBundles()
216
+
for _, bundle := range allBundles {
217
+
totalUncompressedSize += bundle.UncompressedSize
218
+
}
219
+
220
+
return map[string]interface{}{
221
+
"bundle_count": int64(stats["bundle_count"].(int)),
222
+
"total_size": stats["total_size"].(int64),
223
+
"total_uncompressed_size": totalUncompressedSize,
224
+
"last_bundle": int64(lastBundle.(int)),
225
+
}
226
+
}
227
+
228
+
// GetDIDsForBundle gets DIDs from a bundle (loads and extracts)
229
+
func (bm *BundleManager) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, int, error) {
230
+
bundle, err := bm.libManager.LoadBundle(ctx, bundleNum)
231
+
if err != nil {
232
+
return nil, 0, err
233
+
}
234
+
235
+
// Extract unique DIDs
236
+
didSet := make(map[string]bool)
237
+
for _, op := range bundle.Operations {
238
+
didSet[op.DID] = true
239
+
}
240
+
241
+
dids := make([]string, 0, len(didSet))
242
+
for did := range didSet {
243
+
dids = append(dids, did)
244
+
}
245
+
246
+
return dids, bundle.DIDCount, nil
247
+
}
248
+
249
+
// FindBundleForTimestamp finds bundle containing a timestamp
250
+
func (bm *BundleManager) FindBundleForTimestamp(afterTime time.Time) int {
251
+
index := bm.libManager.GetIndex()
252
+
bundles := index.GetBundles()
253
+
254
+
// Find bundle containing this time
255
+
for _, bundle := range bundles {
256
+
if (bundle.StartTime.Before(afterTime) || bundle.StartTime.Equal(afterTime)) &&
257
+
(bundle.EndTime.After(afterTime) || bundle.EndTime.Equal(afterTime)) {
258
+
return bundle.BundleNumber
259
+
}
260
+
}
261
+
262
+
// Return closest bundle before this time
263
+
for i := len(bundles) - 1; i >= 0; i-- {
264
+
if bundles[i].EndTime.Before(afterTime) {
265
+
return bundles[i].BundleNumber
266
+
}
267
+
}
268
+
269
+
return 1 // Default to first bundle
270
+
}
271
+
272
+
// StreamRaw streams raw compressed bundle data
273
+
func (bm *BundleManager) StreamRaw(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
274
+
return bm.libManager.StreamBundleRaw(ctx, bundleNumber)
275
+
}
276
+
277
+
// StreamDecompressed streams decompressed bundle data
278
+
func (bm *BundleManager) StreamDecompressed(ctx context.Context, bundleNumber int) (io.ReadCloser, error) {
279
+
return bm.libManager.StreamBundleDecompressed(ctx, bundleNumber)
280
+
}
281
+
282
+
// GetPLCHistory calculates historical statistics from the bundle index
283
+
func (bm *BundleManager) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*storage.PLCHistoryPoint, error) {
284
+
index := bm.libManager.GetIndex()
285
+
allBundles := index.GetBundles()
286
+
287
+
// Filter bundles >= fromBundle
288
+
var filtered []*plcbundle.BundleMetadata
289
+
for _, b := range allBundles {
290
+
if b.BundleNumber >= fromBundle {
291
+
filtered = append(filtered, b)
292
+
}
293
+
}
294
+
295
+
if len(filtered) == 0 {
296
+
return []*storage.PLCHistoryPoint{}, nil
297
+
}
298
+
299
+
// Sort bundles by bundle number to ensure proper cumulative calculation
300
+
sort.Slice(filtered, func(i, j int) bool {
301
+
return filtered[i].BundleNumber < filtered[j].BundleNumber
302
+
})
303
+
304
+
// Group by date
305
+
type dailyStat struct {
306
+
lastBundle int
307
+
bundleCount int
308
+
totalUncompressed int64
309
+
totalCompressed int64
310
+
}
311
+
312
+
dailyStats := make(map[string]*dailyStat)
313
+
314
+
// Map to store the cumulative values at the end of each date
315
+
dateCumulatives := make(map[string]struct {
316
+
uncompressed int64
317
+
compressed int64
318
+
})
319
+
320
+
// Calculate cumulative totals as we iterate through sorted bundles
321
+
cumulativeUncompressed := int64(0)
322
+
cumulativeCompressed := int64(0)
323
+
324
+
for _, bundle := range filtered {
325
+
dateStr := bundle.StartTime.Format("2006-01-02")
326
+
327
+
// Update cumulative totals
328
+
cumulativeUncompressed += bundle.UncompressedSize
329
+
cumulativeCompressed += bundle.CompressedSize
330
+
331
+
if stat, exists := dailyStats[dateStr]; exists {
332
+
// Update existing day
333
+
if bundle.BundleNumber > stat.lastBundle {
334
+
stat.lastBundle = bundle.BundleNumber
335
+
}
336
+
stat.bundleCount++
337
+
stat.totalUncompressed += bundle.UncompressedSize
338
+
stat.totalCompressed += bundle.CompressedSize
339
+
} else {
340
+
// Create new day entry
341
+
dailyStats[dateStr] = &dailyStat{
342
+
lastBundle: bundle.BundleNumber,
343
+
bundleCount: 1,
344
+
totalUncompressed: bundle.UncompressedSize,
345
+
totalCompressed: bundle.CompressedSize,
346
+
}
347
+
}
348
+
349
+
// Store the cumulative values at the end of this date
350
+
// (will be overwritten if there are multiple bundles on the same day)
351
+
dateCumulatives[dateStr] = struct {
352
+
uncompressed int64
353
+
compressed int64
354
+
}{
355
+
uncompressed: cumulativeUncompressed,
356
+
compressed: cumulativeCompressed,
357
+
}
358
+
}
359
+
360
+
// Convert map to sorted slice by date
361
+
var dates []string
362
+
for date := range dailyStats {
363
+
dates = append(dates, date)
364
+
}
365
+
sort.Strings(dates)
366
+
367
+
// Build history points with cumulative operations
368
+
var history []*storage.PLCHistoryPoint
369
+
cumulativeOps := 0
370
+
371
+
for _, date := range dates {
372
+
stat := dailyStats[date]
373
+
cumulativeOps += stat.bundleCount * 10000
374
+
cumulative := dateCumulatives[date]
375
+
376
+
history = append(history, &storage.PLCHistoryPoint{
377
+
Date: date,
378
+
BundleNumber: stat.lastBundle,
379
+
OperationCount: cumulativeOps,
380
+
UncompressedSize: stat.totalUncompressed,
381
+
CompressedSize: stat.totalCompressed,
382
+
CumulativeUncompressed: cumulative.uncompressed,
383
+
CumulativeCompressed: cumulative.compressed,
384
+
})
385
+
}
386
+
387
+
// Apply limit if specified
388
+
if limit > 0 && len(history) > limit {
389
+
history = history[:limit]
390
+
}
391
+
392
+
return history, nil
393
+
}
394
+
395
+
// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396
+
func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397
+
// Define the path to the labels file
398
+
labelsDir := filepath.Join(bm.bundleDir, "labels")
399
+
labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400
+
401
+
// Check if file exists
402
+
if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403
+
log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404
+
// Return empty, not an error
405
+
return []*PLCOpLabel{}, nil
406
+
}
407
+
408
+
// Open the Zstd-compressed file
409
+
file, err := os.Open(labelsFile)
410
+
if err != nil {
411
+
return nil, fmt.Errorf("failed to open labels file: %w", err)
412
+
}
413
+
defer file.Close()
414
+
415
+
// Create a Zstd reader
416
+
zstdReader, err := zstd.NewReader(file)
417
+
if err != nil {
418
+
return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419
+
}
420
+
defer zstdReader.Close()
421
+
422
+
// Create a CSV reader
423
+
csvReader := csv.NewReader(zstdReader)
424
+
// We skipped the header, so no header read needed
425
+
// Set FieldsPerRecord to 7 for validation
426
+
//csvReader.FieldsPerRecord = 7
427
+
428
+
var labels []*PLCOpLabel
429
+
430
+
// Read all records
431
+
for {
432
+
// Check for context cancellation
433
+
if err := ctx.Err(); err != nil {
434
+
return nil, err
435
+
}
436
+
437
+
record, err := csvReader.Read()
438
+
if err == io.EOF {
439
+
break // End of file
440
+
}
441
+
if err != nil {
442
+
log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443
+
continue // Skip bad line
444
+
}
445
+
446
+
// Parse the CSV record (which is []string)
447
+
label, err := parseLabelRecord(record)
448
+
if err != nil {
449
+
log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450
+
continue // Skip bad data
451
+
}
452
+
453
+
labels = append(labels, label)
454
+
}
455
+
456
+
return labels, nil
457
+
}
458
+
459
+
// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460
+
func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461
+
// New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462
+
if len(record) != 6 {
463
+
err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464
+
// --- ADDED LOG ---
465
+
log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466
+
// ---
467
+
return nil, err
468
+
}
469
+
470
+
// 0:bundle
471
+
bundle, err := strconv.Atoi(record[0])
472
+
if err != nil {
473
+
// --- ADDED LOG ---
474
+
log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475
+
// ---
476
+
return nil, fmt.Errorf("parsing 'bundle': %w", err)
477
+
}
478
+
479
+
// 1:position
480
+
position, err := strconv.Atoi(record[1])
481
+
if err != nil {
482
+
// --- ADDED LOG ---
483
+
log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484
+
// ---
485
+
return nil, fmt.Errorf("parsing 'position': %w", err)
486
+
}
487
+
488
+
// 2:cid(short)
489
+
shortCID := record[2]
490
+
491
+
// 3:size
492
+
size, err := strconv.Atoi(record[3])
493
+
if err != nil {
494
+
// --- ADDED LOG ---
495
+
log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496
+
// ---
497
+
return nil, fmt.Errorf("parsing 'size': %w", err)
498
+
}
499
+
500
+
// 4:confidence
501
+
confidence, err := strconv.ParseFloat(record[4], 64)
502
+
if err != nil {
503
+
// --- ADDED LOG ---
504
+
log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505
+
// ---
506
+
return nil, fmt.Errorf("parsing 'confidence': %w", err)
507
+
}
508
+
509
+
// 5:labels
510
+
detectors := strings.Split(record[5], ";")
511
+
512
+
label := &PLCOpLabel{
513
+
Bundle: bundle,
514
+
Position: position,
515
+
CID: shortCID,
516
+
Size: size,
517
+
Confidence: confidence,
518
+
Detectors: detectors,
519
+
}
520
+
521
+
return label, nil
522
+
}
+8
-15
internal/plc/scanner.go
+8
-15
internal/plc/scanner.go
···
6
6
"strings"
7
7
"time"
8
8
9
-
"github.com/atscan/atscanner/internal/config"
10
-
"github.com/atscan/atscanner/internal/log"
11
-
"github.com/atscan/atscanner/internal/storage"
9
+
"github.com/atscan/atscand/internal/config"
10
+
"github.com/atscan/atscand/internal/log"
11
+
"github.com/atscan/atscand/internal/storage"
12
12
)
13
13
14
14
type Scanner struct {
···
17
17
config config.PLCConfig
18
18
}
19
19
20
-
func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner {
20
+
func NewScanner(db storage.Database, cfg config.PLCConfig, bundleManager *BundleManager) *Scanner {
21
21
log.Verbose("NewScanner: IndexDIDs config = %v", cfg.IndexDIDs)
22
22
23
-
bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.DirectoryURL, db, cfg.IndexDIDs)
24
-
if err != nil {
25
-
log.Error("Failed to initialize bundle manager: %v", err)
26
-
return nil
27
-
}
28
-
29
23
return &Scanner{
30
-
bundleManager: bundleManager,
24
+
bundleManager: bundleManager, // Use provided instance
31
25
db: db,
32
26
config: cfg,
33
27
}
34
28
}
35
29
36
30
func (s *Scanner) Close() {
37
-
if s.bundleManager != nil {
38
-
s.bundleManager.Close()
39
-
}
31
+
// Don't close bundleManager here - it's shared
40
32
}
41
33
42
34
func (s *Scanner) Scan(ctx context.Context) error {
···
198
190
}
199
191
200
192
func (s *Scanner) storeEndpoint(ctx context.Context, epType, endpoint string, discoveredAt time.Time) error {
193
+
valid := validateEndpoint(endpoint)
201
194
return s.db.UpsertEndpoint(ctx, &storage.Endpoint{
202
195
EndpointType: epType,
203
196
Endpoint: endpoint,
204
197
DiscoveredAt: discoveredAt,
205
198
LastChecked: time.Time{},
206
199
Status: storage.EndpointStatusUnknown,
200
+
Valid: valid,
207
201
})
208
202
}
209
203
···
237
231
238
232
// ScanMetrics tracks scan progress
239
233
type ScanMetrics struct {
240
-
totalFetched int64
241
234
totalProcessed int64
242
235
newEndpoints int64
243
236
endpointCounts map[string]int64
+60
-1
internal/plc/types.go
+60
-1
internal/plc/types.go
···
1
1
package plc
2
2
3
3
import (
4
-
plclib "github.com/atscan/plcbundle/plc"
4
+
"net/url"
5
+
"strings"
6
+
7
+
plclib "tangled.org/atscan.net/plcbundle/plc"
5
8
)
6
9
7
10
// Re-export library types
···
28
31
Type string
29
32
Endpoint string
30
33
}
34
+
35
+
// PLCOpLabel holds metadata from the label CSV file
36
+
type PLCOpLabel struct {
37
+
Bundle int `json:"bundle"`
38
+
Position int `json:"position"`
39
+
CID string `json:"cid"`
40
+
Size int `json:"size"`
41
+
Confidence float64 `json:"confidence"`
42
+
Detectors []string `json:"detectors"`
43
+
}
44
+
45
+
// validateEndpoint checks if endpoint is in correct format: https://<domain>
46
+
func validateEndpoint(endpoint string) bool {
47
+
// Must not be empty
48
+
if endpoint == "" {
49
+
return false
50
+
}
51
+
52
+
// Must not have trailing slash
53
+
if strings.HasSuffix(endpoint, "/") {
54
+
return false
55
+
}
56
+
57
+
// Parse URL
58
+
u, err := url.Parse(endpoint)
59
+
if err != nil {
60
+
return false
61
+
}
62
+
63
+
// Must use https scheme
64
+
if u.Scheme != "https" {
65
+
return false
66
+
}
67
+
68
+
// Must have a host
69
+
if u.Host == "" {
70
+
return false
71
+
}
72
+
73
+
// Must not have path (except empty)
74
+
if u.Path != "" && u.Path != "/" {
75
+
return false
76
+
}
77
+
78
+
// Must not have query parameters
79
+
if u.RawQuery != "" {
80
+
return false
81
+
}
82
+
83
+
// Must not have fragment
84
+
if u.Fragment != "" {
85
+
return false
86
+
}
87
+
88
+
return true
89
+
}
-21
internal/storage/db.go
-21
internal/storage/db.go
···
50
50
GetScanCursor(ctx context.Context, source string) (*ScanCursor, error)
51
51
UpdateScanCursor(ctx context.Context, cursor *ScanCursor) error
52
52
53
-
// Bundle operations
54
-
CreateBundle(ctx context.Context, bundle *PLCBundle) error
55
-
GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error)
56
-
GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error)
57
-
GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error)
58
-
GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error)
59
-
GetBundleStats(ctx context.Context) (count, compressedSize, uncompressedSize, lastBundle int64, err error)
60
-
GetLastBundleNumber(ctx context.Context) (int, error)
61
-
GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error)
62
-
GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error)
63
-
64
-
// Mempool operations
65
-
AddToMempool(ctx context.Context, ops []MempoolOperation) error
66
-
GetMempoolCount(ctx context.Context) (int, error)
67
-
GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error)
68
-
DeleteFromMempool(ctx context.Context, ids []int64) error
69
-
GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error)
70
-
GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error)
71
-
GetMempoolUniqueDIDCount(ctx context.Context) (int, error)
72
-
GetMempoolUncompressedSize(ctx context.Context) (int64, error)
73
-
74
53
// Metrics
75
54
StorePLCMetrics(ctx context.Context, metrics *PLCMetrics) error
76
55
GetPLCMetrics(ctx context.Context, limit int) ([]*PLCMetrics, error)
+25
-549
internal/storage/postgres.go
+25
-549
internal/storage/postgres.go
···
5
5
"database/sql"
6
6
"encoding/json"
7
7
"fmt"
8
-
"strings"
9
8
"time"
10
9
11
-
"github.com/atscan/atscanner/internal/log"
10
+
"github.com/atscan/atscand/internal/log"
12
11
"github.com/jackc/pgx/v5"
13
12
"github.com/jackc/pgx/v5/pgxpool"
14
13
_ "github.com/jackc/pgx/v5/stdlib"
···
85
84
ip TEXT,
86
85
ipv6 TEXT,
87
86
ip_resolved_at TIMESTAMP,
87
+
valid BOOLEAN DEFAULT true,
88
88
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
89
89
UNIQUE(endpoint_type, endpoint)
90
90
);
···
96
96
CREATE INDEX IF NOT EXISTS idx_endpoints_ipv6 ON endpoints(ipv6);
97
97
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did ON endpoints(server_did);
98
98
CREATE INDEX IF NOT EXISTS idx_endpoints_server_did_type_discovered ON endpoints(server_did, endpoint_type, discovered_at);
99
+
CREATE INDEX IF NOT EXISTS idx_endpoints_valid ON endpoints(valid);
99
100
100
101
-- IP infos table (IP as PRIMARY KEY)
101
102
CREATE TABLE IF NOT EXISTS ip_infos (
···
157
158
records_processed BIGINT DEFAULT 0
158
159
);
159
160
160
-
CREATE TABLE IF NOT EXISTS plc_bundles (
161
-
bundle_number INTEGER PRIMARY KEY,
162
-
start_time TIMESTAMP NOT NULL,
163
-
end_time TIMESTAMP NOT NULL,
164
-
did_count INTEGER NOT NULL DEFAULT 0,
165
-
hash TEXT NOT NULL,
166
-
compressed_hash TEXT NOT NULL,
167
-
compressed_size BIGINT NOT NULL,
168
-
uncompressed_size BIGINT NOT NULL,
169
-
cumulative_compressed_size BIGINT NOT NULL,
170
-
cumulative_uncompressed_size BIGINT NOT NULL,
171
-
cursor TEXT,
172
-
prev_bundle_hash TEXT,
173
-
compressed BOOLEAN DEFAULT true,
174
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
175
-
);
176
-
177
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_time ON plc_bundles(start_time, end_time);
178
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_hash ON plc_bundles(hash);
179
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_prev ON plc_bundles(prev_bundle_hash);
180
-
CREATE INDEX IF NOT EXISTS idx_plc_bundles_number_desc ON plc_bundles(bundle_number DESC);
181
-
182
-
CREATE TABLE IF NOT EXISTS plc_mempool (
183
-
id BIGSERIAL PRIMARY KEY,
184
-
did TEXT NOT NULL,
185
-
operation TEXT NOT NULL,
186
-
cid TEXT NOT NULL UNIQUE,
187
-
created_at TIMESTAMP NOT NULL,
188
-
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
189
-
);
190
-
191
-
CREATE INDEX IF NOT EXISTS idx_mempool_created_at ON plc_mempool(created_at);
192
-
CREATE INDEX IF NOT EXISTS idx_mempool_did ON plc_mempool(did);
193
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_mempool_cid ON plc_mempool(cid);
194
-
195
161
-- Minimal dids table
196
162
CREATE TABLE IF NOT EXISTS dids (
197
163
did TEXT PRIMARY KEY,
···
244
210
245
211
func (p *PostgresDB) UpsertEndpoint(ctx context.Context, endpoint *Endpoint) error {
246
212
query := `
247
-
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at)
248
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
213
+
INSERT INTO endpoints (endpoint_type, endpoint, discovered_at, last_checked, status, ip, ipv6, ip_resolved_at, valid)
214
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
249
215
ON CONFLICT(endpoint_type, endpoint) DO UPDATE SET
250
216
last_checked = EXCLUDED.last_checked,
251
217
status = EXCLUDED.status,
···
261
227
WHEN (EXCLUDED.ip IS NOT NULL AND EXCLUDED.ip != '') OR (EXCLUDED.ipv6 IS NOT NULL AND EXCLUDED.ipv6 != '') THEN EXCLUDED.ip_resolved_at
262
228
ELSE endpoints.ip_resolved_at
263
229
END,
230
+
valid = EXCLUDED.valid,
264
231
updated_at = CURRENT_TIMESTAMP
265
232
RETURNING id
266
233
`
267
234
err := p.db.QueryRowContext(ctx, query,
268
235
endpoint.EndpointType, endpoint.Endpoint, endpoint.DiscoveredAt,
269
-
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt).Scan(&endpoint.ID)
236
+
endpoint.LastChecked, endpoint.Status, endpoint.IP, endpoint.IPv6, endpoint.IPResolvedAt, endpoint.Valid).Scan(&endpoint.ID)
270
237
return err
271
238
}
272
239
···
287
254
func (p *PostgresDB) GetEndpoint(ctx context.Context, endpoint string, endpointType string) (*Endpoint, error) {
288
255
query := `
289
256
SELECT id, endpoint_type, endpoint, discovered_at, last_checked, status,
290
-
ip, ipv6, ip_resolved_at, updated_at
257
+
ip, ipv6, ip_resolved_at, valid, updated_at
291
258
FROM endpoints
292
259
WHERE endpoint = $1 AND endpoint_type = $2
293
260
`
···
298
265
299
266
err := p.db.QueryRowContext(ctx, query, endpoint, endpointType).Scan(
300
267
&ep.ID, &ep.EndpointType, &ep.Endpoint, &ep.DiscoveredAt, &lastChecked,
301
-
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.UpdatedAt,
268
+
&ep.Status, &ip, &ipv6, &ipResolvedAt, &ep.Valid, &ep.UpdatedAt,
302
269
)
303
270
if err != nil {
304
271
return nil, err
···
324
291
query := `
325
292
SELECT DISTINCT ON (COALESCE(server_did, id::text))
326
293
id, endpoint_type, endpoint, server_did, discovered_at, last_checked, status,
327
-
ip, ipv6, ip_resolved_at, updated_at
294
+
ip, ipv6, ip_resolved_at, valid, updated_at
328
295
FROM endpoints
329
296
WHERE 1=1
330
297
`
···
337
304
args = append(args, filter.Type)
338
305
argIdx++
339
306
}
307
+
308
+
// NEW: Filter by valid flag
309
+
if filter.OnlyValid {
310
+
query += fmt.Sprintf(" AND valid = true", argIdx)
311
+
}
340
312
if filter.Status != "" {
341
313
statusInt := EndpointStatusUnknown
342
314
switch filter.Status {
···
602
574
last_checked,
603
575
status,
604
576
ip,
605
-
ipv6
577
+
ipv6,
578
+
valid
606
579
FROM endpoints
607
580
WHERE endpoint_type = 'pds'
608
581
ORDER BY COALESCE(server_did, id::text), discovered_at ASC
609
582
)
610
583
SELECT
611
-
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6,
584
+
e.id, e.endpoint, e.server_did, e.discovered_at, e.last_checked, e.status, e.ip, e.ipv6, e.valid,
612
585
latest.user_count, latest.response_time, latest.version, latest.scanned_at,
613
586
i.city, i.country, i.country_code, i.asn, i.asn_org,
614
587
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
···
679
652
var scannedAt sql.NullTime
680
653
681
654
err := rows.Scan(
682
-
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6,
655
+
&item.ID, &item.Endpoint, &serverDID, &item.DiscoveredAt, &item.LastChecked, &item.Status, &ip, &ipv6, &item.Valid,
683
656
&userCount, &responseTime, &version, &scannedAt,
684
657
&city, &country, &countryCode, &asn, &asnOrg,
685
658
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
741
714
742
715
func (p *PostgresDB) GetPDSDetail(ctx context.Context, endpoint string) (*PDSDetail, error) {
743
716
query := `
744
-
WITH target_endpoint AS MATERIALIZED ( -- MATERIALIZED fence for optimization
717
+
WITH target_endpoint AS MATERIALIZED (
745
718
SELECT
746
719
e.id,
747
720
e.endpoint,
···
750
723
e.last_checked,
751
724
e.status,
752
725
e.ip,
753
-
e.ipv6
726
+
e.ipv6,
727
+
e.valid
754
728
FROM endpoints e
755
729
WHERE e.endpoint = $1
756
730
AND e.endpoint_type = 'pds'
757
-
LIMIT 1 -- Early termination since we expect exactly 1 row
731
+
LIMIT 1
758
732
)
759
733
SELECT
760
734
te.id,
···
765
739
te.status,
766
740
te.ip,
767
741
te.ipv6,
742
+
te.valid,
768
743
latest.user_count,
769
744
latest.response_time,
770
745
latest.version,
···
774
749
i.is_datacenter, i.is_vpn, i.is_crawler, i.is_tor, i.is_proxy,
775
750
i.latitude, i.longitude,
776
751
i.raw_data,
777
-
-- Inline aliases aggregation (avoid second CTE)
778
752
COALESCE(
779
753
ARRAY(
780
754
SELECT e2.endpoint
···
787
761
),
788
762
ARRAY[]::text[]
789
763
) as aliases,
790
-
-- Inline first_discovered_at (avoid aggregation)
791
764
CASE
792
765
WHEN te.server_did IS NOT NULL THEN (
793
766
SELECT MIN(e3.discovered_at)
···
828
801
var firstDiscoveredAt sql.NullTime
829
802
830
803
err := p.db.QueryRowContext(ctx, query, endpoint).Scan(
831
-
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6,
804
+
&detail.ID, &detail.Endpoint, &serverDID, &detail.DiscoveredAt, &detail.LastChecked, &detail.Status, &ip, &ipv6, &detail.Valid,
832
805
&userCount, &responseTime, &version, &serverInfoJSON, &scannedAt,
833
806
&city, &country, &countryCode, &asn, &asnOrg,
834
807
&isDatacenter, &isVPN, &isCrawler, &isTor, &isProxy,
···
855
828
// Set aliases and is_primary
856
829
detail.Aliases = aliases
857
830
if serverDID.Valid && serverDID.String != "" && firstDiscoveredAt.Valid {
858
-
// Has server_did - check if this is the first discovered
859
831
detail.IsPrimary = detail.DiscoveredAt.Equal(firstDiscoveredAt.Time) ||
860
832
detail.DiscoveredAt.Before(firstDiscoveredAt.Time)
861
833
} else {
862
-
// No server_did means unique server
863
834
detail.IsPrimary = true
864
835
}
865
836
···
1190
1161
}
1191
1162
}
1192
1163
return 0
1193
-
}
1194
-
1195
-
// ===== BUNDLE OPERATIONS =====
1196
-
1197
-
func (p *PostgresDB) CreateBundle(ctx context.Context, bundle *PLCBundle) error {
1198
-
// Calculate cumulative sizes from previous bundle
1199
-
if bundle.BundleNumber > 1 {
1200
-
prevBundle, err := p.GetBundleByNumber(ctx, bundle.BundleNumber-1)
1201
-
if err == nil && prevBundle != nil {
1202
-
bundle.CumulativeCompressedSize = prevBundle.CumulativeCompressedSize + bundle.CompressedSize
1203
-
bundle.CumulativeUncompressedSize = prevBundle.CumulativeUncompressedSize + bundle.UncompressedSize
1204
-
} else {
1205
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1206
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1207
-
}
1208
-
} else {
1209
-
bundle.CumulativeCompressedSize = bundle.CompressedSize
1210
-
bundle.CumulativeUncompressedSize = bundle.UncompressedSize
1211
-
}
1212
-
1213
-
query := `
1214
-
INSERT INTO plc_bundles (
1215
-
bundle_number, start_time, end_time, did_count,
1216
-
hash, compressed_hash, compressed_size, uncompressed_size,
1217
-
cumulative_compressed_size, cumulative_uncompressed_size,
1218
-
cursor, prev_bundle_hash, compressed
1219
-
)
1220
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
1221
-
ON CONFLICT(bundle_number) DO UPDATE SET
1222
-
start_time = EXCLUDED.start_time,
1223
-
end_time = EXCLUDED.end_time,
1224
-
did_count = EXCLUDED.did_count,
1225
-
hash = EXCLUDED.hash,
1226
-
compressed_hash = EXCLUDED.compressed_hash,
1227
-
compressed_size = EXCLUDED.compressed_size,
1228
-
uncompressed_size = EXCLUDED.uncompressed_size,
1229
-
cumulative_compressed_size = EXCLUDED.cumulative_compressed_size,
1230
-
cumulative_uncompressed_size = EXCLUDED.cumulative_uncompressed_size,
1231
-
cursor = EXCLUDED.cursor,
1232
-
prev_bundle_hash = EXCLUDED.prev_bundle_hash,
1233
-
compressed = EXCLUDED.compressed
1234
-
`
1235
-
_, err := p.db.ExecContext(ctx, query,
1236
-
bundle.BundleNumber, bundle.StartTime, bundle.EndTime,
1237
-
bundle.DIDCount, bundle.Hash, bundle.CompressedHash,
1238
-
bundle.CompressedSize, bundle.UncompressedSize,
1239
-
bundle.CumulativeCompressedSize, bundle.CumulativeUncompressedSize,
1240
-
bundle.Cursor, bundle.PrevBundleHash, bundle.Compressed,
1241
-
)
1242
-
1243
-
return err
1244
-
}
1245
-
1246
-
func (p *PostgresDB) GetBundleByNumber(ctx context.Context, bundleNumber int) (*PLCBundle, error) {
1247
-
query := `
1248
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1249
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1250
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1251
-
FROM plc_bundles
1252
-
WHERE bundle_number = $1
1253
-
`
1254
-
1255
-
var bundle PLCBundle
1256
-
var prevHash sql.NullString
1257
-
var cursor sql.NullString
1258
-
1259
-
err := p.db.QueryRowContext(ctx, query, bundleNumber).Scan(
1260
-
&bundle.BundleNumber, &bundle.StartTime, &bundle.EndTime,
1261
-
&bundle.DIDCount, &bundle.Hash, &bundle.CompressedHash,
1262
-
&bundle.CompressedSize, &bundle.UncompressedSize,
1263
-
&bundle.CumulativeCompressedSize, &bundle.CumulativeUncompressedSize,
1264
-
&cursor, &prevHash, &bundle.Compressed, &bundle.CreatedAt,
1265
-
)
1266
-
if err != nil {
1267
-
return nil, err
1268
-
}
1269
-
1270
-
if prevHash.Valid {
1271
-
bundle.PrevBundleHash = prevHash.String
1272
-
}
1273
-
if cursor.Valid {
1274
-
bundle.Cursor = cursor.String
1275
-
}
1276
-
1277
-
return &bundle, nil
1278
-
}
1279
-
1280
-
func (p *PostgresDB) GetBundles(ctx context.Context, limit int) ([]*PLCBundle, error) {
1281
-
query := `
1282
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1283
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1284
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1285
-
FROM plc_bundles
1286
-
ORDER BY bundle_number DESC
1287
-
LIMIT $1
1288
-
`
1289
-
1290
-
rows, err := p.db.QueryContext(ctx, query, limit)
1291
-
if err != nil {
1292
-
return nil, err
1293
-
}
1294
-
defer rows.Close()
1295
-
1296
-
return p.scanBundles(rows)
1297
-
}
1298
-
1299
-
func (p *PostgresDB) GetBundlesForDID(ctx context.Context, did string) ([]*PLCBundle, error) {
1300
-
// Get bundle numbers from dids table
1301
-
var bundleNumbersJSON []byte
1302
-
err := p.db.QueryRowContext(ctx, `
1303
-
SELECT bundle_numbers FROM dids WHERE did = $1
1304
-
`, did).Scan(&bundleNumbersJSON)
1305
-
1306
-
if err == sql.ErrNoRows {
1307
-
return []*PLCBundle{}, nil
1308
-
}
1309
-
if err != nil {
1310
-
return nil, err
1311
-
}
1312
-
1313
-
var bundleNumbers []int
1314
-
if err := json.Unmarshal(bundleNumbersJSON, &bundleNumbers); err != nil {
1315
-
return nil, err
1316
-
}
1317
-
1318
-
if len(bundleNumbers) == 0 {
1319
-
return []*PLCBundle{}, nil
1320
-
}
1321
-
1322
-
// Build query with IN clause
1323
-
placeholders := make([]string, len(bundleNumbers))
1324
-
args := make([]interface{}, len(bundleNumbers))
1325
-
for i, num := range bundleNumbers {
1326
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1327
-
args[i] = num
1328
-
}
1329
-
1330
-
query := fmt.Sprintf(`
1331
-
SELECT bundle_number, start_time, end_time, did_count, hash, compressed_hash,
1332
-
compressed_size, uncompressed_size, cumulative_compressed_size,
1333
-
cumulative_uncompressed_size, cursor, prev_bundle_hash, compressed, created_at
1334
-
FROM plc_bundles
1335
-
WHERE bundle_number IN (%s)
1336
-
ORDER BY bundle_number ASC
1337
-
`, strings.Join(placeholders, ","))
1338
-
1339
-
rows, err := p.db.QueryContext(ctx, query, args...)
1340
-
if err != nil {
1341
-
return nil, err
1342
-
}
1343
-
defer rows.Close()
1344
-
1345
-
return p.scanBundles(rows)
1346
-
}
1347
-
1348
-
func (p *PostgresDB) GetDIDsForBundle(ctx context.Context, bundleNum int) ([]string, error) {
1349
-
query := `
1350
-
SELECT did
1351
-
FROM dids
1352
-
WHERE bundle_numbers @> $1::jsonb
1353
-
ORDER BY did
1354
-
`
1355
-
1356
-
rows, err := p.db.QueryContext(ctx, query, fmt.Sprintf("[%d]", bundleNum))
1357
-
if err != nil {
1358
-
return nil, err
1359
-
}
1360
-
defer rows.Close()
1361
-
1362
-
var dids []string
1363
-
for rows.Next() {
1364
-
var did string
1365
-
if err := rows.Scan(&did); err != nil {
1366
-
return nil, err
1367
-
}
1368
-
dids = append(dids, did)
1369
-
}
1370
-
1371
-
return dids, rows.Err()
1372
-
}
1373
-
1374
-
func (p *PostgresDB) scanBundles(rows *sql.Rows) ([]*PLCBundle, error) {
1375
-
var bundles []*PLCBundle
1376
-
1377
-
for rows.Next() {
1378
-
var bundle PLCBundle
1379
-
var prevHash sql.NullString
1380
-
var cursor sql.NullString
1381
-
1382
-
if err := rows.Scan(
1383
-
&bundle.BundleNumber,
1384
-
&bundle.StartTime,
1385
-
&bundle.EndTime,
1386
-
&bundle.DIDCount,
1387
-
&bundle.Hash,
1388
-
&bundle.CompressedHash,
1389
-
&bundle.CompressedSize,
1390
-
&bundle.UncompressedSize,
1391
-
&bundle.CumulativeCompressedSize,
1392
-
&bundle.CumulativeUncompressedSize,
1393
-
&cursor,
1394
-
&prevHash,
1395
-
&bundle.Compressed,
1396
-
&bundle.CreatedAt,
1397
-
); err != nil {
1398
-
return nil, err
1399
-
}
1400
-
1401
-
if prevHash.Valid {
1402
-
bundle.PrevBundleHash = prevHash.String
1403
-
}
1404
-
if cursor.Valid {
1405
-
bundle.Cursor = cursor.String
1406
-
}
1407
-
1408
-
bundles = append(bundles, &bundle)
1409
-
}
1410
-
1411
-
return bundles, rows.Err()
1412
-
}
1413
-
1414
-
func (p *PostgresDB) GetBundleStats(ctx context.Context) (int64, int64, int64, int64, error) {
1415
-
var count, lastBundleNum int64
1416
-
err := p.db.QueryRowContext(ctx, `
1417
-
SELECT COUNT(*), COALESCE(MAX(bundle_number), 0)
1418
-
FROM plc_bundles
1419
-
`).Scan(&count, &lastBundleNum)
1420
-
if err != nil {
1421
-
return 0, 0, 0, 0, err
1422
-
}
1423
-
1424
-
if lastBundleNum == 0 {
1425
-
return 0, 0, 0, 0, nil
1426
-
}
1427
-
1428
-
var compressedSize, uncompressedSize int64
1429
-
err = p.db.QueryRowContext(ctx, `
1430
-
SELECT cumulative_compressed_size, cumulative_uncompressed_size
1431
-
FROM plc_bundles
1432
-
WHERE bundle_number = $1
1433
-
`, lastBundleNum).Scan(&compressedSize, &uncompressedSize)
1434
-
if err != nil {
1435
-
return 0, 0, 0, 0, err
1436
-
}
1437
-
1438
-
return count, compressedSize, uncompressedSize, lastBundleNum, nil
1439
-
}
1440
-
1441
-
func (p *PostgresDB) GetLastBundleNumber(ctx context.Context) (int, error) {
1442
-
query := "SELECT COALESCE(MAX(bundle_number), 0) FROM plc_bundles"
1443
-
var num int
1444
-
err := p.db.QueryRowContext(ctx, query).Scan(&num)
1445
-
return num, err
1446
-
}
1447
-
1448
-
func (p *PostgresDB) GetBundleForTimestamp(ctx context.Context, afterTime time.Time) (int, error) {
1449
-
query := `
1450
-
SELECT bundle_number
1451
-
FROM plc_bundles
1452
-
WHERE start_time <= $1 AND end_time >= $1
1453
-
ORDER BY bundle_number ASC
1454
-
LIMIT 1
1455
-
`
1456
-
1457
-
var bundleNum int
1458
-
err := p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1459
-
if err == sql.ErrNoRows {
1460
-
query = `
1461
-
SELECT bundle_number
1462
-
FROM plc_bundles
1463
-
WHERE end_time < $1
1464
-
ORDER BY bundle_number DESC
1465
-
LIMIT 1
1466
-
`
1467
-
err = p.db.QueryRowContext(ctx, query, afterTime).Scan(&bundleNum)
1468
-
if err == sql.ErrNoRows {
1469
-
return 1, nil
1470
-
}
1471
-
if err != nil {
1472
-
return 0, err
1473
-
}
1474
-
return bundleNum, nil
1475
-
}
1476
-
if err != nil {
1477
-
return 0, err
1478
-
}
1479
-
1480
-
return bundleNum, nil
1481
-
}
1482
-
1483
-
func (p *PostgresDB) GetPLCHistory(ctx context.Context, limit int, fromBundle int) ([]*PLCHistoryPoint, error) {
1484
-
query := `
1485
-
WITH daily_stats AS (
1486
-
SELECT
1487
-
DATE(start_time) as date,
1488
-
MAX(bundle_number) as last_bundle,
1489
-
COUNT(*) as bundle_count,
1490
-
SUM(uncompressed_size) as total_uncompressed,
1491
-
SUM(compressed_size) as total_compressed,
1492
-
MAX(cumulative_uncompressed_size) as cumulative_uncompressed,
1493
-
MAX(cumulative_compressed_size) as cumulative_compressed
1494
-
FROM plc_bundles
1495
-
WHERE bundle_number >= $1
1496
-
GROUP BY DATE(start_time)
1497
-
)
1498
-
SELECT
1499
-
date::text,
1500
-
last_bundle,
1501
-
SUM(bundle_count * 10000) OVER (ORDER BY date) as cumulative_operations,
1502
-
total_uncompressed,
1503
-
total_compressed,
1504
-
cumulative_uncompressed,
1505
-
cumulative_compressed
1506
-
FROM daily_stats
1507
-
ORDER BY date ASC
1508
-
`
1509
-
1510
-
if limit > 0 {
1511
-
query += fmt.Sprintf(" LIMIT %d", limit)
1512
-
}
1513
-
1514
-
rows, err := p.db.QueryContext(ctx, query, fromBundle)
1515
-
if err != nil {
1516
-
return nil, err
1517
-
}
1518
-
defer rows.Close()
1519
-
1520
-
var history []*PLCHistoryPoint
1521
-
for rows.Next() {
1522
-
var point PLCHistoryPoint
1523
-
var cumulativeOps int64
1524
-
1525
-
err := rows.Scan(
1526
-
&point.Date,
1527
-
&point.BundleNumber,
1528
-
&cumulativeOps,
1529
-
&point.UncompressedSize,
1530
-
&point.CompressedSize,
1531
-
&point.CumulativeUncompressed,
1532
-
&point.CumulativeCompressed,
1533
-
)
1534
-
if err != nil {
1535
-
return nil, err
1536
-
}
1537
-
1538
-
point.OperationCount = int(cumulativeOps)
1539
-
1540
-
history = append(history, &point)
1541
-
}
1542
-
1543
-
return history, rows.Err()
1544
-
}
1545
-
1546
-
// ===== MEMPOOL OPERATIONS =====
1547
-
1548
-
func (p *PostgresDB) AddToMempool(ctx context.Context, ops []MempoolOperation) error {
1549
-
if len(ops) == 0 {
1550
-
return nil
1551
-
}
1552
-
1553
-
tx, err := p.db.BeginTx(ctx, nil)
1554
-
if err != nil {
1555
-
return err
1556
-
}
1557
-
defer tx.Rollback()
1558
-
1559
-
stmt, err := tx.PrepareContext(ctx, `
1560
-
INSERT INTO plc_mempool (did, operation, cid, created_at)
1561
-
VALUES ($1, $2, $3, $4)
1562
-
ON CONFLICT(cid) DO NOTHING
1563
-
`)
1564
-
if err != nil {
1565
-
return err
1566
-
}
1567
-
defer stmt.Close()
1568
-
1569
-
for _, op := range ops {
1570
-
_, err := stmt.ExecContext(ctx, op.DID, op.Operation, op.CID, op.CreatedAt)
1571
-
if err != nil {
1572
-
return err
1573
-
}
1574
-
}
1575
-
1576
-
return tx.Commit()
1577
-
}
1578
-
1579
-
func (p *PostgresDB) GetMempoolCount(ctx context.Context) (int, error) {
1580
-
query := "SELECT COUNT(*) FROM plc_mempool"
1581
-
var count int
1582
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1583
-
return count, err
1584
-
}
1585
-
1586
-
func (p *PostgresDB) GetMempoolOperations(ctx context.Context, limit int) ([]MempoolOperation, error) {
1587
-
query := `
1588
-
SELECT id, did, operation, cid, created_at, added_at
1589
-
FROM plc_mempool
1590
-
ORDER BY created_at ASC
1591
-
LIMIT $1
1592
-
`
1593
-
1594
-
rows, err := p.db.QueryContext(ctx, query, limit)
1595
-
if err != nil {
1596
-
return nil, err
1597
-
}
1598
-
defer rows.Close()
1599
-
1600
-
var ops []MempoolOperation
1601
-
for rows.Next() {
1602
-
var op MempoolOperation
1603
-
err := rows.Scan(&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt)
1604
-
if err != nil {
1605
-
return nil, err
1606
-
}
1607
-
ops = append(ops, op)
1608
-
}
1609
-
1610
-
return ops, rows.Err()
1611
-
}
1612
-
1613
-
func (p *PostgresDB) DeleteFromMempool(ctx context.Context, ids []int64) error {
1614
-
if len(ids) == 0 {
1615
-
return nil
1616
-
}
1617
-
1618
-
placeholders := make([]string, len(ids))
1619
-
args := make([]interface{}, len(ids))
1620
-
for i, id := range ids {
1621
-
placeholders[i] = fmt.Sprintf("$%d", i+1)
1622
-
args[i] = id
1623
-
}
1624
-
1625
-
query := fmt.Sprintf("DELETE FROM plc_mempool WHERE id IN (%s)",
1626
-
strings.Join(placeholders, ","))
1627
-
1628
-
_, err := p.db.ExecContext(ctx, query, args...)
1629
-
return err
1630
-
}
1631
-
1632
-
func (p *PostgresDB) GetFirstMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1633
-
query := `
1634
-
SELECT id, did, operation, cid, created_at, added_at
1635
-
FROM plc_mempool
1636
-
ORDER BY created_at ASC, id ASC
1637
-
LIMIT 1
1638
-
`
1639
-
1640
-
var op MempoolOperation
1641
-
err := p.db.QueryRowContext(ctx, query).Scan(
1642
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1643
-
)
1644
-
if err == sql.ErrNoRows {
1645
-
return nil, nil
1646
-
}
1647
-
if err != nil {
1648
-
return nil, err
1649
-
}
1650
-
1651
-
return &op, nil
1652
-
}
1653
-
1654
-
func (p *PostgresDB) GetLastMempoolOperation(ctx context.Context) (*MempoolOperation, error) {
1655
-
query := `
1656
-
SELECT id, did, operation, cid, created_at, added_at
1657
-
FROM plc_mempool
1658
-
ORDER BY created_at DESC, id DESC
1659
-
LIMIT 1
1660
-
`
1661
-
1662
-
var op MempoolOperation
1663
-
err := p.db.QueryRowContext(ctx, query).Scan(
1664
-
&op.ID, &op.DID, &op.Operation, &op.CID, &op.CreatedAt, &op.AddedAt,
1665
-
)
1666
-
if err == sql.ErrNoRows {
1667
-
return nil, nil
1668
-
}
1669
-
if err != nil {
1670
-
return nil, err
1671
-
}
1672
-
1673
-
return &op, nil
1674
-
}
1675
-
1676
-
func (p *PostgresDB) GetMempoolUniqueDIDCount(ctx context.Context) (int, error) {
1677
-
query := "SELECT COUNT(DISTINCT did) FROM plc_mempool"
1678
-
var count int
1679
-
err := p.db.QueryRowContext(ctx, query).Scan(&count)
1680
-
return count, err
1681
-
}
1682
-
1683
-
func (p *PostgresDB) GetMempoolUncompressedSize(ctx context.Context) (int64, error) {
1684
-
query := "SELECT COALESCE(SUM(LENGTH(operation)), 0) FROM plc_mempool"
1685
-
var size int64
1686
-
err := p.db.QueryRowContext(ctx, query).Scan(&size)
1687
-
return size, err
1688
1164
}
1689
1165
1690
1166
// ===== CURSOR OPERATIONS =====
+7
-14
internal/storage/types.go
+7
-14
internal/storage/types.go
···
26
26
LastChecked time.Time
27
27
Status int
28
28
IP string
29
-
IPv6 string // NEW
29
+
IPv6 string
30
30
IPResolvedAt time.Time
31
+
Valid bool
31
32
UpdatedAt time.Time
32
33
}
33
34
···
76
77
77
78
// EndpointFilter for querying endpoints
78
79
type EndpointFilter struct {
79
-
Type string // "pds", "labeler", etc.
80
+
Type string
80
81
Status string
81
82
MinUserCount int64
82
83
OnlyStale bool
84
+
OnlyValid bool
83
85
RecheckInterval time.Duration
84
-
Random bool // NEW: Return results in random order
86
+
Random bool
85
87
Limit int
86
88
Offset int
87
89
}
···
153
155
CumulativeCompressed int64 `json:"cumulative_compressed"`
154
156
}
155
157
156
-
// MempoolOperation represents an operation waiting to be bundled
157
-
type MempoolOperation struct {
158
-
ID int64
159
-
DID string
160
-
Operation string
161
-
CID string
162
-
CreatedAt time.Time
163
-
AddedAt time.Time
164
-
}
165
-
166
158
// ScanCursor stores scanning progress
167
159
type ScanCursor struct {
168
160
Source string
···
223
215
LastChecked time.Time
224
216
Status int
225
217
IP string
226
-
IPv6 string // NEW
218
+
IPv6 string
219
+
Valid bool // NEW
227
220
228
221
// From latest endpoint_scans (via JOIN)
229
222
LatestScan *struct {
+2
-2
internal/worker/scheduler.go
+2
-2
internal/worker/scheduler.go
+113
utils/import-labels.js
+113
utils/import-labels.js
···
1
+
import { file, write } from "bun";
2
+
import { join } from "path";
3
+
import { mkdir } from "fs/promises";
4
+
import { init, compress } from "@bokuweb/zstd-wasm";
5
+
6
+
// --- Configuration ---
7
+
const CSV_FILE = process.argv[2];
8
+
const CONFIG_FILE = "config.yaml";
9
+
const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10
+
// ---------------------
11
+
12
+
if (!CSV_FILE) {
13
+
console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14
+
process.exit(1);
15
+
}
16
+
17
+
console.log("========================================");
18
+
console.log("PLC Operation Labels Import (Bun + WASM)");
19
+
console.log("========================================");
20
+
21
+
// 1. Read and parse config
22
+
console.log(`Loading config from ${CONFIG_FILE}...`);
23
+
const configFile = await file(CONFIG_FILE).text();
24
+
const config = Bun.YAML.parse(configFile);
25
+
const bundleDir = config?.plc?.bundle_dir;
26
+
27
+
if (!bundleDir) {
28
+
console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29
+
process.exit(1);
30
+
}
31
+
32
+
const FINAL_LABELS_DIR = join(bundleDir, "labels");
33
+
await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
+
35
+
console.log(`CSV File: ${CSV_FILE}`);
36
+
console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37
+
console.log("");
38
+
39
+
// 2. Initialize Zstd WASM module
40
+
await init();
41
+
42
+
// --- Pass 1: Read entire file into memory and group by bundle ---
43
+
console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44
+
console.warn("This will use a large amount of RAM!");
45
+
46
+
const startTime = Date.now();
47
+
const bundles = new Map(); // Map<string, string[]>
48
+
let lineCount = 0;
49
+
50
+
const inputFile = file(CSV_FILE);
51
+
const fileStream = inputFile.stream();
52
+
const decoder = new TextDecoder();
53
+
let remainder = "";
54
+
55
+
for await (const chunk of fileStream) {
56
+
const text = remainder + decoder.decode(chunk);
57
+
const lines = text.split("\n");
58
+
remainder = lines.pop() || "";
59
+
60
+
for (const line of lines) {
61
+
if (line === "") continue;
62
+
lineCount++;
63
+
64
+
if (lineCount === 1 && line.startsWith("bundle,")) {
65
+
continue; // Skip header
66
+
}
67
+
68
+
const firstCommaIndex = line.indexOf(",");
69
+
if (firstCommaIndex === -1) {
70
+
console.warn(`Skipping malformed line: ${line}`);
71
+
continue;
72
+
}
73
+
const bundleNumStr = line.substring(0, firstCommaIndex);
74
+
const bundleKey = bundleNumStr.padStart(6, "0");
75
+
76
+
// Add line to the correct bundle's array
77
+
if (!bundles.has(bundleKey)) {
78
+
bundles.set(bundleKey, []);
79
+
}
80
+
bundles.get(bundleKey).push(line);
81
+
}
82
+
}
83
+
// Note: We ignore any final `remainder` as it's likely an empty line
84
+
85
+
console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86
+
console.log(`Found ${bundles.size} unique bundles.`);
87
+
88
+
// --- Pass 2: Compress and write each bundle ---
89
+
console.log("\nPass 2/2: Compressing and writing bundle files...");
90
+
let i = 0;
91
+
for (const [bundleKey, lines] of bundles.entries()) {
92
+
i++;
93
+
console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
+
95
+
// Join all lines for this bundle into one big string
96
+
const content = lines.join("\n");
97
+
98
+
// Compress the string
99
+
const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
+
101
+
// Write the compressed data to the file
102
+
const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103
+
await write(outPath, compressedData);
104
+
}
105
+
106
+
// 3. Clean up
107
+
const totalTime = (Date.now() - startTime) / 1000;
108
+
console.log("\n========================================");
109
+
console.log("Import Summary");
110
+
console.log("========================================");
111
+
console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112
+
console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113
+
console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
+91
utils/import-labels.sh
···
1
+
#!/bin/bash
2
+
# import-labels-v4-sorted-pipe.sh
3
+
4
+
set -e
5
+
6
+
if [ $# -lt 1 ]; then
7
+
echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8
+
exit 1
9
+
fi
10
+
11
+
CSV_FILE="$1"
12
+
CONFIG_FILE="config.yaml"
13
+
14
+
[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15
+
[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
+
17
+
# Extract bundle directory path
18
+
BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
+
20
+
[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
+
22
+
FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
+
24
+
echo "========================================"
25
+
echo "PLC Operation Labels Import (Sorted Pipe)"
26
+
echo "========================================"
27
+
echo "CSV File: $CSV_FILE"
28
+
echo "Output Dir: $FINAL_LABELS_DIR"
29
+
echo ""
30
+
31
+
# Ensure the final directory exists
32
+
mkdir -p "$FINAL_LABELS_DIR"
33
+
34
+
echo "Streaming, sorting, and compressing on the fly..."
35
+
echo "This will take time. `pv` will show progress of the TAIL command."
36
+
echo "The `sort` command will run after `pv` is complete."
37
+
echo ""
38
+
39
+
# This is the single-pass pipeline
40
+
tail -n +2 "$CSV_FILE" | \
41
+
pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42
+
sort -t, -k1,1n | \
43
+
awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44
+
# This awk script EXPECTS input sorted by bundle number (col 1)
45
+
BEGIN {
46
+
# last_bundle_num tracks the bundle we are currently writing
47
+
last_bundle_num = -1
48
+
# cmd holds the current zstd pipe command
49
+
cmd = ""
50
+
}
51
+
{
52
+
current_bundle_num = $1
53
+
54
+
# Check if the bundle number has changed
55
+
if (current_bundle_num != last_bundle_num) {
56
+
57
+
# If it changed, and we have an old pipe open, close it
58
+
if (last_bundle_num != -1) {
59
+
close(cmd)
60
+
}
61
+
62
+
# Create the new pipe command, writing to the final .zst file
63
+
outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64
+
cmd = "zstd -T0 -o " outfile
65
+
66
+
# Update the tracker
67
+
last_bundle_num = current_bundle_num
68
+
69
+
# Print progress to stderr
70
+
printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71
+
}
72
+
73
+
# Print the current line ($0) to the open pipe
74
+
# The first time this runs for a bundle, it opens the pipe
75
+
# Subsequent times, it writes to the already-open pipe
76
+
print $0 | cmd
77
+
}
78
+
# END block: close the very last pipe
79
+
END {
80
+
if (last_bundle_num != -1) {
81
+
close(cmd)
82
+
}
83
+
printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84
+
}'
85
+
86
+
echo ""
87
+
echo "========================================"
88
+
echo "Import Summary"
89
+
echo "========================================"
90
+
echo "✓ Import completed successfully!"
91
+
echo "Label files are stored in: $FINAL_LABELS_DIR"