···2233import (
44 "context"
55+ "encoding/csv"
56 "fmt"
67 "io"
88+ "os"
99+ "path/filepath"
710 "sort"
1111+ "strconv"
1212+ "strings"
813 "time"
9141015 "github.com/atscan/atscand/internal/log"
1116 "github.com/atscan/atscand/internal/storage"
1717+ "github.com/klauspost/compress/zstd"
1218 plcbundle "tangled.org/atscan.net/plcbundle"
1319)
1420···385391386392 return history, nil
387393}
394394+395395+// GetBundleLabels reads labels from a compressed CSV file for a specific bundle
396396+func (bm *BundleManager) GetBundleLabels(ctx context.Context, bundleNum int) ([]*PLCOpLabel, error) {
397397+ // Define the path to the labels file
398398+ labelsDir := filepath.Join(bm.bundleDir, "labels")
399399+ labelsFile := filepath.Join(labelsDir, fmt.Sprintf("%06d.csv.zst", bundleNum))
400400+401401+ // Check if file exists
402402+ if _, err := os.Stat(labelsFile); os.IsNotExist(err) {
403403+ log.Verbose("No labels file found for bundle %d at %s", bundleNum, labelsFile)
404404+ // Return empty, not an error
405405+ return []*PLCOpLabel{}, nil
406406+ }
407407+408408+ // Open the Zstd-compressed file
409409+ file, err := os.Open(labelsFile)
410410+ if err != nil {
411411+ return nil, fmt.Errorf("failed to open labels file: %w", err)
412412+ }
413413+ defer file.Close()
414414+415415+ // Create a Zstd reader
416416+ zstdReader, err := zstd.NewReader(file)
417417+ if err != nil {
418418+ return nil, fmt.Errorf("failed to create zstd reader: %w", err)
419419+ }
420420+ defer zstdReader.Close()
421421+422422+ // Create a CSV reader
423423+ csvReader := csv.NewReader(zstdReader)
424424+ // We skipped the header, so no header read needed
425425+ // Set FieldsPerRecord to 7 for validation
426426+ //csvReader.FieldsPerRecord = 7
427427+428428+ var labels []*PLCOpLabel
429429+430430+ // Read all records
431431+ for {
432432+ // Check for context cancellation
433433+ if err := ctx.Err(); err != nil {
434434+ return nil, err
435435+ }
436436+437437+ record, err := csvReader.Read()
438438+ if err == io.EOF {
439439+ break // End of file
440440+ }
441441+ if err != nil {
442442+ log.Error("Error reading CSV record in %s: %v", labelsFile, err)
443443+ continue // Skip bad line
444444+ }
445445+446446+ // Parse the CSV record (which is []string)
447447+ label, err := parseLabelRecord(record)
448448+ if err != nil {
449449+ log.Error("Error parsing CSV data for bundle %d: %v", bundleNum, err)
450450+ continue // Skip bad data
451451+ }
452452+453453+ labels = append(labels, label)
454454+ }
455455+456456+ return labels, nil
457457+}
458458+459459+// parseLabelRecord converts a new format CSV record into a PLCOpLabel struct
460460+func parseLabelRecord(record []string) (*PLCOpLabel, error) {
461461+ // New format: 0:bundle, 1:position, 2:cid(short), 3:size, 4:confidence, 5:labels
462462+ if len(record) != 6 {
463463+ err := fmt.Errorf("invalid record length: expected 6, got %d", len(record))
464464+ // --- ADDED LOG ---
465465+ log.Warn("Skipping malformed CSV line: %v (data: %s)", err, strings.Join(record, ","))
466466+ // ---
467467+ return nil, err
468468+ }
469469+470470+ // 0:bundle
471471+ bundle, err := strconv.Atoi(record[0])
472472+ if err != nil {
473473+ // --- ADDED LOG ---
474474+ log.Warn("Skipping malformed CSV line: 'bundle' column: %v (data: %s)", err, strings.Join(record, ","))
475475+ // ---
476476+ return nil, fmt.Errorf("parsing 'bundle': %w", err)
477477+ }
478478+479479+ // 1:position
480480+ position, err := strconv.Atoi(record[1])
481481+ if err != nil {
482482+ // --- ADDED LOG ---
483483+ log.Warn("Skipping malformed CSV line: 'position' column: %v (data: %s)", err, strings.Join(record, ","))
484484+ // ---
485485+ return nil, fmt.Errorf("parsing 'position': %w", err)
486486+ }
487487+488488+ // 2:cid(short)
489489+ shortCID := record[2]
490490+491491+ // 3:size
492492+ size, err := strconv.Atoi(record[3])
493493+ if err != nil {
494494+ // --- ADDED LOG ---
495495+ log.Warn("Skipping malformed CSV line: 'size' column: %v (data: %s)", err, strings.Join(record, ","))
496496+ // ---
497497+ return nil, fmt.Errorf("parsing 'size': %w", err)
498498+ }
499499+500500+ // 4:confidence
501501+ confidence, err := strconv.ParseFloat(record[4], 64)
502502+ if err != nil {
503503+ // --- ADDED LOG ---
504504+ log.Warn("Skipping malformed CSV line: 'confidence' column: %v (data: %s)", err, strings.Join(record, ","))
505505+ // ---
506506+ return nil, fmt.Errorf("parsing 'confidence': %w", err)
507507+ }
508508+509509+ // 5:labels
510510+ detectors := strings.Split(record[5], ";")
511511+512512+ label := &PLCOpLabel{
513513+ Bundle: bundle,
514514+ Position: position,
515515+ CID: shortCID,
516516+ Size: size,
517517+ Confidence: confidence,
518518+ Detectors: detectors,
519519+ }
520520+521521+ return label, nil
522522+}
+10
internal/plc/types.go
···2828 Type string
2929 Endpoint string
3030}
3131+3232+// PLCOpLabel holds metadata from the label CSV file
3333+type PLCOpLabel struct {
3434+ Bundle int `json:"bundle"`
3535+ Position int `json:"position"`
3636+ CID string `json:"cid"`
3737+ Size int `json:"size"`
3838+ Confidence float64 `json:"confidence"`
3939+ Detectors []string `json:"detectors"`
4040+}
+113
utils/import-labels.js
···11+import { file, write } from "bun";
22+import { join } from "path";
33+import { mkdir } from "fs/promises";
44+import { init, compress } from "@bokuweb/zstd-wasm";
55+66+// --- Configuration ---
77+const CSV_FILE = process.argv[2];
88+const CONFIG_FILE = "config.yaml";
99+const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
1010+// ---------------------
1111+1212+if (!CSV_FILE) {
1313+ console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
1414+ process.exit(1);
1515+}
1616+1717+console.log("========================================");
1818+console.log("PLC Operation Labels Import (Bun + WASM)");
1919+console.log("========================================");
2020+2121+// 1. Read and parse config
2222+console.log(`Loading config from ${CONFIG_FILE}...`);
2323+const configFile = await file(CONFIG_FILE).text();
2424+const config = Bun.YAML.parse(configFile);
2525+const bundleDir = config?.plc?.bundle_dir;
2626+2727+if (!bundleDir) {
2828+ console.error("Error: Could not parse plc.bundle_dir from config.yaml");
2929+ process.exit(1);
3030+}
3131+3232+const FINAL_LABELS_DIR = join(bundleDir, "labels");
3333+await mkdir(FINAL_LABELS_DIR, { recursive: true });
3434+3535+console.log(`CSV File: ${CSV_FILE}`);
3636+console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
3737+console.log("");
3838+3939+// 2. Initialize Zstd WASM module
4040+await init();
4141+4242+// --- Pass 1: Read entire file into memory and group by bundle ---
4343+console.log("Pass 1/2: Reading and grouping all lines by bundle...");
4444+console.warn("This will use a large amount of RAM!");
4545+4646+const startTime = Date.now();
4747+const bundles = new Map(); // Map<string, string[]>
4848+let lineCount = 0;
4949+5050+const inputFile = file(CSV_FILE);
5151+const fileStream = inputFile.stream();
5252+const decoder = new TextDecoder();
5353+let remainder = "";
5454+5555+for await (const chunk of fileStream) {
5656+ const text = remainder + decoder.decode(chunk);
5757+ const lines = text.split("\n");
5858+ remainder = lines.pop() || "";
5959+6060+ for (const line of lines) {
6161+ if (line === "") continue;
6262+ lineCount++;
6363+6464+ if (lineCount === 1 && line.startsWith("bundle,")) {
6565+ continue; // Skip header
6666+ }
6767+6868+ const firstCommaIndex = line.indexOf(",");
6969+ if (firstCommaIndex === -1) {
7070+ console.warn(`Skipping malformed line: ${line}`);
7171+ continue;
7272+ }
7373+ const bundleNumStr = line.substring(0, firstCommaIndex);
7474+ const bundleKey = bundleNumStr.padStart(6, "0");
7575+7676+ // Add line to the correct bundle's array
7777+ if (!bundles.has(bundleKey)) {
7878+ bundles.set(bundleKey, []);
7979+ }
8080+ bundles.get(bundleKey).push(line);
8181+ }
8282+}
8383+// Note: We ignore any final `remainder` as it's likely an empty line
8484+8585+console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
8686+console.log(`Found ${bundles.size} unique bundles.`);
8787+8888+// --- Pass 2: Compress and write each bundle ---
8989+console.log("\nPass 2/2: Compressing and writing bundle files...");
9090+let i = 0;
9191+for (const [bundleKey, lines] of bundles.entries()) {
9292+ i++;
9393+ console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
9494+9595+ // Join all lines for this bundle into one big string
9696+ const content = lines.join("\n");
9797+9898+ // Compress the string
9999+ const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100100+101101+ // Write the compressed data to the file
102102+ const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103103+ await write(outPath, compressedData);
104104+}
105105+106106+// 3. Clean up
107107+const totalTime = (Date.now() - startTime) / 1000;
108108+console.log("\n========================================");
109109+console.log("Import Summary");
110110+console.log("========================================");
111111+console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112112+console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113113+console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);
+91
utils/import-labels.sh
···11+#!/bin/bash
22+# import-labels-v4-sorted-pipe.sh
33+44+set -e
55+66+if [ $# -lt 1 ]; then
77+ echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
88+ exit 1
99+fi
1010+1111+CSV_FILE="$1"
1212+CONFIG_FILE="config.yaml"
1313+1414+[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
1515+[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
1616+1717+# Extract bundle directory path
1818+BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
1919+2020+[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
2121+2222+FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
2323+2424+echo "========================================"
2525+echo "PLC Operation Labels Import (Sorted Pipe)"
2626+echo "========================================"
2727+echo "CSV File: $CSV_FILE"
2828+echo "Output Dir: $FINAL_LABELS_DIR"
2929+echo ""
3030+3131+# Ensure the final directory exists
3232+mkdir -p "$FINAL_LABELS_DIR"
3333+3434+echo "Streaming, sorting, and compressing on the fly..."
3535+echo "This will take time. `pv` will show progress of the TAIL command."
3636+echo "The `sort` command will run after `pv` is complete."
3737+echo ""
3838+3939+# This is the single-pass pipeline
4040+tail -n +2 "$CSV_FILE" | \
4141+ pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
4242+ sort -t, -k1,1n | \
4343+ awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
4444+ # This awk script EXPECTS input sorted by bundle number (col 1)
4545+ BEGIN {
4646+ # last_bundle_num tracks the bundle we are currently writing
4747+ last_bundle_num = -1
4848+ # cmd holds the current zstd pipe command
4949+ cmd = ""
5050+ }
5151+ {
5252+ current_bundle_num = $1
5353+5454+ # Check if the bundle number has changed
5555+ if (current_bundle_num != last_bundle_num) {
5656+5757+ # If it changed, and we have an old pipe open, close it
5858+ if (last_bundle_num != -1) {
5959+ close(cmd)
6060+ }
6161+6262+ # Create the new pipe command, writing to the final .zst file
6363+ outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
6464+ cmd = "zstd -T0 -o " outfile
6565+6666+ # Update the tracker
6767+ last_bundle_num = current_bundle_num
6868+6969+ # Print progress to stderr
7070+ printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
7171+ }
7272+7373+ # Print the current line ($0) to the open pipe
7474+ # The first time this runs for a bundle, it opens the pipe
7575+ # Subsequent times, it writes to the already-open pipe
7676+ print $0 | cmd
7777+ }
7878+ # END block: close the very last pipe
7979+ END {
8080+ if (last_bundle_num != -1) {
8181+ close(cmd)
8282+ }
8383+ printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
8484+ }'
8585+8686+echo ""
8787+echo "========================================"
8888+echo "Import Summary"
8989+echo "========================================"
9090+echo "✓ Import completed successfully!"
9191+echo "Label files are stored in: $FINAL_LABELS_DIR"