wip
1import { file, write } from "bun";
2import { join } from "path";
3import { mkdir } from "fs/promises";
4import { init, compress } from "@bokuweb/zstd-wasm";
5
6// --- Configuration ---
7const CSV_FILE = process.argv[2];
8const CONFIG_FILE = "config.yaml";
9const COMPRESSION_LEVEL = 5; // zstd level 1-22 (5 is a good balance)
10// ---------------------
11
12if (!CSV_FILE) {
13 console.error("Usage: bun run utils/import-labels.js <path-to-csv-file>");
14 process.exit(1);
15}
16
17console.log("========================================");
18console.log("PLC Operation Labels Import (Bun + WASM)");
19console.log("========================================");
20
21// 1. Read and parse config
22console.log(`Loading config from ${CONFIG_FILE}...`);
23const configFile = await file(CONFIG_FILE).text();
24const config = Bun.YAML.parse(configFile);
25const bundleDir = config?.plc?.bundle_dir;
26
27if (!bundleDir) {
28 console.error("Error: Could not parse plc.bundle_dir from config.yaml");
29 process.exit(1);
30}
31
32const FINAL_LABELS_DIR = join(bundleDir, "labels");
33await mkdir(FINAL_LABELS_DIR, { recursive: true });
34
35console.log(`CSV File: ${CSV_FILE}`);
36console.log(`Output Dir: ${FINAL_LABELS_DIR}`);
37console.log("");
38
39// 2. Initialize Zstd WASM module
40await init();
41
42// --- Pass 1: Read entire file into memory and group by bundle ---
43console.log("Pass 1/2: Reading and grouping all lines by bundle...");
44console.warn("This will use a large amount of RAM!");
45
46const startTime = Date.now();
47const bundles = new Map(); // Map<string, string[]>
48let lineCount = 0;
49
50const inputFile = file(CSV_FILE);
51const fileStream = inputFile.stream();
52const decoder = new TextDecoder();
53let remainder = "";
54
55for await (const chunk of fileStream) {
56 const text = remainder + decoder.decode(chunk);
57 const lines = text.split("\n");
58 remainder = lines.pop() || "";
59
60 for (const line of lines) {
61 if (line === "") continue;
62 lineCount++;
63
64 if (lineCount === 1 && line.startsWith("bundle,")) {
65 continue; // Skip header
66 }
67
68 const firstCommaIndex = line.indexOf(",");
69 if (firstCommaIndex === -1) {
70 console.warn(`Skipping malformed line: ${line}`);
71 continue;
72 }
73 const bundleNumStr = line.substring(0, firstCommaIndex);
74 const bundleKey = bundleNumStr.padStart(6, "0");
75
76 // Add line to the correct bundle's array
77 if (!bundles.has(bundleKey)) {
78 bundles.set(bundleKey, []);
79 }
80 bundles.get(bundleKey).push(line);
81 }
82}
83// Note: We ignore any final `remainder` as it's likely an empty line
84
85console.log(`Finished reading ${lineCount.toLocaleString()} lines.`);
86console.log(`Found ${bundles.size} unique bundles.`);
87
88// --- Pass 2: Compress and write each bundle ---
89console.log("\nPass 2/2: Compressing and writing bundle files...");
90let i = 0;
91for (const [bundleKey, lines] of bundles.entries()) {
92 i++;
93 console.log(` (${i}/${bundles.size}) Compressing bundle ${bundleKey}...`);
94
95 // Join all lines for this bundle into one big string
96 const content = lines.join("\n");
97
98 // Compress the string
99 const compressedData = compress(Buffer.from(content), COMPRESSION_LEVEL);
100
101 // Write the compressed data to the file
102 const outPath = join(FINAL_LABELS_DIR, `${bundleKey}.csv.zst`);
103 await write(outPath, compressedData);
104}
105
106// 3. Clean up
107const totalTime = (Date.now() - startTime) / 1000;
108console.log("\n========================================");
109console.log("Import Summary");
110console.log("========================================");
111console.log(`✓ Import completed in ${totalTime.toFixed(2)} seconds.`);
112console.log(`Total lines processed: ${lineCount.toLocaleString()}`);
113console.log(`Label files are stored in: ${FINAL_LABELS_DIR}`);