wip
1#!/bin/bash
2# import-labels-v4-sorted-pipe.sh
3
4set -e
5
6if [ $# -lt 1 ]; then
7 echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>"
8 exit 1
9fi
10
11CSV_FILE="$1"
12CONFIG_FILE="config.yaml"
13
14[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1
15[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1
16
17# Extract bundle directory path
18BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1)
19
20[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1
21
22FINAL_LABELS_DIR="$BUNDLE_DIR/labels"
23
24echo "========================================"
25echo "PLC Operation Labels Import (Sorted Pipe)"
26echo "========================================"
27echo "CSV File: $CSV_FILE"
28echo "Output Dir: $FINAL_LABELS_DIR"
29echo ""
30
31# Ensure the final directory exists
32mkdir -p "$FINAL_LABELS_DIR"
33
34echo "Streaming, sorting, and compressing on the fly..."
35echo "This will take time. `pv` will show progress of the TAIL command."
36echo "The `sort` command will run after `pv` is complete."
37echo ""
38
39# This is the single-pass pipeline
40tail -n +2 "$CSV_FILE" | \
41 pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \
42 sort -t, -k1,1n | \
43 awk -F',' -v final_dir="$FINAL_LABELS_DIR" '
44 # This awk script EXPECTS input sorted by bundle number (col 1)
45 BEGIN {
46 # last_bundle_num tracks the bundle we are currently writing
47 last_bundle_num = -1
48 # cmd holds the current zstd pipe command
49 cmd = ""
50 }
51 {
52 current_bundle_num = $1
53
54 # Check if the bundle number has changed
55 if (current_bundle_num != last_bundle_num) {
56
57 # If it changed, and we have an old pipe open, close it
58 if (last_bundle_num != -1) {
59 close(cmd)
60 }
61
62 # Create the new pipe command, writing to the final .zst file
63 outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num)
64 cmd = "zstd -T0 -o " outfile
65
66 # Update the tracker
67 last_bundle_num = current_bundle_num
68
69 # Print progress to stderr
70 printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr"
71 }
72
73 # Print the current line ($0) to the open pipe
74 # The first time this runs for a bundle, it opens the pipe
75 # Subsequent times, it writes to the already-open pipe
76 print $0 | cmd
77 }
78 # END block: close the very last pipe
79 END {
80 if (last_bundle_num != -1) {
81 close(cmd)
82 }
83 printf " Finished. Total lines: %d\n", NR > "/dev/stderr"
84 }'
85
86echo ""
87echo "========================================"
88echo "Import Summary"
89echo "========================================"
90echo "✓ Import completed successfully!"
91echo "Label files are stored in: $FINAL_LABELS_DIR"