at main 3.0 kB view raw
1#!/bin/bash 2# import-labels-v4-sorted-pipe.sh 3 4set -e 5 6if [ $# -lt 1 ]; then 7 echo "Usage: ./utils/import-labels-v4-sorted-pipe.sh <csv-file>" 8 exit 1 9fi 10 11CSV_FILE="$1" 12CONFIG_FILE="config.yaml" 13 14[ ! -f "$CSV_FILE" ] && echo "Error: CSV file not found" && exit 1 15[ ! -f "$CONFIG_FILE" ] && echo "Error: config.yaml not found" && exit 1 16 17# Extract bundle directory path 18BUNDLE_DIR=$(grep -A 5 "^plc:" "$CONFIG_FILE" | grep "bundle_dir:" | sed 's/.*bundle_dir: *"//' | sed 's/".*//' | head -1) 19 20[ -z "$BUNDLE_DIR" ] && echo "Error: Could not parse plc.bundle_dir from config.yaml" && exit 1 21 22FINAL_LABELS_DIR="$BUNDLE_DIR/labels" 23 24echo "========================================" 25echo "PLC Operation Labels Import (Sorted Pipe)" 26echo "========================================" 27echo "CSV File: $CSV_FILE" 28echo "Output Dir: $FINAL_LABELS_DIR" 29echo "" 30 31# Ensure the final directory exists 32mkdir -p "$FINAL_LABELS_DIR" 33 34echo "Streaming, sorting, and compressing on the fly..." 35echo "This will take time. `pv` will show progress of the TAIL command." 36echo "The `sort` command will run after `pv` is complete." 37echo "" 38 39# This is the single-pass pipeline 40tail -n +2 "$CSV_FILE" | \ 41 pv -l -s $(tail -n +2 "$CSV_FILE" | wc -l) | \ 42 sort -t, -k1,1n | \ 43 awk -F',' -v final_dir="$FINAL_LABELS_DIR" ' 44 # This awk script EXPECTS input sorted by bundle number (col 1) 45 BEGIN { 46 # last_bundle_num tracks the bundle we are currently writing 47 last_bundle_num = -1 48 # cmd holds the current zstd pipe command 49 cmd = "" 50 } 51 { 52 current_bundle_num = $1 53 54 # Check if the bundle number has changed 55 if (current_bundle_num != last_bundle_num) { 56 57 # If it changed, and we have an old pipe open, close it 58 if (last_bundle_num != -1) { 59 close(cmd) 60 } 61 62 # Create the new pipe command, writing to the final .zst file 63 outfile = sprintf("%s/%06d.csv.zst", final_dir, current_bundle_num) 64 cmd = "zstd -T0 -o " outfile 65 66 # Update the tracker 67 last_bundle_num = current_bundle_num 68 69 # Print progress to stderr 70 printf " -> Writing bundle %06d\n", current_bundle_num > "/dev/stderr" 71 } 72 73 # Print the current line ($0) to the open pipe 74 # The first time this runs for a bundle, it opens the pipe 75 # Subsequent times, it writes to the already-open pipe 76 print $0 | cmd 77 } 78 # END block: close the very last pipe 79 END { 80 if (last_bundle_num != -1) { 81 close(cmd) 82 } 83 printf " Finished. Total lines: %d\n", NR > "/dev/stderr" 84 }' 85 86echo "" 87echo "========================================" 88echo "Import Summary" 89echo "========================================" 90echo "✓ Import completed successfully!" 91echo "Label files are stored in: $FINAL_LABELS_DIR"