update

Changed files
+163 -57
internal
utils
+2 -1
.gitignore
··· 3 3 *.db-* 4 4 atscanner 5 5 .DS_Store 6 - plc_cache\.tmp/* 6 + plc_cache\.tmp/* 7 + plc_bundles*
+2 -2
config.yaml
··· 5 5 plc: 6 6 directory_url: "https://plc.directory" 7 7 scan_interval: "6h" 8 - cache_dir: "./plc_cache" # Cache directory 9 - use_cache: true # Enable caching 8 + bundle_dir: "./plc_bundles" 9 + use_cache: true 10 10 11 11 pds: 12 12 scan_interval: "15m"
+3 -3
internal/api/handlers.go
··· 121 121 lastBundle := bundles[len(bundles)-1] 122 122 123 123 // Compute file path 124 - filePath := filepath.Join(s.plcCacheDir, fmt.Sprintf("%06d.jsonl.zst", lastBundle.BundleNumber)) 124 + filePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", lastBundle.BundleNumber)) 125 125 126 126 operations, err := s.loadBundleOperations(filePath) 127 127 if err != nil { ··· 167 167 168 168 for _, bundle := range bundles { 169 169 // Compute file path 170 - filePath := filepath.Join(s.plcCacheDir, fmt.Sprintf("%06d.jsonl.zst", bundle.BundleNumber)) 170 + filePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", bundle.BundleNumber)) 171 171 172 172 operations, err := s.loadBundleOperations(filePath) 173 173 if err != nil { ··· 676 676 lastBundle, _ := s.db.GetLastBundleNumber(ctx) 677 677 678 678 for bundleNum := startBundle; bundleNum <= lastBundle && len(allOps) < count; bundleNum++ { 679 - bundlePath := filepath.Join(s.plcCacheDir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)) 679 + bundlePath := filepath.Join(s.plcBundleDir, fmt.Sprintf("%06d.jsonl.zst", bundleNum)) 680 680 681 681 ops, err := s.loadBundleOperations(bundlePath) 682 682 if err != nil {
+9 -9
internal/api/server.go
··· 14 14 ) 15 15 16 16 type Server struct { 17 - router *mux.Router 18 - server *http.Server 19 - db storage.Database 20 - plcClient *plc.Client 21 - plcCacheDir string // NEW: Store cache dir 17 + router *mux.Router 18 + server *http.Server 19 + db storage.Database 20 + plcClient *plc.Client 21 + plcBundleDir string // NEW: Store cache dir 22 22 } 23 23 24 24 func NewServer(db storage.Database, apiCfg config.APIConfig, plcCfg config.PLCConfig) *Server { 25 25 s := &Server{ 26 - router: mux.NewRouter(), 27 - db: db, 28 - plcClient: plc.NewClient(plcCfg.DirectoryURL), 29 - plcCacheDir: plcCfg.CacheDir, // NEW 26 + router: mux.NewRouter(), 27 + db: db, 28 + plcClient: plc.NewClient(plcCfg.DirectoryURL), 29 + plcBundleDir: plcCfg.BundleDir, // NEW 30 30 } 31 31 32 32 s.setupRoutes()
+4 -4
internal/config/config.go
··· 23 23 DirectoryURL string `yaml:"directory_url"` 24 24 ScanInterval time.Duration `yaml:"scan_interval"` 25 25 BatchSize int `yaml:"batch_size"` 26 - CacheDir string `yaml:"cache_dir"` // NEW: Cache directory 27 - UseCache bool `yaml:"use_cache"` // NEW: Enable/disable cache 26 + BundleDir string `yaml:"bundles_dir"` // NEW: Cache directory 27 + UseCache bool `yaml:"use_cache"` // NEW: Enable/disable cache 28 28 } 29 29 30 30 type PDSConfig struct { ··· 61 61 if cfg.PLC.BatchSize == 0 { 62 62 cfg.PLC.BatchSize = 1000 63 63 } 64 - if cfg.PLC.CacheDir == "" { 65 - cfg.PLC.CacheDir = "./plc_cache" 64 + if cfg.PLC.BundleDir == "" { 65 + cfg.PLC.BundleDir = "./plc_bundles" 66 66 } 67 67 if cfg.PDS.ScanInterval == 0 { 68 68 cfg.PDS.ScanInterval = 15 * time.Minute
+1 -1
internal/plc/scanner.go
··· 20 20 } 21 21 22 22 func NewScanner(db storage.Database, cfg config.PLCConfig) *Scanner { 23 - bundleManager, err := NewBundleManager(cfg.CacheDir, cfg.UseCache, db) 23 + bundleManager, err := NewBundleManager(cfg.BundleDir, cfg.UseCache, db) 24 24 if err != nil { 25 25 log.Error("Warning: failed to initialize bundle manager: %v", err) 26 26 bundleManager = &BundleManager{enabled: false}
+2 -2
internal/storage/types.go
··· 101 101 } 102 102 103 103 // GetFilePath returns the computed file path for this bundle 104 - func (b *PLCBundle) GetFilePath(cacheDir string) string { 105 - return filepath.Join(cacheDir, fmt.Sprintf("%06d.jsonl.zst", b.BundleNumber)) 104 + func (b *PLCBundle) GetFilePath(bundleDir string) string { 105 + return filepath.Join(bundleDir, fmt.Sprintf("%06d.jsonl.zst", b.BundleNumber)) 106 106 } 107 107 108 108 // OperationCount() returns 1000 (all bundles have exactly 1000 operations)
+140 -35
utils/verify-export.sh
··· 1 1 #!/bin/bash 2 - # verify-export.sh - Verify local PLC export endpoint against plc.directory 2 + # verify-export.sh - Verify local PLC export endpoint against multiple remotes 3 3 # Usage: ./verify-export.sh [after_timestamp] [count] 4 4 5 5 AFTER="${1:-}" 6 - COUNT="${2:-50}" 6 + COUNT="${2:-1000}" 7 7 LOCAL_URL="http://localhost:8080/api/v1/plc/export" 8 - REMOTE_URL="https://plc.directory/export" 8 + 9 + # Remote URLs (one per line: "name|url") 10 + REMOTES=( 11 + "plc.directory|https://plc.directory/export" 12 + "plc.wtf|https://plc.wtf/export" 13 + ) 9 14 10 15 echo "=== PLC Export Verification ===" 11 16 echo "Count: $COUNT" ··· 22 27 PARAMS="${PARAMS}&after=${AFTER}" 23 28 fi 24 29 30 + # Fetch from local 25 31 echo "Fetching from local API..." 26 32 echo "curl -s \"${LOCAL_URL}?${PARAMS}\"" 27 33 LOCAL_DATA=$(curl -s "${LOCAL_URL}?${PARAMS}") ··· 32 38 echo " Hash: $LOCAL_HASH" 33 39 echo "" 34 40 35 - echo "Fetching from plc.directory..." 36 - REMOTE_DATA=$(curl -s "${REMOTE_URL}?${PARAMS}") 37 - REMOTE_COUNT=$(echo "$REMOTE_DATA" | wc -l | tr -d ' ') 38 - REMOTE_HASH=$(echo "$REMOTE_DATA" | shasum -a 256 | cut -d' ' -f1) 41 + # Arrays to store remote data 42 + REMOTE_NAMES=() 43 + REMOTE_URLS=() 44 + REMOTE_DATA_ARR=() 45 + REMOTE_COUNT_ARR=() 46 + REMOTE_HASH_ARR=() 39 47 40 - echo " Operations: $REMOTE_COUNT" 41 - echo " Hash: $REMOTE_HASH" 42 - echo "" 48 + # Fetch from all remotes 49 + for i in "${!REMOTES[@]}"; do 50 + IFS='|' read -r name url <<< "${REMOTES[$i]}" 51 + 52 + echo "Fetching from ${name}..." 53 + echo "curl -s \"${url}?${PARAMS}\"" 54 + 55 + data=$(curl -s "${url}?${PARAMS}") 56 + count=$(echo "$data" | wc -l | tr -d ' ') 57 + hash=$(echo "$data" | shasum -a 256 | cut -d' ' -f1) 58 + 59 + REMOTE_NAMES+=("$name") 60 + REMOTE_URLS+=("$url") 61 + REMOTE_DATA_ARR+=("$data") 62 + REMOTE_COUNT_ARR+=("$count") 63 + REMOTE_HASH_ARR+=("$hash") 64 + 65 + echo " Operations: $count" 66 + echo " Hash: $hash" 67 + echo "" 68 + done 43 69 44 70 # Compare 45 71 echo "=== COMPARISON ===" 46 - if [ "$LOCAL_HASH" = "$REMOTE_HASH" ]; then 47 - echo "✅ MATCH! Hashes are identical" 72 + 73 + # Check local vs each remote 74 + ALL_MATCH=true 75 + MATCHES=() 76 + for i in "${!REMOTE_NAMES[@]}"; do 77 + name="${REMOTE_NAMES[$i]}" 78 + hash="${REMOTE_HASH_ARR[$i]}" 79 + 80 + if [ "$LOCAL_HASH" = "$hash" ]; then 81 + echo "Local vs ${name}: ✅ MATCH" 82 + MATCHES+=("true") 83 + else 84 + echo "Local vs ${name}: ❌ MISMATCH" 85 + MATCHES+=("false") 86 + ALL_MATCH=false 87 + fi 88 + done 89 + 90 + echo "" 91 + 92 + # Check remotes against each other 93 + REMOTES_MATCH=true 94 + if [ ${#REMOTE_NAMES[@]} -gt 1 ]; then 95 + for ((i=0; i<${#REMOTE_NAMES[@]}-1; i++)); do 96 + for ((j=i+1; j<${#REMOTE_NAMES[@]}; j++)); do 97 + name1="${REMOTE_NAMES[$i]}" 98 + name2="${REMOTE_NAMES[$j]}" 99 + hash1="${REMOTE_HASH_ARR[$i]}" 100 + hash2="${REMOTE_HASH_ARR[$j]}" 101 + 102 + if [ "$hash1" = "$hash2" ]; then 103 + echo "${name1} vs ${name2}: ✅ MATCH" 104 + else 105 + echo "${name1} vs ${name2}: ❌ MISMATCH" 106 + REMOTES_MATCH=false 107 + fi 108 + done 109 + done 48 110 echo "" 49 - echo "Local and remote exports are in sync! 🎯" 111 + fi 112 + 113 + if [ "$ALL_MATCH" = "true" ]; then 114 + echo "🎉 ALL MATCH! All endpoints are in perfect sync! 🎯" 50 115 exit 0 51 116 else 52 - echo "❌ MISMATCH! Hashes differ" 117 + echo "❌ DISCREPANCIES DETECTED" 53 118 echo "" 54 119 55 - # Show counts 56 - if [ "$LOCAL_COUNT" != "$REMOTE_COUNT" ]; then 57 - echo "⚠️ Operation count differs:" 58 - echo " Local: $LOCAL_COUNT operations" 59 - echo " Remote: $REMOTE_COUNT operations" 60 - echo " Diff: $((REMOTE_COUNT - LOCAL_COUNT))" 61 - echo "" 62 - fi 120 + # Show counts comparison 121 + echo "=== OPERATION COUNTS ===" 122 + echo "Local: $LOCAL_COUNT operations" 123 + for i in "${!REMOTE_NAMES[@]}"; do 124 + name="${REMOTE_NAMES[$i]}" 125 + count="${REMOTE_COUNT_ARR[$i]}" 126 + diff=$((count - LOCAL_COUNT)) 127 + echo "${name}: ${count} operations (diff: ${diff})" 128 + done 129 + echo "" 63 130 64 - # Sample first and last operations 65 - echo "First operation (local):" 131 + # Sample operations from each source 132 + echo "=== FIRST OPERATIONS ===" 133 + echo "Local:" 66 134 echo "$LOCAL_DATA" | head -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 67 135 echo "" 68 136 69 - echo "First operation (remote):" 70 - echo "$REMOTE_DATA" | head -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 71 - echo "" 137 + for i in "${!REMOTE_NAMES[@]}"; do 138 + name="${REMOTE_NAMES[$i]}" 139 + data="${REMOTE_DATA_ARR[$i]}" 140 + echo "${name}:" 141 + echo "$data" | head -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 142 + echo "" 143 + done 72 144 73 - echo "Last operation (local):" 145 + echo "=== LAST OPERATIONS ===" 146 + echo "Local:" 74 147 echo "$LOCAL_DATA" | tail -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 75 148 echo "" 76 149 77 - echo "Last operation (remote):" 78 - echo "$REMOTE_DATA" | tail -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 79 - echo "" 150 + for i in "${!REMOTE_NAMES[@]}"; do 151 + name="${REMOTE_NAMES[$i]}" 152 + data="${REMOTE_DATA_ARR[$i]}" 153 + echo "${name}:" 154 + echo "$data" | tail -1 | jq -r '[.did, .cid, .createdAt] | @tsv' 2>/dev/null || echo "(parse error)" 155 + echo "" 156 + done 80 157 81 - # Find first difference 82 - echo "Finding first difference..." 83 - diff <(echo "$LOCAL_DATA" | jq -r '.cid' 2>/dev/null | head -20) \ 84 - <(echo "$REMOTE_DATA" | jq -r '.cid' 2>/dev/null | head -20) || true 158 + # Find first differences for mismatches 159 + for i in "${!REMOTE_NAMES[@]}"; do 160 + if [ "${MATCHES[$i]}" = "false" ]; then 161 + name="${REMOTE_NAMES[$i]}" 162 + data="${REMOTE_DATA_ARR[$i]}" 163 + echo "=== FIRST DIFFERENCES (Local vs ${name}) ===" 164 + diff <(echo "$LOCAL_DATA" | jq -r '.cid' 2>/dev/null | head -20) \ 165 + <(echo "$data" | jq -r '.cid' 2>/dev/null | head -20) || true 166 + echo "" 167 + fi 168 + done 169 + 170 + # Check differences between remotes 171 + if [ "$REMOTES_MATCH" = "false" ] && [ ${#REMOTE_NAMES[@]} -gt 1 ]; then 172 + for ((i=0; i<${#REMOTE_NAMES[@]}-1; i++)); do 173 + for ((j=i+1; j<${#REMOTE_NAMES[@]}; j++)); do 174 + name1="${REMOTE_NAMES[$i]}" 175 + name2="${REMOTE_NAMES[$j]}" 176 + hash1="${REMOTE_HASH_ARR[$i]}" 177 + hash2="${REMOTE_HASH_ARR[$j]}" 178 + 179 + if [ "$hash1" != "$hash2" ]; then 180 + data1="${REMOTE_DATA_ARR[$i]}" 181 + data2="${REMOTE_DATA_ARR[$j]}" 182 + echo "=== FIRST DIFFERENCES (${name1} vs ${name2}) ===" 183 + diff <(echo "$data1" | jq -r '.cid' 2>/dev/null | head -20) \ 184 + <(echo "$data2" | jq -r '.cid' 2>/dev/null | head -20) || true 185 + echo "" 186 + fi 187 + done 188 + done 189 + fi 85 190 86 191 exit 1 87 192 fi