A Transparent and Verifiable Way to Sync the AT Protocol's PLC Directory

Compare changes

Choose any two refs to compare.

Changed files
+965 -11
bundle
cmd
plcbundle
scripts
+138
bundle/manager.go
··· 1205 1205 } 1206 1206 return filtered 1207 1207 } 1208 + 1209 + // ValidateBundle validates all operations in a bundle using go-didplc 1210 + func (m *Manager) ValidateBundle(ctx context.Context, bundleNumber int) error { 1211 + bundle, err := m.LoadBundle(ctx, bundleNumber) 1212 + if err != nil { 1213 + return err 1214 + } 1215 + 1216 + validator := NewValidator(m.logger) 1217 + return validator.ValidateBundleOperations(bundle.Operations) 1218 + } 1219 + 1220 + // ValidateAllBundles validates all bundles in the repository 1221 + func (m *Manager) ValidateAllBundles(ctx context.Context, progressFunc func(current, total int)) error { 1222 + index := m.GetIndex() 1223 + bundles := index.GetBundles() 1224 + 1225 + m.logger.Printf("Validating %d bundles...", len(bundles)) 1226 + 1227 + validator := NewValidator(m.logger) 1228 + errors := 0 1229 + 1230 + for i, meta := range bundles { 1231 + if progressFunc != nil { 1232 + progressFunc(i+1, len(bundles)) 1233 + } 1234 + 1235 + bundle, err := m.LoadBundle(ctx, meta.BundleNumber) 1236 + if err != nil { 1237 + m.logger.Printf("Failed to load bundle %d: %v", meta.BundleNumber, err) 1238 + errors++ 1239 + continue 1240 + } 1241 + 1242 + if err := validator.ValidateBundleOperations(bundle.Operations); err != nil { 1243 + m.logger.Printf("Bundle %d validation failed: %v", meta.BundleNumber, err) 1244 + errors++ 1245 + } 1246 + } 1247 + 1248 + if errors > 0 { 1249 + return fmt.Errorf("%d bundles failed validation", errors) 1250 + } 1251 + 1252 + return nil 1253 + } 1254 + 1255 + // ValidateBundleWithDetails validates a bundle and returns invalid operations 1256 + func (m *Manager) ValidateBundleWithDetails(ctx context.Context, bundleNumber int) ([]InvalidOperation, error) { 1257 + bundle, err := m.LoadBundle(ctx, bundleNumber) 1258 + if err != nil { 1259 + return nil, err 1260 + } 1261 + 1262 + validator := NewValidator(m.logger) 1263 + return validator.ValidateBundleOperationsWithDetails(bundle.Operations) 1264 + } 1265 + 1266 + // ValidateAllBundlesWithDetails validates all bundles and returns all invalid operations 1267 + func (m *Manager) ValidateAllBundlesWithDetails(ctx context.Context, progressFunc func(current, total int)) ([]InvalidOperation, error) { 1268 + index := m.GetIndex() 1269 + bundles := index.GetBundles() 1270 + 1271 + validator := NewValidator(m.logger) 1272 + var allInvalid []InvalidOperation 1273 + errors := 0 1274 + 1275 + for i, meta := range bundles { 1276 + if progressFunc != nil { 1277 + progressFunc(i+1, len(bundles)) 1278 + } 1279 + 1280 + bundle, err := m.LoadBundle(ctx, meta.BundleNumber) 1281 + if err != nil { 1282 + m.logger.Printf("Failed to load bundle %d: %v", meta.BundleNumber, err) 1283 + errors++ 1284 + continue 1285 + } 1286 + 1287 + invalid, err := validator.ValidateBundleOperationsWithDetails(bundle.Operations) 1288 + if err != nil { 1289 + m.logger.Printf("Bundle %d validation failed: %v", meta.BundleNumber, err) 1290 + errors++ 1291 + } 1292 + 1293 + allInvalid = append(allInvalid, invalid...) 1294 + } 1295 + 1296 + if errors > 0 { 1297 + return allInvalid, fmt.Errorf("%d bundles failed validation", errors) 1298 + } 1299 + 1300 + return allInvalid, nil 1301 + } 1302 + 1303 + // ValidateBundleStreaming validates a bundle and streams invalid operations 1304 + func (m *Manager) ValidateBundleStreaming(ctx context.Context, bundleNumber int, callback InvalidCallback) error { 1305 + bundle, err := m.LoadBundle(ctx, bundleNumber) 1306 + if err != nil { 1307 + return err 1308 + } 1309 + 1310 + validator := NewValidator(m.logger) 1311 + return validator.ValidateBundleOperationsStreaming(bundle.Operations, callback) 1312 + } 1313 + 1314 + // ValidateAllBundlesStreaming validates all bundles and streams invalid operations 1315 + func (m *Manager) ValidateAllBundlesStreaming(ctx context.Context, callback InvalidCallback, progressFunc func(current, total int)) error { 1316 + index := m.GetIndex() 1317 + bundles := index.GetBundles() 1318 + 1319 + validator := NewValidator(m.logger) 1320 + errors := 0 1321 + 1322 + for i, meta := range bundles { 1323 + if progressFunc != nil { 1324 + progressFunc(i+1, len(bundles)) 1325 + } 1326 + 1327 + bundle, err := m.LoadBundle(ctx, meta.BundleNumber) 1328 + if err != nil { 1329 + m.logger.Printf("Failed to load bundle %d: %v", meta.BundleNumber, err) 1330 + errors++ 1331 + continue 1332 + } 1333 + 1334 + if err := validator.ValidateBundleOperationsStreaming(bundle.Operations, callback); err != nil { 1335 + // Errors already streamed via callback, just count them 1336 + errors++ 1337 + } 1338 + } 1339 + 1340 + if errors > 0 { 1341 + return fmt.Errorf("%d bundles had validation errors", errors) 1342 + } 1343 + 1344 + return nil 1345 + }
+174
bundle/validator.go
··· 1 + package bundle 2 + 3 + import ( 4 + "encoding/json" 5 + "fmt" 6 + "time" 7 + 8 + didplc "github.com/did-method-plc/go-didplc" 9 + "tangled.org/atscan.net/plcbundle/plc" 10 + ) 11 + 12 + // Validator validates PLC operations using go-didplc 13 + type Validator struct { 14 + logger Logger 15 + } 16 + 17 + // InvalidOperation represents an operation that failed validation 18 + type InvalidOperation struct { 19 + CID string 20 + DID string 21 + Reason string 22 + } 23 + 24 + // InvalidCallback is called immediately when an invalid operation is found 25 + type InvalidCallback func(InvalidOperation) 26 + 27 + // NewValidator creates a new validator 28 + func NewValidator(logger Logger) *Validator { 29 + return &Validator{ 30 + logger: logger, 31 + } 32 + } 33 + 34 + // ValidateBundleOperations validates all operations (simple version, returns error only) 35 + func (v *Validator) ValidateBundleOperations(ops []plc.PLCOperation) error { 36 + _, err := v.ValidateBundleOperationsWithDetails(ops) 37 + return err 38 + } 39 + 40 + // ValidateBundleOperationsWithDetails validates and returns details about invalid operations 41 + func (v *Validator) ValidateBundleOperationsWithDetails(ops []plc.PLCOperation) ([]InvalidOperation, error) { 42 + var invalid []InvalidOperation 43 + 44 + // Use streaming validation but collect results 45 + err := v.ValidateBundleOperationsStreaming(ops, func(inv InvalidOperation) { 46 + invalid = append(invalid, inv) 47 + }) 48 + 49 + return invalid, err 50 + } 51 + 52 + // ValidateBundleOperationsStreaming validates and streams invalid operations via callback 53 + func (v *Validator) ValidateBundleOperationsStreaming(ops []plc.PLCOperation, callback InvalidCallback) error { 54 + if len(ops) == 0 { 55 + return nil 56 + } 57 + 58 + // First pass: validate each operation individually and parse 59 + opsByDID := make(map[string][]didplc.LogEntry) 60 + opCIDMap := make(map[string]string) // CID -> DID mapping 61 + parseErrors := 0 62 + validationErrors := 0 63 + 64 + for _, op := range ops { 65 + opCIDMap[op.CID] = op.DID 66 + 67 + // Try to parse operation 68 + var opEnum didplc.OpEnum 69 + if err := parseOperationToEnum(op, &opEnum); err != nil { 70 + if callback != nil { 71 + callback(InvalidOperation{ 72 + CID: op.CID, 73 + DID: op.DID, 74 + Reason: fmt.Sprintf("parse error: %v", err), 75 + }) 76 + } 77 + parseErrors++ 78 + continue 79 + } 80 + 81 + // Create log entry 82 + logEntry := didplc.LogEntry{ 83 + DID: op.DID, 84 + CID: op.CID, 85 + CreatedAt: op.CreatedAt.Format(time.RFC3339Nano), 86 + Nullified: op.IsNullified(), 87 + Operation: opEnum, 88 + } 89 + 90 + // Validate individual entry (checks CID match, signature for genesis, etc.) 91 + if err := logEntry.Validate(); err != nil { 92 + if callback != nil { 93 + callback(InvalidOperation{ 94 + CID: op.CID, 95 + DID: op.DID, 96 + Reason: fmt.Sprintf("validation error: %v", err), 97 + }) 98 + } 99 + validationErrors++ 100 + // Still add to chain for chain validation (some errors might be at chain level) 101 + } 102 + 103 + opsByDID[op.DID] = append(opsByDID[op.DID], logEntry) 104 + } 105 + 106 + // Second pass: validate chains (chronological order, nullification, etc.) 107 + chainErrors := 0 108 + for did, entries := range opsByDID { 109 + if err := didplc.VerifyOpLog(entries); err != nil { 110 + // Chain validation failed - report which specific operations are affected 111 + // Try to be more specific about which operations caused the failure 112 + errMsg := err.Error() 113 + 114 + if callback != nil { 115 + // For chain errors, report all operations in the chain 116 + // (we don't know which specific one caused it without more detailed analysis) 117 + for _, entry := range entries { 118 + callback(InvalidOperation{ 119 + CID: entry.CID, 120 + DID: did, 121 + Reason: fmt.Sprintf("chain error: %v", errMsg), 122 + }) 123 + } 124 + } 125 + chainErrors++ 126 + } 127 + } 128 + 129 + totalErrors := parseErrors + validationErrors + chainErrors 130 + if totalErrors > 0 { 131 + return fmt.Errorf("%d parse errors, %d validation errors, %d chain errors", 132 + parseErrors, validationErrors, chainErrors) 133 + } 134 + 135 + return nil 136 + } 137 + 138 + // parseOperationToEnum converts plc.PLCOperation to didplc.OpEnum 139 + func parseOperationToEnum(op plc.PLCOperation, opEnum *didplc.OpEnum) error { 140 + // Try to use RawJSON first for exact parsing 141 + if len(op.RawJSON) > 0 { 142 + // Extract just the operation part from the full record 143 + var fullRecord map[string]interface{} 144 + if err := json.Unmarshal(op.RawJSON, &fullRecord); err != nil { 145 + return fmt.Errorf("failed to unmarshal RawJSON: %w", err) 146 + } 147 + 148 + // Get the "operation" field 149 + if opData, ok := fullRecord["operation"]; ok { 150 + // Re-marshal just the operation data 151 + data, err := json.Marshal(opData) 152 + if err != nil { 153 + return fmt.Errorf("failed to marshal operation: %w", err) 154 + } 155 + 156 + if err := json.Unmarshal(data, opEnum); err != nil { 157 + return fmt.Errorf("failed to unmarshal into OpEnum: %w", err) 158 + } 159 + return nil 160 + } 161 + } 162 + 163 + // Fallback: use the Operation map 164 + data, err := json.Marshal(op.Operation) 165 + if err != nil { 166 + return fmt.Errorf("failed to marshal operation: %w", err) 167 + } 168 + 169 + if err := json.Unmarshal(data, opEnum); err != nil { 170 + return fmt.Errorf("failed to unmarshal into OpEnum: %w", err) 171 + } 172 + 173 + return nil 174 + }
+156 -10
cmd/plcbundle/main.go
··· 82 82 cmdServe() 83 83 case "compare": 84 84 cmdCompare() 85 + case "validate-plc": 86 + cmdValidatePLC() 85 87 case "version": 86 88 fmt.Printf("plcbundle version %s\n", version) 87 89 fmt.Printf(" commit: %s\n", gitCommit) ··· 867 869 } 868 870 869 871 ctx := context.Background() 870 - ops, err := mgr.ExportOperations(ctx, afterTime, *count) 871 - if err != nil { 872 - fmt.Fprintf(os.Stderr, "Export failed: %v\n", err) 873 - os.Exit(1) 874 - } 875 872 876 - // Output as JSONL 877 - for _, op := range ops { 878 - if len(op.RawJSON) > 0 { 879 - fmt.Println(string(op.RawJSON)) 873 + // OLD WAY (loads everything into memory): 874 + // ops, err := mgr.ExportOperations(ctx, afterTime, *count) 875 + 876 + // NEW WAY (streams immediately): 877 + index := mgr.GetIndex() 878 + bundles := index.GetBundles() 879 + 880 + exported := 0 881 + 882 + for _, meta := range bundles { 883 + // Stop if we hit the count 884 + if *count > 0 && exported >= *count { 885 + break 886 + } 887 + 888 + // Skip bundles before afterTime 889 + if !afterTime.IsZero() && meta.EndTime.Before(afterTime) { 890 + continue 891 + } 892 + 893 + // Load bundle 894 + bundle, err := mgr.LoadBundle(ctx, meta.BundleNumber) 895 + if err != nil { 896 + fmt.Fprintf(os.Stderr, "Warning: failed to load bundle %d: %v\n", meta.BundleNumber, err) 897 + continue 898 + } 899 + 900 + // Output operations immediately (streaming) 901 + for _, op := range bundle.Operations { 902 + if !afterTime.IsZero() && op.CreatedAt.Before(afterTime) { 903 + continue 904 + } 905 + 906 + if len(op.RawJSON) > 0 { 907 + fmt.Println(string(op.RawJSON)) 908 + } 909 + 910 + exported++ 911 + if *count > 0 && exported >= *count { 912 + break 913 + } 880 914 } 881 915 } 882 916 883 - fmt.Fprintf(os.Stderr, "Exported %d operations\n", len(ops)) 917 + fmt.Fprintf(os.Stderr, "Exported %d operations\n", exported) 884 918 } 885 919 886 920 func cmdBackfill() { ··· 985 1019 fmt.Fprintf(os.Stderr, " Range: %06d - %06d\n", *startFrom, currentBundle-1) 986 1020 } 987 1021 1022 + func cmdValidatePLC() { 1023 + fs := flag.NewFlagSet("validate-plc", flag.ExitOnError) 1024 + bundleNum := fs.Int("bundle", 0, "specific bundle to validate (0 = all)") 1025 + csvOutput := fs.Bool("csv", false, "output invalid operations as CSV (cid,reason)") 1026 + fs.Parse(os.Args[2:]) 1027 + 1028 + mgr, _, err := getManager("") 1029 + if err != nil { 1030 + fmt.Fprintf(os.Stderr, "Error: %v\n", err) 1031 + os.Exit(1) 1032 + } 1033 + defer mgr.Close() 1034 + 1035 + ctx := context.Background() 1036 + 1037 + // CSV callback - outputs immediately to stdout 1038 + invalidCount := 0 1039 + csvCallback := func(inv bundle.InvalidOperation) { 1040 + if invalidCount == 0 { 1041 + // Print header once 1042 + fmt.Println("cid,reason") 1043 + } 1044 + invalidCount++ 1045 + 1046 + // Escape reason for CSV 1047 + reason := inv.Reason 1048 + if strings.Contains(reason, ",") || strings.Contains(reason, "\"") { 1049 + reason = "\"" + strings.ReplaceAll(reason, "\"", "\"\"") + "\"" 1050 + } 1051 + 1052 + // Output immediately to stdout 1053 + fmt.Printf("%s,%s\n", inv.CID, reason) 1054 + } 1055 + 1056 + if *bundleNum > 0 { 1057 + // Validate single bundle 1058 + if !*csvOutput { 1059 + fmt.Fprintf(os.Stderr, "Validating bundle %06d using go-didplc...\n", *bundleNum) 1060 + } 1061 + 1062 + start := time.Now() 1063 + var err error 1064 + 1065 + if *csvOutput { 1066 + err = mgr.ValidateBundleStreaming(ctx, *bundleNum, csvCallback) 1067 + } else { 1068 + err = mgr.ValidateBundleStreaming(ctx, *bundleNum, nil) 1069 + } 1070 + 1071 + elapsed := time.Since(start) 1072 + 1073 + if *csvOutput { 1074 + if invalidCount == 0 { 1075 + // No invalid operations, still print header 1076 + fmt.Println("cid,reason") 1077 + } 1078 + fmt.Fprintf(os.Stderr, "# Validation complete: %d invalid operations (took %s)\n", 1079 + invalidCount, elapsed.Round(time.Millisecond)) 1080 + } else { 1081 + if err != nil { 1082 + fmt.Fprintf(os.Stderr, "โœ— Validation failed: %v\n", err) 1083 + os.Exit(1) 1084 + } 1085 + fmt.Fprintf(os.Stderr, "โœ“ Bundle %06d is valid (took %s)\n", 1086 + *bundleNum, elapsed.Round(time.Millisecond)) 1087 + } 1088 + } else { 1089 + // Validate all bundles 1090 + index := mgr.GetIndex() 1091 + total := index.Count() 1092 + 1093 + if !*csvOutput { 1094 + fmt.Fprintf(os.Stderr, "Validating all %d bundles using go-didplc...\n\n", total) 1095 + } else { 1096 + fmt.Fprintf(os.Stderr, "# Validating %d bundles...\n", total) 1097 + } 1098 + 1099 + start := time.Now() 1100 + 1101 + err := mgr.ValidateAllBundlesStreaming(ctx, 1102 + func(inv bundle.InvalidOperation) { 1103 + if *csvOutput { 1104 + csvCallback(inv) 1105 + } 1106 + }, 1107 + func(current, total int) { 1108 + // Progress to stderr so it doesn't interfere with CSV 1109 + if current%10 == 0 || current == total { 1110 + fmt.Fprintf(os.Stderr, "\r# Progress: %d/%d (%.1f%%) ", 1111 + current, total, float64(current)/float64(total)*100) 1112 + } 1113 + }) 1114 + 1115 + elapsed := time.Since(start) 1116 + 1117 + if *csvOutput { 1118 + if invalidCount == 0 { 1119 + fmt.Println("cid,reason") 1120 + } 1121 + fmt.Fprintf(os.Stderr, "\n# Validation complete: %d invalid operations (took %s)\n", 1122 + invalidCount, elapsed.Round(time.Second)) 1123 + } else { 1124 + fmt.Fprintf(os.Stderr, "\n") 1125 + if err != nil { 1126 + fmt.Fprintf(os.Stderr, "โœ— Validation failed: %v\n", err) 1127 + os.Exit(1) 1128 + } 1129 + fmt.Fprintf(os.Stderr, "โœ“ All bundles valid (took %s)\n", elapsed.Round(time.Second)) 1130 + } 1131 + } 1132 + } 1133 + 988 1134 func cmdMempool() { 989 1135 fs := flag.NewFlagSet("mempool", flag.ExitOnError) 990 1136 clear := fs.Bool("clear", false, "clear the mempool")
+45
cmd/plcbundle/server.go
··· 299 299 }) 300 300 } 301 301 302 + // Validation endpoint 303 + mux.HandleFunc("/validate/", func(w http.ResponseWriter, r *http.Request) { 304 + handleValidate(w, r, mgr) 305 + }) 306 + 302 307 return mux 303 308 } 304 309 ··· 829 834 time.Sleep(500 * time.Millisecond) 830 835 } 831 836 } 837 + 838 + // handleValidate validates a bundle via HTTP 839 + func handleValidate(w http.ResponseWriter, r *http.Request, mgr *bundle.Manager) { 840 + // Extract bundle number from URL 841 + path := strings.TrimPrefix(r.URL.Path, "/validate/") 842 + 843 + var bundleNum int 844 + if _, err := fmt.Sscanf(path, "%d", &bundleNum); err != nil { 845 + http.Error(w, "Invalid bundle number", http.StatusBadRequest) 846 + return 847 + } 848 + 849 + ctx := r.Context() 850 + 851 + // Validate the bundle 852 + start := time.Now() 853 + err := mgr.ValidateBundle(ctx, bundleNum) 854 + elapsed := time.Since(start) 855 + 856 + w.Header().Set("Content-Type", "application/json") 857 + 858 + if err != nil { 859 + result := map[string]interface{}{ 860 + "bundle_number": bundleNum, 861 + "valid": false, 862 + "error": err.Error(), 863 + "elapsed_ms": elapsed.Milliseconds(), 864 + } 865 + w.WriteHeader(http.StatusOK) // Still 200, just invalid result 866 + json.NewEncoder(w).Encode(result) 867 + return 868 + } 869 + 870 + result := map[string]interface{}{ 871 + "bundle_number": bundleNum, 872 + "valid": true, 873 + "elapsed_ms": elapsed.Milliseconds(), 874 + } 875 + json.NewEncoder(w).Encode(result) 876 + }
+28 -1
go.mod
··· 1 1 module tangled.org/atscan.net/plcbundle 2 2 3 - go 1.23 3 + go 1.24 4 4 5 5 require github.com/klauspost/compress v1.18.1 6 6 7 7 require github.com/gorilla/websocket v1.5.3 8 + 9 + require ( 10 + github.com/bluesky-social/indigo v0.0.0-20251009212240-20524de167fe // indirect 11 + github.com/did-method-plc/go-didplc v0.0.0-20251009212921-7b7a252b8019 // indirect 12 + github.com/ipfs/go-block-format v0.2.0 // indirect 13 + github.com/ipfs/go-cid v0.4.1 // indirect 14 + github.com/ipfs/go-ipfs-util v0.0.3 // indirect 15 + github.com/ipfs/go-ipld-cbor v0.1.0 // indirect 16 + github.com/ipfs/go-ipld-format v0.6.0 // indirect 17 + github.com/klauspost/cpuid/v2 v2.2.7 // indirect 18 + github.com/minio/sha256-simd v1.0.1 // indirect 19 + github.com/mr-tron/base58 v1.2.0 // indirect 20 + github.com/multiformats/go-base32 v0.1.0 // indirect 21 + github.com/multiformats/go-base36 v0.2.0 // indirect 22 + github.com/multiformats/go-multibase v0.2.0 // indirect 23 + github.com/multiformats/go-multihash v0.2.3 // indirect 24 + github.com/multiformats/go-varint v0.0.7 // indirect 25 + github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f // indirect 26 + github.com/spaolacci/murmur3 v1.1.0 // indirect 27 + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e // indirect 28 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b // indirect 29 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02 // indirect 30 + golang.org/x/crypto v0.21.0 // indirect 31 + golang.org/x/sys v0.22.0 // indirect 32 + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect 33 + lukechampine.com/blake3 v1.2.1 // indirect 34 + )
+68
go.sum
··· 1 + github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 2 + github.com/bluesky-social/indigo v0.0.0-20251009212240-20524de167fe h1:VBhaqE5ewQgXbY5SfSWFZC/AwHFo7cHxZKFYi2ce9Yo= 3 + github.com/bluesky-social/indigo v0.0.0-20251009212240-20524de167fe/go.mod h1:RuQVrCGm42QNsgumKaR6se+XkFKfCPNwdCiTvqKRUck= 4 + github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= 5 + github.com/did-method-plc/go-didplc v0.0.0-20251009212921-7b7a252b8019 h1:MhDee1P3Zar8u72U6RtOKvzSd7dBAU3l2hhrOLQsfB0= 6 + github.com/did-method-plc/go-didplc v0.0.0-20251009212921-7b7a252b8019/go.mod h1:dBm0+R8Diqo90As3Q6p2wXAdrGXJgPEWBKUnpV5SUzI= 7 + github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= 8 + github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= 1 9 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= 2 10 github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 11 + github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= 12 + github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= 13 + github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s= 14 + github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= 15 + github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= 16 + github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs= 17 + github.com/ipfs/go-ipld-cbor v0.1.0 h1:dx0nS0kILVivGhfWuB6dUpMa/LAwElHPw1yOGYopoYs= 18 + github.com/ipfs/go-ipld-cbor v0.1.0/go.mod h1:U2aYlmVrJr2wsUBU67K4KgepApSZddGRDWBYR0H4sCk= 19 + github.com/ipfs/go-ipld-format v0.6.0 h1:VEJlA2kQ3LqFSIm5Vu6eIlSxD/Ze90xtc4Meten1F5U= 20 + github.com/ipfs/go-ipld-format v0.6.0/go.mod h1:g4QVMTn3marU3qXchwjpKPKgJv+zF+OlaKMyhJ4LHPg= 21 + github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= 3 22 github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= 4 23 github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= 24 + github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= 25 + github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 26 + github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= 27 + github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= 28 + github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= 29 + github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= 30 + github.com/multiformats/go-base32 v0.1.0 h1:pVx9xoSPqEIQG8o+UbAe7DNi51oej1NtK+aGkbLYxPE= 31 + github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYgtWibDcT0rExnbI= 32 + github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0= 33 + github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4= 34 + github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= 35 + github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= 36 + github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= 37 + github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= 38 + github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8= 39 + github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= 40 + github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 41 + github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f h1:VXTQfuJj9vKR4TCkEuWIckKvdHFeJH/huIFJ9/cXOB0= 42 + github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f/go.mod h1:/zvteZs/GwLtCgZ4BL6CBsk9IKIlexP43ObX9AxTqTw= 43 + github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 44 + github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= 45 + github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= 46 + github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM= 47 + github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 48 + github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 49 + github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= 50 + github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= 51 + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e h1:28X54ciEwwUxyHn9yrZfl5ojgF4CBNLWX7LR0rvBkf4= 52 + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= 53 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b h1:CzigHMRySiX3drau9C6Q5CAbNIApmLdat5jPMqChvDA= 54 + gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b/go.mod h1:/y/V339mxv2sZmYYR64O07VuCpdNZqCTwO8ZcouTMI8= 55 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02 h1:qwDnMxjkyLmAFgcfgTnfJrmYKWhHnci3GjDqcZp1M3Q= 56 + gitlab.com/yawning/tuplehash v0.0.0-20230713102510-df83abbf9a02/go.mod h1:JTnUj0mpYiAsuZLmKjTx/ex3AtMowcCgnE7YNyCEP0I= 57 + golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 58 + golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= 59 + golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= 60 + golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 61 + golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 62 + golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 63 + golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= 64 + golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 65 + golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 66 + golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 67 + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= 68 + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= 69 + gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 70 + gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 71 + lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= 72 + lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
+356
scripts/detect-invalid-ops.py
··· 1 + #!/usr/bin/env python3 2 + """ 3 + detect-invalid-ops.py - Detect invalid PLC operations (optimized for scale) 4 + 5 + Validates PLC operations according to did:plc specification v0.2.1 6 + Optimized for processing millions of operations efficiently. 7 + """ 8 + 9 + import sys 10 + import json 11 + import csv 12 + import base64 13 + import time 14 + from typing import List, Optional, Dict, Any 15 + from collections import defaultdict 16 + 17 + try: 18 + from cryptography.hazmat.primitives.asymmetric import ec 19 + from cryptography.hazmat.backends import default_backend 20 + HAS_CRYPTO = True 21 + except ImportError: 22 + HAS_CRYPTO = False 23 + 24 + # Performance: Pre-compile valid field sets 25 + VALID_TOP_FIELDS = frozenset({'did', 'operation', 'cid', 'nullified', 'createdAt'}) 26 + VALID_PLC_OP_FIELDS = frozenset({'type', 'rotationKeys', 'verificationMethods', 27 + 'alsoKnownAs', 'services', 'prev', 'sig'}) 28 + VALID_LEGACY_FIELDS = frozenset({'type', 'signingKey', 'recoveryKey', 29 + 'handle', 'service', 'prev', 'sig'}) 30 + VALID_TOMBSTONE_FIELDS = frozenset({'type', 'prev', 'sig'}) 31 + VALID_BASE64URL_CHARS = frozenset('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_') 32 + 33 + # EC curve orders for High-S check 34 + P256_ORDER = 0xFFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551 35 + K256_ORDER = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141 36 + P256_HALF_ORDER = P256_ORDER // 2 37 + K256_HALF_ORDER = K256_ORDER // 2 38 + 39 + 40 + class OperationValidator: 41 + def __init__(self, skip_high_s=False): 42 + self.op_count = 0 43 + self.invalid_count = 0 44 + self.skip_high_s = skip_high_s or not HAS_CRYPTO 45 + self.start_time = time.time() 46 + self.last_progress_time = time.time() 47 + self.reason_counts = defaultdict(int) 48 + 49 + def log_progress(self, message: str): 50 + print(f"[PROGRESS] {message}", file=sys.stderr, flush=True) 51 + 52 + def check_extra_fields(self, operation: Dict[str, Any]) -> Optional[str]: 53 + """Check for extra fields - optimized""" 54 + # Fast path: check top-level first (most common) 55 + extra_top = set(operation.keys()) - VALID_TOP_FIELDS 56 + if extra_top: 57 + return f"extra-top-level:{','.join(sorted(extra_top))}" 58 + 59 + op_data = operation.get('operation', {}) 60 + op_type = op_data.get('type') 61 + 62 + # Fast path: direct field set comparison 63 + op_keys = set(op_data.keys()) 64 + 65 + if op_type == 'plc_operation': 66 + extra = op_keys - VALID_PLC_OP_FIELDS 67 + if extra: 68 + return f"extra-plc-operation:{','.join(sorted(extra))}" 69 + 70 + # Only check VM structure if we have verificationMethods 71 + vm = op_data.get('verificationMethods') 72 + if vm: 73 + for key, value in vm.items(): 74 + if isinstance(value, dict): 75 + return f"invalid-verification-method:{key}" 76 + 77 + elif op_type == 'create': 78 + extra = op_keys - VALID_LEGACY_FIELDS 79 + if extra: 80 + return f"extra-legacy-create:{','.join(sorted(extra))}" 81 + 82 + elif op_type == 'plc_tombstone': 83 + extra = op_keys - VALID_TOMBSTONE_FIELDS 84 + if extra: 85 + return f"extra-tombstone:{','.join(sorted(extra))}" 86 + 87 + return None 88 + 89 + def check_signature_encoding(self, operation: Dict[str, Any]) -> Optional[str]: 90 + """Check signature encoding - optimized""" 91 + sig = operation.get('operation', {}).get('sig') 92 + 93 + if not sig: 94 + return None 95 + 96 + # Fast checks first 97 + sig_len = len(sig) 98 + if sig_len != 86: 99 + return f"sig-wrong-length:{sig_len}" 100 + 101 + if '=' in sig: 102 + return "sig-has-padding" 103 + 104 + # Check valid chars (using set for O(1) lookup) 105 + if not set(sig).issubset(VALID_BASE64URL_CHARS): 106 + return "sig-invalid-chars" 107 + 108 + return None 109 + 110 + def check_high_s_signature(self, operation: Dict[str, Any]) -> Optional[str]: 111 + """Check for High-S signatures - optimized""" 112 + if self.skip_high_s: 113 + return None 114 + 115 + sig = operation.get('operation', {}).get('sig') 116 + if not sig: 117 + return None 118 + 119 + try: 120 + # Fast decode with minimal validation 121 + sig_bytes = base64.urlsafe_b64decode(sig + '==') 122 + 123 + if len(sig_bytes) != 64: 124 + return None # Already caught by encoding check 125 + 126 + # Extract s value (second 32 bytes) 127 + s = int.from_bytes(sig_bytes[32:], 'big') 128 + 129 + # Check against both curve half-orders 130 + if s > P256_HALF_ORDER or s > K256_HALF_ORDER: 131 + return "high-s-signature" 132 + 133 + except Exception: 134 + return None # Encoding errors caught elsewhere 135 + 136 + return None 137 + 138 + def check_duplicate_rotation_keys(self, operation: Dict[str, Any]) -> Optional[str]: 139 + """Check for duplicate rotation keys - optimized""" 140 + op_data = operation.get('operation', {}) 141 + 142 + if op_data.get('type') != 'plc_operation': 143 + return None 144 + 145 + keys = op_data.get('rotationKeys') 146 + if not keys: 147 + return None 148 + 149 + keys_len = len(keys) 150 + unique_len = len(set(keys)) 151 + 152 + if unique_len < keys_len: 153 + return f"duplicate-rotation-keys:{keys_len}-total-{unique_len}-unique" 154 + 155 + return None 156 + 157 + def check_rotation_keys_count(self, operation: Dict[str, Any]) -> Optional[str]: 158 + """Check rotation keys count - optimized""" 159 + op_data = operation.get('operation', {}) 160 + 161 + if op_data.get('type') != 'plc_operation': 162 + return None 163 + 164 + keys = op_data.get('rotationKeys') 165 + if not keys: 166 + return None 167 + 168 + count = len(keys) 169 + if count < 1 or count > 5: 170 + return f"invalid-rotation-keys-count:{count}" 171 + 172 + return None 173 + 174 + def check_verification_methods_count(self, operation: Dict[str, Any]) -> Optional[str]: 175 + """Check verification methods count - optimized""" 176 + op_data = operation.get('operation', {}) 177 + 178 + if op_data.get('type') != 'plc_operation': 179 + return None 180 + 181 + vm = op_data.get('verificationMethods') 182 + if not vm: 183 + return None 184 + 185 + count = len(vm) 186 + if count > 10: 187 + return f"too-many-verification-methods:{count}" 188 + 189 + return None 190 + 191 + def validate_operation(self, operation: Dict[str, Any]) -> List[str]: 192 + """Run all validations - optimized hot path""" 193 + reasons = [] 194 + 195 + # Inline checks for performance (avoid function call overhead) 196 + result = self.check_extra_fields(operation) 197 + if result: 198 + reasons.append(result) 199 + 200 + result = self.check_signature_encoding(operation) 201 + if result: 202 + reasons.append(result) 203 + 204 + if not self.skip_high_s: 205 + result = self.check_high_s_signature(operation) 206 + if result: 207 + reasons.append(result) 208 + 209 + result = self.check_duplicate_rotation_keys(operation) 210 + if result: 211 + reasons.append(result) 212 + 213 + result = self.check_rotation_keys_count(operation) 214 + if result: 215 + reasons.append(result) 216 + 217 + result = self.check_verification_methods_count(operation) 218 + if result: 219 + reasons.append(result) 220 + 221 + return reasons 222 + 223 + def process_stream(self): 224 + """Process operations from stdin - optimized""" 225 + writer = csv.writer(sys.stdout, lineterminator='\n') 226 + writer.writerow(['bundle', 'position', 'reason', 'opRaw']) 227 + 228 + # Buffer for batch writing (improves I/O performance) 229 + buffer = [] 230 + buffer_size = 100 231 + 232 + for line in sys.stdin: 233 + line = line.strip() 234 + if not line: 235 + continue 236 + 237 + self.op_count += 1 238 + 239 + # Calculate bundle and position 240 + bundle = (self.op_count - 1) // 10000 + 1 241 + position = (self.op_count - 1) % 10000 242 + 243 + try: 244 + operation = json.loads(line) 245 + except json.JSONDecodeError: 246 + continue 247 + 248 + # Validate 249 + reasons = self.validate_operation(operation) 250 + 251 + if reasons: 252 + self.invalid_count += 1 253 + reason_str = '|'.join(reasons) 254 + 255 + # Track reason statistics 256 + for reason in reasons: 257 + self.reason_counts[reason] += 1 258 + 259 + # Compact JSON (no spaces) 260 + op_raw = json.dumps(operation, separators=(',', ':')) 261 + 262 + buffer.append([bundle, position, reason_str, op_raw]) 263 + 264 + # Batch write for performance 265 + if len(buffer) >= buffer_size: 266 + writer.writerows(buffer) 267 + buffer.clear() 268 + 269 + # Progress (throttled to once per second) 270 + if self.op_count % 10000 == 0: 271 + current_time = time.time() 272 + if current_time - self.last_progress_time >= 1.0: 273 + elapsed = current_time - self.start_time 274 + rate = self.op_count / elapsed 275 + self.log_progress( 276 + f"Processed {self.op_count:,} ops " 277 + f"({rate:,.0f} ops/sec) | " 278 + f"Invalid: {self.invalid_count:,}" 279 + ) 280 + self.last_progress_time = current_time 281 + 282 + # Flush remaining buffer 283 + if buffer: 284 + writer.writerows(buffer) 285 + 286 + # Final summary 287 + elapsed = time.time() - self.start_time 288 + if self.op_count > 0: 289 + percentage = (self.invalid_count / self.op_count) * 100 290 + rate = self.op_count / elapsed 291 + self.log_progress("=" * 60) 292 + self.log_progress(f"Complete: {self.op_count:,} operations in {elapsed:.1f}s") 293 + self.log_progress(f"Rate: {rate:,.0f} ops/sec") 294 + self.log_progress(f"Invalid: {self.invalid_count:,} ({percentage:.4f}%)") 295 + 296 + # Show top invalid reasons 297 + if self.reason_counts: 298 + self.log_progress("\nTop invalid reasons:") 299 + sorted_reasons = sorted( 300 + self.reason_counts.items(), 301 + key=lambda x: x[1], 302 + reverse=True 303 + )[:5] 304 + for reason, count in sorted_reasons: 305 + self.log_progress(f" {reason}: {count:,}") 306 + else: 307 + self.log_progress("No operations processed") 308 + 309 + 310 + def main(): 311 + import argparse 312 + 313 + parser = argparse.ArgumentParser( 314 + description='Detect invalid PLC operations', 315 + formatter_class=argparse.RawDescriptionHelpFormatter, 316 + epilog=''' 317 + Examples: 318 + plcbundle export -count 10000 | %(prog)s > invalid.csv 319 + plcbundle backfill | %(prog)s > invalid.csv 320 + 321 + # Skip High-S check for speed 322 + plcbundle backfill | %(prog)s --skip-high-s > invalid.csv 323 + 324 + Performance: 325 + ~50,000-100,000 ops/sec on modern hardware 326 + ~1-2 hours for 10 million operations 327 + Memory usage: <100MB 328 + ''' 329 + ) 330 + 331 + parser.add_argument( 332 + '--skip-high-s', 333 + action='store_true', 334 + help='Skip High-S validation (faster)' 335 + ) 336 + 337 + args = parser.parse_args() 338 + 339 + if not HAS_CRYPTO and not args.skip_high_s: 340 + print("[WARNING] cryptography not installed. High-S validation disabled.", file=sys.stderr) 341 + print("[WARNING] Install with: pip install cryptography", file=sys.stderr) 342 + args.skip_high_s = True 343 + 344 + validator = OperationValidator(skip_high_s=args.skip_high_s) 345 + 346 + try: 347 + validator.process_stream() 348 + except KeyboardInterrupt: 349 + print("\n[INTERRUPTED]", file=sys.stderr) 350 + sys.exit(1) 351 + except BrokenPipeError: 352 + sys.exit(0) 353 + 354 + 355 + if __name__ == '__main__': 356 + main()