A Transparent and Verifiable Way to Sync the AT Protocol's PLC Directory

update detection

+114 -22
+11 -13
cmd/plcbundle/detector.go
··· 10 10 "os" 11 11 "sort" 12 12 "strings" 13 - "time" 14 13 15 14 "tangled.org/atscan.net/plcbundle/detector" 16 15 "tangled.org/atscan.net/plcbundle/plc" ··· 422 421 ctx := context.Background() 423 422 424 423 // Write CSV header to stdout 425 - fmt.Println("bundle,position,cid,detectors,confidence,detected_at,size") 424 + fmt.Println("bundle,position,cid,size,confidence,labels") 426 425 427 426 // Track statistics 428 427 totalOps := 0 ··· 471 470 // Collect all matches for this operation 472 471 var matchedDetectors []string 473 472 var maxConfidence float64 474 - var detectedAt time.Time 475 473 476 474 // Run all detectors on this operation 477 475 for _, det := range detectors { ··· 493 491 if match.Confidence > maxConfidence { 494 492 maxConfidence = match.Confidence 495 493 } 496 - 497 - // Use current time for first match 498 - if detectedAt.IsZero() { 499 - detectedAt = time.Now() 500 - } 501 494 } 502 495 503 496 // Output only if at least one detector matched ··· 505 498 matchCount++ 506 499 matchedBytes += int64(opSize) 507 500 508 - fmt.Printf("%d,%d,%s,%s,%.2f,%s,%d\n", 501 + // Extract last 4 chars of CID 502 + cidShort := op.CID 503 + if len(cidShort) > 4 { 504 + cidShort = cidShort[len(cidShort)-4:] 505 + } 506 + 507 + fmt.Printf("%d,%d,%s,%d,%.2f,%s\n", 509 508 bundleNum, 510 509 position, 511 - op.CID, 512 - strings.Join(matchedDetectors, ";"), 510 + cidShort, 511 + opSize, 513 512 maxConfidence, 514 - detectedAt.Format("2006-01-02T15:04:05Z"), 515 - opSize, 513 + strings.Join(matchedDetectors, ";"), 516 514 ) 517 515 } 518 516 }
+103 -9
detector/builtin.go
··· 11 11 12 12 // InvalidHandleDetector detects operations with invalid handle patterns 13 13 type InvalidHandleDetector struct { 14 - // Valid handle regex: lowercase letters, numbers, hyphens, dots only 14 + // Valid handle regex based on AT Protocol handle specification 15 15 validHandlePattern *regexp.Regexp 16 16 } 17 17 18 18 func NewInvalidHandleDetector() *InvalidHandleDetector { 19 19 return &InvalidHandleDetector{ 20 - // Valid handle: alphanumeric, hyphens, dots (no underscores!) 21 - validHandlePattern: regexp.MustCompile(`^at://[a-z0-9][a-z0-9-]*(\.[a-z0-9][a-z0-9-]*)*\.[a-z]+$`), 20 + // Valid handle pattern: domain segments + TLD 21 + // Each segment: alphanumeric start/end, hyphens allowed in middle, max 63 chars per segment 22 + // TLD must start with letter 23 + validHandlePattern: regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`), 22 24 } 23 25 } 24 26 25 27 func (d *InvalidHandleDetector) Name() string { return "invalid_handle" } 26 28 func (d *InvalidHandleDetector) Description() string { 27 - return "Detects operations with invalid handle patterns (underscores, invalid chars)" 29 + return "Detects operations with invalid handle patterns (underscores, invalid chars, malformed)" 28 30 } 29 31 func (d *InvalidHandleDetector) Version() string { return "1.0.0" } 30 32 ··· 37 39 continue 38 40 } 39 41 42 + // Extract handle (remove at:// prefix) 43 + handle := strings.TrimPrefix(str, "at://") 44 + 45 + // Remove any path component (e.g., at://user.bsky.social/profile -> user.bsky.social) 46 + if idx := strings.Index(handle, "/"); idx > 0 { 47 + handle = handle[:idx] 48 + } 49 + 40 50 // Check for underscore (invalid in Bluesky handles) 41 - if strings.Contains(str, "_") { 51 + if strings.Contains(handle, "_") { 42 52 return &Match{ 43 53 Reason: "underscore_in_handle", 44 54 Category: "invalid_handle", 45 55 Confidence: 0.99, 46 - Note: "Handle contains underscore which is invalid in Bluesky", 56 + Note: "Handle contains underscore which is invalid in AT Protocol", 47 57 Metadata: map[string]interface{}{ 48 58 "invalid_handle": str, 59 + "extracted": handle, 49 60 "violation": "underscore_character", 50 61 }, 51 62 }, nil 52 63 } 53 64 54 - // Check if handle matches valid pattern 55 - if !d.validHandlePattern.MatchString(str) { 65 + // Check for other invalid characters (anything not alphanumeric, hyphen, or dot) 66 + invalidChars := regexp.MustCompile(`[^a-zA-Z0-9.-]`) 67 + if invalidChars.MatchString(handle) { 68 + return &Match{ 69 + Reason: "invalid_characters", 70 + Category: "invalid_handle", 71 + Confidence: 0.99, 72 + Note: "Handle contains invalid characters", 73 + Metadata: map[string]interface{}{ 74 + "invalid_handle": str, 75 + "extracted": handle, 76 + "violation": "invalid_characters", 77 + }, 78 + }, nil 79 + } 80 + 81 + // Check if handle matches valid AT Protocol pattern 82 + if !d.validHandlePattern.MatchString(handle) { 56 83 return &Match{ 57 84 Reason: "invalid_handle_pattern", 58 85 Category: "invalid_handle", 59 86 Confidence: 0.95, 60 - Note: "Handle does not match valid Bluesky handle pattern", 87 + Note: "Handle does not match valid AT Protocol handle pattern", 61 88 Metadata: map[string]interface{}{ 62 89 "invalid_handle": str, 90 + "extracted": handle, 63 91 "violation": "pattern_mismatch", 92 + }, 93 + }, nil 94 + } 95 + 96 + // Additional checks: handle length 97 + if len(handle) > 253 { // DNS maximum 98 + return &Match{ 99 + Reason: "handle_too_long", 100 + Category: "invalid_handle", 101 + Confidence: 0.98, 102 + Note: "Handle exceeds maximum length (253 characters)", 103 + Metadata: map[string]interface{}{ 104 + "invalid_handle": str, 105 + "extracted": handle, 106 + "length": len(handle), 107 + "violation": "exceeds_max_length", 108 + }, 109 + }, nil 110 + } 111 + 112 + // Check segment lengths (each part between dots should be max 63 chars) 113 + segments := strings.Split(handle, ".") 114 + for i, segment := range segments { 115 + if len(segment) == 0 { 116 + return &Match{ 117 + Reason: "empty_segment", 118 + Category: "invalid_handle", 119 + Confidence: 0.99, 120 + Note: "Handle contains empty segment (consecutive dots)", 121 + Metadata: map[string]interface{}{ 122 + "invalid_handle": str, 123 + "extracted": handle, 124 + "violation": "empty_segment", 125 + }, 126 + }, nil 127 + } 128 + if len(segment) > 63 { 129 + return &Match{ 130 + Reason: "segment_too_long", 131 + Category: "invalid_handle", 132 + Confidence: 0.98, 133 + Note: "Handle segment exceeds maximum length (63 characters)", 134 + Metadata: map[string]interface{}{ 135 + "invalid_handle": str, 136 + "extracted": handle, 137 + "segment": i, 138 + "segment_value": segment, 139 + "length": len(segment), 140 + "violation": "segment_exceeds_max_length", 141 + }, 142 + }, nil 143 + } 144 + } 145 + 146 + // Check minimum segments (at least 2: subdomain.tld) 147 + if len(segments) < 2 { 148 + return &Match{ 149 + Reason: "insufficient_segments", 150 + Category: "invalid_handle", 151 + Confidence: 0.99, 152 + Note: "Handle must have at least 2 segments (subdomain.tld)", 153 + Metadata: map[string]interface{}{ 154 + "invalid_handle": str, 155 + "extracted": handle, 156 + "segments": len(segments), 157 + "violation": "insufficient_segments", 64 158 }, 65 159 }, nil 66 160 }