tangled
alpha
login
or
join now
atscan.net
/
plcbundle
A Transparent and Verifiable Way to Sync the AT Protocol's PLC Directory
14
fork
atom
overview
issues
2
pulls
pipelines
update detection
tree.fail
3 months ago
a8a9a293
40336cb2
+114
-22
2 changed files
expand all
collapse all
unified
split
cmd
plcbundle
detector.go
detector
builtin.go
+11
-13
cmd/plcbundle/detector.go
···
10
10
"os"
11
11
"sort"
12
12
"strings"
13
13
-
"time"
14
13
15
14
"tangled.org/atscan.net/plcbundle/detector"
16
15
"tangled.org/atscan.net/plcbundle/plc"
···
422
421
ctx := context.Background()
423
422
424
423
// Write CSV header to stdout
425
425
-
fmt.Println("bundle,position,cid,detectors,confidence,detected_at,size")
424
424
+
fmt.Println("bundle,position,cid,size,confidence,labels")
426
425
427
426
// Track statistics
428
427
totalOps := 0
···
471
470
// Collect all matches for this operation
472
471
var matchedDetectors []string
473
472
var maxConfidence float64
474
474
-
var detectedAt time.Time
475
473
476
474
// Run all detectors on this operation
477
475
for _, det := range detectors {
···
493
491
if match.Confidence > maxConfidence {
494
492
maxConfidence = match.Confidence
495
493
}
496
496
-
497
497
-
// Use current time for first match
498
498
-
if detectedAt.IsZero() {
499
499
-
detectedAt = time.Now()
500
500
-
}
501
494
}
502
495
503
496
// Output only if at least one detector matched
···
505
498
matchCount++
506
499
matchedBytes += int64(opSize)
507
500
508
508
-
fmt.Printf("%d,%d,%s,%s,%.2f,%s,%d\n",
501
501
+
// Extract last 4 chars of CID
502
502
+
cidShort := op.CID
503
503
+
if len(cidShort) > 4 {
504
504
+
cidShort = cidShort[len(cidShort)-4:]
505
505
+
}
506
506
+
507
507
+
fmt.Printf("%d,%d,%s,%d,%.2f,%s\n",
509
508
bundleNum,
510
509
position,
511
511
-
op.CID,
512
512
-
strings.Join(matchedDetectors, ";"),
510
510
+
cidShort,
511
511
+
opSize,
513
512
maxConfidence,
514
514
-
detectedAt.Format("2006-01-02T15:04:05Z"),
515
515
-
opSize,
513
513
+
strings.Join(matchedDetectors, ";"),
516
514
)
517
515
}
518
516
}
+103
-9
detector/builtin.go
···
11
11
12
12
// InvalidHandleDetector detects operations with invalid handle patterns
13
13
type InvalidHandleDetector struct {
14
14
-
// Valid handle regex: lowercase letters, numbers, hyphens, dots only
14
14
+
// Valid handle regex based on AT Protocol handle specification
15
15
validHandlePattern *regexp.Regexp
16
16
}
17
17
18
18
func NewInvalidHandleDetector() *InvalidHandleDetector {
19
19
return &InvalidHandleDetector{
20
20
-
// Valid handle: alphanumeric, hyphens, dots (no underscores!)
21
21
-
validHandlePattern: regexp.MustCompile(`^at://[a-z0-9][a-z0-9-]*(\.[a-z0-9][a-z0-9-]*)*\.[a-z]+$`),
20
20
+
// Valid handle pattern: domain segments + TLD
21
21
+
// Each segment: alphanumeric start/end, hyphens allowed in middle, max 63 chars per segment
22
22
+
// TLD must start with letter
23
23
+
validHandlePattern: regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`),
22
24
}
23
25
}
24
26
25
27
func (d *InvalidHandleDetector) Name() string { return "invalid_handle" }
26
28
func (d *InvalidHandleDetector) Description() string {
27
27
-
return "Detects operations with invalid handle patterns (underscores, invalid chars)"
29
29
+
return "Detects operations with invalid handle patterns (underscores, invalid chars, malformed)"
28
30
}
29
31
func (d *InvalidHandleDetector) Version() string { return "1.0.0" }
30
32
···
37
39
continue
38
40
}
39
41
42
42
+
// Extract handle (remove at:// prefix)
43
43
+
handle := strings.TrimPrefix(str, "at://")
44
44
+
45
45
+
// Remove any path component (e.g., at://user.bsky.social/profile -> user.bsky.social)
46
46
+
if idx := strings.Index(handle, "/"); idx > 0 {
47
47
+
handle = handle[:idx]
48
48
+
}
49
49
+
40
50
// Check for underscore (invalid in Bluesky handles)
41
41
-
if strings.Contains(str, "_") {
51
51
+
if strings.Contains(handle, "_") {
42
52
return &Match{
43
53
Reason: "underscore_in_handle",
44
54
Category: "invalid_handle",
45
55
Confidence: 0.99,
46
46
-
Note: "Handle contains underscore which is invalid in Bluesky",
56
56
+
Note: "Handle contains underscore which is invalid in AT Protocol",
47
57
Metadata: map[string]interface{}{
48
58
"invalid_handle": str,
59
59
+
"extracted": handle,
49
60
"violation": "underscore_character",
50
61
},
51
62
}, nil
52
63
}
53
64
54
54
-
// Check if handle matches valid pattern
55
55
-
if !d.validHandlePattern.MatchString(str) {
65
65
+
// Check for other invalid characters (anything not alphanumeric, hyphen, or dot)
66
66
+
invalidChars := regexp.MustCompile(`[^a-zA-Z0-9.-]`)
67
67
+
if invalidChars.MatchString(handle) {
68
68
+
return &Match{
69
69
+
Reason: "invalid_characters",
70
70
+
Category: "invalid_handle",
71
71
+
Confidence: 0.99,
72
72
+
Note: "Handle contains invalid characters",
73
73
+
Metadata: map[string]interface{}{
74
74
+
"invalid_handle": str,
75
75
+
"extracted": handle,
76
76
+
"violation": "invalid_characters",
77
77
+
},
78
78
+
}, nil
79
79
+
}
80
80
+
81
81
+
// Check if handle matches valid AT Protocol pattern
82
82
+
if !d.validHandlePattern.MatchString(handle) {
56
83
return &Match{
57
84
Reason: "invalid_handle_pattern",
58
85
Category: "invalid_handle",
59
86
Confidence: 0.95,
60
60
-
Note: "Handle does not match valid Bluesky handle pattern",
87
87
+
Note: "Handle does not match valid AT Protocol handle pattern",
61
88
Metadata: map[string]interface{}{
62
89
"invalid_handle": str,
90
90
+
"extracted": handle,
63
91
"violation": "pattern_mismatch",
92
92
+
},
93
93
+
}, nil
94
94
+
}
95
95
+
96
96
+
// Additional checks: handle length
97
97
+
if len(handle) > 253 { // DNS maximum
98
98
+
return &Match{
99
99
+
Reason: "handle_too_long",
100
100
+
Category: "invalid_handle",
101
101
+
Confidence: 0.98,
102
102
+
Note: "Handle exceeds maximum length (253 characters)",
103
103
+
Metadata: map[string]interface{}{
104
104
+
"invalid_handle": str,
105
105
+
"extracted": handle,
106
106
+
"length": len(handle),
107
107
+
"violation": "exceeds_max_length",
108
108
+
},
109
109
+
}, nil
110
110
+
}
111
111
+
112
112
+
// Check segment lengths (each part between dots should be max 63 chars)
113
113
+
segments := strings.Split(handle, ".")
114
114
+
for i, segment := range segments {
115
115
+
if len(segment) == 0 {
116
116
+
return &Match{
117
117
+
Reason: "empty_segment",
118
118
+
Category: "invalid_handle",
119
119
+
Confidence: 0.99,
120
120
+
Note: "Handle contains empty segment (consecutive dots)",
121
121
+
Metadata: map[string]interface{}{
122
122
+
"invalid_handle": str,
123
123
+
"extracted": handle,
124
124
+
"violation": "empty_segment",
125
125
+
},
126
126
+
}, nil
127
127
+
}
128
128
+
if len(segment) > 63 {
129
129
+
return &Match{
130
130
+
Reason: "segment_too_long",
131
131
+
Category: "invalid_handle",
132
132
+
Confidence: 0.98,
133
133
+
Note: "Handle segment exceeds maximum length (63 characters)",
134
134
+
Metadata: map[string]interface{}{
135
135
+
"invalid_handle": str,
136
136
+
"extracted": handle,
137
137
+
"segment": i,
138
138
+
"segment_value": segment,
139
139
+
"length": len(segment),
140
140
+
"violation": "segment_exceeds_max_length",
141
141
+
},
142
142
+
}, nil
143
143
+
}
144
144
+
}
145
145
+
146
146
+
// Check minimum segments (at least 2: subdomain.tld)
147
147
+
if len(segments) < 2 {
148
148
+
return &Match{
149
149
+
Reason: "insufficient_segments",
150
150
+
Category: "invalid_handle",
151
151
+
Confidence: 0.99,
152
152
+
Note: "Handle must have at least 2 segments (subdomain.tld)",
153
153
+
Metadata: map[string]interface{}{
154
154
+
"invalid_handle": str,
155
155
+
"extracted": handle,
156
156
+
"segments": len(segments),
157
157
+
"violation": "insufficient_segments",
64
158
},
65
159
}, nil
66
160
}