wip
1pv plc_cache/*.jsonl.zst | zstdcat | \
2 jq -r '[.createdAt, .cid, .did] | @tsv' | \
3 awk '
4 NR > 1 {
5 # Track consecutive same timestamps
6 if ($1 == prev_time) {
7 if (same_streak == 0) {
8 same_streak = 2 # Current + previous
9 streak_time = $1
10 streak_data[1] = prev_time "\t" prev_cid "\t" prev_did
11 streak_data[2] = $1 "\t" $2 "\t" $3
12 } else {
13 same_streak++
14 streak_data[same_streak] = $1 "\t" $2 "\t" $3
15 }
16 } else {
17 # Streak ended, check if it was 8+
18 if (same_streak >= 8) {
19 groups_of_8_plus++
20 printf "\n=== Found %d items with same createdAt: %s ===\n", same_streak, streak_time > "/dev/stderr"
21 for (i = 1; i <= same_streak; i++) {
22 split(streak_data[i], parts, "\t")
23 printf " %s | CID: %s | DID: %s\n", parts[1], parts[2], parts[3] > "/dev/stderr"
24 }
25
26 # Track maximum
27 if (same_streak > max_streak) {
28 max_streak = same_streak
29 max_time = streak_time
30 }
31 }
32 same_streak = 0
33 delete streak_data
34 }
35 }
36
37 {prev_time = $1; prev_cid = $2; prev_did = $3}
38
39 END {
40 # Check last streak
41 if (same_streak >= 8) {
42 groups_of_8_plus++
43 printf "\n=== Found %d items with same createdAt: %s ===\n", same_streak, streak_time > "/dev/stderr"
44 for (i = 1; i <= same_streak; i++) {
45 split(streak_data[i], parts, "\t")
46 printf " %s | CID: %s | DID: %s\n", parts[1], parts[2], parts[3] > "/dev/stderr"
47 }
48
49 # Track maximum
50 if (same_streak > max_streak) {
51 max_streak = same_streak
52 max_time = streak_time
53 }
54 }
55
56 printf "\n=== SUMMARY ===\n" > "/dev/stderr"
57 printf "Total groups of 8+ items with same createdAt: %d\n", groups_of_8_plus > "/dev/stderr"
58 if (max_streak > 0) {
59 printf "Largest group: %d items at %s\n", max_streak, max_time > "/dev/stderr"
60 }
61 }
62 '
63