like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
at main 473 lines 18 kB view raw
1package sync 2 3import ( 4 "fmt" 5 "testing" 6 "time" 7 8 "tangled.org/karitham.dev/lazuli/kway" 9) 10 11func TestCreateRecordKey(t *testing.T) { 12 tests := []struct { 13 name string 14 record *PlayRecord 15 expected string 16 }{ 17 { 18 name: "basic TID", 19 record: &PlayRecord{ 20 TrackName: "Test Track", 21 Artists: []PlayRecordArtist{{ArtistName: "Test Artist"}}, 22 PlayedTime: Timestamp{Time: time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)}, 23 }, 24 expected: "3kiz7zjhak222", 25 }, 26 } 27 28 for _, tt := range tests { 29 t.Run(tt.name, func(t *testing.T) { 30 result := CreateRecordKey(tt.record) 31 if result != tt.expected { 32 t.Errorf("CreateRecordKey() = %q, want %q", result, tt.expected) 33 } 34 }) 35 } 36} 37 38func TestCreateRecordKeys(t *testing.T) { 39 baseTime := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC) 40 records := []*PlayRecord{ 41 {TrackName: "A", PlayedTime: Timestamp{Time: baseTime}}, 42 {TrackName: "B", PlayedTime: Timestamp{Time: baseTime}}, 43 {TrackName: "C", PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}}, 44 } 45 46 keys := CreateRecordKeys(records) 47 if len(keys) != 3 { 48 t.Fatalf("expected 3 keys, got %d", len(keys)) 49 } 50 51 if keys[0] == keys[1] { 52 t.Errorf("expected unique keys for same timestamp, got duplicate %q", keys[0]) 53 } 54 55 if keys[0] >= keys[1] { 56 t.Errorf("expected keys to be sortable, got %q >= %q", keys[0], keys[1]) 57 } 58 59 if keys[1] >= keys[2] { 60 t.Errorf("expected keys to be sortable by time, got %q >= %q", keys[1], keys[2]) 61 } 62} 63 64func TestSelectBetterRecord(t *testing.T) { 65 tests := []struct { 66 name string 67 r1 PlayRecord 68 r2 PlayRecord 69 r1IsLastFM bool 70 r2IsLastFM bool 71 expectedService string 72 }{ 73 { 74 name: "lastfm wins over spotify", 75 r1: PlayRecord{ 76 TrackName: "Test", 77 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 78 MusicServiceBaseDomain: MusicServiceLastFM, 79 }, 80 r2: PlayRecord{ 81 TrackName: "Test", 82 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 83 MusicServiceBaseDomain: MusicServiceSpotify, 84 }, 85 r1IsLastFM: true, 86 r2IsLastFM: false, 87 expectedService: MusicServiceLastFM, 88 }, 89 { 90 name: "spotify loses to lastfm even with mbid", 91 r1: PlayRecord{ 92 TrackName: "Test", 93 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 94 MusicServiceBaseDomain: MusicServiceLastFM, 95 }, 96 r2: PlayRecord{ 97 TrackName: "Test", 98 Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-spotify"}}, 99 MusicServiceBaseDomain: MusicServiceSpotify, 100 }, 101 r1IsLastFM: true, 102 r2IsLastFM: false, 103 expectedService: MusicServiceLastFM, 104 }, 105 { 106 name: "lastfm with mbid wins over spotify without mbid", 107 r1: PlayRecord{ 108 TrackName: "Test", 109 Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-123"}}, 110 MusicServiceBaseDomain: MusicServiceLastFM, 111 }, 112 r2: PlayRecord{ 113 TrackName: "Test", 114 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 115 MusicServiceBaseDomain: MusicServiceSpotify, 116 }, 117 r1IsLastFM: true, 118 r2IsLastFM: false, 119 expectedService: MusicServiceLastFM, 120 }, 121 { 122 name: "spotify without mbid loses to lastfm with mbid", 123 r1: PlayRecord{ 124 TrackName: "Test", 125 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 126 MusicServiceBaseDomain: MusicServiceLastFM, 127 }, 128 r2: PlayRecord{ 129 TrackName: "Test", 130 Artists: []PlayRecordArtist{{ArtistName: "Artist", ArtistMbId: "mbid-123"}}, 131 MusicServiceBaseDomain: MusicServiceSpotify, 132 }, 133 r1IsLastFM: true, 134 r2IsLastFM: false, 135 expectedService: MusicServiceLastFM, 136 }, 137 { 138 name: "recording mbid takes precedence", 139 r1: PlayRecord{ 140 TrackName: "Test", 141 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 142 RecordingMbId: "mbid-recording", 143 MusicServiceBaseDomain: MusicServiceLastFM, 144 }, 145 r2: PlayRecord{ 146 TrackName: "Test", 147 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 148 MusicServiceBaseDomain: MusicServiceLastFM, 149 }, 150 r1IsLastFM: true, 151 r2IsLastFM: true, 152 expectedService: MusicServiceLastFM, 153 }, 154 { 155 name: "both spotify same source", 156 r1: PlayRecord{ 157 TrackName: "Test", 158 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 159 MusicServiceBaseDomain: MusicServiceSpotify, 160 }, 161 r2: PlayRecord{ 162 TrackName: "Test", 163 Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, 164 MusicServiceBaseDomain: MusicServiceSpotify, 165 }, 166 r1IsLastFM: false, 167 r2IsLastFM: false, 168 expectedService: MusicServiceSpotify, 169 }, 170 } 171 172 for _, tt := range tests { 173 t.Run(tt.name, func(t *testing.T) { 174 result := tt.r1.betterThan(&tt.r2) 175 var resultService string 176 if result { 177 resultService = tt.r1.MusicServiceBaseDomain 178 } else { 179 resultService = tt.r2.MusicServiceBaseDomain 180 } 181 182 if resultService != tt.expectedService { 183 t.Errorf("BetterThan() result service = %q, want %q", resultService, tt.expectedService) 184 } 185 }) 186 } 187} 188 189func TestMergeRecordsComprehensive(t *testing.T) { 190 baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 191 192 tests := []struct { 193 name string 194 lastfm []*PlayRecord 195 spotify []*PlayRecord 196 tolerance time.Duration 197 expectedLen int 198 expectedMergedTotal int 199 expectedFirstTrack string 200 expectedOrder []string // track names in expected order 201 }{ 202 { 203 name: "both slices empty", 204 lastfm: []*PlayRecord{}, 205 spotify: []*PlayRecord{}, 206 tolerance: 0, 207 expectedLen: 0, 208 expectedMergedTotal: 0, 209 }, 210 { 211 name: "only lastfm records", 212 lastfm: []*PlayRecord{ 213 {TrackName: "Song A", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 214 {TrackName: "Song B", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 215 }, 216 spotify: []*PlayRecord{}, 217 tolerance: 0, 218 expectedLen: 2, 219 expectedMergedTotal: 2, 220 expectedOrder: []string{"Song A", "Song B"}, 221 }, 222 { 223 name: "only spotify records", 224 lastfm: []*PlayRecord{}, 225 spotify: []*PlayRecord{ 226 {TrackName: "Song X", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 227 {TrackName: "Song Y", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceSpotify}, 228 }, 229 tolerance: 0, 230 expectedLen: 2, 231 expectedMergedTotal: 2, 232 expectedOrder: []string{"Song X", "Song Y"}, 233 }, 234 { 235 name: "zero tolerance no duplicates", 236 lastfm: []*PlayRecord{ 237 {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 238 }, 239 spotify: []*PlayRecord{ 240 {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 241 }, 242 tolerance: 0, 243 expectedLen: 2, 244 expectedMergedTotal: 2, 245 }, 246 { 247 name: "zero tolerance exact duplicate", 248 lastfm: []*PlayRecord{ 249 {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 250 }, 251 spotify: []*PlayRecord{ 252 {TrackName: "Same Song", Artists: []PlayRecordArtist{{ArtistName: "Same Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceSpotify}, 253 }, 254 tolerance: 0, 255 expectedLen: 1, 256 expectedMergedTotal: 1, 257 expectedFirstTrack: "Same Song", 258 }, 259 { 260 name: "within tolerance duplicate", 261 lastfm: []*PlayRecord{ 262 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 263 }, 264 spotify: []*PlayRecord{ 265 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 266 }, 267 tolerance: 30 * time.Second, 268 expectedLen: 1, 269 expectedMergedTotal: 1, 270 expectedFirstTrack: "Song", 271 }, 272 { 273 name: "outside tolerance no duplicate", 274 lastfm: []*PlayRecord{ 275 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 276 }, 277 spotify: []*PlayRecord{ 278 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(60 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 279 }, 280 tolerance: 30 * time.Second, 281 expectedLen: 2, 282 expectedMergedTotal: 2, 283 }, 284 { 285 name: "time bucket boundary exact", 286 lastfm: []*PlayRecord{ 287 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 288 }, 289 spotify: []*PlayRecord{ 290 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 291 }, 292 tolerance: 30 * time.Second, 293 expectedLen: 1, 294 expectedMergedTotal: 1, 295 }, 296 { 297 name: "time bucket boundary crossed", 298 lastfm: []*PlayRecord{ 299 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 300 }, 301 spotify: []*PlayRecord{ 302 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 303 }, 304 tolerance: 30 * time.Second, 305 expectedLen: 2, 306 }, 307 { 308 name: "lastfm priority over spotify", 309 lastfm: []*PlayRecord{ 310 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 311 }, 312 spotify: []*PlayRecord{ 313 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 314 }, 315 tolerance: 30 * time.Second, 316 expectedLen: 1, 317 expectedMergedTotal: 1, 318 expectedFirstTrack: "Song", 319 }, 320 { 321 name: "same source with mbid preferred", 322 lastfm: []*PlayRecord{ 323 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM, RecordingMbId: "mbid-123"}, 324 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 325 }, 326 spotify: []*PlayRecord{}, 327 tolerance: 30 * time.Second, 328 expectedLen: 1, 329 expectedMergedTotal: 1, 330 expectedFirstTrack: "Song", 331 }, 332 { 333 name: "case insensitive duplicate detection", 334 lastfm: []*PlayRecord{ 335 {TrackName: "song title", Artists: []PlayRecordArtist{{ArtistName: "artist name"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 336 }, 337 spotify: []*PlayRecord{ 338 {TrackName: "SONG TITLE", Artists: []PlayRecordArtist{{ArtistName: "ARTIST NAME"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 339 }, 340 tolerance: 30 * time.Second, 341 expectedLen: 1, 342 expectedMergedTotal: 1, 343 }, 344 { 345 name: "multiple duplicates across time buckets", 346 lastfm: []*PlayRecord{ 347 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 348 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(2 * time.Minute)}, MusicServiceBaseDomain: MusicServiceLastFM}, 349 }, 350 spotify: []*PlayRecord{ 351 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 352 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(2*time.Minute + 10*time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 353 }, 354 tolerance: 30 * time.Second, 355 expectedLen: 2, 356 expectedMergedTotal: 2, 357 }, 358 { 359 name: "sorted by time then track name", 360 lastfm: []*PlayRecord{ 361 {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 362 {TrackName: "B Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(time.Hour)}, MusicServiceBaseDomain: MusicServiceLastFM}, 363 }, 364 spotify: []*PlayRecord{ 365 {TrackName: "A Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(30 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 366 }, 367 tolerance: 30 * time.Second, 368 expectedLen: 2, 369 expectedMergedTotal: 2, 370 expectedOrder: []string{"A Song", "B Song"}, 371 }, 372 { 373 name: "many duplicates in same bucket", 374 lastfm: []*PlayRecord{ 375 {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime}, MusicServiceBaseDomain: MusicServiceLastFM}, 376 {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(5 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 377 {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(10 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 378 }, 379 spotify: []*PlayRecord{ 380 {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(15 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 381 {TrackName: "Popular Song", Artists: []PlayRecordArtist{{ArtistName: "Popular Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(20 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 382 }, 383 tolerance: 30 * time.Second, 384 expectedLen: 1, 385 expectedMergedTotal: 1, 386 }, 387 { 388 name: "adjacent bucket detection works", 389 lastfm: []*PlayRecord{ 390 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(29 * time.Second)}, MusicServiceBaseDomain: MusicServiceLastFM}, 391 }, 392 spotify: []*PlayRecord{ 393 {TrackName: "Song", Artists: []PlayRecordArtist{{ArtistName: "Artist"}}, PlayedTime: Timestamp{Time: baseTime.Add(31 * time.Second)}, MusicServiceBaseDomain: MusicServiceSpotify}, 394 }, 395 tolerance: 5 * time.Second, 396 expectedLen: 1, 397 expectedMergedTotal: 1, 398 }, 399 } 400 401 for _, tt := range tests { 402 t.Run(tt.name, func(t *testing.T) { 403 result := kway.Merge([][]*PlayRecord{tt.lastfm, tt.spotify}, tt.tolerance) 404 405 if len(result) != tt.expectedLen { 406 t.Errorf("MergeRecords() length = %d, want %d", len(result), tt.expectedLen) 407 } 408 409 if tt.expectedFirstTrack != "" && len(result) > 0 { 410 if result[0].TrackName != tt.expectedFirstTrack { 411 t.Errorf("MergeRecords() first track = %q, want %q", result[0].TrackName, tt.expectedFirstTrack) 412 } 413 } 414 415 if len(tt.expectedOrder) > 0 { 416 if len(result) != len(tt.expectedOrder) { 417 t.Errorf("MergeRecords() order length mismatch, got %d, want %d", len(result), len(tt.expectedOrder)) 418 } else { 419 for i, expectedTrack := range tt.expectedOrder { 420 if i < len(result) && result[i].TrackName != expectedTrack { 421 t.Errorf("MergeRecords() order[%d] = %q, want %q", i, result[i].TrackName, expectedTrack) 422 } 423 } 424 } 425 } 426 427 // Verify sorting is correct 428 for i := 1; i < len(result); i++ { 429 prev, curr := result[i-1], result[i] 430 if prev.PlayedTime.After(curr.PlayedTime.Time) { 431 t.Errorf("MergeRecords() sorting failed: %q at %v should be after %q at %v", 432 prev.TrackName, prev.PlayedTime.Time, curr.TrackName, curr.PlayedTime.Time) 433 } 434 if prev.PlayedTime.Equal(curr.PlayedTime.Time) && prev.TrackName > curr.TrackName { 435 t.Errorf("MergeRecords() same-time sorting failed: %q should be before %q", 436 prev.TrackName, curr.TrackName) 437 } 438 } 439 }) 440 } 441} 442 443func BenchmarkMergeRecords(b *testing.B) { 444 // Generate test data with multiple sources and items 445 numSources := 10 446 itemsPerSource := 1000 447 tolerance := 10 * time.Minute 448 baseTime := time.Date(2024, 1, 15, 10, 0, 0, 0, time.UTC) 449 450 sources := make([][]*PlayRecord, numSources) 451 for i := range numSources { 452 sources[i] = make([]*PlayRecord, itemsPerSource) 453 for j := range itemsPerSource { 454 sources[i][j] = &PlayRecord{ 455 Type: "app.bsky.feed.post", 456 TrackName: fmt.Sprintf("Song %d", (i+j)%100), 457 Artists: []PlayRecordArtist{{ArtistName: fmt.Sprintf("Artist %d", i%20)}}, 458 PlayedTime: Timestamp{Time: baseTime.Add(time.Duration(i*itemsPerSource+j) * time.Minute)}, 459 SubmissionClientAgent: DefaultClientAgent, 460 MusicServiceBaseDomain: []string{MusicServiceLastFM, MusicServiceSpotify}[i%2], 461 OriginUrl: "https://example.com", 462 MsPlayed: 180000, 463 } 464 if (i+j)%3 == 0 { 465 sources[i][j].RecordingMbId = "mbid-123" 466 } 467 } 468 } 469 470 for b.Loop() { 471 kway.Merge(sources, tolerance) 472 } 473}