package sync import ( "bytes" "fmt" "time" "unicode" "unicode/utf8" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/fxamacker/cbor/v2" ) type ExistingRecord struct { URI string CID string Value *PlayRecord } func normalizeToBytes(s string) []byte { b := make([]byte, 0, len(s)) for _, r := range s { r = unicode.ToLower(r) if r >= 128 || unicode.IsLetter(r) || unicode.IsNumber(r) { b = utf8.AppendRune(b, r) } } return b } type PlayRecord struct { Type string `json:"$type"` TrackName string `json:"trackName"` Artists []PlayRecordArtist `json:"artists"` PlayedTime Timestamp `json:"playedTime"` SubmissionClientAgent string `json:"submissionClientAgent"` MusicServiceBaseDomain string `json:"musicServiceBaseDomain"` ReleaseName string `json:"releaseName,omitempty"` ReleaseMbId string `json:"releaseMbId,omitempty"` RecordingMbId string `json:"recordingMbId,omitempty"` OriginUrl string `json:"originUrl"` MsPlayed int `json:"msPlayed,omitempty"` normalizedTrack []byte `json:"-"` normalizedArtist []byte `json:"-"` } func (r *PlayRecord) ArtistName() string { if len(r.Artists) > 0 { return r.Artists[0].ArtistName } return "Unknown Artist" } func (r *PlayRecord) normalizeArtist() []byte { if len(r.normalizedArtist) > 0 { return r.normalizedArtist } r.normalizedArtist = normalizeToBytes(r.ArtistName()) return r.normalizedArtist } func (r *PlayRecord) normalizeTrack() []byte { if len(r.normalizedTrack) != 0 { return r.normalizedTrack } r.normalizedTrack = normalizeToBytes(r.TrackName) return r.normalizedTrack } func (r *PlayRecord) hasMBID() bool { for _, a := range r.Artists { if a.ArtistMbId != "" { return true } } return r.RecordingMbId != "" } func (r *PlayRecord) isLastFM() bool { return r.MusicServiceBaseDomain == MusicServiceLastFM } func (r *PlayRecord) betterThan(other *PlayRecord) bool { return (r.hasMBID() && !other.hasMBID()) || (r.isLastFM() && !other.isLastFM()) } func (r *PlayRecord) IsDuplicate(other *PlayRecord, tolerance time.Duration) (bool, bool) { return r.sameAs(other, tolerance), r.betterThan(other) } func (r *PlayRecord) sameAs(other *PlayRecord, tolerance time.Duration) bool { if !bytes.Equal(r.normalizeTrack(), other.normalizeTrack()) { return false } if !bytes.Equal(r.normalizeArtist(), other.normalizeArtist()) { return false } diff := r.PlayedTime.Sub(other.PlayedTime.Time) return max(diff, -diff) <= tolerance } func (r *PlayRecord) Time() time.Time { return r.PlayedTime.Time } type PlayRecordArtist struct { ArtistName string `json:"artistName"` ArtistMbId string `json:"artistMbId,omitempty"` } const ( MusicServiceLastFM = "last.fm" MusicServiceSpotify = "spotify.com" TimeBucketSize = 30 * time.Second MinListenDuration = 30 * time.Second DefaultClientAgent = "lazuli/dev" ) func CreateRecordKey(record *PlayRecord) string { return string(syntax.NewTIDFromTime(record.PlayedTime.Time, 0)) } func CreateRecordKeys(records []*PlayRecord) []string { keys := make([]string, len(records)) usedTIDs := make(map[string]int) for i, rec := range records { t := rec.PlayedTime.Time tid := syntax.NewTIDFromTime(t, 0) for usedTIDs[string(tid)] > 0 { usedTIDs[string(tid)]++ tid = syntax.NewTIDFromTime(t, uint(usedTIDs[string(tid)]-1)) } usedTIDs[string(tid)]++ keys[i] = string(tid) } return keys } func FilterNew(records []*PlayRecord, existing []ExistingRecord, processed map[string]bool, tolerance time.Duration) []*PlayRecord { existingSet := make(map[*PlayRecord]bool) for _, rec := range existing { existingSet[rec.Value] = true } var newRecords []*PlayRecord for _, record := range records { if processed != nil && processed[CreateRecordKey(record)] { continue } if len(existingSet) > 0 { isDup := false for existingRec := range existingSet { if record.sameAs(existingRec, tolerance) { isDup = true break } } if isDup { continue } } newRecords = append(newRecords, record) } return newRecords } func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord { groups := make(map[string][]ExistingRecord) for _, rec := range records { key := CreateRecordKey(rec.Value) if key == "|||" { continue } groups[key] = append(groups[key], rec) } duplicates := make(map[string][]ExistingRecord) for key, group := range groups { if len(group) >= 2 { duplicates[key] = group } } return duplicates } type Timestamp struct { time.Time } func (t Timestamp) MarshalJSON() ([]byte, error) { return []byte(`"` + t.Format(time.RFC3339Nano) + `"`), nil } func (t *Timestamp) UnmarshalCBOR(data []byte) error { var s string err := cbor.Unmarshal(data, &s) if err != nil { return fmt.Errorf("failed to decode timestamp cbor value: %w", err) } return t.parse(s) } func (t *Timestamp) UnmarshalJSON(data []byte) error { if string(data) == "null" { *t = Timestamp{} return nil } return t.parse(string(data[1 : len(data)-1])) } func (t *Timestamp) parse(s string) error { tm, err := time.Parse(time.RFC3339Nano, s) if err != nil { tm, err = time.Parse(time.RFC3339, s) if err != nil { return fmt.Errorf("failed to parse timestamp %q: %w", s, err) } } *t = Timestamp{Time: tm} return nil }