like malachite (atproto-lastfm-importer) but in go and bluer
go spotify tealfm lastfm atproto
at main 239 lines 5.6 kB view raw
1package sync 2 3import ( 4 "bytes" 5 "fmt" 6 "time" 7 "unicode" 8 "unicode/utf8" 9 10 "github.com/bluesky-social/indigo/atproto/syntax" 11 "github.com/fxamacker/cbor/v2" 12) 13 14type ExistingRecord struct { 15 URI string 16 CID string 17 Value *PlayRecord 18} 19 20func normalizeToBytes(s string) []byte { 21 b := make([]byte, 0, len(s)) 22 23 for _, r := range s { 24 r = unicode.ToLower(r) 25 26 if r >= 128 || unicode.IsLetter(r) || unicode.IsNumber(r) { 27 b = utf8.AppendRune(b, r) 28 } 29 } 30 31 return b 32} 33 34type PlayRecord struct { 35 Type string `json:"$type"` 36 TrackName string `json:"trackName"` 37 Artists []PlayRecordArtist `json:"artists"` 38 PlayedTime Timestamp `json:"playedTime"` 39 SubmissionClientAgent string `json:"submissionClientAgent"` 40 MusicServiceBaseDomain string `json:"musicServiceBaseDomain"` 41 ReleaseName string `json:"releaseName,omitempty"` 42 ReleaseMbId string `json:"releaseMbId,omitempty"` 43 RecordingMbId string `json:"recordingMbId,omitempty"` 44 OriginUrl string `json:"originUrl"` 45 MsPlayed int `json:"msPlayed,omitempty"` 46 47 normalizedTrack []byte `json:"-"` 48 normalizedArtist []byte `json:"-"` 49} 50 51func (r *PlayRecord) ArtistName() string { 52 if len(r.Artists) > 0 { 53 return r.Artists[0].ArtistName 54 } 55 return "Unknown Artist" 56} 57 58func (r *PlayRecord) normalizeArtist() []byte { 59 if len(r.normalizedArtist) > 0 { 60 return r.normalizedArtist 61 } 62 63 r.normalizedArtist = normalizeToBytes(r.ArtistName()) 64 65 return r.normalizedArtist 66} 67 68func (r *PlayRecord) normalizeTrack() []byte { 69 if len(r.normalizedTrack) != 0 { 70 return r.normalizedTrack 71 } 72 73 r.normalizedTrack = normalizeToBytes(r.TrackName) 74 75 return r.normalizedTrack 76} 77 78func (r *PlayRecord) hasMBID() bool { 79 for _, a := range r.Artists { 80 if a.ArtistMbId != "" { 81 return true 82 } 83 } 84 85 return r.RecordingMbId != "" 86} 87 88func (r *PlayRecord) isLastFM() bool { 89 return r.MusicServiceBaseDomain == MusicServiceLastFM 90} 91 92func (r *PlayRecord) betterThan(other *PlayRecord) bool { 93 return (r.hasMBID() && !other.hasMBID()) || (r.isLastFM() && !other.isLastFM()) 94} 95 96func (r *PlayRecord) IsDuplicate(other *PlayRecord, tolerance time.Duration) (bool, bool) { 97 return r.sameAs(other, tolerance), r.betterThan(other) 98} 99 100func (r *PlayRecord) sameAs(other *PlayRecord, tolerance time.Duration) bool { 101 if !bytes.Equal(r.normalizeTrack(), other.normalizeTrack()) { 102 return false 103 } 104 if !bytes.Equal(r.normalizeArtist(), other.normalizeArtist()) { 105 return false 106 } 107 108 diff := r.PlayedTime.Sub(other.PlayedTime.Time) 109 return max(diff, -diff) <= tolerance 110} 111 112func (r *PlayRecord) Time() time.Time { 113 return r.PlayedTime.Time 114} 115 116type PlayRecordArtist struct { 117 ArtistName string `json:"artistName"` 118 ArtistMbId string `json:"artistMbId,omitempty"` 119} 120 121const ( 122 MusicServiceLastFM = "last.fm" 123 MusicServiceSpotify = "spotify.com" 124 125 TimeBucketSize = 30 * time.Second 126 MinListenDuration = 30 * time.Second 127 128 DefaultClientAgent = "lazuli/dev" 129) 130 131func CreateRecordKey(record *PlayRecord) string { 132 return string(syntax.NewTIDFromTime(record.PlayedTime.Time, 0)) 133} 134 135func CreateRecordKeys(records []*PlayRecord) []string { 136 keys := make([]string, len(records)) 137 usedTIDs := make(map[string]int) 138 139 for i, rec := range records { 140 t := rec.PlayedTime.Time 141 tid := syntax.NewTIDFromTime(t, 0) 142 for usedTIDs[string(tid)] > 0 { 143 usedTIDs[string(tid)]++ 144 tid = syntax.NewTIDFromTime(t, uint(usedTIDs[string(tid)]-1)) 145 } 146 usedTIDs[string(tid)]++ 147 keys[i] = string(tid) 148 } 149 return keys 150} 151 152func FilterNew(records []*PlayRecord, existing []ExistingRecord, processed map[string]bool, tolerance time.Duration) []*PlayRecord { 153 existingSet := make(map[*PlayRecord]bool) 154 for _, rec := range existing { 155 existingSet[rec.Value] = true 156 } 157 158 var newRecords []*PlayRecord 159 for _, record := range records { 160 if processed != nil && processed[CreateRecordKey(record)] { 161 continue 162 } 163 164 if len(existingSet) > 0 { 165 isDup := false 166 for existingRec := range existingSet { 167 if record.sameAs(existingRec, tolerance) { 168 isDup = true 169 break 170 } 171 } 172 if isDup { 173 continue 174 } 175 } 176 newRecords = append(newRecords, record) 177 } 178 return newRecords 179} 180 181func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord { 182 groups := make(map[string][]ExistingRecord) 183 for _, rec := range records { 184 key := CreateRecordKey(rec.Value) 185 if key == "|||" { 186 continue 187 } 188 groups[key] = append(groups[key], rec) 189 } 190 191 duplicates := make(map[string][]ExistingRecord) 192 for key, group := range groups { 193 if len(group) >= 2 { 194 duplicates[key] = group 195 } 196 } 197 return duplicates 198} 199 200type Timestamp struct { 201 time.Time 202} 203 204func (t Timestamp) MarshalJSON() ([]byte, error) { 205 return []byte(`"` + t.Format(time.RFC3339Nano) + `"`), nil 206} 207 208func (t *Timestamp) UnmarshalCBOR(data []byte) error { 209 var s string 210 211 err := cbor.Unmarshal(data, &s) 212 if err != nil { 213 return fmt.Errorf("failed to decode timestamp cbor value: %w", err) 214 } 215 216 return t.parse(s) 217} 218 219func (t *Timestamp) UnmarshalJSON(data []byte) error { 220 if string(data) == "null" { 221 *t = Timestamp{} 222 return nil 223 } 224 225 return t.parse(string(data[1 : len(data)-1])) 226} 227 228func (t *Timestamp) parse(s string) error { 229 tm, err := time.Parse(time.RFC3339Nano, s) 230 if err != nil { 231 tm, err = time.Parse(time.RFC3339, s) 232 if err != nil { 233 return fmt.Errorf("failed to parse timestamp %q: %w", s, err) 234 } 235 } 236 237 *t = Timestamp{Time: tm} 238 return nil 239}