like malachite (atproto-lastfm-importer) but in go and bluer
go
spotify
tealfm
lastfm
atproto
1package sync
2
3import (
4 "bytes"
5 "fmt"
6 "time"
7 "unicode"
8 "unicode/utf8"
9
10 "github.com/bluesky-social/indigo/atproto/syntax"
11 "github.com/fxamacker/cbor/v2"
12)
13
14type ExistingRecord struct {
15 URI string
16 CID string
17 Value *PlayRecord
18}
19
20func normalizeToBytes(s string) []byte {
21 b := make([]byte, 0, len(s))
22
23 for _, r := range s {
24 r = unicode.ToLower(r)
25
26 if r >= 128 || unicode.IsLetter(r) || unicode.IsNumber(r) {
27 b = utf8.AppendRune(b, r)
28 }
29 }
30
31 return b
32}
33
34type PlayRecord struct {
35 Type string `json:"$type"`
36 TrackName string `json:"trackName"`
37 Artists []PlayRecordArtist `json:"artists"`
38 PlayedTime Timestamp `json:"playedTime"`
39 SubmissionClientAgent string `json:"submissionClientAgent"`
40 MusicServiceBaseDomain string `json:"musicServiceBaseDomain"`
41 ReleaseName string `json:"releaseName,omitempty"`
42 ReleaseMbId string `json:"releaseMbId,omitempty"`
43 RecordingMbId string `json:"recordingMbId,omitempty"`
44 OriginUrl string `json:"originUrl"`
45 MsPlayed int `json:"msPlayed,omitempty"`
46
47 normalizedTrack []byte `json:"-"`
48 normalizedArtist []byte `json:"-"`
49}
50
51func (r *PlayRecord) ArtistName() string {
52 if len(r.Artists) > 0 {
53 return r.Artists[0].ArtistName
54 }
55 return "Unknown Artist"
56}
57
58func (r *PlayRecord) normalizeArtist() []byte {
59 if len(r.normalizedArtist) > 0 {
60 return r.normalizedArtist
61 }
62
63 r.normalizedArtist = normalizeToBytes(r.ArtistName())
64
65 return r.normalizedArtist
66}
67
68func (r *PlayRecord) normalizeTrack() []byte {
69 if len(r.normalizedTrack) != 0 {
70 return r.normalizedTrack
71 }
72
73 r.normalizedTrack = normalizeToBytes(r.TrackName)
74
75 return r.normalizedTrack
76}
77
78func (r *PlayRecord) hasMBID() bool {
79 for _, a := range r.Artists {
80 if a.ArtistMbId != "" {
81 return true
82 }
83 }
84
85 return r.RecordingMbId != ""
86}
87
88func (r *PlayRecord) isLastFM() bool {
89 return r.MusicServiceBaseDomain == MusicServiceLastFM
90}
91
92func (r *PlayRecord) betterThan(other *PlayRecord) bool {
93 return (r.hasMBID() && !other.hasMBID()) || (r.isLastFM() && !other.isLastFM())
94}
95
96func (r *PlayRecord) IsDuplicate(other *PlayRecord, tolerance time.Duration) (bool, bool) {
97 return r.sameAs(other, tolerance), r.betterThan(other)
98}
99
100func (r *PlayRecord) sameAs(other *PlayRecord, tolerance time.Duration) bool {
101 if !bytes.Equal(r.normalizeTrack(), other.normalizeTrack()) {
102 return false
103 }
104 if !bytes.Equal(r.normalizeArtist(), other.normalizeArtist()) {
105 return false
106 }
107
108 diff := r.PlayedTime.Sub(other.PlayedTime.Time)
109 return max(diff, -diff) <= tolerance
110}
111
112func (r *PlayRecord) Time() time.Time {
113 return r.PlayedTime.Time
114}
115
116type PlayRecordArtist struct {
117 ArtistName string `json:"artistName"`
118 ArtistMbId string `json:"artistMbId,omitempty"`
119}
120
121const (
122 MusicServiceLastFM = "last.fm"
123 MusicServiceSpotify = "spotify.com"
124
125 TimeBucketSize = 30 * time.Second
126 MinListenDuration = 30 * time.Second
127
128 DefaultClientAgent = "lazuli/dev"
129)
130
131func CreateRecordKey(record *PlayRecord) string {
132 return string(syntax.NewTIDFromTime(record.PlayedTime.Time, 0))
133}
134
135func CreateRecordKeys(records []*PlayRecord) []string {
136 keys := make([]string, len(records))
137 usedTIDs := make(map[string]int)
138
139 for i, rec := range records {
140 t := rec.PlayedTime.Time
141 tid := syntax.NewTIDFromTime(t, 0)
142 for usedTIDs[string(tid)] > 0 {
143 usedTIDs[string(tid)]++
144 tid = syntax.NewTIDFromTime(t, uint(usedTIDs[string(tid)]-1))
145 }
146 usedTIDs[string(tid)]++
147 keys[i] = string(tid)
148 }
149 return keys
150}
151
152func FilterNew(records []*PlayRecord, existing []ExistingRecord, processed map[string]bool, tolerance time.Duration) []*PlayRecord {
153 existingSet := make(map[*PlayRecord]bool)
154 for _, rec := range existing {
155 existingSet[rec.Value] = true
156 }
157
158 var newRecords []*PlayRecord
159 for _, record := range records {
160 if processed != nil && processed[CreateRecordKey(record)] {
161 continue
162 }
163
164 if len(existingSet) > 0 {
165 isDup := false
166 for existingRec := range existingSet {
167 if record.sameAs(existingRec, tolerance) {
168 isDup = true
169 break
170 }
171 }
172 if isDup {
173 continue
174 }
175 }
176 newRecords = append(newRecords, record)
177 }
178 return newRecords
179}
180
181func FindDuplicates(records []ExistingRecord) map[string][]ExistingRecord {
182 groups := make(map[string][]ExistingRecord)
183 for _, rec := range records {
184 key := CreateRecordKey(rec.Value)
185 if key == "|||" {
186 continue
187 }
188 groups[key] = append(groups[key], rec)
189 }
190
191 duplicates := make(map[string][]ExistingRecord)
192 for key, group := range groups {
193 if len(group) >= 2 {
194 duplicates[key] = group
195 }
196 }
197 return duplicates
198}
199
200type Timestamp struct {
201 time.Time
202}
203
204func (t Timestamp) MarshalJSON() ([]byte, error) {
205 return []byte(`"` + t.Format(time.RFC3339Nano) + `"`), nil
206}
207
208func (t *Timestamp) UnmarshalCBOR(data []byte) error {
209 var s string
210
211 err := cbor.Unmarshal(data, &s)
212 if err != nil {
213 return fmt.Errorf("failed to decode timestamp cbor value: %w", err)
214 }
215
216 return t.parse(s)
217}
218
219func (t *Timestamp) UnmarshalJSON(data []byte) error {
220 if string(data) == "null" {
221 *t = Timestamp{}
222 return nil
223 }
224
225 return t.parse(string(data[1 : len(data)-1]))
226}
227
228func (t *Timestamp) parse(s string) error {
229 tm, err := time.Parse(time.RFC3339Nano, s)
230 if err != nil {
231 tm, err = time.Parse(time.RFC3339, s)
232 if err != nil {
233 return fmt.Errorf("failed to parse timestamp %q: %w", s, err)
234 }
235 }
236
237 *t = Timestamp{Time: tm}
238 return nil
239}