Your music, beautifully tracked. All yours. (coming soon) teal.fm
teal-fm atproto
at main 4.2 kB view raw
1-- Migration to add fuzzy text matching capabilities 2-- This enables better artist name matching using trigram similarity 3 4-- Enable pg_trgm extension for trigram similarity matching 5CREATE EXTENSION IF NOT EXISTS pg_trgm; 6 7-- Create indexes for efficient trigram matching on artist names 8CREATE INDEX idx_artists_extended_name_trgm ON artists_extended USING gin (name gin_trgm_ops); 9CREATE INDEX idx_artists_extended_name_normalized_trgm ON artists_extended USING gin (name_normalized gin_trgm_ops); 10 11-- Create a function to calculate comprehensive artist similarity 12CREATE OR REPLACE FUNCTION calculate_artist_similarity( 13 input_name TEXT, 14 existing_name TEXT, 15 input_album TEXT DEFAULT NULL, 16 existing_album TEXT DEFAULT NULL 17) RETURNS FLOAT AS $$ 18DECLARE 19 name_similarity FLOAT; 20 album_similarity FLOAT := 0.0; 21 final_score FLOAT; 22BEGIN 23 -- Calculate trigram similarity for artist names 24 name_similarity := similarity(LOWER(TRIM(input_name)), LOWER(TRIM(existing_name))); 25 26 -- Boost for exact matches after normalization 27 IF LOWER(TRIM(regexp_replace(input_name, '[^a-zA-Z0-9\s]', '', 'g'))) = 28 LOWER(TRIM(regexp_replace(existing_name, '[^a-zA-Z0-9\s]', '', 'g'))) THEN 29 name_similarity := GREATEST(name_similarity, 0.95); 30 END IF; 31 32 -- Factor in album similarity if both are provided 33 IF input_album IS NOT NULL AND existing_album IS NOT NULL THEN 34 album_similarity := similarity(LOWER(TRIM(input_album)), LOWER(TRIM(existing_album))); 35 -- Weight: 80% name, 20% album 36 final_score := (name_similarity * 0.8) + (album_similarity * 0.2); 37 ELSE 38 final_score := name_similarity; 39 END IF; 40 41 RETURN final_score; 42END; 43$$ LANGUAGE plpgsql IMMUTABLE; 44 45-- Create a view for fuzzy artist matching with confidence scores 46CREATE VIEW fuzzy_artist_matches AS 47SELECT DISTINCT 48 ae1.id as query_artist_id, 49 ae1.name as query_artist_name, 50 ae1.mbid_type as query_mbid_type, 51 ae2.id as match_artist_id, 52 ae2.name as match_artist_name, 53 ae2.mbid as match_mbid, 54 ae2.mbid_type as match_mbid_type, 55 similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) as name_similarity, 56 CASE 57 WHEN ae2.mbid_type = 'musicbrainz' THEN 'upgrade_to_mb' 58 WHEN ae1.mbid_type = 'musicbrainz' AND ae2.mbid_type = 'synthetic' THEN 'consolidate_to_mb' 59 ELSE 'merge_synthetic' 60 END as match_action 61FROM artists_extended ae1 62CROSS JOIN artists_extended ae2 63WHERE ae1.id != ae2.id 64AND similarity(LOWER(TRIM(ae1.name)), LOWER(TRIM(ae2.name))) > 0.8 65AND ( 66 ae1.mbid_type = 'synthetic' OR ae2.mbid_type = 'musicbrainz' 67); 68 69-- Add comments 70COMMENT ON EXTENSION pg_trgm IS 'Trigram extension for fuzzy text matching'; 71COMMENT ON INDEX idx_artists_extended_name_trgm IS 'GIN index for trigram similarity on artist names'; 72COMMENT ON FUNCTION calculate_artist_similarity IS 'Calculates similarity score between artists considering name and optional album context'; 73COMMENT ON VIEW fuzzy_artist_matches IS 'Shows potential artist matches with confidence scores and recommended actions'; 74 75-- Create a function to suggest artist consolidations 76CREATE OR REPLACE FUNCTION suggest_artist_consolidations(min_similarity FLOAT DEFAULT 0.9) 77RETURNS TABLE( 78 action TEXT, 79 synthetic_artist TEXT, 80 target_artist TEXT, 81 similarity_score FLOAT, 82 synthetic_plays INTEGER, 83 target_plays INTEGER 84) AS $$ 85BEGIN 86 RETURN QUERY 87 SELECT 88 fam.match_action as action, 89 fam.query_artist_name as synthetic_artist, 90 fam.match_artist_name as target_artist, 91 fam.name_similarity as similarity_score, 92 (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.query_artist_id) as synthetic_plays, 93 (SELECT COUNT(*)::INTEGER FROM play_to_artists_extended WHERE artist_id = fam.match_artist_id) as target_plays 94 FROM fuzzy_artist_matches fam 95 WHERE fam.name_similarity >= min_similarity 96 AND fam.match_action = 'upgrade_to_mb' 97 ORDER BY fam.name_similarity DESC, synthetic_plays DESC; 98END; 99$$ LANGUAGE plpgsql; 100 101COMMENT ON FUNCTION suggest_artist_consolidations IS 'Returns suggestions for consolidating synthetic artists with MusicBrainz artists based on similarity';