Your music, beautifully tracked. All yours. (coming soon) teal.fm
teal-fm atproto
at main 16 kB view raw
1-- Enhanced discriminant extraction with comprehensive edition/version patterns 2-- This migration improves the auto-population of discriminants for better metadata handling 3 4-- Drop existing functions to replace them with enhanced versions 5DROP FUNCTION IF EXISTS extract_discriminant(TEXT); 6DROP FUNCTION IF EXISTS get_base_name(TEXT); 7 8-- Enhanced function to extract discriminants with comprehensive patterns 9CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 10DECLARE 11 -- Comprehensive patterns for discriminant extraction 12 discriminant_patterns TEXT[] := ARRAY[ 13 -- Parentheses patterns 14 '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)', 15 '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)', 16 '\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)', 17 '\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)', 18 '\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)', 19 20 -- Brackets patterns 21 '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 22 '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 23 '\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 24 '\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 25 '\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 26 27 -- Braces patterns 28 '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 29 '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 30 '\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 31 '\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 32 '\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 33 34 -- Dash/hyphen patterns (common for editions) 35 '[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 36 '[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 37 38 -- Colon patterns (common for subtitles and versions) 39 ':\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 40 ':\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 41 ]; 42 43 pattern TEXT; 44 match_result TEXT; 45BEGIN 46 -- Return early if input is null or empty 47 IF name_text IS NULL OR trim(name_text) = '' THEN 48 RETURN NULL; 49 END IF; 50 51 -- Try each pattern to find discriminant information 52 FOREACH pattern IN ARRAY discriminant_patterns 53 LOOP 54 SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 55 IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 56 -- Clean up the match result 57 match_result := trim(match_result); 58 -- Remove leading/trailing punctuation 59 match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 60 -- Ensure it's not just whitespace or empty after cleanup 61 IF length(trim(match_result)) > 0 THEN 62 RETURN match_result; 63 END IF; 64 END IF; 65 END LOOP; 66 67 RETURN NULL; 68END; 69$$ LANGUAGE plpgsql IMMUTABLE; 70 71-- Enhanced function to get base name without discriminant 72CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 73DECLARE 74 -- Comprehensive cleanup patterns matching the extraction patterns 75 cleanup_patterns TEXT[] := ARRAY[ 76 -- Remove parentheses content 77 '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*', 78 '\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*', 79 '\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*', 80 '\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*', 81 '\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*', 82 83 -- Remove brackets content 84 '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 85 '\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 86 '\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 87 '\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 88 '\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 89 90 -- Remove braces content 91 '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 92 '\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 93 '\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 94 '\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 95 '\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 96 97 -- Remove dash/hyphen patterns 98 '\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 99 '\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 100 101 -- Remove colon patterns 102 '\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 103 '\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 104 ]; 105 106 pattern TEXT; 107 result_text TEXT := name_text; 108BEGIN 109 -- Return early if input is null or empty 110 IF name_text IS NULL OR trim(name_text) = '' THEN 111 RETURN name_text; 112 END IF; 113 114 -- Remove discriminant patterns to get base name 115 FOREACH pattern IN ARRAY cleanup_patterns 116 LOOP 117 result_text := regexp_replace(result_text, pattern, ' ', 'gi'); 118 END LOOP; 119 120 -- Clean up extra whitespace and normalize 121 result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 122 123 -- Remove trailing punctuation that might be left after removal 124 result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 125 result_text := trim(result_text); 126 127 -- Ensure we don't return an empty string 128 IF length(result_text) = 0 THEN 129 RETURN name_text; 130 END IF; 131 132 RETURN result_text; 133END; 134$$ LANGUAGE plpgsql IMMUTABLE; 135 136-- Create function to extract discriminant specifically for editions and versions 137CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 138DECLARE 139 -- Focused patterns for edition/version extraction 140 edition_patterns TEXT[] := ARRAY[ 141 -- Edition patterns 142 '\(([^)]*edition[^)]*)\)', 143 '\[([^]]*edition[^]]*)\]', 144 '\{([^}]*edition[^}]*)\}', 145 '[-–—]\s*([^-–—]*edition[^-–—]*)$', 146 ':\s*([^:]*edition[^:]*)$', 147 148 -- Version patterns 149 '\(([^)]*version[^)]*)\)', 150 '\[([^]]*version[^]]*)\]', 151 '\{([^}]*version[^}]*)\}', 152 '[-–—]\s*([^-–—]*version[^-–—]*)$', 153 ':\s*([^:]*version[^:]*)$', 154 155 -- Remaster patterns 156 '\(([^)]*remaster[^)]*)\)', 157 '\[([^]]*remaster[^]]*)\]', 158 '\{([^}]*remaster[^}]*)\}', 159 '[-–—]\s*([^-–—]*remaster[^-–—]*)$', 160 ':\s*([^:]*remaster[^:]*)$', 161 162 -- Year-based patterns 163 '\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)', 164 '\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 165 '\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 166 ]; 167 168 pattern TEXT; 169 match_result TEXT; 170BEGIN 171 -- Return early if input is null or empty 172 IF name_text IS NULL OR trim(name_text) = '' THEN 173 RETURN NULL; 174 END IF; 175 176 -- Try edition-specific patterns first 177 FOREACH pattern IN ARRAY edition_patterns 178 LOOP 179 SELECT substring(name_text FROM pattern COLLATE "C") INTO match_result; 180 IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 181 match_result := trim(match_result); 182 match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 183 IF length(trim(match_result)) > 0 THEN 184 RETURN match_result; 185 END IF; 186 END IF; 187 END LOOP; 188 189 RETURN NULL; 190END; 191$$ LANGUAGE plpgsql IMMUTABLE; 192 193-- Update recordings table to populate discriminants from existing names 194UPDATE recordings 195SET discriminant = extract_discriminant(name) 196WHERE discriminant IS NULL 197 AND extract_discriminant(name) IS NOT NULL; 198 199-- Update releases table to populate discriminants from existing names 200UPDATE releases 201SET discriminant = extract_discriminant(name) 202WHERE discriminant IS NULL 203 AND extract_discriminant(name) IS NOT NULL; 204 205-- Update plays table to populate discriminants from existing names where not already set 206UPDATE plays 207SET track_discriminant = extract_discriminant(track_name) 208WHERE track_discriminant IS NULL 209 AND extract_discriminant(track_name) IS NOT NULL; 210 211UPDATE plays 212SET release_discriminant = extract_discriminant(release_name) 213WHERE release_discriminant IS NULL 214 AND release_name IS NOT NULL 215 AND extract_discriminant(release_name) IS NOT NULL; 216 217-- Create indexes for efficient discriminant queries 218CREATE INDEX IF NOT EXISTS idx_recordings_name_discriminant ON recordings (name, discriminant); 219CREATE INDEX IF NOT EXISTS idx_releases_name_discriminant ON releases (name, discriminant); 220 221-- Add comments for the new function 222COMMENT ON FUNCTION extract_discriminant IS 'Enhanced discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 223COMMENT ON FUNCTION get_base_name IS 'Enhanced base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 224COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized function for extracting edition and version discriminants with focused patterns'; 225 226-- Create a view to show discriminant extraction results for analysis 227CREATE OR REPLACE VIEW discriminant_analysis AS 228SELECT 229 'recordings' as table_name, 230 name as original_name, 231 discriminant, 232 get_base_name(name) as base_name, 233 extract_discriminant(name) as extracted_discriminant, 234 extract_edition_discriminant(name) as edition_discriminant 235FROM recordings 236WHERE name IS NOT NULL 237UNION ALL 238SELECT 239 'releases' as table_name, 240 name as original_name, 241 discriminant, 242 get_base_name(name) as base_name, 243 extract_discriminant(name) as extracted_discriminant, 244 extract_edition_discriminant(name) as edition_discriminant 245FROM releases 246WHERE name IS NOT NULL; 247 248COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing discriminant extraction results for quality assessment and debugging'; 249 250-- Refresh materialized views to include discriminant information 251REFRESH MATERIALIZED VIEW mv_release_play_counts; 252REFRESH MATERIALIZED VIEW mv_recording_play_counts; 253 254-- Create summary statistics for discriminant usage 255CREATE OR REPLACE VIEW discriminant_stats AS 256SELECT 257 'recordings' as entity_type, 258 COUNT(*) as total_count, 259 COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 260 COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 261 ROUND( 262 COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 263 ) as discriminant_percentage 264FROM recordings 265UNION ALL 266SELECT 267 'releases' as entity_type, 268 COUNT(*) as total_count, 269 COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) as with_discriminant, 270 COUNT(CASE WHEN discriminant IS NULL AND extract_discriminant(name) IS NOT NULL THEN 1 END) as extractable_discriminant, 271 ROUND( 272 COUNT(CASE WHEN discriminant IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2 273 ) as discriminant_percentage 274FROM releases; 275 276COMMENT ON VIEW discriminant_stats IS 'Statistics showing discriminant usage and extraction potential across entity types';