Your music, beautifully tracked. All yours. (coming soon) teal.fm
teal-fm atproto
at main 15 kB view raw
1-- Fix case sensitivity in discriminant extraction patterns 2-- This migration updates the discriminant extraction functions to properly handle case-insensitive matching 3 4-- Drop dependent views first, then functions, then recreate everything 5DROP VIEW IF EXISTS discriminant_analysis CASCADE; 6DROP VIEW IF EXISTS discriminant_stats CASCADE; 7 8-- Drop existing functions to replace with case-insensitive versions 9DROP FUNCTION IF EXISTS extract_discriminant(TEXT) CASCADE; 10DROP FUNCTION IF EXISTS get_base_name(TEXT) CASCADE; 11DROP FUNCTION IF EXISTS extract_edition_discriminant(TEXT) CASCADE; 12 13-- Enhanced function to extract discriminants with case-insensitive matching 14CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$ 15DECLARE 16 -- Comprehensive patterns for discriminant extraction with case-insensitive flags 17 discriminant_patterns TEXT[] := ARRAY[ 18 -- Parentheses patterns 19 '(?i)\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\)', 20 '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\)', 21 '(?i)\(([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\)', 22 '(?i)\(([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\)', 23 '(?i)\(([^)]*(?:from|soundtrack|ost|score|theme).*?)\)', 24 25 -- Brackets patterns 26 '(?i)\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\]', 27 '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\]', 28 '(?i)\[([^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\]', 29 '(?i)\[([^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\]', 30 '(?i)\[([^]]*(?:from|soundtrack|ost|score|theme).*?)\]', 31 32 -- Braces patterns 33 '(?i)\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?)\}', 34 '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?)\}', 35 '(?i)\{([^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?)\}', 36 '(?i)\{([^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?)\}', 37 '(?i)\{([^}]*(?:from|soundtrack|ost|score|theme).*?)\}', 38 39 -- Dash/hyphen patterns (common for editions) 40 '(?i)[-–—]\s*([^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?)$', 41 '(?i)[-–—]\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 42 43 -- Colon patterns (common for subtitles and versions) 44 '(?i):\s*([^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?)$', 45 '(?i):\s*(\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 46 ]; 47 48 pattern TEXT; 49 match_result TEXT; 50BEGIN 51 -- Return early if input is null or empty 52 IF name_text IS NULL OR trim(name_text) = '' THEN 53 RETURN NULL; 54 END IF; 55 56 -- Try each pattern to find discriminant information 57 FOREACH pattern IN ARRAY discriminant_patterns 58 LOOP 59 SELECT substring(name_text FROM pattern) INTO match_result; 60 IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 61 -- Clean up the match result 62 match_result := trim(match_result); 63 -- Remove leading/trailing punctuation 64 match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 65 -- Ensure it's not just whitespace or empty after cleanup 66 IF length(trim(match_result)) > 0 THEN 67 RETURN match_result; 68 END IF; 69 END IF; 70 END LOOP; 71 72 RETURN NULL; 73END; 74$$ LANGUAGE plpgsql IMMUTABLE; 75 76-- Enhanced function to get base name without discriminant with case-insensitive matching 77CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$ 78DECLARE 79 -- Comprehensive cleanup patterns matching the extraction patterns 80 cleanup_patterns TEXT[] := ARRAY[ 81 -- Remove parentheses content 82 '(?i)\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\)\s*', 83 '(?i)\s*\([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\)\s*', 84 '(?i)\s*\([^)]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\)\s*', 85 '(?i)\s*\([^)]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\)\s*', 86 '(?i)\s*\([^)]*(?:from|soundtrack|ost|score|theme).*?\)\s*', 87 88 -- Remove brackets content 89 '(?i)\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\]\s*', 90 '(?i)\s*\[[^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\]\s*', 91 '(?i)\s*\[[^]]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\]\s*', 92 '(?i)\s*\[[^]]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\]\s*', 93 '(?i)\s*\[[^]]*(?:from|soundtrack|ost|score|theme).*?\]\s*', 94 95 -- Remove braces content 96 '(?i)\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray|hdtv|web|retail|promo|single|ep|lp|maxi|mini|radio|club|dance|house|techno|trance|ambient|classical|jazz|folk|country|rock|pop|metal|punk|indie|alternative).*?\}\s*', 97 '(?i)\s*\{[^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?\}\s*', 98 '(?i)\s*\{[^}]*(?:vol\.|volume|pt\.|part|disc|disk|cd)\s*\d+.*?\}\s*', 99 '(?i)\s*\{[^}]*(?:feat\.|featuring|ft\.|with|vs\.|versus|&|and)\s+.*?\}\s*', 100 '(?i)\s*\{[^}]*(?:from|soundtrack|ost|score|theme).*?\}\s*', 101 102 -- Remove dash/hyphen patterns 103 '(?i)\s*[-–—]\s*[^-–—]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive|digital|vinyl|cd|dvd|blu-ray).*?$', 104 '(?i)\s*[-–—]\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$', 105 106 -- Remove colon patterns 107 '(?i)\s*:\s*[^:]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus|edition|special|limited|expanded|director''s|uncut|final|ultimate|platinum|gold|anniversary|collector''s|standard|enhanced|super|mega|ultra|plus|pro|premium|complete|definitive|classic|original|alternate|alternative|unreleased|rare|exclusive).*?$', 108 '(?i)\s*:\s*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release).*?$' 109 ]; 110 111 pattern TEXT; 112 result_text TEXT := name_text; 113BEGIN 114 -- Return early if input is null or empty 115 IF name_text IS NULL OR trim(name_text) = '' THEN 116 RETURN name_text; 117 END IF; 118 119 -- Remove discriminant patterns to get base name 120 FOREACH pattern IN ARRAY cleanup_patterns 121 LOOP 122 result_text := regexp_replace(result_text, pattern, ' ', 'g'); 123 END LOOP; 124 125 -- Clean up extra whitespace and normalize 126 result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g'); 127 128 -- Remove trailing punctuation that might be left after removal 129 result_text := regexp_replace(result_text, '[,;:\-–—]\s*$', '', 'g'); 130 result_text := trim(result_text); 131 132 -- Ensure we don't return an empty string 133 IF length(result_text) = 0 THEN 134 RETURN name_text; 135 END IF; 136 137 RETURN result_text; 138END; 139$$ LANGUAGE plpgsql IMMUTABLE; 140 141-- Enhanced function to extract discriminant specifically for editions and versions with case-insensitive matching 142CREATE OR REPLACE FUNCTION extract_edition_discriminant(name_text TEXT) RETURNS TEXT AS $$ 143DECLARE 144 -- Focused patterns for edition/version extraction with case-insensitive flags 145 edition_patterns TEXT[] := ARRAY[ 146 -- Edition patterns 147 '(?i)\(([^)]*edition[^)]*)\)', 148 '(?i)\[([^]]*edition[^]]*)\]', 149 '(?i)\{([^}]*edition[^}]*)\}', 150 '(?i)[-–—]\s*([^-–—]*edition[^-–—]*)$', 151 '(?i):\s*([^:]*edition[^:]*)$', 152 153 -- Version patterns 154 '(?i)\(([^)]*version[^)]*)\)', 155 '(?i)\[([^]]*version[^]]*)\]', 156 '(?i)\{([^}]*version[^}]*)\}', 157 '(?i)[-–—]\s*([^-–—]*version[^-–—]*)$', 158 '(?i):\s*([^:]*version[^:]*)$', 159 160 -- Remaster patterns 161 '(?i)\(([^)]*remaster[^)]*)\)', 162 '(?i)\[([^]]*remaster[^]]*)\]', 163 '(?i)\{([^}]*remaster[^}]*)\}', 164 '(?i)[-–—]\s*([^-–—]*remaster[^-–—]*)$', 165 '(?i):\s*([^:]*remaster[^:]*)$', 166 167 -- Year-based patterns 168 '(?i)\(([^)]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^)]*)\)', 169 '(?i)\[([^]]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^]]*)\]', 170 '(?i)\{([^}]*(?:\d{4}|\d{2})\s*(?:remaster|edition|version|mix|cut|release)[^}]*)\}' 171 ]; 172 173 pattern TEXT; 174 match_result TEXT; 175BEGIN 176 -- Return early if input is null or empty 177 IF name_text IS NULL OR trim(name_text) = '' THEN 178 RETURN NULL; 179 END IF; 180 181 -- Try edition-specific patterns first 182 FOREACH pattern IN ARRAY edition_patterns 183 LOOP 184 SELECT substring(name_text FROM pattern) INTO match_result; 185 IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN 186 match_result := trim(match_result); 187 match_result := regexp_replace(match_result, '^[^\w]+|[^\w]+$', '', 'g'); 188 IF length(trim(match_result)) > 0 THEN 189 RETURN match_result; 190 END IF; 191 END IF; 192 END LOOP; 193 194 RETURN NULL; 195END; 196$$ LANGUAGE plpgsql IMMUTABLE; 197 198-- Update existing records with newly extracted discriminants (case-insensitive) 199UPDATE recordings 200SET discriminant = extract_discriminant(name) 201WHERE discriminant IS NULL 202 AND extract_discriminant(name) IS NOT NULL; 203 204UPDATE releases 205SET discriminant = extract_discriminant(name) 206WHERE discriminant IS NULL 207 AND extract_discriminant(name) IS NOT NULL; 208 209UPDATE plays 210SET track_discriminant = extract_discriminant(track_name) 211WHERE track_discriminant IS NULL 212 AND extract_discriminant(track_name) IS NOT NULL; 213 214UPDATE plays 215SET release_discriminant = extract_discriminant(release_name) 216WHERE release_discriminant IS NULL 217 AND release_name IS NOT NULL 218 AND extract_discriminant(release_name) IS NOT NULL; 219 220-- Update comments for the enhanced functions 221COMMENT ON FUNCTION extract_discriminant IS 'Enhanced case-insensitive discriminant extraction supporting comprehensive edition/version patterns including parentheses, brackets, braces, dashes, and colons'; 222COMMENT ON FUNCTION get_base_name IS 'Enhanced case-insensitive base name extraction removing comprehensive discriminant patterns to enable proper grouping'; 223COMMENT ON FUNCTION extract_edition_discriminant IS 'Specialized case-insensitive function for extracting edition and version discriminants with focused patterns'; 224 225-- Refresh materialized views to reflect the case-insensitive improvements 226REFRESH MATERIALIZED VIEW mv_release_play_counts; 227REFRESH MATERIALIZED VIEW mv_recording_play_counts; 228 229-- Update discriminant analysis view to include case-insensitive results 230DROP VIEW IF EXISTS discriminant_analysis; 231CREATE OR REPLACE VIEW discriminant_analysis AS 232SELECT 233 'recordings' as table_name, 234 name as original_name, 235 discriminant, 236 get_base_name(name) as base_name, 237 extract_discriminant(name) as extracted_discriminant, 238 extract_edition_discriminant(name) as edition_discriminant 239FROM recordings 240WHERE name IS NOT NULL 241UNION ALL 242SELECT 243 'releases' as table_name, 244 name as original_name, 245 discriminant, 246 get_base_name(name) as base_name, 247 extract_discriminant(name) as extracted_discriminant, 248 extract_edition_discriminant(name) as edition_discriminant 249FROM releases 250WHERE name IS NOT NULL; 251 252COMMENT ON VIEW discriminant_analysis IS 'Analysis view showing case-insensitive discriminant extraction results for quality assessment and debugging';