1-- Migration to add discriminant fields for track and release variants
2-- This enables proper handling of different versions while maintaining grouping capabilities
3
4-- Add discriminant fields to plays table
5ALTER TABLE plays ADD COLUMN track_discriminant TEXT;
6ALTER TABLE plays ADD COLUMN release_discriminant TEXT;
7
8-- Add discriminant field to releases table
9ALTER TABLE releases ADD COLUMN discriminant TEXT;
10
11-- Add discriminant field to recordings table
12ALTER TABLE recordings ADD COLUMN discriminant TEXT;
13
14-- Create indexes for efficient searching and filtering
15CREATE INDEX idx_plays_track_discriminant ON plays (track_discriminant);
16CREATE INDEX idx_plays_release_discriminant ON plays (release_discriminant);
17CREATE INDEX idx_releases_discriminant ON releases (discriminant);
18CREATE INDEX idx_recordings_discriminant ON recordings (discriminant);
19
20-- Create composite indexes for grouping by base name + discriminant
21CREATE INDEX idx_plays_track_name_discriminant ON plays (track_name, track_discriminant);
22CREATE INDEX idx_plays_release_name_discriminant ON plays (release_name, release_discriminant);
23
24-- Update materialized views to include discriminant information
25DROP MATERIALIZED VIEW IF EXISTS mv_release_play_counts;
26CREATE MATERIALIZED VIEW mv_release_play_counts AS
27SELECT
28 r.mbid AS release_mbid,
29 r.name AS release_name,
30 r.discriminant AS release_discriminant,
31 COUNT(p.uri) AS play_count
32FROM
33 releases r
34 LEFT JOIN plays p ON p.release_mbid = r.mbid
35GROUP BY
36 r.mbid, r.name, r.discriminant;
37
38CREATE UNIQUE INDEX idx_mv_release_play_counts_discriminant ON mv_release_play_counts (release_mbid);
39
40DROP MATERIALIZED VIEW IF EXISTS mv_recording_play_counts;
41CREATE MATERIALIZED VIEW mv_recording_play_counts AS
42SELECT
43 rec.mbid AS recording_mbid,
44 rec.name AS recording_name,
45 rec.discriminant AS recording_discriminant,
46 COUNT(p.uri) AS play_count
47FROM
48 recordings rec
49 LEFT JOIN plays p ON p.recording_mbid = rec.mbid
50GROUP BY
51 rec.mbid, rec.name, rec.discriminant;
52
53CREATE UNIQUE INDEX idx_mv_recording_play_counts_discriminant ON mv_recording_play_counts (recording_mbid);
54
55-- Create views for analyzing track/release variants
56CREATE VIEW track_variants AS
57SELECT
58 track_name,
59 track_discriminant,
60 COUNT(*) AS play_count,
61 COUNT(DISTINCT did) AS unique_listeners,
62 COUNT(DISTINCT recording_mbid) AS unique_recordings
63FROM plays
64WHERE track_name IS NOT NULL
65GROUP BY track_name, track_discriminant
66ORDER BY track_name, play_count DESC;
67
68CREATE VIEW release_variants AS
69SELECT
70 release_name,
71 release_discriminant,
72 COUNT(*) AS play_count,
73 COUNT(DISTINCT did) AS unique_listeners,
74 COUNT(DISTINCT release_mbid) AS unique_releases
75FROM plays
76WHERE release_name IS NOT NULL
77GROUP BY release_name, release_discriminant
78ORDER BY release_name, play_count DESC;
79
80-- Create function to extract potential discriminants from existing names
81CREATE OR REPLACE FUNCTION extract_discriminant(name_text TEXT) RETURNS TEXT AS $$
82DECLARE
83 discriminant_patterns TEXT[] := ARRAY[
84 '\(([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\)',
85 '\[([^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\]',
86 '\{([^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?)\}'
87 ];
88 pattern TEXT;
89 match_result TEXT;
90BEGIN
91 -- Try each pattern to find discriminant information
92 FOREACH pattern IN ARRAY discriminant_patterns
93 LOOP
94 SELECT substring(name_text FROM pattern) INTO match_result;
95 IF match_result IS NOT NULL AND length(trim(match_result)) > 0 THEN
96 RETURN trim(match_result);
97 END IF;
98 END LOOP;
99
100 RETURN NULL;
101END;
102$$ LANGUAGE plpgsql IMMUTABLE;
103
104-- Create function to get base name without discriminant
105CREATE OR REPLACE FUNCTION get_base_name(name_text TEXT) RETURNS TEXT AS $$
106DECLARE
107 cleanup_patterns TEXT[] := ARRAY[
108 '\s*\([^)]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\)\s*',
109 '\s*\[[^]]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\]\s*',
110 '\s*\{[^}]*(?:deluxe|remaster|remastered|extended|acoustic|live|radio|edit|version|remix|demo|instrumental|explicit|clean|bonus).*?\}\s*'
111 ];
112 pattern TEXT;
113 result_text TEXT := name_text;
114BEGIN
115 -- Remove discriminant patterns to get base name
116 FOREACH pattern IN ARRAY cleanup_patterns
117 LOOP
118 result_text := regexp_replace(result_text, pattern, ' ', 'gi');
119 END LOOP;
120
121 -- Clean up extra whitespace
122 result_text := regexp_replace(trim(result_text), '\s+', ' ', 'g');
123
124 RETURN result_text;
125END;
126$$ LANGUAGE plpgsql IMMUTABLE;
127
128-- Add comments explaining the discriminant system
129COMMENT ON COLUMN plays.track_discriminant IS 'Distinguishing information for track variants (e.g., "Acoustic Version", "Live at Wembley", "Radio Edit")';
130COMMENT ON COLUMN plays.release_discriminant IS 'Distinguishing information for release variants (e.g., "Deluxe Edition", "Remastered", "2023 Remaster")';
131COMMENT ON COLUMN releases.discriminant IS 'Distinguishing information for release variants to enable proper grouping';
132COMMENT ON COLUMN recordings.discriminant IS 'Distinguishing information for recording variants to enable proper grouping';
133
134COMMENT ON VIEW track_variants IS 'Shows all variants of tracks with their play counts and unique listeners';
135COMMENT ON VIEW release_variants IS 'Shows all variants of releases with their play counts and unique listeners';
136
137COMMENT ON FUNCTION extract_discriminant IS 'Extracts discriminant information from track/release names for migration purposes';
138COMMENT ON FUNCTION get_base_name IS 'Returns the base name without discriminant information for grouping purposes';