1-- Migration to support synthetic MBIDs for artists without MusicBrainz data
2-- This ensures all artists have some form of ID while maintaining uniqueness
3
4-- Enable UUID extension for v5 UUID generation
5CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
6
7-- Add a column to track MBID type (musicbrainz, synthetic, unknown)
8ALTER TABLE artists_extended ADD COLUMN mbid_type TEXT DEFAULT 'unknown' NOT NULL;
9
10-- Add check constraint for valid MBID types
11ALTER TABLE artists_extended ADD CONSTRAINT chk_mbid_type
12 CHECK (mbid_type IN ('musicbrainz', 'synthetic', 'unknown'));
13
14-- Update existing records to set proper MBID type
15UPDATE artists_extended SET mbid_type = 'musicbrainz' WHERE mbid IS NOT NULL;
16
17-- Drop the unique constraint on name_normalized for null MBIDs since we'll handle duplicates differently
18DROP INDEX IF EXISTS idx_artists_extended_name_unique;
19
20-- Add index for efficient querying by MBID type
21CREATE INDEX idx_artists_extended_mbid_type ON artists_extended (mbid_type);
22
23-- Create a view to easily work with different artist types
24CREATE VIEW artists_with_type AS
25SELECT
26 id,
27 mbid,
28 name,
29 mbid_type,
30 play_count,
31 created_at,
32 updated_at,
33 -- For synthetic MBIDs, we can show the source name used for generation
34 CASE
35 WHEN mbid_type = 'synthetic' THEN 'Generated from: ' || name
36 WHEN mbid_type = 'musicbrainz' THEN 'MusicBrainz: ' || mbid::text
37 ELSE 'No MBID available'
38 END as mbid_info
39FROM artists_extended;
40
41-- Update materialized views to include MBID type information
42DROP MATERIALIZED VIEW IF EXISTS mv_artist_play_counts;
43CREATE MATERIALIZED VIEW mv_artist_play_counts AS
44SELECT
45 ae.id AS artist_id,
46 ae.mbid AS artist_mbid,
47 ae.name AS artist_name,
48 ae.mbid_type,
49 COUNT(p.uri) AS play_count
50FROM
51 artists_extended ae
52 LEFT JOIN play_to_artists_extended ptae ON ae.id = ptae.artist_id
53 LEFT JOIN plays p ON p.uri = ptae.play_uri
54GROUP BY
55 ae.id, ae.mbid, ae.name, ae.mbid_type;
56
57CREATE UNIQUE INDEX idx_mv_artist_play_counts_with_type ON mv_artist_play_counts (artist_id);
58
59-- Add comments explaining the synthetic MBID system
60COMMENT ON COLUMN artists_extended.mbid_type IS 'Type of MBID: musicbrainz (real), synthetic (generated), or unknown (legacy data)';
61COMMENT ON COLUMN artists_extended.mbid IS 'MusicBrainz ID (for musicbrainz type) or synthetic UUID (for synthetic type)';
62COMMENT ON VIEW artists_with_type IS 'View that provides human-readable information about artist MBID sources';
63
64-- Add a function to generate synthetic MBIDs
65CREATE OR REPLACE FUNCTION generate_synthetic_mbid(artist_name TEXT) RETURNS UUID AS $$
66DECLARE
67 namespace_uuid UUID := '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; -- DNS namespace
68 result_uuid UUID;
69BEGIN
70 -- Generate deterministic UUID v5 based on artist name
71 SELECT uuid_generate_v5(namespace_uuid, artist_name) INTO result_uuid;
72 RETURN result_uuid;
73END;
74$$ LANGUAGE plpgsql IMMUTABLE;
75
76COMMENT ON FUNCTION generate_synthetic_mbid IS 'Generates a deterministic UUID v5 for artist names without MusicBrainz IDs';