compatibility.sql
edited
1WITH user_top_artists AS (
2 SELECT
3 user_id,
4 artist_id,
5 COUNT(*) as play_count,
6 ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY COUNT(*) DESC) as artist_rank
7 FROM scrobbles s
8 INNER JOIN artists a ON a.id = s.artist_id
9 WHERE s.artist_id IS NOT NULL
10 AND a.name != 'Various Artists'
11 AND user_id IN ('rec_cug4h6ibhfbm7uq5dte0', 'rec_d3tv55lqrj614pp2mgag')
12 GROUP BY user_id, artist_id
13),
14user_totals AS (
15 SELECT
16 user_id,
17 COUNT(DISTINCT artist_id) as total_artists
18 FROM user_top_artists
19 WHERE artist_rank <= 50 -- Use fewer artists for normalization
20 GROUP BY user_id
21),
22shared_weighted AS (
23 SELECT
24 u1.artist_id,
25 u1.artist_rank as user1_rank,
26 u2.artist_rank as user2_rank,
27 (1.0 / u1.artist_rank) * (1.0 / u2.artist_rank) as artist_weight,
28 ROW_NUMBER() OVER (ORDER BY (1.0 / u1.artist_rank) * (1.0 / u2.artist_rank) DESC) as weight_rank
29 FROM user_top_artists u1
30 INNER JOIN user_top_artists u2
31 ON u1.artist_id = u2.artist_id
32 AND u1.user_id = 'rec_cug4h6ibhfbm7uq5dte0'
33 AND u2.user_id = 'rec_d3tv55lqrj614pp2mgag'
34 WHERE u1.artist_rank <= 50 -- Consider fewer artists
35 AND u2.artist_rank <= 50
36),
37compatibility_calc AS (
38 SELECT
39 SUM(sw.artist_weight) as weighted_overlap,
40 COUNT(*) as shared_count,
41 (SELECT total_artists FROM user_totals WHERE user_id = 'rec_cug4h6ibhfbm7uq5dte0') as user1_total,
42 (SELECT total_artists FROM user_totals WHERE user_id = 'rec_d3tv55lqrj614pp2mgag') as user2_total
43 FROM shared_weighted sw
44)
45SELECT
46 -- Simpler Jaccard-like formula (more generous)
47 ROUND(
48 (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100,
49 1
50 ) as compatibility_percentage,
51 CASE
52 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 20 THEN 'Low'
53 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 40 THEN 'Medium'
54 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 60 THEN 'High'
55 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 75 THEN 'Very High'
56 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 90 THEN 'Super'
57 ELSE 'ZOMG!1!'
58 END as compatibility_level,
59 shared_count as shared_artists,
60 user1_total as user1_artist_count,
61 user2_total as user2_artist_count,
62 LIST(a.name ORDER BY sw.artist_weight DESC) FILTER (WHERE sw.weight_rank <= 10) as top_shared_artists
63FROM compatibility_calc
64CROSS JOIN shared_weighted sw
65INNER JOIN artists a ON a.id = sw.artist_id
66GROUP BY weighted_overlap, shared_count, user1_total, user2_total;