compatibility.sql edited
66 lines 2.7 kB view raw
1WITH user_top_artists AS ( 2 SELECT 3 user_id, 4 artist_id, 5 COUNT(*) as play_count, 6 ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY COUNT(*) DESC) as artist_rank 7 FROM scrobbles s 8 INNER JOIN artists a ON a.id = s.artist_id 9 WHERE s.artist_id IS NOT NULL 10 AND a.name != 'Various Artists' 11 AND user_id IN ('rec_cug4h6ibhfbm7uq5dte0', 'rec_d3tv55lqrj614pp2mgag') 12 GROUP BY user_id, artist_id 13), 14user_totals AS ( 15 SELECT 16 user_id, 17 COUNT(DISTINCT artist_id) as total_artists 18 FROM user_top_artists 19 WHERE artist_rank <= 50 -- Use fewer artists for normalization 20 GROUP BY user_id 21), 22shared_weighted AS ( 23 SELECT 24 u1.artist_id, 25 u1.artist_rank as user1_rank, 26 u2.artist_rank as user2_rank, 27 (1.0 / u1.artist_rank) * (1.0 / u2.artist_rank) as artist_weight, 28 ROW_NUMBER() OVER (ORDER BY (1.0 / u1.artist_rank) * (1.0 / u2.artist_rank) DESC) as weight_rank 29 FROM user_top_artists u1 30 INNER JOIN user_top_artists u2 31 ON u1.artist_id = u2.artist_id 32 AND u1.user_id = 'rec_cug4h6ibhfbm7uq5dte0' 33 AND u2.user_id = 'rec_d3tv55lqrj614pp2mgag' 34 WHERE u1.artist_rank <= 50 -- Consider fewer artists 35 AND u2.artist_rank <= 50 36), 37compatibility_calc AS ( 38 SELECT 39 SUM(sw.artist_weight) as weighted_overlap, 40 COUNT(*) as shared_count, 41 (SELECT total_artists FROM user_totals WHERE user_id = 'rec_cug4h6ibhfbm7uq5dte0') as user1_total, 42 (SELECT total_artists FROM user_totals WHERE user_id = 'rec_d3tv55lqrj614pp2mgag') as user2_total 43 FROM shared_weighted sw 44) 45SELECT 46 -- Simpler Jaccard-like formula (more generous) 47 ROUND( 48 (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100, 49 1 50 ) as compatibility_percentage, 51 CASE 52 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 20 THEN 'Low' 53 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 40 THEN 'Medium' 54 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 60 THEN 'High' 55 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 75 THEN 'Very High' 56 WHEN (shared_count * 1.0 / LEAST(user1_total, user2_total)) * 100 < 90 THEN 'Super' 57 ELSE 'ZOMG!1!' 58 END as compatibility_level, 59 shared_count as shared_artists, 60 user1_total as user1_artist_count, 61 user2_total as user2_artist_count, 62 LIST(a.name ORDER BY sw.artist_weight DESC) FILTER (WHERE sw.weight_rank <= 10) as top_shared_artists 63FROM compatibility_calc 64CROSS JOIN shared_weighted sw 65INNER JOIN artists a ON a.id = sw.artist_id 66GROUP BY weighted_overlap, shared_count, user1_total, user2_total;