A community based topic aggregation platform built on atproto
at main 261 lines 9.9 kB view raw
1""" 2Tests for link_extractor module. 3""" 4import pytest 5from unittest.mock import MagicMock, patch 6import requests 7from src.link_extractor import LinkExtractor 8 9 10class TestLinkExtractor: 11 """Tests for LinkExtractor class.""" 12 13 def test_init_default_domains(self): 14 """Test default allowed domains.""" 15 extractor = LinkExtractor() 16 assert "streamable.com" in extractor.allowed_domains 17 18 def test_init_custom_domains(self): 19 """Test custom allowed domains.""" 20 extractor = LinkExtractor(allowed_domains=["example.com", "test.org"]) 21 assert "example.com" in extractor.allowed_domains 22 assert "test.org" in extractor.allowed_domains 23 assert "streamable.com" not in extractor.allowed_domains 24 25 26class TestIsAllowedUrl: 27 """Tests for is_allowed_url method.""" 28 29 @pytest.fixture 30 def extractor(self): 31 return LinkExtractor() 32 33 def test_streamable_url(self, extractor): 34 """Test streamable.com URL detection.""" 35 assert extractor.is_allowed_url("https://streamable.com/abc123") 36 assert extractor.is_allowed_url("http://streamable.com/xyz789") 37 assert extractor.is_allowed_url("https://www.streamable.com/test") 38 39 def test_non_streamable_url(self, extractor): 40 """Test rejection of non-streamable URLs.""" 41 assert not extractor.is_allowed_url("https://youtube.com/watch?v=123") 42 assert not extractor.is_allowed_url("https://reddit.com/r/nba") 43 assert not extractor.is_allowed_url("https://twitter.com/video") 44 45 def test_empty_url(self, extractor): 46 """Test empty URL handling.""" 47 assert not extractor.is_allowed_url("") 48 assert not extractor.is_allowed_url(None) 49 50 def test_invalid_url(self, extractor): 51 """Test invalid URL handling.""" 52 assert not extractor.is_allowed_url("not a url") 53 assert not extractor.is_allowed_url("streamable.com/abc") # Missing scheme 54 55 56class TestExtractVideoUrl: 57 """Tests for extract_video_url method.""" 58 59 @pytest.fixture 60 def extractor(self): 61 return LinkExtractor() 62 63 def test_direct_link(self, extractor): 64 """Test extraction from direct link.""" 65 entry = MagicMock() 66 entry.link = "https://streamable.com/abc123" 67 entry.content = None 68 entry.description = None 69 entry.summary = None 70 71 result = extractor.extract_video_url(entry) 72 assert result == "https://streamable.com/abc123" 73 74 def test_link_in_content(self, extractor): 75 """Test extraction from entry content.""" 76 entry = MagicMock() 77 entry.link = "https://reddit.com/r/nba/comments/123" 78 79 content_item = MagicMock() 80 content_item.value = 'Check out this play: <a href="https://streamable.com/xyz789">video</a>' 81 entry.content = [content_item] 82 entry.description = None 83 entry.summary = None 84 85 result = extractor.extract_video_url(entry) 86 assert result == "https://streamable.com/xyz789" 87 88 def test_link_in_description(self, extractor): 89 """Test extraction from entry description.""" 90 entry = MagicMock() 91 entry.link = "https://reddit.com/r/nba/comments/123" 92 entry.content = None 93 entry.description = "Amazing dunk! https://streamable.com/dunk99" 94 entry.summary = None 95 96 result = extractor.extract_video_url(entry) 97 assert result == "https://streamable.com/dunk99" 98 99 def test_link_in_summary(self, extractor): 100 """Test extraction from entry summary.""" 101 entry = MagicMock() 102 entry.link = "https://reddit.com/r/nba/comments/123" 103 entry.content = None 104 entry.description = None 105 entry.summary = "Game winner: https://streamable.com/winner1" 106 107 result = extractor.extract_video_url(entry) 108 assert result == "https://streamable.com/winner1" 109 110 def test_no_video_link(self, extractor): 111 """Test when no video link is present.""" 112 entry = MagicMock() 113 entry.link = "https://reddit.com/r/nba/comments/123" 114 entry.content = None 115 entry.description = "Just a text post about basketball" 116 entry.summary = None 117 118 result = extractor.extract_video_url(entry) 119 assert result is None 120 121 def test_multiple_links_returns_first(self, extractor): 122 """Test that first video link is returned when multiple present.""" 123 entry = MagicMock() 124 entry.link = "https://reddit.com/r/nba/comments/123" 125 entry.content = None 126 entry.description = ( 127 "First: https://streamable.com/first " 128 "Second: https://streamable.com/second" 129 ) 130 entry.summary = None 131 132 result = extractor.extract_video_url(entry) 133 assert result == "https://streamable.com/first" 134 135 136class TestNormalizeUrl: 137 """Tests for URL normalization.""" 138 139 @pytest.fixture 140 def extractor(self): 141 return LinkExtractor() 142 143 def test_removes_trailing_slash(self, extractor): 144 """Test trailing slash removal.""" 145 url = extractor._normalize_url("https://streamable.com/abc123/") 146 assert url == "https://streamable.com/abc123" 147 148 def test_upgrades_http_to_https(self, extractor): 149 """Test HTTP to HTTPS upgrade.""" 150 url = extractor._normalize_url("http://streamable.com/abc123") 151 assert url == "https://streamable.com/abc123" 152 153 154class TestGetVideoId: 155 """Tests for get_video_id method.""" 156 157 @pytest.fixture 158 def extractor(self): 159 return LinkExtractor() 160 161 def test_extracts_video_id(self, extractor): 162 """Test video ID extraction.""" 163 video_id = extractor.get_video_id("https://streamable.com/abc123") 164 assert video_id == "abc123" 165 166 def test_handles_www_prefix(self, extractor): 167 """Test www prefix handling.""" 168 video_id = extractor.get_video_id("https://www.streamable.com/xyz789") 169 assert video_id == "xyz789" 170 171 def test_empty_url(self, extractor): 172 """Test empty URL handling.""" 173 assert extractor.get_video_id("") is None 174 assert extractor.get_video_id(None) is None 175 176 def test_url_with_query_params(self, extractor): 177 """Test URL with query parameters.""" 178 video_id = extractor.get_video_id("https://streamable.com/test123?foo=bar") 179 assert video_id == "test123" 180 181 182class TestGetThumbnailUrl: 183 """Tests for get_thumbnail_url method.""" 184 185 @pytest.fixture 186 def extractor(self): 187 return LinkExtractor() 188 189 def test_returns_thumbnail_on_success(self, extractor): 190 """Test successful thumbnail fetch.""" 191 mock_response = MagicMock() 192 mock_response.status_code = 200 193 mock_response.json.return_value = { 194 "thumbnail_url": "https://cdn.streamable.com/image/abc123.jpg", 195 "type": "video", 196 } 197 198 with patch("requests.get", return_value=mock_response): 199 result = extractor.get_thumbnail_url("https://streamable.com/abc123") 200 assert result == "https://cdn.streamable.com/image/abc123.jpg" 201 202 def test_returns_none_on_network_error(self, extractor): 203 """Test handling of network errors.""" 204 with patch("requests.get", side_effect=requests.ConnectionError("Network error")): 205 result = extractor.get_thumbnail_url("https://streamable.com/abc123") 206 assert result is None 207 208 def test_returns_none_on_http_error(self, extractor): 209 """Test handling of HTTP errors.""" 210 mock_response = MagicMock() 211 mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found") 212 213 with patch("requests.get", return_value=mock_response): 214 result = extractor.get_thumbnail_url("https://streamable.com/abc123") 215 assert result is None 216 217 def test_returns_none_on_invalid_json(self, extractor): 218 """Test handling of invalid JSON response.""" 219 mock_response = MagicMock() 220 mock_response.status_code = 200 221 mock_response.json.side_effect = ValueError("Invalid JSON") 222 223 with patch("requests.get", return_value=mock_response): 224 result = extractor.get_thumbnail_url("https://streamable.com/abc123") 225 assert result is None 226 227 def test_returns_none_for_empty_url(self, extractor): 228 """Test empty URL handling.""" 229 assert extractor.get_thumbnail_url("") is None 230 assert extractor.get_thumbnail_url(None) is None 231 232 def test_returns_none_for_non_allowed_url(self, extractor): 233 """Test rejection of non-allowed URLs.""" 234 # Should not even make a request for non-allowed domains 235 with patch("requests.get") as mock_get: 236 result = extractor.get_thumbnail_url("https://youtube.com/watch?v=123") 237 assert result is None 238 mock_get.assert_not_called() 239 240 def test_returns_none_when_thumbnail_missing(self, extractor): 241 """Test handling when thumbnail_url is missing from response.""" 242 mock_response = MagicMock() 243 mock_response.status_code = 200 244 mock_response.json.return_value = {"type": "video", "title": "Test"} 245 246 with patch("requests.get", return_value=mock_response): 247 result = extractor.get_thumbnail_url("https://streamable.com/abc123") 248 assert result is None 249 250 def test_uses_correct_oembed_url(self, extractor): 251 """Test that correct oembed URL is constructed.""" 252 mock_response = MagicMock() 253 mock_response.status_code = 200 254 mock_response.json.return_value = {"thumbnail_url": "https://example.com/thumb.jpg"} 255 256 with patch("requests.get", return_value=mock_response) as mock_get: 257 extractor.get_thumbnail_url("https://streamable.com/xyz789") 258 mock_get.assert_called_once() 259 call_url = mock_get.call_args[0][0] 260 assert "api.streamable.com/oembed" in call_url 261 assert "url=https://streamable.com/xyz789" in call_url