A community based topic aggregation platform built on atproto
1"""
2Tests for link_extractor module.
3"""
4import pytest
5from unittest.mock import MagicMock, patch
6import requests
7from src.link_extractor import LinkExtractor
8
9
10class TestLinkExtractor:
11 """Tests for LinkExtractor class."""
12
13 def test_init_default_domains(self):
14 """Test default allowed domains."""
15 extractor = LinkExtractor()
16 assert "streamable.com" in extractor.allowed_domains
17
18 def test_init_custom_domains(self):
19 """Test custom allowed domains."""
20 extractor = LinkExtractor(allowed_domains=["example.com", "test.org"])
21 assert "example.com" in extractor.allowed_domains
22 assert "test.org" in extractor.allowed_domains
23 assert "streamable.com" not in extractor.allowed_domains
24
25
26class TestIsAllowedUrl:
27 """Tests for is_allowed_url method."""
28
29 @pytest.fixture
30 def extractor(self):
31 return LinkExtractor()
32
33 def test_streamable_url(self, extractor):
34 """Test streamable.com URL detection."""
35 assert extractor.is_allowed_url("https://streamable.com/abc123")
36 assert extractor.is_allowed_url("http://streamable.com/xyz789")
37 assert extractor.is_allowed_url("https://www.streamable.com/test")
38
39 def test_non_streamable_url(self, extractor):
40 """Test rejection of non-streamable URLs."""
41 assert not extractor.is_allowed_url("https://youtube.com/watch?v=123")
42 assert not extractor.is_allowed_url("https://reddit.com/r/nba")
43 assert not extractor.is_allowed_url("https://twitter.com/video")
44
45 def test_empty_url(self, extractor):
46 """Test empty URL handling."""
47 assert not extractor.is_allowed_url("")
48 assert not extractor.is_allowed_url(None)
49
50 def test_invalid_url(self, extractor):
51 """Test invalid URL handling."""
52 assert not extractor.is_allowed_url("not a url")
53 assert not extractor.is_allowed_url("streamable.com/abc") # Missing scheme
54
55
56class TestExtractVideoUrl:
57 """Tests for extract_video_url method."""
58
59 @pytest.fixture
60 def extractor(self):
61 return LinkExtractor()
62
63 def test_direct_link(self, extractor):
64 """Test extraction from direct link."""
65 entry = MagicMock()
66 entry.link = "https://streamable.com/abc123"
67 entry.content = None
68 entry.description = None
69 entry.summary = None
70
71 result = extractor.extract_video_url(entry)
72 assert result == "https://streamable.com/abc123"
73
74 def test_link_in_content(self, extractor):
75 """Test extraction from entry content."""
76 entry = MagicMock()
77 entry.link = "https://reddit.com/r/nba/comments/123"
78
79 content_item = MagicMock()
80 content_item.value = 'Check out this play: <a href="https://streamable.com/xyz789">video</a>'
81 entry.content = [content_item]
82 entry.description = None
83 entry.summary = None
84
85 result = extractor.extract_video_url(entry)
86 assert result == "https://streamable.com/xyz789"
87
88 def test_link_in_description(self, extractor):
89 """Test extraction from entry description."""
90 entry = MagicMock()
91 entry.link = "https://reddit.com/r/nba/comments/123"
92 entry.content = None
93 entry.description = "Amazing dunk! https://streamable.com/dunk99"
94 entry.summary = None
95
96 result = extractor.extract_video_url(entry)
97 assert result == "https://streamable.com/dunk99"
98
99 def test_link_in_summary(self, extractor):
100 """Test extraction from entry summary."""
101 entry = MagicMock()
102 entry.link = "https://reddit.com/r/nba/comments/123"
103 entry.content = None
104 entry.description = None
105 entry.summary = "Game winner: https://streamable.com/winner1"
106
107 result = extractor.extract_video_url(entry)
108 assert result == "https://streamable.com/winner1"
109
110 def test_no_video_link(self, extractor):
111 """Test when no video link is present."""
112 entry = MagicMock()
113 entry.link = "https://reddit.com/r/nba/comments/123"
114 entry.content = None
115 entry.description = "Just a text post about basketball"
116 entry.summary = None
117
118 result = extractor.extract_video_url(entry)
119 assert result is None
120
121 def test_multiple_links_returns_first(self, extractor):
122 """Test that first video link is returned when multiple present."""
123 entry = MagicMock()
124 entry.link = "https://reddit.com/r/nba/comments/123"
125 entry.content = None
126 entry.description = (
127 "First: https://streamable.com/first "
128 "Second: https://streamable.com/second"
129 )
130 entry.summary = None
131
132 result = extractor.extract_video_url(entry)
133 assert result == "https://streamable.com/first"
134
135
136class TestNormalizeUrl:
137 """Tests for URL normalization."""
138
139 @pytest.fixture
140 def extractor(self):
141 return LinkExtractor()
142
143 def test_removes_trailing_slash(self, extractor):
144 """Test trailing slash removal."""
145 url = extractor._normalize_url("https://streamable.com/abc123/")
146 assert url == "https://streamable.com/abc123"
147
148 def test_upgrades_http_to_https(self, extractor):
149 """Test HTTP to HTTPS upgrade."""
150 url = extractor._normalize_url("http://streamable.com/abc123")
151 assert url == "https://streamable.com/abc123"
152
153
154class TestGetVideoId:
155 """Tests for get_video_id method."""
156
157 @pytest.fixture
158 def extractor(self):
159 return LinkExtractor()
160
161 def test_extracts_video_id(self, extractor):
162 """Test video ID extraction."""
163 video_id = extractor.get_video_id("https://streamable.com/abc123")
164 assert video_id == "abc123"
165
166 def test_handles_www_prefix(self, extractor):
167 """Test www prefix handling."""
168 video_id = extractor.get_video_id("https://www.streamable.com/xyz789")
169 assert video_id == "xyz789"
170
171 def test_empty_url(self, extractor):
172 """Test empty URL handling."""
173 assert extractor.get_video_id("") is None
174 assert extractor.get_video_id(None) is None
175
176 def test_url_with_query_params(self, extractor):
177 """Test URL with query parameters."""
178 video_id = extractor.get_video_id("https://streamable.com/test123?foo=bar")
179 assert video_id == "test123"
180
181
182class TestGetThumbnailUrl:
183 """Tests for get_thumbnail_url method."""
184
185 @pytest.fixture
186 def extractor(self):
187 return LinkExtractor()
188
189 def test_returns_thumbnail_on_success(self, extractor):
190 """Test successful thumbnail fetch."""
191 mock_response = MagicMock()
192 mock_response.status_code = 200
193 mock_response.json.return_value = {
194 "thumbnail_url": "https://cdn.streamable.com/image/abc123.jpg",
195 "type": "video",
196 }
197
198 with patch("requests.get", return_value=mock_response):
199 result = extractor.get_thumbnail_url("https://streamable.com/abc123")
200 assert result == "https://cdn.streamable.com/image/abc123.jpg"
201
202 def test_returns_none_on_network_error(self, extractor):
203 """Test handling of network errors."""
204 with patch("requests.get", side_effect=requests.ConnectionError("Network error")):
205 result = extractor.get_thumbnail_url("https://streamable.com/abc123")
206 assert result is None
207
208 def test_returns_none_on_http_error(self, extractor):
209 """Test handling of HTTP errors."""
210 mock_response = MagicMock()
211 mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
212
213 with patch("requests.get", return_value=mock_response):
214 result = extractor.get_thumbnail_url("https://streamable.com/abc123")
215 assert result is None
216
217 def test_returns_none_on_invalid_json(self, extractor):
218 """Test handling of invalid JSON response."""
219 mock_response = MagicMock()
220 mock_response.status_code = 200
221 mock_response.json.side_effect = ValueError("Invalid JSON")
222
223 with patch("requests.get", return_value=mock_response):
224 result = extractor.get_thumbnail_url("https://streamable.com/abc123")
225 assert result is None
226
227 def test_returns_none_for_empty_url(self, extractor):
228 """Test empty URL handling."""
229 assert extractor.get_thumbnail_url("") is None
230 assert extractor.get_thumbnail_url(None) is None
231
232 def test_returns_none_for_non_allowed_url(self, extractor):
233 """Test rejection of non-allowed URLs."""
234 # Should not even make a request for non-allowed domains
235 with patch("requests.get") as mock_get:
236 result = extractor.get_thumbnail_url("https://youtube.com/watch?v=123")
237 assert result is None
238 mock_get.assert_not_called()
239
240 def test_returns_none_when_thumbnail_missing(self, extractor):
241 """Test handling when thumbnail_url is missing from response."""
242 mock_response = MagicMock()
243 mock_response.status_code = 200
244 mock_response.json.return_value = {"type": "video", "title": "Test"}
245
246 with patch("requests.get", return_value=mock_response):
247 result = extractor.get_thumbnail_url("https://streamable.com/abc123")
248 assert result is None
249
250 def test_uses_correct_oembed_url(self, extractor):
251 """Test that correct oembed URL is constructed."""
252 mock_response = MagicMock()
253 mock_response.status_code = 200
254 mock_response.json.return_value = {"thumbnail_url": "https://example.com/thumb.jpg"}
255
256 with patch("requests.get", return_value=mock_response) as mock_get:
257 extractor.get_thumbnail_url("https://streamable.com/xyz789")
258 mock_get.assert_called_once()
259 call_url = mock_get.call_args[0][0]
260 assert "api.streamable.com/oembed" in call_url
261 assert "url=https://streamable.com/xyz789" in call_url