+20
-19
packages/extension/src/content/scrapers/twitter-scraper.ts
+20
-19
packages/extension/src/content/scrapers/twitter-scraper.ts
···
6
6
*/
7
7
export class TwitterScraper extends BaseScraper {
8
8
/**
9
-
* Returns the stable selector for Twitter username elements
10
-
* data-testid="UserName" is used consistently across Twitter's UI
9
+
* Returns the stable selector for Twitter user cells
10
+
* data-testid="UserCell" contains each user row
11
11
*/
12
12
getUsernameSelector(): string {
13
-
return '[data-testid="UserName"]';
13
+
return '[data-testid="UserCell"]';
14
14
}
15
15
16
16
/**
17
-
* Extracts username from Twitter UserName element
18
-
* Structure: <div data-testid="UserName">
19
-
* <div><span>Display Name</span></div>
20
-
* <div><span>@handle</span></div>
21
-
* </div>
17
+
* Extracts username from Twitter UserCell element
18
+
* Each UserCell contains profile links with href="/username"
22
19
*/
23
20
extractUsername(element: Element): string | null {
24
-
// Find all spans within the UserName element
25
-
const spans = element.querySelectorAll('span');
21
+
// Find all links in the cell
22
+
const links = element.querySelectorAll('a');
23
+
24
+
for (const link of links) {
25
+
const href = link.getAttribute('href');
26
26
27
-
for (const span of spans) {
28
-
const text = span.textContent?.trim();
27
+
// Profile links are like /username (not /i/something or /username/status/...)
28
+
if (href && href.startsWith('/') && !href.startsWith('/i/')) {
29
+
const parts = href.split('/');
29
30
30
-
// Look for text starting with @
31
-
if (text && text.startsWith('@')) {
32
-
// Remove @ prefix and convert to lowercase
33
-
const username = text.slice(1).toLowerCase();
31
+
// Should be exactly 2 parts: ['', 'username']
32
+
if (parts.length === 2 && parts[1]) {
33
+
const username = parts[1].toLowerCase();
34
34
35
-
// Validate username format (alphanumeric + underscore)
36
-
if (/^[a-z0-9_]+$/i.test(username)) {
37
-
return username;
35
+
// Validate username format (alphanumeric + underscore)
36
+
if (/^[a-z0-9_]+$/i.test(username)) {
37
+
return username;
38
+
}
38
39
}
39
40
}
40
41
}