A social knowledge tool for researchers built on ATProto
1import { CitoidMetadataService } from '../../infrastructure/CitoidMetadataService';
2import { URL } from '../../domain/value-objects/URL';
3
4describe('CitoidMetadataService Integration Tests', () => {
5 let service: CitoidMetadataService;
6
7 beforeEach(() => {
8 service = new CitoidMetadataService();
9 });
10
11 describe('fetchMetadata', () => {
12 it('should fetch metadata for arXiv paper', async () => {
13 // Arrange
14 const arxivUrl = 'https://arxiv.org/abs/2502.10834';
15 const urlResult = URL.create(arxivUrl);
16 expect(urlResult.isOk()).toBe(true);
17 const url = urlResult.unwrap();
18
19 // Act
20 const result = await service.fetchMetadata(url);
21
22 // Assert
23 expect(result.isOk()).toBe(true);
24
25 if (result.isOk()) {
26 const metadata = result.unwrap();
27
28 // Basic structure checks
29 expect(metadata.url).toBe(arxivUrl);
30 expect(metadata.retrievedAt).toBeInstanceOf(Date);
31
32 // Content checks - arXiv papers should have these fields
33 expect(metadata.title).toBeDefined();
34 expect(metadata.title).not.toBe('');
35
36 expect(metadata.author).toBeDefined();
37 expect(metadata.author).not.toBe('');
38
39 expect(metadata.description).toBeDefined();
40 expect(metadata.description).not.toBe('');
41
42 // arXiv specific checks
43 expect(metadata.type).toBe('preprint');
44 expect(metadata.siteName).toContain('arXiv');
45
46 // Date should be parsed correctly
47 expect(metadata.publishedDate).toBeInstanceOf(Date);
48
49 console.log('Fetched metadata:', {
50 title: metadata.title,
51 author: metadata.author,
52 description: metadata.description?.substring(0, 100) + '...',
53 type: metadata.type,
54 siteName: metadata.siteName,
55 publishedDate: metadata.publishedDate,
56 });
57 }
58 }, 10000); // 10 second timeout for network request
59
60 it('should handle invalid URLs gracefully', async () => {
61 // Arrange
62 const invalidUrl = 'https://example.com/nonexistent-page-12345';
63 const urlResult = URL.create(invalidUrl);
64 expect(urlResult.isOk()).toBe(true);
65 const url = urlResult.unwrap();
66
67 // Act
68 const result = await service.fetchMetadata(url);
69
70 // Assert
71 // Should either succeed with minimal metadata or fail gracefully
72 if (result.isErr()) {
73 expect(result.error.message).toMatch(/metadata|Not Found/);
74 } else {
75 // If it succeeds, should at least have the URL
76 const metadata = result.unwrap();
77 expect(metadata.url).toBe(invalidUrl);
78 }
79 }, 10000);
80
81 it('should handle network errors gracefully', async () => {
82 // Arrange - use a URL that will cause network issues
83 const problematicUrl = 'https://this-domain-should-not-exist-12345.com';
84 const urlResult = URL.create(problematicUrl);
85 expect(urlResult.isOk()).toBe(true);
86 const url = urlResult.unwrap();
87
88 // Act
89 const result = await service.fetchMetadata(url);
90
91 // Assert
92 expect(result.isErr()).toBe(true);
93 if (result.isErr()) {
94 expect(result.error.message).toContain('Bad Request');
95 }
96 }, 10000);
97 });
98
99 describe('isAvailable', () => {
100 it('should check if Citoid service is available', async () => {
101 // Act
102 const isAvailable = await service.isAvailable();
103
104 // Assert
105 expect(typeof isAvailable).toBe('boolean');
106
107 // In most cases, the service should be available
108 // But we don't want to fail the test if Wikipedia is down
109 console.log('Citoid service availability:', isAvailable);
110 }, 5000);
111 });
112
113 describe('author formatting', () => {
114 it('should handle multiple authors correctly', async () => {
115 // This test uses a known paper with multiple authors
116 const paperUrl = 'https://arxiv.org/abs/1706.03762'; // "Attention Is All You Need"
117 const urlResult = URL.create(paperUrl);
118 expect(urlResult.isOk()).toBe(true);
119 const url = urlResult.unwrap();
120
121 // Act
122 const result = await service.fetchMetadata(url);
123
124 // Assert
125 if (result.isOk()) {
126 const metadata = result.unwrap();
127 expect(metadata.author).toBeDefined();
128 expect(metadata.author).not.toBe('');
129
130 // Should contain at least one author name
131 expect(metadata.author!.length).toBeGreaterThanOrEqual(0);
132
133 console.log('Authors for multi-author paper:', metadata.author);
134 }
135 }, 10000);
136 });
137
138 describe('date parsing', () => {
139 it('should parse publication dates correctly', async () => {
140 // Arrange
141 const arxivUrl = 'https://arxiv.org/abs/2502.10834';
142 const urlResult = URL.create(arxivUrl);
143 expect(urlResult.isOk()).toBe(true);
144 const url = urlResult.unwrap();
145
146 // Act
147 const result = await service.fetchMetadata(url);
148
149 // Assert
150 if (result.isOk()) {
151 const metadata = result.unwrap();
152
153 if (metadata.publishedDate) {
154 expect(metadata.publishedDate).toBeInstanceOf(Date);
155 expect(metadata.publishedDate.getTime()).not.toBeNaN();
156
157 // Should be a reasonable date (not in the future, not too old)
158 const now = new Date();
159 const oneYearAgo = new Date(
160 now.getFullYear() - 1,
161 now.getMonth(),
162 now.getDate(),
163 );
164
165 expect(metadata.publishedDate.getTime()).toBeLessThanOrEqual(
166 now.getTime(),
167 );
168 expect(metadata.publishedDate.getTime()).toBeGreaterThanOrEqual(
169 oneYearAgo.getTime(),
170 );
171
172 console.log(
173 'Parsed publication date:',
174 metadata.publishedDate.toISOString(),
175 );
176 }
177 }
178 }, 10000);
179 });
180});