src/modules/cards/tests/infrastructure/CitoidMetadataService.integration.test.ts at main

cosmik.network / semble
fork
A social knowledge tool for researchers built on ATProto
fork
semble / src / modules / cards / tests / infrastructure / CitoidMetadataService.integration.test.ts
at main 180 lines 5.8 kB view raw
wrap content
Wesley Finck formatted all files with new formatting convention 9mo ago
e0a3f4b4
  1import { CitoidMetadataService } from '../../infrastructure/CitoidMetadataService';
  2import { URL } from '../../domain/value-objects/URL';
  3
  4describe('CitoidMetadataService Integration Tests', () => {
  5  let service: CitoidMetadataService;
  6
  7  beforeEach(() => {
  8    service = new CitoidMetadataService();
  9  });
 10
 11  describe('fetchMetadata', () => {
 12    it('should fetch metadata for arXiv paper', async () => {
 13      // Arrange
 14      const arxivUrl = 'https://arxiv.org/abs/2502.10834';
 15      const urlResult = URL.create(arxivUrl);
 16      expect(urlResult.isOk()).toBe(true);
 17      const url = urlResult.unwrap();
 18
 19      // Act
 20      const result = await service.fetchMetadata(url);
 21
 22      // Assert
 23      expect(result.isOk()).toBe(true);
 24
 25      if (result.isOk()) {
 26        const metadata = result.unwrap();
 27
 28        // Basic structure checks
 29        expect(metadata.url).toBe(arxivUrl);
 30        expect(metadata.retrievedAt).toBeInstanceOf(Date);
 31
 32        // Content checks - arXiv papers should have these fields
 33        expect(metadata.title).toBeDefined();
 34        expect(metadata.title).not.toBe('');
 35
 36        expect(metadata.author).toBeDefined();
 37        expect(metadata.author).not.toBe('');
 38
 39        expect(metadata.description).toBeDefined();
 40        expect(metadata.description).not.toBe('');
 41
 42        // arXiv specific checks
 43        expect(metadata.type).toBe('preprint');
 44        expect(metadata.siteName).toContain('arXiv');
 45
 46        // Date should be parsed correctly
 47        expect(metadata.publishedDate).toBeInstanceOf(Date);
 48
 49        console.log('Fetched metadata:', {
 50          title: metadata.title,
 51          author: metadata.author,
 52          description: metadata.description?.substring(0, 100) + '...',
 53          type: metadata.type,
 54          siteName: metadata.siteName,
 55          publishedDate: metadata.publishedDate,
 56        });
 57      }
 58    }, 10000); // 10 second timeout for network request
 59
 60    it('should handle invalid URLs gracefully', async () => {
 61      // Arrange
 62      const invalidUrl = 'https://example.com/nonexistent-page-12345';
 63      const urlResult = URL.create(invalidUrl);
 64      expect(urlResult.isOk()).toBe(true);
 65      const url = urlResult.unwrap();
 66
 67      // Act
 68      const result = await service.fetchMetadata(url);
 69
 70      // Assert
 71      // Should either succeed with minimal metadata or fail gracefully
 72      if (result.isErr()) {
 73        expect(result.error.message).toMatch(/metadata|Not Found/);
 74      } else {
 75        // If it succeeds, should at least have the URL
 76        const metadata = result.unwrap();
 77        expect(metadata.url).toBe(invalidUrl);
 78      }
 79    }, 10000);
 80
 81    it('should handle network errors gracefully', async () => {
 82      // Arrange - use a URL that will cause network issues
 83      const problematicUrl = 'https://this-domain-should-not-exist-12345.com';
 84      const urlResult = URL.create(problematicUrl);
 85      expect(urlResult.isOk()).toBe(true);
 86      const url = urlResult.unwrap();
 87
 88      // Act
 89      const result = await service.fetchMetadata(url);
 90
 91      // Assert
 92      expect(result.isErr()).toBe(true);
 93      if (result.isErr()) {
 94        expect(result.error.message).toContain('Bad Request');
 95      }
 96    }, 10000);
 97  });
 98
 99  describe('isAvailable', () => {
100    it('should check if Citoid service is available', async () => {
101      // Act
102      const isAvailable = await service.isAvailable();
103
104      // Assert
105      expect(typeof isAvailable).toBe('boolean');
106
107      // In most cases, the service should be available
108      // But we don't want to fail the test if Wikipedia is down
109      console.log('Citoid service availability:', isAvailable);
110    }, 5000);
111  });
112
113  describe('author formatting', () => {
114    it('should handle multiple authors correctly', async () => {
115      // This test uses a known paper with multiple authors
116      const paperUrl = 'https://arxiv.org/abs/1706.03762'; // "Attention Is All You Need"
117      const urlResult = URL.create(paperUrl);
118      expect(urlResult.isOk()).toBe(true);
119      const url = urlResult.unwrap();
120
121      // Act
122      const result = await service.fetchMetadata(url);
123
124      // Assert
125      if (result.isOk()) {
126        const metadata = result.unwrap();
127        expect(metadata.author).toBeDefined();
128        expect(metadata.author).not.toBe('');
129
130        // Should contain at least one author name
131        expect(metadata.author!.length).toBeGreaterThanOrEqual(0);
132
133        console.log('Authors for multi-author paper:', metadata.author);
134      }
135    }, 10000);
136  });
137
138  describe('date parsing', () => {
139    it('should parse publication dates correctly', async () => {
140      // Arrange
141      const arxivUrl = 'https://arxiv.org/abs/2502.10834';
142      const urlResult = URL.create(arxivUrl);
143      expect(urlResult.isOk()).toBe(true);
144      const url = urlResult.unwrap();
145
146      // Act
147      const result = await service.fetchMetadata(url);
148
149      // Assert
150      if (result.isOk()) {
151        const metadata = result.unwrap();
152
153        if (metadata.publishedDate) {
154          expect(metadata.publishedDate).toBeInstanceOf(Date);
155          expect(metadata.publishedDate.getTime()).not.toBeNaN();
156
157          // Should be a reasonable date (not in the future, not too old)
158          const now = new Date();
159          const oneYearAgo = new Date(
160            now.getFullYear() - 1,
161            now.getMonth(),
162            now.getDate(),
163          );
164
165          expect(metadata.publishedDate.getTime()).toBeLessThanOrEqual(
166            now.getTime(),
167          );
168          expect(metadata.publishedDate.getTime()).toBeGreaterThanOrEqual(
169            oneYearAgo.getTime(),
170          );
171
172          console.log(
173            'Parsed publication date:',
174            metadata.publishedDate.toISOString(),
175          );
176        }
177      }
178    }, 10000);
179  });
180});
Configure Feed

Configure Feed