A web scraper build to search specific information for a given compound (and its pseudonyms)
at develop 63 lines 2.1 kB view raw
1import unittest 2 3from scrapy.http import Request 4 5from FourmiCrawler import spider 6from FourmiCrawler.sources.NIST import NIST 7from FourmiCrawler.sources.source import Source 8 9 10class TestFoumiSpider(unittest.TestCase): 11 def setUp(self): 12 self.compound = "test_compound" 13 self.attributes = ["a.*", ".*a"] 14 self.spi = spider.FourmiSpider(self.compound, self.attributes) 15 16 def test_init(self): 17 # Test the initiation of the Fourmi spider 18 self.assertIn(self.compound, self.spi.synonyms) 19 for attr in self.attributes: 20 self.assertIn(attr, self.spi.selected_attributes) 21 22 def test_add_source(self): 23 # Testing the source adding function of the Fourmi spider 24 src = Source() 25 self.spi.add_source(src) 26 self.assertIn(src, self.spi._sources) 27 28 def test_add_sources(self): 29 # Testing the function that adds multiple sources 30 srcs = [Source(), Source(), Source()] 31 self.spi.add_sources(srcs) 32 33 for src in srcs: 34 self.assertIn(src, self.spi._sources) 35 36 def test_start_requests(self): 37 # A test for the function that generates the start requests 38 self.spi._sources = [] 39 40 src = Source() 41 self.spi.add_source(src) 42 self.assertEqual(self.spi.start_requests(), []) 43 44 src2 = NIST() 45 self.spi.add_source(src2) 46 requests = self.spi.start_requests() 47 self.assertGreater(len(requests), 0) 48 self.assertIsInstance(requests[0], Request) 49 50 def test_synonym_requests(self): 51 # A test for the synonym request function 52 self.spi._sources = [] 53 54 src = Source() 55 self.spi.add_source(src) 56 self.assertEqual(self.spi.get_synonym_requests("new_compound"), []) 57 self.assertIn("new_compound", self.spi.synonyms) 58 59 src2 = NIST() 60 self.spi.add_source(src2) 61 self.assertIsInstance(self.spi.get_synonym_requests("other_compound")[0], Request) 62 self.assertIn("other_compound", self.spi.synonyms) 63 self.assertEqual(self.spi.get_synonym_requests("other_compound"), [])