A web scraper build to search specific information for a given compound (and its pseudonyms)
1from scrapy import log
2# from scrapy.http import Request
3
4
5class Source:
6 website = "http://something/*" # Regex of URI's the source is able to parse
7 _spider = None
8
9 def __init__(self):
10 """
11 Initiation of a new Source
12 """
13 pass
14
15 def parse(self, response):
16 """
17 This function should be able to parse all Scrapy Response objects with a URL matching the website Regex.
18 :param response: A Scrapy Response object
19 :return: A list of Result items and new Scrapy Requests
20 """
21 log.msg("The parse function of the empty source was used.", level=log.WARNING)
22 pass
23
24 def new_compound_request(self, compound):
25 """
26 This function should return a Scrapy Request for the given compound request.
27 :param compound: A compound name.
28 :return: A new Scrapy Request
29 """
30 # return Request(url=self.website[:-1] + compound, callback=self.parse)
31 pass
32
33 def set_spider(self, spider):
34 """
35 A Function to save the associated spider.
36 :param spider: A FourmiSpider object
37 """
38 self._spider = spider