A web scraper build to search specific information for a given compound (and its pseudonyms)
1from scrapy import log
2# from scrapy.http import Request
3
4
5class Source:
6 website = "http://something/.*" # Regex of URI's the source is able to parse
7 _spider = None
8
9 def __init__(self, config=None):
10 """
11 Initiation of a new Source
12 """
13 self.cfg = {}
14 if config is not None:
15 self.cfg = config
16 pass
17
18 def parse(self, response):
19 """
20 This function should be able to parse all Scrapy Response objects with a URL matching the website Regex.
21 :param response: A Scrapy Response object
22 :return: A list of Result items and new Scrapy Requests
23 """
24 log.msg("The parse function of the empty source was used.", level=log.WARNING)
25 pass
26
27 def new_compound_request(self, compound):
28 """
29 This function should return a Scrapy Request for the given compound request.
30 :param compound: A compound name.
31 :return: A new Scrapy Request
32 """
33 # return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
34 pass
35
36 def set_spider(self, spider):
37 """
38 A Function to save the associated spider.
39 :param spider: A FourmiSpider object
40 """
41 self._spider = spider