A web scraper build to search specific information for a given compound (and its pseudonyms)
at develop 41 lines 1.3 kB view raw
1from scrapy import log 2# from scrapy.http import Request 3 4 5class Source: 6 website = "http://something/.*" # Regex of URI's the source is able to parse 7 _spider = None 8 9 def __init__(self, config=None): 10 """ 11 Initiation of a new Source 12 """ 13 self.cfg = {} 14 if config is not None: 15 self.cfg = config 16 pass 17 18 def parse(self, response): 19 """ 20 This function should be able to parse all Scrapy Response objects with a URL matching the website Regex. 21 :param response: A Scrapy Response object 22 :return: A list of Result items and new Scrapy Requests 23 """ 24 log.msg("The parse function of the empty source was used.", level=log.WARNING) 25 pass 26 27 def new_compound_request(self, compound): 28 """ 29 This function should return a Scrapy Request for the given compound request. 30 :param compound: A compound name. 31 :return: A new Scrapy Request 32 """ 33 # return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse) 34 pass 35 36 def set_spider(self, spider): 37 """ 38 A Function to save the associated spider. 39 :param spider: A FourmiSpider object 40 """ 41 self._spider = spider