A web scraper build to search specific information for a given compound (and its pseudonyms)
at feature/executable 38 lines 1.2 kB view raw
1from scrapy import log 2# from scrapy.http import Request 3 4 5class Source: 6 website = "http://something/*" # Regex of URI's the source is able to parse 7 _spider = None 8 9 def __init__(self): 10 """ 11 Initiation of a new Source 12 """ 13 pass 14 15 def parse(self, response): 16 """ 17 This function should be able to parse all Scrapy Response objects with a URL matching the website Regex. 18 :param response: A Scrapy Response object 19 :return: A list of Result items and new Scrapy Requests 20 """ 21 log.msg("The parse function of the empty source was used.", level=log.WARNING) 22 pass 23 24 def new_compound_request(self, compound): 25 """ 26 This function should return a Scrapy Request for the given compound request. 27 :param compound: A compound name. 28 :return: A new Scrapy Request 29 """ 30 # return Request(url=self.website[:-1] + compound, callback=self.parse) 31 pass 32 33 def set_spider(self, spider): 34 """ 35 A Function to save the associated spider. 36 :param spider: A FourmiSpider object 37 """ 38 self._spider = spider