A web scraper build to search specific information for a given compound (and its pseudonyms)
1#!/usr/bin/env python 2""" 3Fourmi - An internet webcrawler searching for information on chemical 4compounds. [todo] - Add some more useful text here. 5""" 6 7from twisted.internet import reactor 8from scrapy.crawler import Crawler 9from scrapy import log, signals 10from FourmiCrawler.spiders.Fourmispider import FourmiSpider 11from scrapy.utils.project import get_project_settings 12 13 14def setup_crawler(searchable): 15 # [TODO] - Initiate all parsers for the different websites and get 16 # allowed URLs. 17 spider = FourmiSpider(compound=searchable) 18 settings = get_project_settings() 19 crawler = Crawler(settings) 20 crawler.signals.connect(reactor.stop, signal=signals.spider_closed) 21 crawler.configure() 22 crawler.crawl(spider) 23 crawler.start() 24 25 26def start(): 27 setup_crawler("Methane") 28 log.start() 29 reactor.run() 30 31start()