A web scraper build to search specific information for a given compound (and its pseudonyms)
at develop 24 lines 677 B view raw
1# Scrapy settings for Fourmi project 2# 3# For simplicity, this file contains only the most important settings by 4# default. All the other settings are documented here: 5# 6# http://doc.scrapy.org/en/latest/topics/settings.html 7# 8 9BOT_NAME = 'FourmiCrawler' 10 11SPIDER_MODULES = ['FourmiCrawler'] 12NEWSPIDER_MODULE = 'FourmiCrawler' 13ITEM_PIPELINES = { 14 "FourmiCrawler.pipelines.RemoveNonePipeline": 100, 15 'FourmiCrawler.pipelines.AttributeSelectionPipeline': 200, 16 'FourmiCrawler.pipelines.DuplicatePipeline': 300, 17} 18FEED_URI = 'results.json' 19FEED_FORMAT = 'jsonlines' 20 21# Crawl responsibly by identifying yourself (and your website) on the 22# user-agent 23 24USER_AGENT = 'Fourmi'