A web scraper build to search specific information for a given compound (and its pseudonyms)
1# Scrapy settings for Fourmi project
2#
3# For simplicity, this file contains only the most important settings by
4# default. All the other settings are documented here:
5#
6# http://doc.scrapy.org/en/latest/topics/settings.html
7#
8
9BOT_NAME = 'FourmiCrawler'
10
11SPIDER_MODULES = ['FourmiCrawler']
12NEWSPIDER_MODULE = 'FourmiCrawler'
13ITEM_PIPELINES = {
14 "FourmiCrawler.pipelines.RemoveNonePipeline": 100,
15 'FourmiCrawler.pipelines.AttributeSelectionPipeline': 200,
16 'FourmiCrawler.pipelines.DuplicatePipeline': 300,
17}
18FEED_URI = 'results.json'
19FEED_FORMAT = 'jsonlines'
20
21# Crawl responsibly by identifying yourself (and your website) on the
22# user-agent
23
24USER_AGENT = 'Fourmi'