A web scraper build to search specific information for a given compound (and its pseudonyms)
1import ConfigParser
2import os
3import shutil
4
5from scrapy.utils.project import get_project_settings
6
7
8class Configurator:
9 """
10 A helper class in the fourmi class. This class is used to process the settings as set
11 from one of the Fourmi applications.
12 """
13
14 def __init__(self):
15 self.scrapy_settings = get_project_settings()
16
17 def set_output(self, filename, fileformat, compound):
18 """
19 This function manipulates the Scrapy output file settings that normally would be set in the settings file.
20 In the Fourmi project these are command line arguments.
21 :param filename: The filename of the file where the output will be put.
22 :param fileformat: The format in which the output will be.
23 """
24
25 if filename != '<compound>.*format*':
26 self.scrapy_settings.overrides["FEED_URI"] = filename
27 elif fileformat == "jsonlines":
28 self.scrapy_settings.overrides["FEED_URI"] = compound + ".json"
29 elif fileformat is not None:
30 self.scrapy_settings.overrides["FEED_URI"] = compound + "." + fileformat
31
32 if fileformat is not None:
33 self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
34
35 def set_logging(self, logfile=None, verbose=0):
36 """
37 This function changes the default settings of Scapy's logging functionality
38 using the settings given by the CLI.
39 :param logfile: The location where the logfile will be saved.
40 :param verbose: A integer value to switch between loglevels.
41 """
42 if verbose != 0:
43 self.scrapy_settings.overrides["LOG_ENABLED"] = True
44 else:
45 self.scrapy_settings.overrides["LOG_ENABLED"] = False
46
47 if verbose == 1:
48 self.scrapy_settings.overrides["LOG_LEVEL"] = "WARNING"
49 elif verbose == 2:
50 self.scrapy_settings.overrides["LOG_LEVEL"] = "INFO"
51 else:
52 self.scrapy_settings.overrides["LOG_LEVEL"] = "DEBUG"
53
54 if verbose > 1:
55 self.scrapy_settings.overrides["LOG_STDOUT"] = False
56 else:
57 self.scrapy_settings.overrides["LOG_STDOUT"] = True
58
59 if logfile is not None:
60 self.scrapy_settings.overrides["LOG_FILE"] = logfile
61 else:
62 self.scrapy_settings.overrides["LOG_FILE"] = None
63
64 @staticmethod
65 def read_sourceconfiguration():
66 """
67 This function reads sources.cfg in the main folder for configuration
68 variables for sources
69 :return a ConfigParser object of sources.cfg
70 """
71 current_dir = os.path.dirname(os.path.abspath(__file__))
72 config_path = current_dir + '/../sources.cfg'
73 # [TODO]: location of sources.cfg should be softcoded eventually
74 if not os.path.isfile(config_path):
75 try:
76 shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../sources.cfg.sample", config_path)
77 except IOError:
78 print "WARNING: Source configuration couldn't be found and couldn't be created."
79 config = ConfigParser.ConfigParser()
80 config.read(config_path)
81 return config
82
83 @staticmethod
84 def get_section(config, sourcename):
85 """
86 This function reads a config section labeled in variable sourcename and
87 tests whether the reliability variable is set else set to empty string.
88 Return the default section if the labeled config section does not exist
89 :param config: a ConfigParser object
90 :param sourcename: the name of the section to be read
91 :return a dictionary of the section in the config labeled in sourcename
92 """
93 section = dict()
94 if config.has_section(sourcename):
95 section = dict(config.items(sourcename))
96 elif config.defaults():
97 section = config.defaults()
98 if 'reliability' not in section:
99 print 'WARNING: Reliability not set for %s' % sourcename
100 section['reliability'] = ''
101 return section