A web scraper build to search specific information for a given compound (and its pseudonyms)

Compare changes

Choose any two refs to compare.

+32 -16
+2 -6
.travis.yml
··· 6 6 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors 7 7 install: 8 8 - pip install Scrapy docopt 9 - - pip install coveralls 10 9 11 10 # command to run tests, e.g. python setup.py test 12 11 script: 13 - - nosetests --with-coverage --cover-package=FourmiCrawler tests 12 + - nosetests tests 14 13 15 14 notifications: 16 - slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM 17 - 18 - after_success: 19 - coveralls --verbose 15 + slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
+2 -2
fourmi.py
··· 1 - #!/usr/bin/env python 1 + # !/usr/bin/env python 2 2 """ 3 3 Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms). 4 4 ··· 102 102 103 103 # The start for the Fourmi Command Line interface. 104 104 if __name__ == '__main__': 105 - arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.2') 105 + arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.1') 106 106 loader = SourceLoader() 107 107 108 108 if arguments["--include"]:
+18
setup.py
··· 1 + import sys 2 + from cx_Freeze import setup, Executable 3 + 4 + # After running the setup file (python setup.py build) the scrapy/VERSION file has to be manually put into the 5 + # library.zip, also the FourmiCrawler map has to be copied to both the library and the exe.win32-2.7 folder. after 6 + # putting the files in the library the library has to be zipped and replace the old library. 7 + # Dependencies are automatically detected, but it might need fine tuning. 8 + build_exe_options = {"packages": ["os", "scrapy", "lxml", "w3lib", "pkg_resources", "zope.interface", "twisted.internet"], "excludes": []} 9 + 10 + # GUI applications require a different base on Windows (the default is for a 11 + # console application). 12 + base = None 13 + 14 + setup( name = "Scrapy", 15 + version = "0.1", 16 + description = "My GUI application!", 17 + options = {"build_exe": build_exe_options}, 18 + executables = [Executable("fourmi.py", base=base)])
+10 -8
sourceloader.py
··· 1 1 import inspect 2 + import sys 2 3 import os 3 4 import re 4 5 ··· 9 10 sources = [] 10 11 11 12 def __init__(self, rel_dir="FourmiCrawler/sources"): 12 - """ 13 - The initiation of a SourceLoader, selects and indexes a directory for usable sources. 14 - :param rel_dir: A relative path to a directory. 15 - """ 16 - path = os.path.dirname(os.path.abspath(__file__)) 13 + 14 + if hasattr(sys,'frozen'): 15 + path = os.path.dirname(sys.executable) 16 + else: 17 + path = os.path.dirname(os.path.abspath(__file__)) 18 + 17 19 path += "/" + rel_dir 18 20 known_parser = set() 19 21 20 22 for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 21 - mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py]) 23 + mod = __import__('.'.join([rel_dir.replace('/', "."), py]), fromlist=[py]) 22 24 classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))] 23 25 for cls in classes: 24 26 if issubclass(cls, Source) and cls not in known_parser: 25 - self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? 26 - known_parser.add(cls) 27 + self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? 28 + # known_parser.add(cls) 27 29 28 30 def include(self, source_names): 29 31 """