···66# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
77install:
88 - pip install Scrapy docopt
99- - pip install coveralls
1091110# command to run tests, e.g. python setup.py test
1211script:
1313- - nosetests --with-coverage --cover-package=FourmiCrawler tests
1212+ - nosetests tests
14131514notifications:
1616- slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
1717-1818-after_success:
1919- coveralls --verbose1515+ slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
+3-3
FourmiCrawler/spider.py
···3535 return source.parse(response)
3636 return None
37373838- def get_synonym_requests(self, compound, force=False):
3838+ def get_synonym_requests(self, compound):
3939 """
4040 A function that generates new Scrapy Request for each source given a new synonym of a compound.
4141 :param compound: A compound name
4242 :return: A list of Scrapy Request objects
4343 """
4444 requests = []
4545- if force or compound not in self.synonyms:
4545+ if compound not in self.synonyms:
4646 self.synonyms.add(compound)
4747 for parser in self._sources:
4848 parser_requests = parser.new_compound_request(compound)
···5757 """
5858 requests = []
5959 for synonym in self.synonyms:
6060- requests.extend(self.get_synonym_requests(synonym, force=True))
6060+ requests.extend(self.get_synonym_requests(synonym))
6161 return requests
62626363 def add_sources(self, sources):
+2-2
fourmi.py
···11-#!/usr/bin/env python
11+# !/usr/bin/env python
22"""
33Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).
44···102102103103# The start for the Fourmi Command Line interface.
104104if __name__ == '__main__':
105105- arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.2')
105105+ arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.1')
106106 loader = SourceLoader()
107107108108 if arguments["--include"]:
+18
setup.py
···11+import sys
22+from cx_Freeze import setup, Executable
33+44+# After running the setup file (python setup.py build) the scrapy/VERSION file has to be manually put into the
55+# library.zip, also the FourmiCrawler map has to be copied to both the library and the exe.win32-2.7 folder. after
66+# putting the files in the library the library has to be zipped and replace the old library.
77+# Dependencies are automatically detected, but it might need fine tuning.
88+build_exe_options = {"packages": ["os", "scrapy", "lxml", "w3lib", "pkg_resources", "zope.interface", "twisted.internet"], "excludes": []}
99+1010+# GUI applications require a different base on Windows (the default is for a
1111+# console application).
1212+base = None
1313+1414+setup( name = "Scrapy",
1515+ version = "0.1",
1616+ description = "My GUI application!",
1717+ options = {"build_exe": build_exe_options},
1818+ executables = [Executable("fourmi.py", base=base)])
+10-8
sourceloader.py
···11import inspect
22+import sys
23import os
34import re
45···910 sources = []
10111112 def __init__(self, rel_dir="FourmiCrawler/sources"):
1212- """
1313- The initiation of a SourceLoader, selects and indexes a directory for usable sources.
1414- :param rel_dir: A relative path to a directory.
1515- """
1616- path = os.path.dirname(os.path.abspath(__file__))
1313+1414+ if hasattr(sys,'frozen'):
1515+ path = os.path.dirname(sys.executable)
1616+ else:
1717+ path = os.path.dirname(os.path.abspath(__file__))
1818+1719 path += "/" + rel_dir
1820 known_parser = set()
19212022 for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
2121- mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py])
2323+ mod = __import__('.'.join([rel_dir.replace('/', "."), py]), fromlist=[py])
2224 classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))]
2325 for cls in classes:
2426 if issubclass(cls, Source) and cls not in known_parser:
2525- self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers?
2626- known_parser.add(cls)
2727+ self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers?
2828+ # known_parser.add(cls)
27292830 def include(self, source_names):
2931 """
+1-4
tests/test_spider.py
···43434444 src2 = ChemSpider()
4545 self.spi.add_source(src2)
4646- requests = self.spi.start_requests()
4747- self.assertGreater(len(requests), 0)
4848- self.assertIsInstance(requests[0], Request)
4949-4646+ self.assertIsNotNone(self.spi.start_requests())
50475148 def test_synonym_requests(self):
5249 # A test for the synonym request function