A web scraper build to search specific information for a given compound (and its pseudonyms)
1from Tkinter import *
2import os
3import shutil
4from tkFileDialog import asksaveasfilename
5
6from configImporter import *
7
8
9class GUI():
10 def __init__(self, search, config_file='GUI.cfg', sourceloader=None, in_source=True):
11 """Boots the window, configuration."""
12 if not in_source:
13 current_dir = os.path.dirname(os.path.abspath(__file__))
14 config_file = current_dir + '../' + config_file
15 if not os.path.isfile(config_file):
16 try:
17 shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../GUI.cfg.sample", config_file)
18 except IOError:
19 print "GUI configuration couldn't be found and couldn't be created."
20 sys.exit()
21 self.configurator = ConfigImporter(config_file)
22 self.sourceloader = sourceloader
23 self.finish_with_search = False
24 self.values = {}
25 self.required_variables = ['substance']
26 self.search = search
27 self.window, self.variables = self.generate_window(self.load_common_attributes(), self.load_output_types())
28
29 def load_common_attributes(self):
30 """Calls the configuration parser for common attributes."""
31 return [x.strip() for x in self.configurator.load_common_attributes().split(',')]
32
33 def load_output_types(self):
34 """Calls the configuration parser for output types."""
35 return [x.strip() for x in self.configurator.load_output_types().split(',')]
36
37 def load_always_attributes(self):
38 """Calls the configuration parser for attributes that are always used."""
39 return ','.join([x.strip() for x in self.configurator.load_always_attributes().split(',')])
40
41 def set_output(self):
42 self.variable_output_name.set(asksaveasfilename())
43 self.button_output_name.config(text=self.variable_output_name.get())
44
45 def generate_window(self, common_attributes, output_types):
46 """Creates all widgets and variables in the window."""
47 window = Tk()
48 window.wm_title("Fourmi Crawler")
49
50 variables = {}
51
52 variable_substance = StringVar(window)
53 frame_substance = Frame(window)
54 label_substance = Label(frame_substance, text="Substance: ")
55 input_substance = Entry(frame_substance, font=("Helvetica", 12), width=25, textvariable=variable_substance)
56 variables.update({"substance": variable_substance})
57 frame_substance.pack(side=TOP)
58 label_substance.pack()
59 input_substance.pack()
60 input_substance.focus()
61
62 frame_all_attributes = Frame(window)
63 frame_selecting_attributes = Frame(frame_all_attributes)
64 frame_new_attributes = Frame(frame_selecting_attributes)
65 label_new_attributes = Label(frame_new_attributes, text="Parameters: ")
66 input_new_attributes = Text(frame_new_attributes, font=("Helvetica", 8), width=25, height=7, padx=5, pady=5)
67 variables.update({"new_attributes": input_new_attributes})
68 frame_new_attributes.pack(side=LEFT)
69 label_new_attributes.pack()
70 input_new_attributes.pack()
71
72 frame_common_attributes = Frame(frame_selecting_attributes)
73 label_common_attributes = Label(frame_common_attributes, text="Common Parameters: ")
74 input_common_attributes = Listbox(frame_common_attributes, selectmode=MULTIPLE, height=7)
75 scrollbar_common_attributes = Scrollbar(frame_common_attributes)
76 input_common_attributes.config(yscrollcommand=scrollbar_common_attributes.set)
77 scrollbar_common_attributes.config(command=input_common_attributes.yview)
78 if common_attributes and len(common_attributes) > 0:
79 input_common_attributes.insert(END, *common_attributes)
80 variables.update({"common_attributes": input_common_attributes})
81 frame_common_attributes.pack(side=RIGHT)
82 label_common_attributes.pack(side=TOP)
83 input_common_attributes.pack(side=LEFT)
84 scrollbar_common_attributes.pack(side=RIGHT, fill=Y)
85 frame_selecting_attributes.pack()
86
87 frame_last = Frame(window)
88 search_button = Button(frame_last, text="Start search", command=self.prepare_search)
89 cancel_button = Button(frame_last, text="Cancel", command=window.destroy)
90 frame_last.pack(side=BOTTOM)
91 search_button.pack(side=LEFT)
92 cancel_button.pack(side=RIGHT)
93
94 frame_name = Frame(window)
95 frame_output_name = Frame(frame_name)
96 label_output_name = Label(frame_output_name, text='Output file:')
97 self.variable_output_name = StringVar()
98 self.variable_output_name.set('results.csv')
99 variables.update({'output_name':self.variable_output_name})
100 self.button_output_name = Button(frame_output_name, command=self.set_output, text="Select file")
101 frame_output_name.pack(side=LEFT)
102 label_output_name.pack()
103 self.button_output_name.pack()
104 frame_name.pack(side=BOTTOM)
105
106
107 frame_checkboxes = Frame(window)
108 frame_checkbox_attributes = Frame(frame_checkboxes)
109 variable_all_attributes = BooleanVar()
110 variable_all_attributes.set(True)
111 input_all_attributes = Checkbutton(frame_checkbox_attributes, text="Search ALL parameters",
112 variable=variable_all_attributes)
113 variables.update({"all_attributes": variable_all_attributes})
114 frame_checkbox_attributes.pack(side=LEFT)
115 input_all_attributes.pack()
116
117 frame_logging = Frame(frame_checkboxes)
118 variable_logging = BooleanVar()
119 variable_logging.set(False)
120 input_logging = Checkbutton(frame_logging, text="Verbose logging", variable=variable_logging)
121 variables.update({'logging':variable_logging})
122 frame_logging.pack(side=RIGHT)
123 frame_checkboxes.pack(side=BOTTOM)
124 input_logging.pack()
125 frame_all_attributes.pack()
126
127 return window, variables
128
129 def prepare_search(self):
130 """Saves the values from the window for later retrieval."""
131 variables = self.variables
132 values = {}
133
134 values.update({"Always attributes": self.load_always_attributes()})
135 for name, var in variables.iteritems():
136 if var.__class__ is StringVar:
137 values.update({name: var.get()})
138 elif var.__class__ is BooleanVar:
139 values.update({name: var.get()})
140 elif var.__class__ is Text:
141 values.update({name: str(var.get("1.0", END)).strip()})
142 elif var.__class__ is Listbox:
143 values.update({name: ", ".join([var.get(int(i)) for i in var.curselection()])})
144 else:
145 print "No known class, {}, {}".format(name, var)
146
147 values.update({'output_name':self.variable_output_name.get()})
148 values.update({'output_type':self.check_output_type(values.get('output_name'))})
149
150 self.values = values
151 if all([values.get(i) != '' for i in self.required_variables]):
152 self.finish_with_search = True
153 self.window.destroy()
154 else:
155 self.finish_with_search = False
156 #tkMessageBox.showinfo('Not all required information was entered!')
157
158 def execute_search(self):
159 """Calls the Fourmi crawler with the values from the GUI"""
160 if self.values.get('all_attributes'):
161 attributes = ".*"
162 else:
163 attribute_types = ['attributes', 'Common attributes', 'Always attributes']
164 attributes = ','.join([str(self.values.get(attribute)) for attribute in attribute_types])
165 output_file = "file://" + str(self.values.get('output_name')) #Dealing with absolute paths
166
167 arguments = {'--attributes': attributes,
168 '--exclude': None,
169 '--format': self.values.get('output_type'),
170 '--help': False,
171 '--include': None,
172 '--log': 'log.txt',
173 '--output': output_file,
174 '-v': 0 if self.values.get('logging') else 3,
175 '--version': False,
176 '<compound>': self.values.get('substance'),
177 'list': False,
178 'search': True}
179
180 self.search(arguments, self.sourceloader)
181
182 def run(self):
183 """Starts the window and the search."""
184 self.window.mainloop()
185 if self.finish_with_search:
186 self.execute_search()
187
188 def check_output_type(self, filename):
189 parts = str(filename).split('.')
190 output_types = self.load_output_types()
191 extension = parts[-1]
192
193 for type in output_types:
194 if extension==type:
195 return extension
196 return output_types[0]