A web scraper build to search specific information for a given compound (and its pseudonyms)
at develop 8.8 kB view raw
1from Tkinter import * 2import os 3import shutil 4from tkFileDialog import asksaveasfilename 5 6from configImporter import * 7 8 9class GUI(): 10 def __init__(self, search, config_file='GUI.cfg', sourceloader=None, in_source=True): 11 """Boots the window, configuration.""" 12 if not in_source: 13 current_dir = os.path.dirname(os.path.abspath(__file__)) 14 config_file = current_dir + '../' + config_file 15 if not os.path.isfile(config_file): 16 try: 17 shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../GUI.cfg.sample", config_file) 18 except IOError: 19 print "GUI configuration couldn't be found and couldn't be created." 20 sys.exit() 21 self.configurator = ConfigImporter(config_file) 22 self.sourceloader = sourceloader 23 self.finish_with_search = False 24 self.values = {} 25 self.required_variables = ['substance'] 26 self.search = search 27 self.window, self.variables = self.generate_window(self.load_common_attributes(), self.load_output_types()) 28 29 def load_common_attributes(self): 30 """Calls the configuration parser for common attributes.""" 31 return [x.strip() for x in self.configurator.load_common_attributes().split(',')] 32 33 def load_output_types(self): 34 """Calls the configuration parser for output types.""" 35 return [x.strip() for x in self.configurator.load_output_types().split(',')] 36 37 def load_always_attributes(self): 38 """Calls the configuration parser for attributes that are always used.""" 39 return ','.join([x.strip() for x in self.configurator.load_always_attributes().split(',')]) 40 41 def set_output(self): 42 self.variable_output_name.set(asksaveasfilename()) 43 self.button_output_name.config(text=self.variable_output_name.get()) 44 45 def generate_window(self, common_attributes, output_types): 46 """Creates all widgets and variables in the window.""" 47 window = Tk() 48 window.wm_title("Fourmi Crawler") 49 50 variables = {} 51 52 variable_substance = StringVar(window) 53 frame_substance = Frame(window) 54 label_substance = Label(frame_substance, text="Substance: ") 55 input_substance = Entry(frame_substance, font=("Helvetica", 12), width=25, textvariable=variable_substance) 56 variables.update({"substance": variable_substance}) 57 frame_substance.pack(side=TOP) 58 label_substance.pack() 59 input_substance.pack() 60 input_substance.focus() 61 62 frame_all_attributes = Frame(window) 63 frame_selecting_attributes = Frame(frame_all_attributes) 64 frame_new_attributes = Frame(frame_selecting_attributes) 65 label_new_attributes = Label(frame_new_attributes, text="Parameters: ") 66 input_new_attributes = Text(frame_new_attributes, font=("Helvetica", 8), width=25, height=7, padx=5, pady=5) 67 variables.update({"new_attributes": input_new_attributes}) 68 frame_new_attributes.pack(side=LEFT) 69 label_new_attributes.pack() 70 input_new_attributes.pack() 71 72 frame_common_attributes = Frame(frame_selecting_attributes) 73 label_common_attributes = Label(frame_common_attributes, text="Common Parameters: ") 74 input_common_attributes = Listbox(frame_common_attributes, selectmode=MULTIPLE, height=7) 75 scrollbar_common_attributes = Scrollbar(frame_common_attributes) 76 input_common_attributes.config(yscrollcommand=scrollbar_common_attributes.set) 77 scrollbar_common_attributes.config(command=input_common_attributes.yview) 78 if common_attributes and len(common_attributes) > 0: 79 input_common_attributes.insert(END, *common_attributes) 80 variables.update({"common_attributes": input_common_attributes}) 81 frame_common_attributes.pack(side=RIGHT) 82 label_common_attributes.pack(side=TOP) 83 input_common_attributes.pack(side=LEFT) 84 scrollbar_common_attributes.pack(side=RIGHT, fill=Y) 85 frame_selecting_attributes.pack() 86 87 frame_last = Frame(window) 88 search_button = Button(frame_last, text="Start search", command=self.prepare_search) 89 cancel_button = Button(frame_last, text="Cancel", command=window.destroy) 90 frame_last.pack(side=BOTTOM) 91 search_button.pack(side=LEFT) 92 cancel_button.pack(side=RIGHT) 93 94 frame_name = Frame(window) 95 frame_output_name = Frame(frame_name) 96 label_output_name = Label(frame_output_name, text='Output file:') 97 self.variable_output_name = StringVar() 98 self.variable_output_name.set('results.csv') 99 variables.update({'output_name':self.variable_output_name}) 100 self.button_output_name = Button(frame_output_name, command=self.set_output, text="Select file") 101 frame_output_name.pack(side=LEFT) 102 label_output_name.pack() 103 self.button_output_name.pack() 104 frame_name.pack(side=BOTTOM) 105 106 107 frame_checkboxes = Frame(window) 108 frame_checkbox_attributes = Frame(frame_checkboxes) 109 variable_all_attributes = BooleanVar() 110 variable_all_attributes.set(True) 111 input_all_attributes = Checkbutton(frame_checkbox_attributes, text="Search ALL parameters", 112 variable=variable_all_attributes) 113 variables.update({"all_attributes": variable_all_attributes}) 114 frame_checkbox_attributes.pack(side=LEFT) 115 input_all_attributes.pack() 116 117 frame_logging = Frame(frame_checkboxes) 118 variable_logging = BooleanVar() 119 variable_logging.set(False) 120 input_logging = Checkbutton(frame_logging, text="Verbose logging", variable=variable_logging) 121 variables.update({'logging':variable_logging}) 122 frame_logging.pack(side=RIGHT) 123 frame_checkboxes.pack(side=BOTTOM) 124 input_logging.pack() 125 frame_all_attributes.pack() 126 127 return window, variables 128 129 def prepare_search(self): 130 """Saves the values from the window for later retrieval.""" 131 variables = self.variables 132 values = {} 133 134 values.update({"Always attributes": self.load_always_attributes()}) 135 for name, var in variables.iteritems(): 136 if var.__class__ is StringVar: 137 values.update({name: var.get()}) 138 elif var.__class__ is BooleanVar: 139 values.update({name: var.get()}) 140 elif var.__class__ is Text: 141 values.update({name: str(var.get("1.0", END)).strip()}) 142 elif var.__class__ is Listbox: 143 values.update({name: ", ".join([var.get(int(i)) for i in var.curselection()])}) 144 else: 145 print "No known class, {}, {}".format(name, var) 146 147 values.update({'output_name':self.variable_output_name.get()}) 148 values.update({'output_type':self.check_output_type(values.get('output_name'))}) 149 150 self.values = values 151 if all([values.get(i) != '' for i in self.required_variables]): 152 self.finish_with_search = True 153 self.window.destroy() 154 else: 155 self.finish_with_search = False 156 #tkMessageBox.showinfo('Not all required information was entered!') 157 158 def execute_search(self): 159 """Calls the Fourmi crawler with the values from the GUI""" 160 if self.values.get('all_attributes'): 161 attributes = ".*" 162 else: 163 attribute_types = ['attributes', 'Common attributes', 'Always attributes'] 164 attributes = ','.join([str(self.values.get(attribute)) for attribute in attribute_types]) 165 output_file = "file://" + str(self.values.get('output_name')) #Dealing with absolute paths 166 167 arguments = {'--attributes': attributes, 168 '--exclude': None, 169 '--format': self.values.get('output_type'), 170 '--help': False, 171 '--include': None, 172 '--log': 'log.txt', 173 '--output': output_file, 174 '-v': 0 if self.values.get('logging') else 3, 175 '--version': False, 176 '<compound>': self.values.get('substance'), 177 'list': False, 178 'search': True} 179 180 self.search(arguments, self.sourceloader) 181 182 def run(self): 183 """Starts the window and the search.""" 184 self.window.mainloop() 185 if self.finish_with_search: 186 self.execute_search() 187 188 def check_output_type(self, filename): 189 parts = str(filename).split('.') 190 output_types = self.load_output_types() 191 extension = parts[-1] 192 193 for type in output_types: 194 if extension==type: 195 return extension 196 return output_types[0]