A web scraper build to search specific information for a given compound (and its pseudonyms)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Added comments for the class and functions

RTB 81719a38 472aae86

+21
+21
FourmiCrawler/sources/NIST.py
··· 6 6 import re 7 7 8 8 # [TODO]: values can be '128.', perhaps remove the dot in that case? 9 + # [TODO]: properties have references and comments which do not exist in the 10 + # Result item, but should be included eventually. 9 11 10 12 class NIST(Source): 13 + """NIST Scraper plugin 14 + 15 + This plugin manages searching for a chemical on the NIST website 16 + and parsing the resulting page if the chemical exists on NIST. 17 + """ 11 18 website = "http://webbook.nist.gov/*" 12 19 13 20 search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on' ··· 76 83 return requests 77 84 78 85 def parse_generic_info(self, sel): 86 + """Parses: synonyms, chemical formula, molecular weight, InChI, 87 + InChiKey, CAS number 88 + """ 79 89 ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]') 80 90 li = ul.xpath('li') 81 91 ··· 117 127 return requests 118 128 119 129 def parse_aggregate_data(self, table, symbol_table): 130 + """Parses the table(s) which contain possible links to individual 131 + data points 132 + """ 120 133 results = [] 121 134 for tr in table.xpath('tr[td]'): 122 135 extra_data_url = tr.xpath('td[last()][a="Individual data points"]' ··· 151 164 152 165 @staticmethod 153 166 def parse_transition_data(table, symbol_table): 167 + """Parses the table containing properties regarding phase changes""" 154 168 results = [] 155 169 156 170 name = table.xpath('@summary').extract()[0] ··· 176 190 177 191 @staticmethod 178 192 def parse_generic_data(table): 193 + """Parses the common tables of 4 and 5 rows. Assumes they are of the 194 + form: 195 + Symbol (unit)|Temperature (K)|Method|Reference|Comment 196 + Symbol (unit)|Temperature (K)|Reference|Comment 197 + """ 179 198 results = [] 180 199 181 200 name = table.xpath('@summary').extract()[0] ··· 199 218 200 219 @staticmethod 201 220 def parse_antoine_data(table): 221 + """Parse table containing parameters for the Antione equation""" 202 222 results = [] 203 223 204 224 name = table.xpath('@summary').extract()[0] ··· 217 237 return results 218 238 219 239 def parse_individual_datapoints(self, response): 240 + """Parses the page linked from aggregate data""" 220 241 sel = Selector(response) 221 242 table = sel.xpath('//table[@class="data"]')[0] 222 243