My omnium-gatherom of scripts and source code.
at main 1.6 kB view raw
1import json 2import os 3import re 4 5with open('Programming_Languages_Extensions.json') as user_file: 6 parsed_json = json.load(user_file) 7 8def get_extension(s: str) -> str: 9 m = re.search('\.\w*$', s) 10 if m: 11 return m.group(0) 12 else: 13 return "" 14 15def get_lang(ext: str) -> str: 16 for i in parsed_json: 17 current_extensions = i["extensions"] 18 if ext in current_extensions: 19 return str(i["name"]) 20 21 return "NONE" 22 23def get_type(ext: str) -> str: 24 for i in parsed_json: 25 current_extensions = i["extensions"] 26 if ext in current_extensions: 27 return str(i["type"]) 28 29 return "NONE" 30 31search_dir = "." 32regex = re.compile('\w+\.\w*$') 33matches = [] 34for root, dirs, files in os.walk(search_dir): 35 for file in files: 36 if regex.match(file): 37 matches += [root + "/" + file] 38 39 40lang_counts = {} 41for file in matches: 42 with open(file, "rb") as f: 43 num_lines = sum(1 for _ in f) 44 ext = get_extension(file) 45 lang = get_lang(ext) 46 if get_type(ext) != "programming": continue 47 if lang == "NONE": 48 lang_counts[ext] = (num_lines, 1) 49 elif lang not in lang_counts: 50 lang_counts[lang] = (num_lines, 1) 51 else: 52 old = lang_counts[lang] 53 lang_counts[lang] = (old[0] + num_lines, old[1] + 1) 54 55important_languages = ["C++", "Haskell", "APL", "Python", "Rust", "Hare"] 56 57{k: v for k, v in sorted(lang_counts.items(), key=lambda item: item[1][1] / float(item[1][0]))} 58 59print("Language,percent") 60for lang in lang_counts: 61 if lang in important_languages: 62 d = lang_counts[lang] 63 print(lang, float(d[0]) / d[1], sep = ",")