import json import os import re with open('Programming_Languages_Extensions.json') as user_file: parsed_json = json.load(user_file) def get_extension(s: str) -> str: m = re.search('\.\w*$', s) if m: return m.group(0) else: return "" def get_lang(ext: str) -> str: for i in parsed_json: current_extensions = i["extensions"] if ext in current_extensions: return str(i["name"]) return "NONE" def get_type(ext: str) -> str: for i in parsed_json: current_extensions = i["extensions"] if ext in current_extensions: return str(i["type"]) return "NONE" search_dir = "." regex = re.compile('\w+\.\w*$') matches = [] for root, dirs, files in os.walk(search_dir): for file in files: if regex.match(file): matches += [root + "/" + file] lang_counts = {} for file in matches: with open(file, "rb") as f: num_lines = sum(1 for _ in f) ext = get_extension(file) lang = get_lang(ext) if get_type(ext) != "programming": continue if lang == "NONE": lang_counts[ext] = (num_lines, 1) elif lang not in lang_counts: lang_counts[lang] = (num_lines, 1) else: old = lang_counts[lang] lang_counts[lang] = (old[0] + num_lines, old[1] + 1) important_languages = ["C++", "Haskell", "APL", "Python", "Rust", "Hare"] {k: v for k, v in sorted(lang_counts.items(), key=lambda item: item[1][1] / float(item[1][0]))} print("Language,percent") for lang in lang_counts: if lang in important_languages: d = lang_counts[lang] print(lang, float(d[0]) / d[1], sep = ",")