My omnium-gatherom of scripts and source code.
1import json
2import os
3import re
4
5with open('Programming_Languages_Extensions.json') as user_file:
6 parsed_json = json.load(user_file)
7
8def get_extension(s: str) -> str:
9 m = re.search('\.\w*$', s)
10 if m:
11 return m.group(0)
12 else:
13 return ""
14
15def get_lang(ext: str) -> str:
16 for i in parsed_json:
17 current_extensions = i["extensions"]
18 if ext in current_extensions:
19 return str(i["name"])
20
21 return "NONE"
22
23def get_type(ext: str) -> str:
24 for i in parsed_json:
25 current_extensions = i["extensions"]
26 if ext in current_extensions:
27 return str(i["type"])
28
29 return "NONE"
30
31search_dir = "."
32regex = re.compile('\w+\.\w*$')
33matches = []
34for root, dirs, files in os.walk(search_dir):
35 for file in files:
36 if regex.match(file):
37 matches += [root + "/" + file]
38
39
40lang_counts = {}
41for file in matches:
42 with open(file, "rb") as f:
43 num_lines = sum(1 for _ in f)
44 ext = get_extension(file)
45 lang = get_lang(ext)
46 if get_type(ext) != "programming": continue
47 if lang == "NONE":
48 lang_counts[ext] = (num_lines, 1)
49 elif lang not in lang_counts:
50 lang_counts[lang] = (num_lines, 1)
51 else:
52 old = lang_counts[lang]
53 lang_counts[lang] = (old[0] + num_lines, old[1] + 1)
54
55important_languages = ["C++", "Haskell", "APL", "Python", "Rust", "Hare"]
56
57{k: v for k, v in sorted(lang_counts.items(), key=lambda item: item[1][1] / float(item[1][0]))}
58
59print("Language,percent")
60for lang in lang_counts:
61 if lang in important_languages:
62 d = lang_counts[lang]
63 print(lang, float(d[0]) / d[1], sep = ",")