+3
.gitignore
+3
.gitignore
+37
mails/clean-spam.py
+37
mails/clean-spam.py
···
1
+
from pathlib import Path
2
+
import re
3
+
from sys import argv
4
+
import json
5
+
6
+
7
+
domains_blocklist_path = Path(__file__).parent / "spamdomains.json"
8
+
domains_blocklist = json.loads(domains_blocklist_path.read_text())
9
+
10
+
def domains_of_email(email: str):
11
+
domains = set()
12
+
for d in re.finditer(r'https?://([\w.-]+)', email):
13
+
domains.add(d.group(1))
14
+
return domains
15
+
16
+
def is_spam(email: str) -> bool:
17
+
return len(email.strip()) == 0 or len(domains_blocklist & domains_of_email(email)) > 0
18
+
19
+
if len(argv) >= 2 and argv[1] == "domains":
20
+
old_blocklist_size = len(domains_blocklist)
21
+
for f in Path(__file__).parent.glob('*.txt'):
22
+
domains_blocklist |= domains_of_email(f.read_text())
23
+
added_count = len(domains_blocklist) - old_blocklist_size
24
+
domains_blocklist_path.write_text(json.dumps(list(domains_blocklist)))
25
+
if added_count != 0:
26
+
print(f"Added {added_count} domains to blocklist")
27
+
else:
28
+
trashed_count = 0
29
+
for mailfile in Path(__file__).parent.glob("*.txt"):
30
+
if is_spam(mailfile.read_text()):
31
+
mailfile.rename(mailfile.parent / ".trash" / mailfile.name)
32
+
print(f"Trashed {mailfile.name}")
33
+
trashed_count += 1
34
+
print(f"\nTrashed {trashed_count} mails")
35
+
print(f"Use `{Path(__file__).name} domains` to update spam domains blocklist (by adding all link domains from mails you currently have)")
36
+
37
+