this repo has no description

✨ Add python script to clean spam mails

Changed files
+40
mails
+3
.gitignore
··· 47 47 database.json 48 48 .env 49 49 deadlinks.log 50 + 51 + mails/*.txt 52 + mails/*.json
+37
mails/clean-spam.py
··· 1 + from pathlib import Path 2 + import re 3 + from sys import argv 4 + import json 5 + 6 + 7 + domains_blocklist_path = Path(__file__).parent / "spamdomains.json" 8 + domains_blocklist = json.loads(domains_blocklist_path.read_text()) 9 + 10 + def domains_of_email(email: str): 11 + domains = set() 12 + for d in re.finditer(r'https?://([\w.-]+)', email): 13 + domains.add(d.group(1)) 14 + return domains 15 + 16 + def is_spam(email: str) -> bool: 17 + return len(email.strip()) == 0 or len(domains_blocklist & domains_of_email(email)) > 0 18 + 19 + if len(argv) >= 2 and argv[1] == "domains": 20 + old_blocklist_size = len(domains_blocklist) 21 + for f in Path(__file__).parent.glob('*.txt'): 22 + domains_blocklist |= domains_of_email(f.read_text()) 23 + added_count = len(domains_blocklist) - old_blocklist_size 24 + domains_blocklist_path.write_text(json.dumps(list(domains_blocklist))) 25 + if added_count != 0: 26 + print(f"Added {added_count} domains to blocklist") 27 + else: 28 + trashed_count = 0 29 + for mailfile in Path(__file__).parent.glob("*.txt"): 30 + if is_spam(mailfile.read_text()): 31 + mailfile.rename(mailfile.parent / ".trash" / mailfile.name) 32 + print(f"Trashed {mailfile.name}") 33 + trashed_count += 1 34 + print(f"\nTrashed {trashed_count} mails") 35 + print(f"Use `{Path(__file__).name} domains` to update spam domains blocklist (by adding all link domains from mails you currently have)") 36 + 37 +