this repo has no description
1from collections import UserString
2import logging
3from typing import Dict, Optional, Set
4
5import click
6
7from config import CONFIG
8from indexer import FollowIndexer
9import indexer
10
11
12logging.basicConfig(
13 level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
14)
15
16logger = logging.getLogger(__name__)
17
18
19@click.command
20@click.option(
21 "--ch-host",
22)
23@click.option(
24 "--ch-port",
25 type=int,
26)
27@click.option(
28 "--ch-user",
29)
30@click.option(
31 "--ch-pass",
32)
33def main(
34 ch_host: Optional[str],
35 ch_port: Optional[int],
36 ch_user: Optional[str],
37 ch_pass: Optional[str],
38):
39 logger.info("Building follow graph...")
40
41 indexer = FollowIndexer(
42 clickhouse_host=ch_host or CONFIG.clickhouse_host,
43 clickhouse_port=ch_port or CONFIG.clickhouse_port,
44 clickhouse_user=ch_user or CONFIG.clickhouse_user,
45 clickhouse_pass=ch_pass or CONFIG.clickhouse_pass,
46 batch_size=1000,
47 )
48
49 graph: Dict[str, Set[str]] = {}
50
51 def build_graph(did: str, subject: str):
52 if did not in graph:
53 graph[did] = set()
54
55 graph[did].add(subject)
56
57 indexer.stream_follows(build_graph)
58
59 prox_map = {}
60
61 for did in graph:
62 first = graph.get(did, set())
63
64 second: Set[str] = set()
65 for subject in first:
66 second.update(graph.get(subject, set()))
67
68 prox_map[did] = {
69 "hop1": first,
70 "hop2": second - first - {did},
71 }
72
73 import pickle
74
75 with open("prox_map.pkl", "wb") as f:
76 pickle.dump(prox_map, f)
77
78 logger.info(
79 f"Finished building proximity map, saved to prox_map.pkl. {len(prox_map):,} users in map."
80 )
81
82
83if __name__ == "__main__":
84 main()