this repo has no description
at main 1.7 kB view raw
1from collections import UserString 2import logging 3from typing import Dict, Optional, Set 4 5import click 6 7from config import CONFIG 8from indexer import FollowIndexer 9import indexer 10 11 12logging.basicConfig( 13 level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 14) 15 16logger = logging.getLogger(__name__) 17 18 19@click.command 20@click.option( 21 "--ch-host", 22) 23@click.option( 24 "--ch-port", 25 type=int, 26) 27@click.option( 28 "--ch-user", 29) 30@click.option( 31 "--ch-pass", 32) 33def main( 34 ch_host: Optional[str], 35 ch_port: Optional[int], 36 ch_user: Optional[str], 37 ch_pass: Optional[str], 38): 39 logger.info("Building follow graph...") 40 41 indexer = FollowIndexer( 42 clickhouse_host=ch_host or CONFIG.clickhouse_host, 43 clickhouse_port=ch_port or CONFIG.clickhouse_port, 44 clickhouse_user=ch_user or CONFIG.clickhouse_user, 45 clickhouse_pass=ch_pass or CONFIG.clickhouse_pass, 46 batch_size=1000, 47 ) 48 49 graph: Dict[str, Set[str]] = {} 50 51 def build_graph(did: str, subject: str): 52 if did not in graph: 53 graph[did] = set() 54 55 graph[did].add(subject) 56 57 indexer.stream_follows(build_graph) 58 59 prox_map = {} 60 61 for did in graph: 62 first = graph.get(did, set()) 63 64 second: Set[str] = set() 65 for subject in first: 66 second.update(graph.get(subject, set())) 67 68 prox_map[did] = { 69 "hop1": first, 70 "hop2": second - first - {did}, 71 } 72 73 import pickle 74 75 with open("prox_map.pkl", "wb") as f: 76 pickle.dump(prox_map, f) 77 78 logger.info( 79 f"Finished building proximity map, saved to prox_map.pkl. {len(prox_map):,} users in map." 80 ) 81 82 83if __name__ == "__main__": 84 main()