-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess.py
74 lines (59 loc) · 2.65 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
from datetime import datetime
from time import time
from tqdm import tqdm
from utils.constants import ATTRIBUTION_TEXT
from utils.db import get_mutuals
from utils.timing import timer
MAX_FID_WHILE_DEBUGGING = 50
@timer
def generate_symmetric_graph(max_fid, fid_start_range, fid_end_range):
G = nx.Graph()
for fid in tqdm(range(1, max_fid + 1), desc="adding nodes from 1 to max_fid", leave=False):
G.add_node(fid)
for fid in tqdm(range(fid_start_range, fid_end_range + 1),
desc='adding edges for fids {} to {}'.format(
fid_start_range, fid_end_range),
leave=False):
mutuals = get_mutuals(fid) # symmetric relationships only
for m in tqdm(mutuals, desc='edges for mutuals', leave=False):
G.add_edge(fid, m)
return G
if __name__ == '__main__':
# simple networkx analysis adapted from https://www.datacamp.com/tutorial/social-network-analysis-python
max_fid = min(max(get_mutuals(3)), MAX_FID_WHILE_DEBUGGING)
print(f'generating graph for {max_fid} fids')
# generate graph
G, exec_time = generate_symmetric_graph(max_fid, 1, max_fid)
print("graph has {} nodes and {} edges".format(
G.number_of_nodes(), G.number_of_edges()))
print("[graph generation took : {:.1f}s]".format(exec_time))
# calculate clustering coefficient
cc_t1 = time()
cc = nx.average_clustering(G)
cc_t2 = time()
print("clustering coefficient: {:.2f}".format(cc))
print("[cluster coefficient calculation took {:.1f}s]".format(
cc_t2 - cc_t1))
# draw graph in networkx
print("starting to draw graph (this may take a long time, e.g., 1 minute for 100 fids)...")
d_t1 = time()
pos = nx.spring_layout(G)
betCent = nx.betweenness_centrality(G, normalized=True, endpoints=True)
node_size = [v * 10000 for v in betCent.values()]
nx.draw_networkx(G, pos=pos, node_size=node_size, with_labels=False)
plt.axis('off')
plt.title("symmetric networkx graph of {} Farcaster fids".format(max_fid))
plt.text(0.5, 0.5, ATTRIBUTION_TEXT,
fontsize=16, color='gray', alpha=0.5,
ha='center', va='bottom')
d_t2 = time()
most_central_fids = sorted(betCent, key=betCent.get, reverse=True)
fid_centrality = pd.DataFrame(most_central_fids, columns=[
'fid, by most central first'])
ts = datetime.now().strftime("%Y%m%d-%H%M%S")
plt.savefig('out/plot-{}.png'.format(ts))
print("[drawing graph for {} fids took {:.1f}s]".format(max_fid, d_t2 - d_t1))
print(fid_centrality.head())