-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
128 lines (104 loc) · 3.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import math
from graph import *
class MIMIC:
"""
mutual information maximizing input clustering
"""
initial_population: int = 50
death_rate: float = 0.5
birth_rate: float = 1
k: int
G: Graph # len(V) = n
Pt: list[ColouredGraph] # len = N
"""population at current generation t"""
t: int = 0 # generation #
def run(self) -> ColouredGraph:
while C(self.Pt[0]) > 0:
self.Pt += self.beta_mu(self.alpha_MIMIC(self.S(self.Pt)))
self.t += 1
return self.Pt[0]
def beta_mu(self, model: DependencyTree) -> list[ColouredGraph]:
"""
β_μ = sampling operator ∈ O(Nn)
Generate more samples from the distribution p^{θ_t}.
"""
sampling_ratio = self.birth_rate
cnt = round(len(self.Pt) * sampling_ratio)
# model ≈ p^{θ_t}
return [model.sample(self.G) for _ in range(cnt)]
def S(self, population: list[ColouredGraph]) -> list[ColouredGraph]:
"""
S = selection operator ∈ O(N lg N)
Set θ_{t + 1} equal to the Nth percentile of the data.
Retain only the points less than θ_{t + 1}.
"""
survival_rate = 1 - self.death_rate # survival_rate ∝ N
cnt = round(len(population) * survival_rate) # Nth percentile
population.sort(key=C)
# θ_{t + 1} = population[cnt]
return population[:cnt]
def alpha_MIMIC(self, sample: list[ColouredGraph]) -> DependencyTree:
"""
α_MIMIC = model-building operator ∈ O(N n^2)
Update the parameters of density estimator of p^{θ_t} from a sample.
"""
self.Pt = sample
gmi = MutualInformationGraph(len(self.G.V), self.I)
T = gmi.MST()
return DependencyTree(T, self.eP, self.ecP) # ≈ p^{θ_t}
def eP(self, vid: int) -> list[float]:
"""
empirical (unconditional) probability
"""
eP = [0 for _ in range(self.k)]
for G in self.Pt:
eP[G.V[vid].colour] += 1
t = len(self.Pt)
return [cnt / t for cnt in eP]
def ecP(self, parent_id: int, child_id: int) -> list[list[float]]:
"""
empirical conditional probability
"""
P = [[0 for _ in range(self.k)] for _ in range(self.k)]
for G in self.Pt:
P[G.V[parent_id].colour][G.V[child_id].colour] += 1
count_sum = [max(1, sum(P[colour])) for colour in range(self.k)]
return [[P[i][j] / count_sum[i] for j in range(self.k)] for i in range(self.k)]
def I(self, vid: int, uid: int) -> float:
"""
mutual information
https://en.wikipedia.org/wiki/Mutual_information#In_terms_of_PMFs_for_discrete_distributions
"""
Pvu = [[0 for _ in range(self.k)] for _ in range(self.k)]
Pv = [0 for _ in range(self.k)]
Pu = [0 for _ in range(self.k)]
for G in self.Pt:
Pvu[G.V[vid].colour][G.V[uid].colour] += 1
Pv[G.V[vid].colour] += 1
Pu[G.V[uid].colour] += 1
den = len(self.Pt)
ans = 0
for cv in range(self.k):
for cu in range(self.k):
ans += Pvu[cv][cu] / den * math.log(max(1, Pvu[cv][cu]) * den / max(1, Pv[cv] * Pu[cu]))
return ans
def __init__(self, k: int, G: Graph):
self.k = k
self.G = G
def uP(_):
return [1 / k for _ in range(k)]
def ucP(_, __):
return [[1 / k for _ in range(k)] for _ in range(k)]
def I(vid, uid):
return len(G.N[G.V[vid]]) + len(G.N[G.V[uid]])
gmi = MutualInformationGraph(len(G.V), I)
T = gmi.MST()
model = DependencyTree(T, uP, ucP)
self.Pt = [model.sample(self.G) for _ in range(self.initial_population)]
self.t = 0
def solve(k: int, G: Graph) -> ColouredGraph:
return MIMIC(k, G).run()
solution = solve(3, Petersen)
print("vertices' colours are:", end=' ')
for x in solution.V:
print(x.colour, end=' ')