Skip to content

Commit ac71ef7

Browse files
authored
Gamma hyperparam (#60)
* Add gamma parameter * Use gamma to compute noisy edge threshold * Prevent negative noise treshold, improve comments * Rename avg_wts to noise_threaholds * Bump version 2.0.2
1 parent abf77fb commit ac71ef7

File tree

5 files changed

+75
-49
lines changed

5 files changed

+75
-49
lines changed

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Configuring setup()
55
[metadata]
66
name = pecanpy
7-
version = 2.0.2-dev
7+
version = 2.0.2
88
description = A parallelized, efficient, and accelerated node2vec
99
long_description = file: README.md
1010
long_description_content_type = text/markdown

src/pecanpy/cli.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,13 @@ def parse_args():
144144
help="Use node2vec+ extension",
145145
)
146146

147+
parser.add_argument(
148+
"--gamma",
149+
type=float,
150+
default=0,
151+
help="Noisy edge threshold parameter.",
152+
)
153+
147154
return parser.parse_args()
148155

149156

@@ -234,6 +241,7 @@ def read_graph(args):
234241
weighted = args.weighted
235242
directed = args.directed
236243
extend = args.extend
244+
gamma = args.gamma
237245
mode = args.mode
238246
task = args.task
239247

@@ -250,7 +258,7 @@ def read_graph(args):
250258
exit()
251259

252260
pecanpy_mode = getattr(pecanpy, mode, None)
253-
g = pecanpy_mode(p, q, workers, verbose, extend)
261+
g = pecanpy_mode(p, q, workers, verbose, extend, gamma)
254262

255263
read_func = g.read_npz if fp.endswith(".npz") else g.read_edg
256264
read_func(fp, weighted, directed)

src/pecanpy/pecanpy.py

+27-22
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class Base:
4141
4242
"""
4343

44-
def __init__(self, p, q, workers, verbose=False, extend=False):
44+
def __init__(self, p, q, workers, verbose=False, extend=False, gamma=0):
4545
"""Initializ node2vec base class.
4646
4747
Args:
@@ -53,7 +53,11 @@ def __init__(self, p, q, workers, verbose=False, extend=False):
5353
workers (int): number of threads to be spawned for runing node2vec
5454
including walk generation and word2vec embedding.
5555
verbose (bool): show progress bar for walk generation.
56-
extend (bool): ``True`` if use node2vec+ extension, default is ``False``
56+
extend (bool): use node2vec+ extension if set to :obj:`True`
57+
(default: :obj:`False`).
58+
gamma (float): Multiplication factor for the std term of edge
59+
weights added to the average edge weights as the noisy edge
60+
threashold, only used by node2vec+ (default: 0)
5761
5862
"""
5963
super().__init__()
@@ -62,6 +66,7 @@ def __init__(self, p, q, workers, verbose=False, extend=False):
6266
self.workers = workers
6367
self.verbose = verbose
6468
self.extend = extend
69+
self.gamma = gamma
6570

6671
def _map_walk(self, walk_idx_ary):
6772
"""Map walk from node index to node ID.
@@ -148,16 +153,16 @@ def setup_get_normalized_probs(self):
148153
probability computation function ``get_extended_normalized_probs``,
149154
if node2vec+ is used. Otherwise, return the normal transition function
150155
``get_noramlized_probs`` with a trivial placeholder for average edge
151-
weights array ``avg_wts``.
156+
weights array ``noise_thresholds``.
152157
153158
"""
154159
if self.extend: # use n2v+
155160
get_normalized_probs = self.get_extended_normalized_probs
156-
avg_wts = self.get_average_weights()
161+
noise_thresholds = self.get_noise_thresholds()
157162
else: # use normal n2v
158163
get_normalized_probs = self.get_normalized_probs
159-
avg_wts = None
160-
return get_normalized_probs, avg_wts
164+
noise_thresholds = None
165+
return get_normalized_probs, noise_thresholds
161166

162167
def preprocess_transition_probs(self):
163168
"""Null default preprocess method."""
@@ -221,9 +226,9 @@ def embed(
221226
class FirstOrderUnweighted(Base, SparseRWGraph):
222227
"""Directly sample edges for first order random walks."""
223228

224-
def __init__(self, p, q, workers, verbose=False, extend=False):
229+
def __init__(self, *args, **kwargs):
225230
"""Initialize FirstOrderUnweighted mode."""
226-
Base.__init__(self, p, q, workers, verbose, extend)
231+
Base.__init__(self, *args, **kwargs)
227232

228233
def get_move_forward(self):
229234
"""Wrap ``move_forward``."""
@@ -241,9 +246,9 @@ def move_forward(cur_idx, prev_idx=None):
241246
class PreCompFirstOrder(Base, SparseRWGraph):
242247
"""Precompute transition probabilities for first order random walks."""
243248

244-
def __init__(self, p, q, workers, verbose=False, extend=False):
249+
def __init__(self, *args, **kwargs):
245250
"""Initialize PreCompFirstOrder mode."""
246-
Base.__init__(self, p, q, workers, verbose, extend)
251+
Base.__init__(self, *args, **kwargs)
247252
self.alias_j = self.alias_q = None
248253

249254
def get_move_forward(self):
@@ -304,9 +309,9 @@ class PreComp(Base, SparseRWGraph):
304309
305310
"""
306311

307-
def __init__(self, p, q, workers, verbose=False, extend=False):
312+
def __init__(self, *args, **kwargs):
308313
"""Initialize PreComp mode node2vec."""
309-
Base.__init__(self, p, q, workers, verbose, extend)
314+
Base.__init__(self, *args, **kwargs)
310315
self.alias_j = self.alias_q = self.alias_indptr = self.alias_dim = None
311316

312317
def get_move_forward(self):
@@ -390,7 +395,7 @@ def preprocess_transition_probs(self):
390395
q = self.q
391396

392397
# Retrieve transition probability computation callback function
393-
get_normalized_probs, avg_wts = self.setup_get_normalized_probs()
398+
get_normalized_probs, noise_thresholds = self.setup_get_normalized_probs()
394399

395400
# Determine the dimensionality of the 2nd order transition probs
396401
n_nodes = self.indptr.size - 1 # number of nodes
@@ -423,7 +428,7 @@ def compute_all_transition_probs():
423428
q,
424429
idx,
425430
nbr,
426-
avg_wts,
431+
noise_thresholds,
427432
)
428433

429434
start = offset + dim * nbr_idx
@@ -444,9 +449,9 @@ class SparseOTF(Base, SparseRWGraph):
444449
445450
"""
446451

447-
def __init__(self, p, q, workers, verbose=False, extend=False):
452+
def __init__(self, *args, **kwargs):
448453
"""Initialize PreComp mode node2vec."""
449-
Base.__init__(self, p, q, workers, verbose, extend)
454+
Base.__init__(self, *args, **kwargs)
450455

451456
def get_move_forward(self):
452457
"""Wrap ``move_forward``.
@@ -467,7 +472,7 @@ def get_move_forward(self):
467472
p = self.p
468473
q = self.q
469474

470-
get_normalized_probs, avg_wts = self.setup_get_normalized_probs()
475+
get_normalized_probs, noise_thresholds = self.setup_get_normalized_probs()
471476

472477
@njit(nogil=True)
473478
def move_forward(cur_idx, prev_idx=None):
@@ -480,7 +485,7 @@ def move_forward(cur_idx, prev_idx=None):
480485
q,
481486
cur_idx,
482487
prev_idx,
483-
avg_wts,
488+
noise_thresholds,
484489
)
485490
cdf = np.cumsum(normalized_probs)
486491
choice = np.searchsorted(cdf, np.random.random())
@@ -499,9 +504,9 @@ class DenseOTF(Base, DenseRWGraph):
499504
500505
"""
501506

502-
def __init__(self, p, q, workers, verbose=False, extend=False):
507+
def __init__(self, *args, **kwargs):
503508
"""Initialize DenseOTF mode node2vec."""
504-
Base.__init__(self, p, q, workers, verbose, extend)
509+
Base.__init__(self, *args, **kwargs)
505510

506511
def get_move_forward(self):
507512
"""Wrap ``move_forward``.
@@ -521,7 +526,7 @@ def get_move_forward(self):
521526
p = self.p
522527
q = self.q
523528

524-
get_normalized_probs, avg_wts = self.setup_get_normalized_probs()
529+
get_normalized_probs, noise_thresholds = self.setup_get_normalized_probs()
525530

526531
@njit(nogil=True)
527532
def move_forward(cur_idx, prev_idx=None):
@@ -533,7 +538,7 @@ def move_forward(cur_idx, prev_idx=None):
533538
q,
534539
cur_idx,
535540
prev_idx,
536-
avg_wts,
541+
noise_thresholds,
537542
)
538543
cdf = np.cumsum(normalized_probs)
539544
choice = np.searchsorted(cdf, np.random.random())

src/pecanpy/rw/dense_rw.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,16 @@
77
class DenseRWGraph(DenseGraph):
88
"""Dense Graph object equipped with random walk computation."""
99

10-
def get_average_weights(self):
10+
def get_noise_thresholds(self):
1111
"""Compute average edge weights."""
12-
deg_ary = self.data.sum(axis=1)
13-
n_nbrs_ary = self.nonzero.sum(axis=1)
14-
return deg_ary / n_nbrs_ary
12+
num_nodes = len(self.IDlst)
13+
average_weight_ary = np.zeros(num_nodes, dtype=np.float32)
14+
for i in range(num_nodes):
15+
weights = self.data[i, self.nonzero[i]]
16+
average_weight_ary[i] = weights.mean() + self.gamma * weights.std()
17+
average_weight_ary = np.maximum(average_weight_ary, 0)
18+
19+
return average_weight_ary
1520

1621
def get_has_nbrs(self):
1722
"""Wrap ``has_nbrs``."""
@@ -87,14 +92,16 @@ def get_extended_normalized_probs(
8792
if prev_idx is not None: # 2nd order biased walks
8893
prev_nbrs_weight = data[prev_idx].copy()
8994

90-
inout_ind = cur_nbrs_ind & (prev_nbrs_weight < average_weight_ary)
91-
inout_ind[prev_idx] = False # exclude previous state from out biases
95+
# Note: we assume here the network is undirectly, hence the edge
96+
# weight connecting the next to prev is the same as the reverse.
97+
out_ind = cur_nbrs_ind & (prev_nbrs_weight < average_weight_ary)
98+
out_ind[prev_idx] = False # exclude previous state from out biases
9299

93100
# print("CURRENT: ", cur_idx)
94-
# print("INOUT: ", np.where(inout_ind)[0])
95-
# print("NUM INOUT: ", inout_ind.sum(), "\n")
101+
# print("INOUT: ", np.where(out_ind)[0])
102+
# print("NUM INOUT: ", out_ind.sum(), "\n")
96103

97-
t = prev_nbrs_weight[inout_ind] / average_weight_ary[inout_ind]
104+
t = prev_nbrs_weight[out_ind] / average_weight_ary[out_ind]
98105
# optional nonlinear parameterization
99106
# b = 1; t = b * t / (1 - (b - 1) * t)
100107

@@ -103,10 +110,10 @@ def get_extended_normalized_probs(
103110

104111
# suppress noisy edges
105112
alpha[
106-
unnormalized_probs[inout_ind] < average_weight_ary[cur_idx]
113+
unnormalized_probs[out_ind] < average_weight_ary[cur_idx]
107114
] = np.minimum(1, 1 / q)
108-
unnormalized_probs[inout_ind] *= alpha # apply out biases
109-
unnormalized_probs[prev_idx] /= p # apply the return bias
115+
unnormalized_probs[out_ind] *= alpha # apply out biases
116+
unnormalized_probs[prev_idx] /= p # apply the return bias
110117

111118
unnormalized_probs = unnormalized_probs[cur_nbrs_ind]
112119
normalized_probs = unnormalized_probs / unnormalized_probs.sum()

src/pecanpy/rw/sparse_rw.py

+19-13
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,19 @@ def has_nbrs(idx):
1717

1818
return has_nbrs
1919

20-
def get_average_weights(self):
20+
def get_noise_thresholds(self):
2121
"""Compute average edge weights."""
2222
data = self.data
2323
indptr = self.indptr
2424

2525
num_nodes = len(self.IDlst)
2626
average_weight_ary = np.zeros(num_nodes, dtype=np.float32)
27-
for idx in range(num_nodes):
28-
average_weight_ary[idx] = data[indptr[idx] : indptr[idx + 1]].mean()
27+
for i in range(num_nodes):
28+
average_weight_ary[i] = (
29+
data[indptr[i] : indptr[i + 1]].mean()
30+
+ self.gamma * data[indptr[i] : indptr[i + 1]].std()
31+
)
32+
average_weight_ary = np.maximum(average_weight_ary, 0)
2933

3034
return average_weight_ary
3135

@@ -226,7 +230,7 @@ def isnotin(ptr_ary1, ptr_ary2):
226230

227231

228232
@njit(nogil=True)
229-
def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, avg_wts):
233+
def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, noise_thresholds):
230234
"""Find node2vec+ out edges.
231235
232236
The node2vec+ out edges is determined by considering the edge weights
@@ -242,8 +246,9 @@ def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, avg_wts):
242246
the neighbors of the previous state
243247
wts_ary2 (:obj: `numpy.ndarray` of :obj:`float32`): array of edge
244248
weights of the previous state
245-
avg_wts (:obj: `numpy.ndarray` of :obj:`float32`): array of average
246-
edge weights of each node
249+
noise_thresholds (:obj: `numpy.ndarray` of :obj:`float32`): array of
250+
noisy edge threshold computed based on the average and the std of
251+
the edge weights of each node
247252
248253
Return:
249254
Indicator of whether a neighbor of the current state is considered as
@@ -255,7 +260,7 @@ def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, avg_wts):
255260
t = np.zeros(ptr_ary1.size, dtype=np.float32)
256261
idx2 = 0
257262
for idx1 in range(ptr_ary1.size):
258-
if idx2 == ptr_ary2.size: # end of ary2
263+
if idx2 >= ptr_ary2.size: # end of ary2
259264
break
260265

261266
ptr1 = ptr_ary1[idx1]
@@ -265,21 +270,22 @@ def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, avg_wts):
265270
continue
266271

267272
elif ptr1 == ptr2: # found a matching value
268-
if wts_ary2[idx2] >= avg_wts[ptr2]: # check if loose
273+
# If connection is not loose, identify as an in-edge
274+
if wts_ary2[idx2] >= noise_thresholds[ptr2]:
269275
indicator[idx1] = False
270276
else:
271-
t[idx1] = wts_ary2[idx2] / avg_wts[ptr2]
277+
t[idx1] = wts_ary2[idx2] / noise_thresholds[ptr2]
272278
idx2 += 1
273279

274280
elif ptr1 > ptr2:
275-
# sweep through ptr_ary2 until ptr2 catch up on ptr1
276-
for j in range(idx2, ptr_ary2.size):
281+
# Sweep through ptr_ary2 until ptr2 catch up on ptr1
282+
for j in range(idx2 + 1, ptr_ary2.size):
277283
ptr2 = ptr_ary2[j]
278284
if ptr2 == ptr1:
279-
if wts_ary2[j] >= avg_wts[ptr2]:
285+
if wts_ary2[j] >= noise_thresholds[ptr2]:
280286
indicator[idx1] = False
281287
else:
282-
t[idx1] = wts_ary2[j] / avg_wts[ptr2]
288+
t[idx1] = wts_ary2[j] / noise_thresholds[ptr2]
283289
idx2 = j + 1
284290
break
285291

0 commit comments

Comments
 (0)