Skip to content

Commit 1adeecd

Browse files
authored
Refactor typing (#82)
* Type NDArray * Fix doc annotations * Refactor typing * Ignore type alias error * Use alias type * Use type alias * Use type alias
1 parent d11571f commit 1adeecd

File tree

4 files changed

+107
-57
lines changed

4 files changed

+107
-57
lines changed

src/pecanpy/graph.py

+35-30
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
"""Lite graph objects used by pecanpy."""
2-
from typing import Dict
3-
from typing import Iterator
4-
from typing import List
5-
from typing import Optional
6-
from typing import Tuple
7-
82
import numpy as np
93

4+
from .typing import AdjMat
5+
from .typing import AdjNonZeroMat
6+
from .typing import CSR
7+
from .typing import Dict
8+
from .typing import Float32Array
9+
from .typing import Iterator
10+
from .typing import List
11+
from .typing import Optional
12+
from .typing import Tuple
13+
from .typing import Uint32Array
14+
1015

1116
class BaseGraph:
1217
"""Base Graph object.
@@ -78,8 +83,8 @@ class AdjlstGraph(BaseGraph):
7883
Python data structures like list and dict.
7984
8085
Examples:
81-
Read ``.edg`` file and create ``SparseGraph`` object using ``.read_edg``
82-
method.
86+
Read ``.edg`` file and create ``SparseGraph`` object using
87+
``.read_edg`` method.
8388
8489
>>> from pecanpy.graph import AdjlstGraph
8590
>>>
@@ -277,7 +282,7 @@ def save(self, path: str, unweighted: bool = False, delimiter: str = "\t"):
277282
terms = (h_id, t_id) if unweighted else (h_id, t_id, str(w))
278283
f.write(f"{delimiter.join(terms)}\n")
279284

280-
def to_csr(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
285+
def to_csr(self) -> CSR:
281286
"""Construct compressed sparse row matrix."""
282287
indptr = np.zeros(len(self.nodes) + 1, dtype=np.uint32)
283288
for i, row_data in enumerate(self._data):
@@ -297,16 +302,16 @@ def to_csr(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
297302

298303
return indptr, indices, data
299304

300-
def to_dense(self) -> np.ndarray:
305+
def to_dense(self) -> AdjMat:
301306
"""Construct dense adjacency matrix.
302307
303308
Note:
304309
This method does not return DenseGraph object, but instead return
305-
dense adjacency matrix as ``numpy.ndarray``, the index is the same
310+
dense adjacency matrix as NDArray, the index is the same
306311
as that of ``nodes``.
307312
308313
Return:
309-
numpy.ndarray: Full adjacency matrix as 2d numpy array.
314+
NDArray: Full adjacency matrix as 2d numpy array.
310315
311316
"""
312317
n_nodes = len(self.nodes)
@@ -319,11 +324,11 @@ def to_dense(self) -> np.ndarray:
319324
return mat
320325

321326
@classmethod
322-
def from_mat(cls, adj_mat: np.ndarray, node_ids: List[str], **kwargs):
327+
def from_mat(cls, adj_mat: AdjMat, node_ids: List[str], **kwargs):
323328
"""Construct graph using adjacency matrix and node IDs.
324329
325330
Args:
326-
adj_mat(:obj:`numpy.ndarray`): 2D numpy array of adjacency matrix
331+
adj_mat(NDArray): 2D numpy array of adjacency matrix
327332
node_ids(:obj:`list` of str): node ID list
328333
329334
Return:
@@ -347,8 +352,8 @@ class SparseGraph(BaseGraph):
347352
"""Sparse Graph object that stores graph as adjacency list.
348353
349354
Examples:
350-
Read ``.edg`` file and create ``SparseGraph`` object using ``.read_edg``
351-
method.
355+
Read ``.edg`` file and create ``SparseGraph`` object using
356+
``.read_edg`` method.
352357
353358
>>> from pecanpy.graph import SparseGraph
354359
>>>
@@ -366,9 +371,9 @@ class SparseGraph(BaseGraph):
366371
def __init__(self):
367372
"""Initialize SparseGraph object."""
368373
super().__init__()
369-
self.data: Optional[np.ndarray] = None
370-
self.indptr: Optional[np.ndarray] = None
371-
self.indices: Optional[np.ndarray] = None
374+
self.data: Optional[Float32Array] = None
375+
self.indptr: Optional[Uint32Array] = None
376+
self.indices: Optional[Uint32Array] = None
372377

373378
@property
374379
def num_edges(self) -> int:
@@ -457,14 +462,14 @@ def from_adjlst_graph(cls, adjlst_graph, **kwargs):
457462
return g
458463

459464
@classmethod
460-
def from_mat(cls, adj_mat: np.ndarray, node_ids: List[str], **kwargs):
465+
def from_mat(cls, adj_mat: AdjMat, node_ids: List[str], **kwargs):
461466
"""Construct csr graph using adjacency matrix and node IDs.
462467
463468
Note:
464469
Only consider positive valued edges.
465470
466471
Args:
467-
adj_mat(:obj:`numpy.ndarray`): 2D numpy array of adjacency matrix
472+
adj_mat(NDArray): 2D numpy array of adjacency matrix
468473
node_ids(:obj:`list` of str): node ID list
469474
470475
"""
@@ -479,15 +484,15 @@ class DenseGraph(BaseGraph):
479484
"""Dense Graph object that stores graph as array.
480485
481486
Examples:
482-
Read ``.npz`` files and create ``DenseGraph`` object using ``read_npz``.
487+
Read ``.npz`` files and create ``DenseGraph`` object using ``read_npz``
483488
484489
>>> from pecanpy.graph import DenseGraph
485490
>>>
486491
>>> g = DenseGraph() # initialize DenseGraph object
487492
>>>
488493
>>> g.read_npz(paht_to_npz_file, weighted=True, directed=False)
489494
490-
Read ``.edg`` files and create ``DenseGraph`` object using ``read_edg``.
495+
Read ``.edg`` files and create ``DenseGraph`` object using ``read_edg``
491496
492497
>>> from pecanpy.graph import DenseGraph
493498
>>>
@@ -505,8 +510,8 @@ class DenseGraph(BaseGraph):
505510
def __init__(self):
506511
"""Initialize DenseGraph object."""
507512
super().__init__()
508-
self._data: Optional[np.ndarray] = None
509-
self._nonzero: Optional[np.ndarray] = None
513+
self._data: Optional[AdjMat] = None
514+
self._nonzero: Optional[AdjNonZeroMat] = None
510515

511516
@property
512517
def num_edges(self) -> int:
@@ -517,18 +522,18 @@ def num_edges(self) -> int:
517522
raise ValueError("Empty graph.")
518523

519524
@property
520-
def data(self) -> Optional[np.ndarray]:
525+
def data(self) -> Optional[AdjMat]:
521526
"""Return the adjacency matrix."""
522527
return self._data
523528

524529
@data.setter
525-
def data(self, data: np.ndarray):
530+
def data(self, data: AdjMat):
526531
"""Set adjacency matrix and the corresponding nonzero matrix."""
527532
self._data = data.astype(float)
528533
self._nonzero = np.array(self._data != 0, dtype=bool)
529534

530535
@property
531-
def nonzero(self) -> Optional[np.ndarray]:
536+
def nonzero(self) -> Optional[AdjNonZeroMat]:
532537
"""Return the nonzero mask for the adjacency matrix."""
533538
return self._nonzero
534539

@@ -580,11 +585,11 @@ def from_adjlst_graph(cls, adjlst_graph, **kwargs):
580585
return g
581586

582587
@classmethod
583-
def from_mat(cls, adj_mat: np.ndarray, node_ids: List[str], **kwargs):
588+
def from_mat(cls, adj_mat: AdjMat, node_ids: List[str], **kwargs):
584589
"""Construct dense graph using adjacency matrix and node IDs.
585590
586591
Args:
587-
adj_mat(:obj:`numpy.ndarray`): 2D numpy array of adjacency matrix
592+
adj_mat(NDArray): 2D numpy array of adjacency matrix
588593
node_ids(:obj:`list` of str): node ID list
589594
590595
"""

src/pecanpy/pecanpy.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
"""Different strategies for generating node2vec walks."""
2-
from typing import Any
3-
from typing import Callable
4-
from typing import List
5-
from typing import Optional
6-
72
import numpy as np
83
from gensim.models import Word2Vec
9-
from nptyping import NDArray
104
from numba import njit
115
from numba import prange
126
from numba.np.ufunc.parallel import _get_thread_id
@@ -15,11 +9,16 @@
159
from .graph import BaseGraph
1610
from .rw import DenseRWGraph
1711
from .rw import SparseRWGraph
12+
from .typing import Embeddings
13+
from .typing import Float32Array
14+
from .typing import HasNbrs
15+
from .typing import List
16+
from .typing import MoveForward
17+
from .typing import Optional
18+
from .typing import Uint32Array
19+
from .typing import Uint64Array
1820
from .wrappers import Timer
1921

20-
HasNbrs = Callable[[np.uint32], bool]
21-
MoveForward = Callable[..., np.uint32]
22-
2322

2423
class Base(BaseGraph):
2524
"""Base node2vec object.
@@ -99,7 +98,7 @@ def __init__(
9998
self.random_state = random_state
10099
self._preprocessed: bool = False
101100

102-
def _map_walk(self, walk_idx_ary: np.ndarray) -> List[str]:
101+
def _map_walk(self, walk_idx_ary: Uint32Array) -> List[str]:
103102
"""Map walk from node index to node ID.
104103
105104
Note:
@@ -166,11 +165,11 @@ def _random_walks(
166165
tot_num_jobs: int,
167166
walk_length: int,
168167
random_state: Optional[int],
169-
start_node_idx_ary: NDArray[(Any,), np.uint32],
168+
start_node_idx_ary: Uint32Array,
170169
has_nbrs: HasNbrs,
171170
move_forward: MoveForward,
172171
progress_proxy: ProgressBar,
173-
):
172+
) -> Uint32Array:
174173
"""Simulate a random walk starting from start node."""
175174
# Seed the random number generator
176175
if random_state is not None:
@@ -241,7 +240,7 @@ def embed(
241240
window_size: int = 10,
242241
epochs: int = 1,
243242
verbose: bool = False,
244-
) -> np.ndarray:
243+
) -> Embeddings:
245244
"""Generate embeddings.
246245
247246
This is a shortcut function that combines ``simulate_walks`` with
@@ -264,7 +263,7 @@ def embed(
264263
skip-gram training if set to True
265264
266265
Return:
267-
numpy.ndarray: The embedding matrix, each row is a node embedding
266+
Embeddings: The embedding matrix, each row is a node embedding
268267
vector. The index is the same as that for the graph.
269268
270269
"""
@@ -375,7 +374,10 @@ class PreComp(Base, SparseRWGraph):
375374
def __init__(self, *args, **kwargs):
376375
"""Initialize PreComp mode node2vec."""
377376
Base.__init__(self, *args, **kwargs)
378-
self.alias_j = self.alias_q = self.alias_indptr = self.alias_dim = None
377+
self.alias_dim: Optional[Uint32Array] = None
378+
self.alias_j: Optional[Uint32Array] = None
379+
self.alias_q: Optional[Float32Array] = None
380+
self.alias_indptr: Optional[Uint64Array] = None
379381

380382
def get_move_forward(self):
381383
"""Wrap ``move_forward``.
@@ -624,7 +626,7 @@ def alias_setup(probs):
624626
625627
Args:
626628
probs (list(float32)): normalized transition probabilities array, could
627-
be in either list or numpy.ndarray, of float32 values.
629+
be in either list or NDArray, of float32 values.
628630
629631
"""
630632
k = probs.size

src/pecanpy/rw/sparse_rw.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ def isnotin(ptr_ary1, ptr_ary2):
154154
returned to the ``get_normalized_probs``.
155155
156156
Args:
157-
ptr_ary1 (:obj:`numpy.ndarray` of :obj:`uint32`): array of pointers to
157+
ptr_ary1 (Uint32Array): array of pointers to
158158
the neighbors of the current state
159-
ptr_ary2 (:obj:`numpy.ndarray` of :obj:`uint32`): array of pointers to
159+
ptr_ary2 (Uint32Array): array of pointers to
160160
the neighbors of the previous state
161161
162162
Returns:
@@ -241,15 +241,13 @@ def isnotin_extended(ptr_ary1, ptr_ary2, wts_ary2, noise_thresholds):
241241
the previous state as out edges if the edge weight is below average.
242242
243243
Args:
244-
ptr_ary1 (:obj:`numpy.ndarray` of :obj:`uint32`): array of pointers to
245-
the neighbors of the current state
246-
ptr_ary2 (:obj:`numpy.ndarray` of :obj:`uint32`): array of pointers to
247-
the neighbors of the previous state
248-
wts_ary2 (:obj: `numpy.ndarray` of :obj:`float32`): array of edge
249-
weights of the previous state
250-
noise_thresholds (:obj: `numpy.ndarray` of :obj:`float32`): array of
251-
noisy edge threshold computed based on the average and the std of
252-
the edge weights of each node
244+
ptr_ary1 (Uint32Array): array of pointers to the neighbors of the
245+
current state
246+
ptr_ary2 (Uint32Array): array of pointers to the neighbors of the
247+
previous state
248+
wts_ary2 (Float32Array): array of edge weights of the previous state
249+
noise_thresholds (Float32Array): array of noisy edge threshold computed
250+
based on the average and the std of the edge weights of each node
253251
254252
Return:
255253
Indicator of whether a neighbor of the current state is considered as

src/pecanpy/typing.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""Type annotations."""
2+
from typing import Any
3+
from typing import Callable
4+
from typing import Dict
5+
from typing import Iterator
6+
from typing import List
7+
from typing import Optional
8+
from typing import Tuple
9+
10+
import numpy as np
11+
from nptyping import NDArray
12+
from typing_extensions import TypeAlias
13+
14+
# Callbacks ###################################################################
15+
HasNbrs = Callable[[np.uint32], bool]
16+
MoveForward = Callable[..., np.uint32]
17+
18+
# Numpy array types ###########################################################
19+
# issue with type alias (https://stackoverflow.com/questions/62073473)
20+
Embeddings: TypeAlias = NDArray[(Any, Any), np.float32] # type: ignore
21+
AdjMat: TypeAlias = NDArray[(Any, Any), Any] # type: ignore
22+
AdjNonZeroMat: TypeAlias = NDArray[(Any, Any), bool] # type: ignore
23+
Uint32Array: TypeAlias = NDArray[(Any,), np.uint32] # type: ignore
24+
Uint64Array: TypeAlias = NDArray[(Any,), np.uint64] # type: ignore
25+
Float32Array: TypeAlias = NDArray[(Any,), np.float32] # type: ignore
26+
CSR = Tuple[Uint32Array, Uint32Array, Float32Array]
27+
28+
__all__ = [
29+
"Any",
30+
"Callable",
31+
"Dict",
32+
"Iterator",
33+
"List",
34+
"Tuple",
35+
"Optional",
36+
"NDArray",
37+
"HasNbrs",
38+
"MoveForward",
39+
"Embeddings",
40+
"AdjMat",
41+
"AdjNonZeroMat",
42+
"Uint32Array",
43+
"Float32Array",
44+
"CSR",
45+
]

0 commit comments

Comments
 (0)