Skip to content

Commit

Permalink
Merge pull request #95 from barahona-research-group/add_feat_info2
Browse files Browse the repository at this point in the history
Add feat info
  • Loading branch information
arnaudon authored Jan 28, 2021
2 parents 0613a21 + 2494c5c commit 2fac212
Show file tree
Hide file tree
Showing 42 changed files with 665 additions and 58 deletions.
6 changes: 3 additions & 3 deletions hcga/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,18 +653,18 @@ def classify_pairwise( # pylint: disable=too-many-locals
The top features for each pair with high enough accuracies are collected in a list,
for later analysis.
Args:
features (dataframe): extracted features
features_info (dataframe): features information
model (str): model to preform analysis
graph_removal (float): remove samples with more than graph_removal % bad values
n_top_features (int): number of top features to save
reduce_set (bool): is True, the classification will be rerun
on a reduced set of top features (from shapely analysis)
on a reduced set of top features (from shapely analysis)
reduce_set_size (int): number of features to keep for reduces set
reduced_set_max_correlation (float): to discared highly correlated top features
in reduced set of features
in reduced set of features
n_repeats (int): number of k-fold repeats
n_splits (int): numbere of split for k-fold, None=automatic estimation
analysis_type (str): 'classification' or 'regression'
Expand Down
11 changes: 11 additions & 0 deletions hcga/features/assortativity.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,19 @@ def weigted_average_neighbor_degree(graph):
class Assortativity(FeatureClass):
"""Assortativity class.
Features derived from different measures of assortativity of the graph.
Uses networkx, see `https://networkx.github.io/documentation/networkx-2.4/reference/\
algorithms/assortativity.html`
References
----------
.. [1] M. E. J. Newman, Mixing patterns in networks,
Physical Review E, 67 026126, 2003
.. [2] Foster, J.G., Foster, D.V., Grassberger, P. & Paczuski, M.
Edge direction and the structure of networks, PNAS 107, 10815-20 (2010).
"""

modes = ["fast", "medium", "slow"]
Expand Down
12 changes: 11 additions & 1 deletion hcga/features/basal_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,17 @@ def exp_attracting_edge(graph):


class BasalNodes(FeatureClass):
"""Basal nodes class."""
"""Basal nodes class.
Basal nodes are nodes which have in degree equal to zero. Attracting nodes are
nodes which have out degree equal to zero
References
----------
.. [1]Johnson, Samuel, and Nick S. Jones. "Looplessness in networks is linked to trophic\
coherence.",
Proceedings of the National Academy of Sciences 114.22 (2017): 5618-5623.
"""

modes = ["fast", "medium", "slow"]
shortname = "BN"
Expand Down
12 changes: 11 additions & 1 deletion hcga/features/basic_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,17 @@ def radius(graph):


class BasicStats(FeatureClass):
"""Basic stats class."""
"""Basic stats class.
Here we compute basic measures statistics of the graphs, e.g. number of nodes.
References
----------
.. [1] Mark E. J. Newman.
*Networks: An Introduction.*
Oxford University Press, USA, 2010, pp. 169.
"""

modes = ["fast", "medium", "slow"]
shortname = "BS"
Expand Down
42 changes: 42 additions & 0 deletions hcga/features/centralities_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,48 @@ class CentralitiesBasic(FeatureClass):
Uses networkx, see 'https://networkx.github.io/documentation/stable/reference/\
algorithms/centrality.html`
Here we implement:
Degree Centrality
Eigenvector Centrality [1]_ [2]_
Closeness Centrality [3]_ [4]_
Betweenness Centrality [5]_ [6]_ [7]_ [8]_
Harmonic Centrality [9]_
References
----------
.. [1] Phillip Bonacich.
"Power and Centrality: A Family of Measures."
*American Journal of Sociology* 92(5):1170–1182, 1986
<http://www.leonidzhukov.net/hse/2014/socialnetworks/papers/Bonacich-Centrality.pdf>
.. [2] Mark E. J. Newman.
*Networks: An Introduction.*
Oxford University Press, USA, 2010, pp. 169.
.. [3] Linton C. Freeman: Centrality in networks: I.
Conceptual clarification. Social Networks 1:215-239, 1979.
http://leonidzhukov.ru/hse/2013/socialnetworks/papers/freeman79-centrality.pdf
.. [4] pg. 201 of Wasserman, S. and Faust, K.,
Social Network Analysis: Methods and Applications, 1994,
Cambridge University Press.
.. [5] Ulrik Brandes:
A Faster Algorithm for Betweenness Centrality.
Journal of Mathematical Sociology 25(2):163-177, 2001.
http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
.. [6] Ulrik Brandes:
On Variants of Shortest-Path Betweenness
Centrality and their Generic Computation.
Social Networks 30(2):136-145, 2008.
http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
.. [7] Ulrik Brandes and Christian Pich:
Centrality Estimation in Large Networks.
International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
.. [8] Linton C. Freeman:
A set of measures of centrality based on betweenness.
Sociometry 40: 35–41, 1977
http://moreno.ss.uci.edu/23.pdf
.. [9] Boldi, Paolo, and Sebastiano Vigna. "Axioms for centrality."
Internet Mathematics 10.3-4 (2014): 222-262.
"""

modes = ["fast", "medium", "slow"]
Expand Down
20 changes: 19 additions & 1 deletion hcga/features/chemical_theory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,25 @@


class ChemicalTheory(FeatureClass):
"""Chemical theory class."""
"""Chemical theory class.
Here we implement the wiener and estrada indexes.
The *Wiener index* of a graph is the sum of the shortest-path
distances between each pair of reachable nodes [1]_. For pairs of nodes
in undirected graphs, only one orientation of the pair is counted.
The Estrada Index is a topological index of folding or 3D “compactness” [2]_.
References
----------
.. [1] Rouvray, Dennis H.
"The rich legacy of half a century of the Wiener index.",
Topology in Chemistry. Woodhead Publishing, 2002. 16-37.
.. [2] E. Estrada, Characterization of 3D molecular structure,
Chem. Phys. Lett. 319, 713 (2000).
"""

modes = ["fast", "medium", "slow"]
shortname = "CT"
Expand Down
25 changes: 24 additions & 1 deletion hcga/features/cliques.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,30 @@ def maximal_clique_sizes(graph):


class Cliques(FeatureClass):
"""Cliques class."""
"""Cliques class.
Here we construct features based on cliques
(subsets of vertices, all adjacent to each other, also called complete subgraphs).
References
----------
.. [1] Bron, C. and Kerbosch, J.
"Algorithm 457: finding all cliques of an undirected graph".
*Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
<http://portal.acm.org/citation.cfm?doid=362342.362367>
.. [2] F. Cazals, C. Karande,
"A note on the problem of reporting maximal cliques",
*Theoretical Computer Science*,
Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
<https://doi.org/10.1016/j.tcs.2008.05.010>
.. [3] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J.,
Langston, M.A., Samatova, N.F.,
"Genome-Scale Computational Approaches to Memory-Intensive
Applications in Systems Biology".
*Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005
Conference, pp. 12, 12--18 Nov. 2005.
<https://doi.org/10.1109/SC.2005.29>.
"""

modes = ["fast", "medium", "slow"]
shortname = "Cli"
Expand Down
29 changes: 28 additions & 1 deletion hcga/features/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,34 @@ def square_clustering_dist(graph):


class Clustering(FeatureClass):
"""Clustering class."""
"""Clustering class.
Here we construct features based on the number of triangles in a graph.
Uses networkx, see 'https://networkx.github.io/documentation/stable/reference/\
algorithms/clustering.html`
We compute:
The number of triangles
Transitivity [1]_
Clustering [2]_ [3]_ [4]_
References
----------
.. [1] Biggs, Norman (1993).
Algebraic Graph Theory (2nd ed.).
Cambridge: Cambridge University Press. p. 118.
.. [2] Generalizations of the clustering coefficient to weighted
complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
http://jponnela.com/web_documents/a9.pdf
.. [3] Intensity and coherence of motifs in weighted complex
networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
Physical Review E, 71(6), 065103 (2005).
.. [4] Clustering in complex directed networks by G. Fagiolo,
Physical Review E, 76(2), 026107 (2007).
"""

modes = ["medium", "slow"]
shortname = "Clu"
Expand Down
15 changes: 14 additions & 1 deletion hcga/features/communities_asyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,20 @@ def partitions(graph, num_comms):


class CommunitiesAsyn(FeatureClass):
"""Communities Asyn class."""
"""Communities Asyn class.
The asynchronous fluid communities algorithm is described in
[1]_. The algorithm is based on the simple idea of fluids interacting
in an environment, expanding and pushing each other. Its initialization is
random, so found communities may vary on different executions.
References
----------
.. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
Competitive and Highly Scalable Community Detection Algorithm".
[https://arxiv.org/pdf/1703.09307.pdf].
"""

modes = ["slow"]
shortname = "CA"
Expand Down
17 changes: 16 additions & 1 deletion hcga/features/communities_bisection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,22 @@ def largest_commsize(graph):


class CommunitiesBisection(FeatureClass):
"""Communities Bisection class."""
"""Communities Bisection class.
This algorithm partitions a network into two sets by iteratively
swapping pairs of nodes to reduce the edge cut between the two sets. The
pairs are chosen according to a modified form of Kernighan-Lin, which
moves node individually, alternating between sides to keep the bisection
balanced.
References
----------
.. [1] Kernighan, B. W.; Lin, Shen (1970).
"An efficient heuristic procedure for partitioning graphs."
*Bell Systems Technical Journal* 49: 291--307.
Oxford University Press 2011.
"""

modes = ["medium", "slow"]
shortname = "CBI"
Expand Down
19 changes: 18 additions & 1 deletion hcga/features/communities_labelprop.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,24 @@ def ratio_commsize_maxmin(graph):


class CommunitiesLabelPropagation(FeatureClass):
"""Communities Label propagation class."""
"""Communities Label propagation class.
Features based on the community detection using label propagation.
Uses networkx, see 'https://networkx.org/documentation/stable/reference/algorithms/\
community.html`
Finds communities in `G` using a semi-synchronous label propagation
method[1]_. This method combines the advantages of both the synchronous
and asynchronous models.
References
----------
.. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection
via semi-synchronous label propagation algorithms. In Business
Applications of Social Network Analysis (BASNA), 2010 IEEE International
Workshop on (pp. 1-8). IEEE.
"""

modes = ["medium", "slow"]
shortname = "CLP"
Expand Down
23 changes: 22 additions & 1 deletion hcga/features/communities_modularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,28 @@ def ratio_commsize_maxmin_weighted(graph):


class CommunitiesModularity(FeatureClass):
"""Communities Modularity propagation class."""
"""Communities Modularity propagation class.
Features based on the community detection using modularity.
Uses networkx, see 'https://networkx.org/documentation/stable/reference/algorithms/\
community.html`
Find communities in graph using Clauset-Newman-Moore greedy modularity maximization.
This method currently supports the Graph class and does not consider edge weights.
Greedy modularity maximization begins with each node in its own community and joins
the pair of communities that most increases modularity until no such pair exists.
References
----------
.. [1] M. E. J Newman 'Networks: An Introduction', page 224
Oxford University Press 2011.
.. [2] Clauset, A., Newman, M. E., & Moore, C.
"Finding community structure in very large networks."
Physical Review E 70(6), 2004.
"""

modes = ["fast", "medium", "slow"]
shortname = "CM"
Expand Down
17 changes: 16 additions & 1 deletion hcga/features/connectance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,22 @@


class Connectance(FeatureClass):
"""Connectance class."""
"""Connectance class.
Features based on the connectivity of the graph.
For now we compute only the density:
.. math::
d = \frac{2m}{n(n-1)},
and for directed graphs is
.. math::
d = \frac{m}{n(n-1)},
"""

modes = ["fast", "medium", "slow"]
shortname = "Cns"
Expand Down
17 changes: 16 additions & 1 deletion hcga/features/core_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,22 @@ def core_number(graph):


class CoreNumber(FeatureClass):
"""Core number class."""
"""Core number class.
Features based on a k-core analysis.
A k-core is a maximal subgraph that contains nodes of degree k or more.
The core number of a node is the largest value k of a k-core containing that node.
Uses networkx, see 'https://networkx.org/documentation/stable/reference/algorithms/core.html`
References
----------
.. [1] An O(m) Algorithm for Cores Decomposition of Networks
Vladimir Batagelj and Matjaz Zaversnik, 2003.
https://arxiv.org/abs/cs.DS/0310049
"""

modes = ["fast", "medium", "slow"]
shortname = "CoN"
Expand Down
8 changes: 7 additions & 1 deletion hcga/features/covering.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ def min_edge_cover(graph):


class Covering(FeatureClass):
"""Covering class."""
"""Covering class.
Features based on the minimum edge cover of a graph.
Uses networkx, 'https://networkx.org/documentation/stable//reference/algorithms/covering.html
"""

modes = ["fast", "medium", "slow"]
shortname = "CV"
Expand Down
Loading

0 comments on commit 2fac212

Please sign in to comment.