Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH; WIP] Implementation of FCI algorithm that leverages the base classes used in PC #32

Closed
wants to merge 14 commits into from
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,26 @@ build-docs:
make -C doc/ clean
make -C doc/ html-noplot
cd doc/ && make view


clean-pyc:
find . -name "*.pyc" | xargs rm -f

clean-so:
find . -name "*.so" | xargs rm -f
find . -name "*.pyd" | xargs rm -f

clean-build:
rm -rf _build build dist dodiscover.egg-info

clean-ctags:
rm -f tags

clean-cache:
find . -name "__pycache__" | xargs rm -rf

clean-test:
rm -rf .pytest_cache .mypy_cache .ipynb_checkpoints
rm junit-results.xml

clean: clean-build clean-pyc clean-so clean-ctags clean-cache clean-test
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ Constraint-based structure learning
:toctree: generated/

LearnSkeleton
LearnSemiMarkovianSkeleton
SkeletonMethods
PC
FCI

Comparing causal discovery algorithms
=====================================
Expand Down
1 change: 1 addition & 0 deletions doc/whats_new/v0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Changelog
- |Feature| Implement confusion matrix method for comparing networkx-like graphs, :func:`dodiscover.metrics.confusion_matrix_networks`, by `Adam Li`_ (:pr:`48`)
- |Feature| Implement classification-based CI test (CCIT), :class:`dodiscover.ci.ClassifierCITest` under the ``dodiscover.ci`` submodule, by `Adam Li`_ (:pr:`28`)
- |Feature| Implement PC algorithm, :class:`dodiscover.constraint.PC` for learning causal structure from observational data under the ``dodiscover.constraint`` submodule, by `Adam Li`_ (:pr:`30`)
- |Feature| Implement FCI algorithm, :class:`dodiscover.constraint.FCI` for learning causal structure from observational data with latent confounders under the ``dodiscover.constraint`` submodule, by `Adam Li`_ (:pr:`32`)

Code and Documentation Contributors
-----------------------------------
Expand Down
2 changes: 1 addition & 1 deletion dodiscover/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
from . import metrics # noqa: F401
from ._protocol import EquivalenceClass, Graph
from ._version import __version__ # noqa: F401
from .constraint import PC
from .constraint import FCI, PC
from .context import Context
14 changes: 14 additions & 0 deletions dodiscover/_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ def remove_node(self, u) -> None:
"""Remove a node from the graph."""
pass

def remove_edge(self, u, v, edge_type) -> None:
"""Remove edge from graph."""
pass

def remove_edges_from(self, edges) -> None:
"""Remove a set of edges from the graph."""
pass
Expand Down Expand Up @@ -62,6 +66,16 @@ def undirected_edge_name(self) -> str:
"""Name of the undirected edges."""
pass

@property
def bidirected_edge_name(self) -> str:
"""Name of the bidirected edges."""
pass

@property
def circle_edge_name(self) -> str:
"""Name of the directed edges."""
pass

def orient_uncertain_edge(self, u, v) -> None:
"""Orients an uncertain edge in the equivalence class to directed ``'u'*->'v'``."""
pass
Expand Down
6 changes: 5 additions & 1 deletion dodiscover/ci/kernel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,12 @@ def test(
pvalue : float
The p-value of the test.
"""
if z_covariates is None:
z_covariates = set()
self._check_test_input(df, x_vars, y_vars, z_covariates)
if z_covariates is None or len(z_covariates) == 0:

z_columns = list(z_covariates)
if len(z_columns) == 0:
Z = None
else:
Z = df[z_covariates].to_numpy().reshape((-1, len(z_covariates)))
Expand Down
4 changes: 1 addition & 3 deletions dodiscover/ci/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ def test(
if isinstance(self.graph, nx.DiGraph):
is_sep = nx.d_separated(self.graph, x_vars, y_vars, z_covariates)
else:
from graphs import m_separated

is_sep = m_separated(self.graph, x_vars, y_vars, z_covariates)
is_sep = nx.m_separated(self.graph, x_vars, y_vars, z_covariates)

if is_sep:
pvalue = 1
Expand Down
4 changes: 3 additions & 1 deletion dodiscover/constraint/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .config import SkeletonMethods
from .fcialg import FCI
from .pcalg import PC
from .skeleton import LearnSkeleton, SkeletonMethods
from .skeleton import LearnSemiMarkovianSkeleton, LearnSkeleton
28 changes: 12 additions & 16 deletions dodiscover/constraint/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ class BaseConstraintDiscovery:
skeleton_method : SkeletonMethods
The method to use for testing conditional independence. Must be one of
('neighbors', 'complete', 'neighbors_path'). See Notes for more details.
max_path_length : int
The maximum length of a path to consider when looking for possibly d-separating
sets among two nodes. Only used if ``skeleton_method=pds``. Default is infinite.
apply_orientations : bool
Whether or not to apply orientation rules given the learned skeleton graph
and separating set per pair of variables. If ``True`` (default), will
Expand All @@ -62,17 +59,16 @@ class BaseConstraintDiscovery:
"""

graph_: Optional[EquivalenceClass]
separating_sets_: Optional[SeparatingSet]
separating_sets_: SeparatingSet

def __init__(
self,
ci_estimator: BaseConditionalIndependenceTest,
alpha: float = 0.05,
min_cond_set_size: int = None,
max_cond_set_size: int = None,
max_combinations: int = None,
min_cond_set_size: Optional[int] = None,
max_cond_set_size: Optional[int] = None,
max_combinations: Optional[int] = None,
skeleton_method: SkeletonMethods = SkeletonMethods.NBRS,
max_path_length: int = np.inf,
apply_orientations: bool = True,
**ci_estimator_kwargs,
):
Expand All @@ -93,13 +89,13 @@ def __init__(
max_combinations = np.inf
self.max_combinations = max_combinations

# special attributes for learning skeleton with semi-Markovian models
self.max_path_length = max_path_length

# initialize the result properties we want to fit
self.separating_sets_ = None
self.separating_sets_ = defaultdict(lambda: defaultdict(list))
self.graph_ = None

# debugging mode
self.n_ci_tests = 0

def _initialize_sep_sets(self, init_graph: nx.Graph) -> SeparatingSet:
# keep track of separating sets
sep_set: SeparatingSet = defaultdict(lambda: defaultdict(list))
Expand Down Expand Up @@ -162,10 +158,10 @@ def fit(self, context: Context) -> None:
self.X_ = self.context_.data

# initialize graph object to apply learning
sep_set = self._initialize_sep_sets(self.init_graph_)
self.separating_sets_ = self._initialize_sep_sets(self.init_graph_)

# learn skeleton graph and the separating sets per variable
graph, sep_set = self.learn_skeleton(self.context_, sep_set)
graph, self.separating_sets_ = self.learn_skeleton(self.context_, self.separating_sets_)

# convert networkx.Graph to relevant causal graph object
graph = self.convert_skeleton_graph(graph)
Expand All @@ -174,11 +170,10 @@ def fit(self, context: Context) -> None:
if self.apply_orientations:
# for all pairs of non-adjacent variables with a common neighbor
# check if we can orient the edge as a collider
self.orient_unshielded_triples(graph, sep_set)
self.orient_unshielded_triples(graph, self.separating_sets_)
self.orient_edges(graph)

# store resulting data structures
self.separating_sets_ = sep_set
self.graph_ = graph

def evaluate_edge(
Expand Down Expand Up @@ -263,5 +258,6 @@ def learn_skeleton(

skel_graph = skel_alg.adj_graph_
sep_set = skel_alg.sep_set_
self.n_ci_tests += skel_alg.n_ci_tests

return skel_graph, sep_set
20 changes: 20 additions & 0 deletions dodiscover/constraint/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from enum import Enum, EnumMeta


class MetaEnum(EnumMeta):
def __contains__(cls, item):
try:
cls(item)
except ValueError:
return False
return True


class SkeletonMethods(Enum, metaclass=MetaEnum):
"""Available methods for learning a skeleton from data."""

COMPLETE = "complete"
NBRS = "neighbors"
NBRS_PATH = "neighbors_path"
PDS = "pds"
PDS_PATH = "pds_path"
Loading