diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88b2edb..b954e14 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: name: Format docstrings - repo: https://github.com/asottile/pyupgrade - rev: v3.18.0 + rev: v3.19.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -42,8 +42,9 @@ repos: name: Sort imports - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.7.0 + rev: v0.7.2 hooks: - id: ruff args: [--exit-non-zero-on-fix, --fix, --line-length=180] + exclude: "\\.ipynb$" name: Lint code diff --git a/examples/mad_example.py b/examples/mad_example.py index 4169178..3f3bcf4 100644 --- a/examples/mad_example.py +++ b/examples/mad_example.py @@ -35,7 +35,7 @@ clf_name = 'KNN' clf = KNN() clf.fit(X_train) - thres = MAD(factor=1) + thres = MAD() # get the prediction labels and outlier scores of the training data y_train_scores = clf.decision_scores_ # raw outlier scores diff --git a/examples/zscore_example.py b/examples/zscore_example.py index 67099da..55dde0d 100644 --- a/examples/zscore_example.py +++ b/examples/zscore_example.py @@ -35,7 +35,7 @@ clf_name = 'KNN' clf = KNN() clf.fit(X_train) - thres = ZSCORE(factor=1) + thres = ZSCORE() # get the prediction labels and outlier scores of the training data y_train_scores = clf.decision_scores_ # raw outlier scores diff --git a/pythresh/test/test_mad.py b/pythresh/test/test_mad.py index 04ea341..1a0afd7 100644 --- a/pythresh/test/test_mad.py +++ b/pythresh/test/test_mad.py @@ -1,5 +1,6 @@ import sys import unittest +from itertools import product from os.path import dirname as up # noinspection PyProtectedMember @@ -41,11 +42,15 @@ def setUp(self): self.all_scores = [scores, multiple_scores] - self.thres = MAD() + self.factors = [0.5, 1, 2] def test_prediction_labels(self): - for scores in self.all_scores: + params = product(self.all_scores, self.factors) + + for scores, factor in params: + + self.thres = MAD(factor=factor) pred_labels = self.thres.eval(scores) assert (self.thres.thresh_ is not None) @@ -58,42 +63,3 @@ def test_prediction_labels(self): assert (pred_labels.min() == 0) assert (pred_labels.max() == 1) - - def test_factor_adjustment(self): - """Test the effect of the factor on MAD thresholding.""" - for scores in self.all_scores: - # Test with default factor (1) - thres_default = MAD(factor=1) - pred_labels_default = thres_default.eval(scores) - default_thresh = thres_default.thresh_ - - # Test with a higher factor - thres_high = MAD(factor=2) - pred_labels_high = thres_high.eval(scores) - high_thresh = thres_high.thresh_ - - # Test with a lower factor - thres_low = MAD(factor=0.5) - pred_labels_low = thres_low.eval(scores) - low_thresh = thres_low.thresh_ - - # Assertions on thresholds - self.assertLessEqual(default_thresh, high_thresh, - 'Higher factor should increase the threshold.') - self.assertGreaterEqual(default_thresh, low_thresh, - 'Lower factor should decrease the threshold.') - - # Assertions on prediction labels - for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]: - self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]), - 'Predictions should only contain 0 and 1.') - - # Verify that the number of outliers changes with the factor - default_outliers = np.sum(pred_labels_default) - high_outliers = np.sum(pred_labels_high) - low_outliers = np.sum(pred_labels_low) - - self.assertLessEqual(high_outliers, default_outliers, - 'Higher factor should reduce or maintain the number of outliers.') - self.assertGreaterEqual(low_outliers, default_outliers, - 'Lower factor should increase or maintain the number of outliers.') diff --git a/pythresh/test/test_zscore.py b/pythresh/test/test_zscore.py index 66b36c9..c8be587 100644 --- a/pythresh/test/test_zscore.py +++ b/pythresh/test/test_zscore.py @@ -1,5 +1,6 @@ import sys import unittest +from itertools import product from os.path import dirname as up # noinspection PyProtectedMember @@ -41,11 +42,15 @@ def setUp(self): self.all_scores = [scores, multiple_scores] - self.thres = ZSCORE() + self.factors = [0.5, 1, 2] def test_prediction_labels(self): - for scores in self.all_scores: + params = product(self.all_scores, self.factors) + + for scores, factor in params: + + self.thres = ZSCORE(factor=factor) pred_labels = self.thres.eval(scores) assert (self.thres.thresh_ is not None) @@ -58,32 +63,3 @@ def test_prediction_labels(self): assert (pred_labels.min() == 0) assert (pred_labels.max() == 1) - - def test_factor_adjustment(self): - """Test the effect of the factor on ZSCORE thresholding.""" - for scores in self.all_scores: - # Test with default factor (1) - thres_default = ZSCORE(factor=1) - pred_labels_default = thres_default.eval(scores) - default_outliers = np.sum(pred_labels_default) - - # Test with a higher factor - thres_high = ZSCORE(factor=2) - pred_labels_high = thres_high.eval(scores) - high_outliers = np.sum(pred_labels_high) - - # Test with a lower factor - thres_low = ZSCORE(factor=0.5) - pred_labels_low = thres_low.eval(scores) - low_outliers = np.sum(pred_labels_low) - - # Assertions on the number of outliers - self.assertLessEqual(high_outliers, default_outliers, - 'Higher factor should reduce the number of outliers.') - self.assertGreaterEqual(low_outliers, default_outliers, - 'Lower factor should increase the number of outliers.') - - # Assertions on predictions being binary - for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]: - self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]), - 'Predictions should only contain 0 and 1.')