Aligned examples and tests

Co-authored-by: Daniel Kulik <74867610+KulikDM@users.noreply.github.com>
KulikDM · Nov 21, 2024 · 0389df2 · 0389df2
1 parent af4c5b6
commit 0389df2
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 76 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
         name: Format docstrings
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.18.0
+    rev: v3.19.0
     hooks:
       - id: pyupgrade
         args: [--py38-plus]
@@ -42,8 +42,9 @@ repos:
         name: Sort imports
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.7.0
+    rev: v0.7.2
     hooks:
       - id: ruff
         args: [--exit-non-zero-on-fix, --fix, --line-length=180]
+        exclude: "\\.ipynb$"
         name: Lint code
diff --git a/examples/mad_example.py b/examples/mad_example.py
@@ -35,7 +35,7 @@
     clf_name = 'KNN'
     clf = KNN()
     clf.fit(X_train)
-    thres = MAD(factor=1)
+    thres = MAD()
 
     # get the prediction labels and outlier scores of the training data
     y_train_scores = clf.decision_scores_  # raw outlier scores

diff --git a/examples/zscore_example.py b/examples/zscore_example.py
@@ -35,7 +35,7 @@
     clf_name = 'KNN'
     clf = KNN()
     clf.fit(X_train)
-    thres = ZSCORE(factor=1)
+    thres = ZSCORE()
 
     # get the prediction labels and outlier scores of the training data
     y_train_scores = clf.decision_scores_  # raw outlier scores

diff --git a/pythresh/test/test_mad.py b/pythresh/test/test_mad.py
@@ -1,5 +1,6 @@
 import sys
 import unittest
+from itertools import product
 from os.path import dirname as up
 
 # noinspection PyProtectedMember
@@ -41,11 +42,15 @@ def setUp(self):
 
         self.all_scores = [scores, multiple_scores]
 
-        self.thres = MAD()
+        self.factors = [0.5, 1, 2]
 
     def test_prediction_labels(self):
 
-        for scores in self.all_scores:
+        params = product(self.all_scores, self.factors)
+
+        for scores, factor in params:
+
+            self.thres = MAD(factor=factor)
 
             pred_labels = self.thres.eval(scores)
             assert (self.thres.thresh_ is not None)
@@ -58,42 +63,3 @@ def test_prediction_labels(self):
 
             assert (pred_labels.min() == 0)
             assert (pred_labels.max() == 1)
-
-    def test_factor_adjustment(self):
-        """Test the effect of the factor on MAD thresholding."""
-        for scores in self.all_scores:
-            # Test with default factor (1)
-            thres_default = MAD(factor=1)
-            pred_labels_default = thres_default.eval(scores)
-            default_thresh = thres_default.thresh_
-
-            # Test with a higher factor
-            thres_high = MAD(factor=2)
-            pred_labels_high = thres_high.eval(scores)
-            high_thresh = thres_high.thresh_
-
-            # Test with a lower factor
-            thres_low = MAD(factor=0.5)
-            pred_labels_low = thres_low.eval(scores)
-            low_thresh = thres_low.thresh_
-
-            # Assertions on thresholds
-            self.assertLessEqual(default_thresh, high_thresh,
-                                 'Higher factor should increase the threshold.')
-            self.assertGreaterEqual(default_thresh, low_thresh,
-                                    'Lower factor should decrease the threshold.')
-
-            # Assertions on prediction labels
-            for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]:
-                self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]),
-                                'Predictions should only contain 0 and 1.')
-
-            # Verify that the number of outliers changes with the factor
-            default_outliers = np.sum(pred_labels_default)
-            high_outliers = np.sum(pred_labels_high)
-            low_outliers = np.sum(pred_labels_low)
-
-            self.assertLessEqual(high_outliers, default_outliers,
-                                 'Higher factor should reduce or maintain the number of outliers.')
-            self.assertGreaterEqual(low_outliers, default_outliers,
-                                    'Lower factor should increase or maintain the number of outliers.')
diff --git a/pythresh/test/test_zscore.py b/pythresh/test/test_zscore.py
@@ -1,5 +1,6 @@
 import sys
 import unittest
+from itertools import product
 from os.path import dirname as up
 
 # noinspection PyProtectedMember
@@ -41,11 +42,15 @@ def setUp(self):
 
         self.all_scores = [scores, multiple_scores]
 
-        self.thres = ZSCORE()
+        self.factors = [0.5, 1, 2]
 
     def test_prediction_labels(self):
 
-        for scores in self.all_scores:
+        params = product(self.all_scores, self.factors)
+
+        for scores, factor in params:
+
+            self.thres = ZSCORE(factor=factor)
 
             pred_labels = self.thres.eval(scores)
             assert (self.thres.thresh_ is not None)
@@ -58,32 +63,3 @@ def test_prediction_labels(self):
 
             assert (pred_labels.min() == 0)
             assert (pred_labels.max() == 1)
-
-    def test_factor_adjustment(self):
-        """Test the effect of the factor on ZSCORE thresholding."""
-        for scores in self.all_scores:
-            # Test with default factor (1)
-            thres_default = ZSCORE(factor=1)
-            pred_labels_default = thres_default.eval(scores)
-            default_outliers = np.sum(pred_labels_default)
-
-            # Test with a higher factor
-            thres_high = ZSCORE(factor=2)
-            pred_labels_high = thres_high.eval(scores)
-            high_outliers = np.sum(pred_labels_high)
-
-            # Test with a lower factor
-            thres_low = ZSCORE(factor=0.5)
-            pred_labels_low = thres_low.eval(scores)
-            low_outliers = np.sum(pred_labels_low)
-
-            # Assertions on the number of outliers
-            self.assertLessEqual(high_outliers, default_outliers,
-                                 'Higher factor should reduce the number of outliers.')
-            self.assertGreaterEqual(low_outliers, default_outliers,
-                                    'Lower factor should increase the number of outliers.')
-
-            # Assertions on predictions being binary
-            for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]:
-                self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]),
-                                'Predictions should only contain 0 and 1.')