Skip to content

Commit

Permalink
Aligned examples and tests
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel Kulik <74867610+KulikDM@users.noreply.github.com>
  • Loading branch information
KulikDM and KulikDM committed Nov 21, 2024
1 parent af4c5b6 commit 0389df2
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 76 deletions.
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ repos:
name: Format docstrings

- repo: https://github.com/asottile/pyupgrade
rev: v3.18.0
rev: v3.19.0
hooks:
- id: pyupgrade
args: [--py38-plus]
Expand All @@ -42,8 +42,9 @@ repos:
name: Sort imports

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.7.0
rev: v0.7.2
hooks:
- id: ruff
args: [--exit-non-zero-on-fix, --fix, --line-length=180]
exclude: "\\.ipynb$"
name: Lint code
2 changes: 1 addition & 1 deletion examples/mad_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
clf_name = 'KNN'
clf = KNN()
clf.fit(X_train)
thres = MAD(factor=1)
thres = MAD()

# get the prediction labels and outlier scores of the training data
y_train_scores = clf.decision_scores_ # raw outlier scores
Expand Down
2 changes: 1 addition & 1 deletion examples/zscore_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
clf_name = 'KNN'
clf = KNN()
clf.fit(X_train)
thres = ZSCORE(factor=1)
thres = ZSCORE()

# get the prediction labels and outlier scores of the training data
y_train_scores = clf.decision_scores_ # raw outlier scores
Expand Down
48 changes: 7 additions & 41 deletions pythresh/test/test_mad.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import unittest
from itertools import product
from os.path import dirname as up

# noinspection PyProtectedMember
Expand Down Expand Up @@ -41,11 +42,15 @@ def setUp(self):

self.all_scores = [scores, multiple_scores]

self.thres = MAD()
self.factors = [0.5, 1, 2]

def test_prediction_labels(self):

for scores in self.all_scores:
params = product(self.all_scores, self.factors)

for scores, factor in params:

self.thres = MAD(factor=factor)

pred_labels = self.thres.eval(scores)
assert (self.thres.thresh_ is not None)
Expand All @@ -58,42 +63,3 @@ def test_prediction_labels(self):

assert (pred_labels.min() == 0)
assert (pred_labels.max() == 1)

def test_factor_adjustment(self):
"""Test the effect of the factor on MAD thresholding."""
for scores in self.all_scores:
# Test with default factor (1)
thres_default = MAD(factor=1)
pred_labels_default = thres_default.eval(scores)
default_thresh = thres_default.thresh_

# Test with a higher factor
thres_high = MAD(factor=2)
pred_labels_high = thres_high.eval(scores)
high_thresh = thres_high.thresh_

# Test with a lower factor
thres_low = MAD(factor=0.5)
pred_labels_low = thres_low.eval(scores)
low_thresh = thres_low.thresh_

# Assertions on thresholds
self.assertLessEqual(default_thresh, high_thresh,
'Higher factor should increase the threshold.')
self.assertGreaterEqual(default_thresh, low_thresh,
'Lower factor should decrease the threshold.')

# Assertions on prediction labels
for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]:
self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]),
'Predictions should only contain 0 and 1.')

# Verify that the number of outliers changes with the factor
default_outliers = np.sum(pred_labels_default)
high_outliers = np.sum(pred_labels_high)
low_outliers = np.sum(pred_labels_low)

self.assertLessEqual(high_outliers, default_outliers,
'Higher factor should reduce or maintain the number of outliers.')
self.assertGreaterEqual(low_outliers, default_outliers,
'Lower factor should increase or maintain the number of outliers.')
38 changes: 7 additions & 31 deletions pythresh/test/test_zscore.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import unittest
from itertools import product
from os.path import dirname as up

# noinspection PyProtectedMember
Expand Down Expand Up @@ -41,11 +42,15 @@ def setUp(self):

self.all_scores = [scores, multiple_scores]

self.thres = ZSCORE()
self.factors = [0.5, 1, 2]

def test_prediction_labels(self):

for scores in self.all_scores:
params = product(self.all_scores, self.factors)

for scores, factor in params:

self.thres = ZSCORE(factor=factor)

pred_labels = self.thres.eval(scores)
assert (self.thres.thresh_ is not None)
Expand All @@ -58,32 +63,3 @@ def test_prediction_labels(self):

assert (pred_labels.min() == 0)
assert (pred_labels.max() == 1)

def test_factor_adjustment(self):
"""Test the effect of the factor on ZSCORE thresholding."""
for scores in self.all_scores:
# Test with default factor (1)
thres_default = ZSCORE(factor=1)
pred_labels_default = thres_default.eval(scores)
default_outliers = np.sum(pred_labels_default)

# Test with a higher factor
thres_high = ZSCORE(factor=2)
pred_labels_high = thres_high.eval(scores)
high_outliers = np.sum(pred_labels_high)

# Test with a lower factor
thres_low = ZSCORE(factor=0.5)
pred_labels_low = thres_low.eval(scores)
low_outliers = np.sum(pred_labels_low)

# Assertions on the number of outliers
self.assertLessEqual(high_outliers, default_outliers,
'Higher factor should reduce the number of outliers.')
self.assertGreaterEqual(low_outliers, default_outliers,
'Lower factor should increase the number of outliers.')

# Assertions on predictions being binary
for pred_labels in [pred_labels_default, pred_labels_high, pred_labels_low]:
self.assertTrue(np.array_equal(np.unique(pred_labels), [0, 1]),
'Predictions should only contain 0 and 1.')

0 comments on commit 0389df2

Please sign in to comment.