From 66ea716407a41c1afa8c41a64b1b29eada0ef8ea Mon Sep 17 00:00:00 2001
From: Ken Brewer <kenibrewer@users.noreply.github.com>
Date: Sat, 30 Mar 2024 01:20:59 +0000
Subject: [PATCH] refactor: apply bugbear linting checks and fixes

---
 pycytominer/aggregate.py                      |  2 +-
 pycytominer/annotate.py                       |  6 +++--
 pycytominer/consensus.py                      |  8 +++++--
 .../cyto_utils/DeepProfiler_processing.py     |  6 +++--
 pycytominer/cyto_utils/cell_locations.py      |  2 +-
 pycytominer/cyto_utils/cells.py               | 10 ++++----
 pycytominer/cyto_utils/collate.py             |  6 ++---
 pycytominer/cyto_utils/features.py            |  2 +-
 pycytominer/cyto_utils/load.py                |  4 ++--
 pycytominer/cyto_utils/output.py              |  7 +++++-
 pycytominer/cyto_utils/util.py                | 24 ++++++++-----------
 pycytominer/cyto_utils/write_gct.py           |  2 +-
 pyproject.toml                                |  2 +-
 tests/test_cyto_utils/test_cells.py           |  7 ++++--
 14 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
index 3ee945a8..ed86a12f 100644
--- a/pycytominer/aggregate.py
+++ b/pycytominer/aggregate.py
@@ -13,7 +13,7 @@
 
 def aggregate(
     population_df,
-    strata=["Metadata_Plate", "Metadata_Well"],
+    strata=("Metadata_Plate", "Metadata_Well"),
     features="infer",
     operation="median",
     output_file=None,
diff --git a/pycytominer/annotate.py b/pycytominer/annotate.py
index 2f616dc1..1a097266 100644
--- a/pycytominer/annotate.py
+++ b/pycytominer/annotate.py
@@ -17,7 +17,7 @@
 def annotate(
     profiles,
     platemap,
-    join_on=["Metadata_well_position", "Metadata_Well"],
+    join_on=("Metadata_well_position", "Metadata_Well"),
     output_file=None,
     output_type="csv",
     add_metadata_id_to_platemap=True,
@@ -28,7 +28,7 @@ def annotate(
     external_join_right=None,
     compression_options=None,
     float_format=None,
-    cmap_args={},
+    cmap_args=None,
     **kwargs,
 ):
     """Add metadata to aggregated profiles.
@@ -76,6 +76,8 @@ def annotate(
         data.
     """
 
+    cmap_args = cmap_args if cmap_args is not None else {}
+
     # Load Data
     profiles = load_profiles(profiles)
     platemap = load_platemap(platemap, add_metadata_id_to_platemap)
diff --git a/pycytominer/consensus.py b/pycytominer/consensus.py
index b1326abe..8cb6df57 100644
--- a/pycytominer/consensus.py
+++ b/pycytominer/consensus.py
@@ -13,14 +13,14 @@
 
 def consensus(
     profiles,
-    replicate_columns=["Metadata_Plate", "Metadata_Well"],
+    replicate_columns=("Metadata_Plate", "Metadata_Well"),
     operation="median",
     features="infer",
     output_file=None,
     output_type="csv",
     compression_options=None,
     float_format=None,
-    modz_args={"method": "spearman"},
+    modz_args=None,
 ):
     """Form level 5 consensus profile data.
 
@@ -95,6 +95,10 @@ def consensus(
         output_file=None,
     )
     """
+    # Set default modz_args
+    if modz_args is None:
+        modz_args = {"method": "spearman"}
+
     # Confirm that the operation is supported
     check_consensus_operation(operation)
 
diff --git a/pycytominer/cyto_utils/DeepProfiler_processing.py b/pycytominer/cyto_utils/DeepProfiler_processing.py
index 5991063e..b4bfc5d2 100644
--- a/pycytominer/cyto_utils/DeepProfiler_processing.py
+++ b/pycytominer/cyto_utils/DeepProfiler_processing.py
@@ -248,7 +248,8 @@ def aggregate_deep(self):
             # if no files were found there is a miss-match between the index and the output files
             if not len(arr):
                 warnings.warn(
-                    f"No files for the key {metadata_level} could be found.\nThis program will continue, but be aware that this might induce errors!"
+                    f"No files for the key {metadata_level} could be found.\nThis program will continue, but be aware that this might induce errors!",
+                    stacklevel=1,
                 )
                 continue
             df = pd.concat(arr)
@@ -384,7 +385,8 @@ def get_single_cells(
             # skip a file if there are no features
             if len(features.index) == 0:
                 warnings.warn(
-                    f"No features could be found at {features_path}.\nThis program will continue, but be aware that this might induce errors!"
+                    f"No features could be found at {features_path}.\nThis program will continue, but be aware that this might induce errors!",
+                    stacklevel=1,
                 )
                 continue
             locations = load_npz_locations(
diff --git a/pycytominer/cyto_utils/cell_locations.py b/pycytominer/cyto_utils/cell_locations.py
index 612ae873..ccf40ab6 100644
--- a/pycytominer/cyto_utils/cell_locations.py
+++ b/pycytominer/cyto_utils/cell_locations.py
@@ -86,7 +86,7 @@ def __init__(
         image_column: str = "ImageNumber",
         object_column: str = "ObjectNumber",
         table_column: str = "TableNumber",
-        image_key: list = ["Metadata_Plate", "Metadata_Well", "Metadata_Site"],
+        image_key: list = ("Metadata_Plate", "Metadata_Well", "Metadata_Site"),
         cell_x_loc: str = "Nuclei_Location_Center_X",
         cell_y_loc: str = "Nuclei_Location_Center_Y",
     ):
diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
index b9cb351b..e57a3645 100644
--- a/pycytominer/cyto_utils/cells.py
+++ b/pycytominer/cyto_utils/cells.py
@@ -95,13 +95,13 @@ class SingleCells:
     def __init__(
         self,
         sql_file,
-        strata=["Metadata_Plate", "Metadata_Well"],
+        strata=("Metadata_Plate", "Metadata_Well"),
         aggregation_operation="median",
         output_file=None,
         compartments=default_compartments,
         compartment_linking_cols=default_linking_cols,
-        merge_cols=["TableNumber", "ImageNumber"],
-        image_cols=["TableNumber", "ImageNumber", "Metadata_Site"],
+        merge_cols=("TableNumber", "ImageNumber"),
+        image_cols=("TableNumber", "ImageNumber", "Metadata_Site"),
         add_image_features=False,
         image_feature_categories=None,
         features="infer",
@@ -241,8 +241,8 @@ def set_subsample_n(self, subsample_n):
 
         try:
             self.subsample_n = int(subsample_n)
-        except ValueError:
-            raise ValueError("subsample n must be an integer or coercable")
+        except ValueError as e:
+            raise ValueError("subsample n must be an integer or coercable") from e
         self._check_subsampling()
 
     def set_subsample_random_state(self, random_state):
diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py
index f9e26e2d..469fafaa 100644
--- a/pycytominer/cyto_utils/collate.py
+++ b/pycytominer/cyto_utils/collate.py
@@ -31,7 +31,7 @@ def collate(
     tmp_dir="/tmp",
     overwrite=False,
     add_image_features=True,
-    image_feature_categories=["Granularity", "Texture", "ImageQuality", "Threshold"],
+    image_feature_categories=("Granularity", "Texture", "ImageQuality", "Threshold"),
     printtoscreen=True,
 ):
     """Collate the CellProfiler-created CSVs into a single SQLite file by calling cytominer-database
@@ -74,12 +74,12 @@ def collate(
     try:
         import cytominer_database.ingest
         import cytominer_database.munge
-    except ImportError:
+    except ImportError as e:
         raise ImportError(
             """Optional dependency cytominer-database is not installed.
             Please install the `collate` optional dependency group: e.g. `pip install pycytominer[collate]`
             """
-        )
+        ) from e
 
     # Set up directories (these need to be abspaths to keep from confusing makedirs later)
     input_dir = pathlib.Path(f"{base_directory}/analysis/{batch}/{plate}/{csv_dir}")
diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py
index b88aba34..62e6e18a 100644
--- a/pycytominer/cyto_utils/features.py
+++ b/pycytominer/cyto_utils/features.py
@@ -75,7 +75,7 @@ def label_compartment(cp_features, compartment, metadata_cols):
 
 def infer_cp_features(
     population_df,
-    compartments=["Cells", "Nuclei", "Cytoplasm"],
+    compartments=("Cells", "Nuclei", "Cytoplasm"),
     metadata=False,
     image_features=False,
 ):
diff --git a/pycytominer/cyto_utils/load.py b/pycytominer/cyto_utils/load.py
index 7abdd06d..78ccee99 100644
--- a/pycytominer/cyto_utils/load.py
+++ b/pycytominer/cyto_utils/load.py
@@ -121,8 +121,8 @@ def load_platemap(platemap, add_metadata_id=True):
         try:
             delim = infer_delim(platemap)
             platemap = pd.read_csv(platemap, sep=delim)
-        except FileNotFoundError:
-            raise FileNotFoundError(f"{platemap} platemap file not found")
+        except FileNotFoundError as e:
+            raise FileNotFoundError(f"{platemap} platemap file not found") from e
     else:
         # Setting platemap to a copy to prevent column name changes from back-propagating
         platemap = platemap.copy()
diff --git a/pycytominer/cyto_utils/output.py b/pycytominer/cyto_utils/output.py
index 5018df86..d650fb53 100644
--- a/pycytominer/cyto_utils/output.py
+++ b/pycytominer/cyto_utils/output.py
@@ -15,7 +15,7 @@ def output(
     output_type: str = "csv",
     sep: str = ",",
     float_format: Optional[str] = None,
-    compression_options: Union[str, Dict] = {"method": "gzip", "mtime": 1},
+    compression_options: Union[str, Dict, None] = None,
     **kwargs,
 ):
     """Given an output file and compression options, write file to disk
@@ -79,6 +79,11 @@ def output(
     )
     """
 
+    # Set default compression options (done outside of function signature to avoid mutable default arguments)
+    compression_options = (
+        compression_options if compression_options else {"method": "gzip", "mtime": 1}
+    )
+
     if output_type == "csv":
         compression_options = set_compression_method(compression=compression_options)
 
diff --git a/pycytominer/cyto_utils/util.py b/pycytominer/cyto_utils/util.py
index 948a17ef..a31aadf3 100644
--- a/pycytominer/cyto_utils/util.py
+++ b/pycytominer/cyto_utils/util.py
@@ -56,7 +56,7 @@ def check_compartments(compartments):
         warn_str = "Non-canonical compartment detected: {x}".format(
             x=", ".join(non_canonical_compartments)
         )
-        warnings.warn(warn_str)
+        warnings.warn(warn_str, stacklevel=1)
 
 
 def load_known_metadata_dictionary(metadata_file=default_metadata_file):
@@ -186,10 +186,10 @@ def check_fields_of_view_format(fields_of_view):
             else:
                 try:
                     return list(map(int, fields_of_view))
-                except ValueError:
+                except ValueError as e:
                     raise TypeError(
                         "Variables of type int expected, however some of the input fields of view are not integers."
-                    )
+                    ) from e
         else:
             raise TypeError(
                 f"Variable of type list expected, however type {type(fields_of_view)} was passed."
@@ -215,11 +215,9 @@ def check_fields_of_view(data_fields_of_view, input_fields_of_view):
 
     """
 
-    try:
-        assert len(
-            list(np.intersect1d(data_fields_of_view, input_fields_of_view))
-        ) == len(input_fields_of_view)
-    except AssertionError:
+    if not len(list(np.intersect1d(data_fields_of_view, input_fields_of_view))) == len(
+        input_fields_of_view
+    ):
         raise ValueError(
             "Some of the input fields of view are not present in the image table."
         )
@@ -248,12 +246,10 @@ def check_image_features(image_features, image_columns):
     else:
         level = 0
 
-    try:
-        assert all(
-            feature in list({img_col.split("_")[level] for img_col in image_columns})
-            for feature in image_features
-        )
-    except AssertionError:
+    if not all(
+        feature in list({img_col.split("_")[level] for img_col in image_columns})
+        for feature in image_features
+    ):
         raise ValueError(
             "Some of the input image features are not present in the image table."
         )
diff --git a/pycytominer/cyto_utils/write_gct.py b/pycytominer/cyto_utils/write_gct.py
index a058da3d..fc9ea3aa 100644
--- a/pycytominer/cyto_utils/write_gct.py
+++ b/pycytominer/cyto_utils/write_gct.py
@@ -101,5 +101,5 @@ def write_gct(
         gctwriter = csv.writer(gctfile, delimiter="\t")
         gctwriter.writerow([version])
         gctwriter.writerow(data_dimensions)
-        for feature, row in full_df.iterrows():
+        for _, row in full_df.iterrows():
             gctwriter.writerow(row)
diff --git a/pyproject.toml b/pyproject.toml
index be34d95a..94f0a440 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -144,7 +144,7 @@ select = [
     # flake8-bandit
     # "S",
     # flake8-bugbear
-    # "B",
+    "B",
     # isort
     # "I",
     # mccabe
diff --git a/tests/test_cyto_utils/test_cells.py b/tests/test_cyto_utils/test_cells.py
index eb686023..36891fc5 100644
--- a/tests/test_cyto_utils/test_cells.py
+++ b/tests/test_cyto_utils/test_cells.py
@@ -21,9 +21,12 @@
 
 def build_random_data(
     compartment="cells",
-    ImageNumber=sorted(["x", "y"] * 50),
-    TableNumber=sorted(["x_hash", "y_hash"] * 50),
+    ImageNumber=None,
+    TableNumber=None,
 ):
+    # Set mutable default values outside of function signature
+    ImageNumber = ImageNumber if ImageNumber else sorted(["x", "y"] * 50)
+    TableNumber = TableNumber if TableNumber else sorted(["x_hash", "y_hash"] * 50)
     a_feature = random.sample(range(1, 1000), 100)
     b_feature = random.sample(range(1, 1000), 100)
     c_feature = random.sample(range(1, 1000), 100)