From 66ea716407a41c1afa8c41a64b1b29eada0ef8ea Mon Sep 17 00:00:00 2001 From: Ken Brewer Date: Sat, 30 Mar 2024 01:20:59 +0000 Subject: [PATCH] refactor: apply bugbear linting checks and fixes --- pycytominer/aggregate.py | 2 +- pycytominer/annotate.py | 6 +++-- pycytominer/consensus.py | 8 +++++-- .../cyto_utils/DeepProfiler_processing.py | 6 +++-- pycytominer/cyto_utils/cell_locations.py | 2 +- pycytominer/cyto_utils/cells.py | 10 ++++---- pycytominer/cyto_utils/collate.py | 6 ++--- pycytominer/cyto_utils/features.py | 2 +- pycytominer/cyto_utils/load.py | 4 ++-- pycytominer/cyto_utils/output.py | 7 +++++- pycytominer/cyto_utils/util.py | 24 ++++++++----------- pycytominer/cyto_utils/write_gct.py | 2 +- pyproject.toml | 2 +- tests/test_cyto_utils/test_cells.py | 7 ++++-- 14 files changed, 50 insertions(+), 38 deletions(-) diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py index 3ee945a8..ed86a12f 100644 --- a/pycytominer/aggregate.py +++ b/pycytominer/aggregate.py @@ -13,7 +13,7 @@ def aggregate( population_df, - strata=["Metadata_Plate", "Metadata_Well"], + strata=("Metadata_Plate", "Metadata_Well"), features="infer", operation="median", output_file=None, diff --git a/pycytominer/annotate.py b/pycytominer/annotate.py index 2f616dc1..1a097266 100644 --- a/pycytominer/annotate.py +++ b/pycytominer/annotate.py @@ -17,7 +17,7 @@ def annotate( profiles, platemap, - join_on=["Metadata_well_position", "Metadata_Well"], + join_on=("Metadata_well_position", "Metadata_Well"), output_file=None, output_type="csv", add_metadata_id_to_platemap=True, @@ -28,7 +28,7 @@ def annotate( external_join_right=None, compression_options=None, float_format=None, - cmap_args={}, + cmap_args=None, **kwargs, ): """Add metadata to aggregated profiles. @@ -76,6 +76,8 @@ def annotate( data. """ + cmap_args = cmap_args if cmap_args is not None else {} + # Load Data profiles = load_profiles(profiles) platemap = load_platemap(platemap, add_metadata_id_to_platemap) diff --git a/pycytominer/consensus.py b/pycytominer/consensus.py index b1326abe..8cb6df57 100644 --- a/pycytominer/consensus.py +++ b/pycytominer/consensus.py @@ -13,14 +13,14 @@ def consensus( profiles, - replicate_columns=["Metadata_Plate", "Metadata_Well"], + replicate_columns=("Metadata_Plate", "Metadata_Well"), operation="median", features="infer", output_file=None, output_type="csv", compression_options=None, float_format=None, - modz_args={"method": "spearman"}, + modz_args=None, ): """Form level 5 consensus profile data. @@ -95,6 +95,10 @@ def consensus( output_file=None, ) """ + # Set default modz_args + if modz_args is None: + modz_args = {"method": "spearman"} + # Confirm that the operation is supported check_consensus_operation(operation) diff --git a/pycytominer/cyto_utils/DeepProfiler_processing.py b/pycytominer/cyto_utils/DeepProfiler_processing.py index 5991063e..b4bfc5d2 100644 --- a/pycytominer/cyto_utils/DeepProfiler_processing.py +++ b/pycytominer/cyto_utils/DeepProfiler_processing.py @@ -248,7 +248,8 @@ def aggregate_deep(self): # if no files were found there is a miss-match between the index and the output files if not len(arr): warnings.warn( - f"No files for the key {metadata_level} could be found.\nThis program will continue, but be aware that this might induce errors!" + f"No files for the key {metadata_level} could be found.\nThis program will continue, but be aware that this might induce errors!", + stacklevel=1, ) continue df = pd.concat(arr) @@ -384,7 +385,8 @@ def get_single_cells( # skip a file if there are no features if len(features.index) == 0: warnings.warn( - f"No features could be found at {features_path}.\nThis program will continue, but be aware that this might induce errors!" + f"No features could be found at {features_path}.\nThis program will continue, but be aware that this might induce errors!", + stacklevel=1, ) continue locations = load_npz_locations( diff --git a/pycytominer/cyto_utils/cell_locations.py b/pycytominer/cyto_utils/cell_locations.py index 612ae873..ccf40ab6 100644 --- a/pycytominer/cyto_utils/cell_locations.py +++ b/pycytominer/cyto_utils/cell_locations.py @@ -86,7 +86,7 @@ def __init__( image_column: str = "ImageNumber", object_column: str = "ObjectNumber", table_column: str = "TableNumber", - image_key: list = ["Metadata_Plate", "Metadata_Well", "Metadata_Site"], + image_key: list = ("Metadata_Plate", "Metadata_Well", "Metadata_Site"), cell_x_loc: str = "Nuclei_Location_Center_X", cell_y_loc: str = "Nuclei_Location_Center_Y", ): diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py index b9cb351b..e57a3645 100644 --- a/pycytominer/cyto_utils/cells.py +++ b/pycytominer/cyto_utils/cells.py @@ -95,13 +95,13 @@ class SingleCells: def __init__( self, sql_file, - strata=["Metadata_Plate", "Metadata_Well"], + strata=("Metadata_Plate", "Metadata_Well"), aggregation_operation="median", output_file=None, compartments=default_compartments, compartment_linking_cols=default_linking_cols, - merge_cols=["TableNumber", "ImageNumber"], - image_cols=["TableNumber", "ImageNumber", "Metadata_Site"], + merge_cols=("TableNumber", "ImageNumber"), + image_cols=("TableNumber", "ImageNumber", "Metadata_Site"), add_image_features=False, image_feature_categories=None, features="infer", @@ -241,8 +241,8 @@ def set_subsample_n(self, subsample_n): try: self.subsample_n = int(subsample_n) - except ValueError: - raise ValueError("subsample n must be an integer or coercable") + except ValueError as e: + raise ValueError("subsample n must be an integer or coercable") from e self._check_subsampling() def set_subsample_random_state(self, random_state): diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py index f9e26e2d..469fafaa 100644 --- a/pycytominer/cyto_utils/collate.py +++ b/pycytominer/cyto_utils/collate.py @@ -31,7 +31,7 @@ def collate( tmp_dir="/tmp", overwrite=False, add_image_features=True, - image_feature_categories=["Granularity", "Texture", "ImageQuality", "Threshold"], + image_feature_categories=("Granularity", "Texture", "ImageQuality", "Threshold"), printtoscreen=True, ): """Collate the CellProfiler-created CSVs into a single SQLite file by calling cytominer-database @@ -74,12 +74,12 @@ def collate( try: import cytominer_database.ingest import cytominer_database.munge - except ImportError: + except ImportError as e: raise ImportError( """Optional dependency cytominer-database is not installed. Please install the `collate` optional dependency group: e.g. `pip install pycytominer[collate]` """ - ) + ) from e # Set up directories (these need to be abspaths to keep from confusing makedirs later) input_dir = pathlib.Path(f"{base_directory}/analysis/{batch}/{plate}/{csv_dir}") diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py index b88aba34..62e6e18a 100644 --- a/pycytominer/cyto_utils/features.py +++ b/pycytominer/cyto_utils/features.py @@ -75,7 +75,7 @@ def label_compartment(cp_features, compartment, metadata_cols): def infer_cp_features( population_df, - compartments=["Cells", "Nuclei", "Cytoplasm"], + compartments=("Cells", "Nuclei", "Cytoplasm"), metadata=False, image_features=False, ): diff --git a/pycytominer/cyto_utils/load.py b/pycytominer/cyto_utils/load.py index 7abdd06d..78ccee99 100644 --- a/pycytominer/cyto_utils/load.py +++ b/pycytominer/cyto_utils/load.py @@ -121,8 +121,8 @@ def load_platemap(platemap, add_metadata_id=True): try: delim = infer_delim(platemap) platemap = pd.read_csv(platemap, sep=delim) - except FileNotFoundError: - raise FileNotFoundError(f"{platemap} platemap file not found") + except FileNotFoundError as e: + raise FileNotFoundError(f"{platemap} platemap file not found") from e else: # Setting platemap to a copy to prevent column name changes from back-propagating platemap = platemap.copy() diff --git a/pycytominer/cyto_utils/output.py b/pycytominer/cyto_utils/output.py index 5018df86..d650fb53 100644 --- a/pycytominer/cyto_utils/output.py +++ b/pycytominer/cyto_utils/output.py @@ -15,7 +15,7 @@ def output( output_type: str = "csv", sep: str = ",", float_format: Optional[str] = None, - compression_options: Union[str, Dict] = {"method": "gzip", "mtime": 1}, + compression_options: Union[str, Dict, None] = None, **kwargs, ): """Given an output file and compression options, write file to disk @@ -79,6 +79,11 @@ def output( ) """ + # Set default compression options (done outside of function signature to avoid mutable default arguments) + compression_options = ( + compression_options if compression_options else {"method": "gzip", "mtime": 1} + ) + if output_type == "csv": compression_options = set_compression_method(compression=compression_options) diff --git a/pycytominer/cyto_utils/util.py b/pycytominer/cyto_utils/util.py index 948a17ef..a31aadf3 100644 --- a/pycytominer/cyto_utils/util.py +++ b/pycytominer/cyto_utils/util.py @@ -56,7 +56,7 @@ def check_compartments(compartments): warn_str = "Non-canonical compartment detected: {x}".format( x=", ".join(non_canonical_compartments) ) - warnings.warn(warn_str) + warnings.warn(warn_str, stacklevel=1) def load_known_metadata_dictionary(metadata_file=default_metadata_file): @@ -186,10 +186,10 @@ def check_fields_of_view_format(fields_of_view): else: try: return list(map(int, fields_of_view)) - except ValueError: + except ValueError as e: raise TypeError( "Variables of type int expected, however some of the input fields of view are not integers." - ) + ) from e else: raise TypeError( f"Variable of type list expected, however type {type(fields_of_view)} was passed." @@ -215,11 +215,9 @@ def check_fields_of_view(data_fields_of_view, input_fields_of_view): """ - try: - assert len( - list(np.intersect1d(data_fields_of_view, input_fields_of_view)) - ) == len(input_fields_of_view) - except AssertionError: + if not len(list(np.intersect1d(data_fields_of_view, input_fields_of_view))) == len( + input_fields_of_view + ): raise ValueError( "Some of the input fields of view are not present in the image table." ) @@ -248,12 +246,10 @@ def check_image_features(image_features, image_columns): else: level = 0 - try: - assert all( - feature in list({img_col.split("_")[level] for img_col in image_columns}) - for feature in image_features - ) - except AssertionError: + if not all( + feature in list({img_col.split("_")[level] for img_col in image_columns}) + for feature in image_features + ): raise ValueError( "Some of the input image features are not present in the image table." ) diff --git a/pycytominer/cyto_utils/write_gct.py b/pycytominer/cyto_utils/write_gct.py index a058da3d..fc9ea3aa 100644 --- a/pycytominer/cyto_utils/write_gct.py +++ b/pycytominer/cyto_utils/write_gct.py @@ -101,5 +101,5 @@ def write_gct( gctwriter = csv.writer(gctfile, delimiter="\t") gctwriter.writerow([version]) gctwriter.writerow(data_dimensions) - for feature, row in full_df.iterrows(): + for _, row in full_df.iterrows(): gctwriter.writerow(row) diff --git a/pyproject.toml b/pyproject.toml index be34d95a..94f0a440 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,7 +144,7 @@ select = [ # flake8-bandit # "S", # flake8-bugbear - # "B", + "B", # isort # "I", # mccabe diff --git a/tests/test_cyto_utils/test_cells.py b/tests/test_cyto_utils/test_cells.py index eb686023..36891fc5 100644 --- a/tests/test_cyto_utils/test_cells.py +++ b/tests/test_cyto_utils/test_cells.py @@ -21,9 +21,12 @@ def build_random_data( compartment="cells", - ImageNumber=sorted(["x", "y"] * 50), - TableNumber=sorted(["x_hash", "y_hash"] * 50), + ImageNumber=None, + TableNumber=None, ): + # Set mutable default values outside of function signature + ImageNumber = ImageNumber if ImageNumber else sorted(["x", "y"] * 50) + TableNumber = TableNumber if TableNumber else sorted(["x_hash", "y_hash"] * 50) a_feature = random.sample(range(1, 1000), 100) b_feature = random.sample(range(1, 1000), 100) c_feature = random.sample(range(1, 1000), 100)