Reduce package dependencies (#57)

jkanche · web-flow · commit 1f3758c95ca3 · 2025-03-13T16:37:45.000-07:00
- All dependencies are now listed under optional, except for numpy and biocutils.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## Version 0.7.0
+
+- All dependencies are now listed under optional, except for numpy and biocutils.
+
 ## Version 0.6.1
 
 - Fix name of the attribute that contains names of dimensions in matrices.
diff --git a/README.md b/README.md
@@ -4,10 +4,12 @@
 
 # rds2py
 
-Parse and construct Python representations for datasets stored in RDS files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. ***For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp).***
+Parse and construct Python representations for datasets stored in RDS files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. **_For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp)._**
 
 ---
+
 **Version 0.5.0** brings major changes to the package,
+
 - Complete overhaul of the codebase using pybind11
 - Streamlined readers for R data types
 - Updated API for all classes and methods
@@ -18,7 +20,7 @@ Please refer to the [documentation](https://biocpy.github.io/rds2py/) for the la
 
 The package provides:
 
-- Efficient parsing of RDS files with *minimal* memory overhead
+- Efficient parsing of RDS files with _minimal_ memory overhead
 - Support for R's basic data types and complex S4 objects
   - Vectors (numeric, character, logical)
   - Factors
@@ -48,51 +50,61 @@ pip install rds2py
 pip install rds2py[optional]
 ```
 
+By default, the package does not install packages to convert python representations to BiocPy classes. Please consider installing all optional dependencies.
+
 ## Usage
 
 If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases).
 
-### Basic Usage
-
 ```python
 from rds2py import read_rds
 r_obj = read_rds("path/to/file.rds")
 ```
 
 The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file.
 
-## Write-your-own-reader
+In addition, the package provides the dictionary representation of the RDS file.
+
+```python
+from rds2py import parse_rds
+
+robject_dict = parse_rds("path/to/file.rds")
+print(robject_dict)
+```
+
+### Write-your-own-reader
 
-In addition, the package provides the dictionary representation of the RDS file, allowing users to write their own custom readers into appropriate Python representations.
+Reading RDS files as dictionary representations allows users to write their own custom readers into appropriate Python representations.
 
 ```python
 from rds2py import parse_rds
 
-data = parse_rds("path/to/file.rds")
-print(data)
+robject = parse_rds("path/to/file.rds")
+print(robject)
 ```
 
 if you know this RDS file contains an `GenomicRanges` object, you can use the built-in reader or write your own reader to convert this dictionary.
 
 ```python
 from rds2py.read_granges import read_genomic_ranges
 
-gr = read_genomic_ranges(data)
+gr = read_genomic_ranges(robject)
+print(gr)
 ```
 
 ## Type Conversion Reference
 
-| R Type | Python/NumPy Type |
-|--------|------------------|
-| numeric | numpy.ndarray (float64) |
-| integer | numpy.ndarray (int32) |
-| character | list of str |
-| logical | numpy.ndarray (bool) |
-| factor | list |
-| data.frame | BiocFrame |
-| matrix | numpy.ndarray or scipy.sparse matrix |
-| dgCMatrix | scipy.sparse.csc_matrix |
-| dgRMatrix | scipy.sparse.csr_matrix |
+| R Type     | Python/NumPy Type                    |
+| ---------- | ------------------------------------ |
+| numeric    | numpy.ndarray (float64)              |
+| integer    | numpy.ndarray (int32)                |
+| character  | list of str                          |
+| logical    | numpy.ndarray (bool)                 |
+| factor     | list                                 |
+| data.frame | BiocFrame                            |
+| matrix     | numpy.ndarray or scipy.sparse matrix |
+| dgCMatrix  | scipy.sparse.csc_matrix              |
+| dgRMatrix  | scipy.sparse.csr_matrix              |
 
 ## Developer Notes
 
diff --git a/setup.cfg b/setup.cfg
@@ -50,13 +50,7 @@ python_requires = >=3.9
 install_requires =
     importlib-metadata; python_version<"3.8"
     numpy
-    scipy
-    biocframe
     biocutils>=0.1.5
-    genomicranges>=0.4.9
-    summarizedexperiment>=0.4.1
-    singlecellexperiment>=0.4.1
-    multiassayexperiment
 
 [options.packages.find]
 where = src
@@ -70,14 +64,19 @@ exclude =
 optional =
     pandas
     hdf5array
+    scipy
+    biocframe
+    genomicranges>=0.4.9
+    summarizedexperiment>=0.4.1
+    singlecellexperiment>=0.4.1
+    multiassayexperiment
 
 # Add here test requirements (semicolon/line-separated)
 testing =
     setuptools
     pytest
     pytest-cov
-    pandas
-    hdf5array
+    %(optional)s
 
 [options.entry_points]
 # Add here console scripts like:
diff --git a/src/rds2py/read_delayed_matrix.py b/src/rds2py/read_delayed_matrix.py
@@ -1,7 +1,5 @@
 """Functions and classes for parsing R delayed matrix objects from HDF5Array."""
 
-from hdf5array import Hdf5CompressedSparseMatrix
-
 from .generics import _dispatcher
 from .rdsutils import get_class
 
@@ -10,7 +8,7 @@
 __license__ = "MIT"
 
 
-def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix:
+def read_hdf5_sparse(robject: dict, **kwargs):
     """Convert an R delayed sparse array (H5-backed).
 
     Args:
@@ -38,4 +36,6 @@ def read_hdf5_sparse(robject: dict, **kwargs) -> Hdf5CompressedSparseMatrix:
     fpath = list(_dispatcher(_seed_obj["attributes"]["filepath"], **kwargs))[0]
     group_name = list(_dispatcher(_seed_obj["attributes"]["group"], **kwargs))[0]
 
+    from hdf5array import Hdf5CompressedSparseMatrix
+
     return Hdf5CompressedSparseMatrix(path=fpath, group_name=group_name, shape=shape, by_column=by_column)
diff --git a/src/rds2py/read_granges.py b/src/rds2py/read_granges.py
@@ -4,9 +4,6 @@
 equivalents, preserving all genomic coordinates and associated metadata.
 """
 
-from genomicranges import GenomicRanges, GenomicRangesList, SeqInfo
-from iranges import IRanges
-
 from .generics import _dispatcher
 from .rdsutils import get_class
 
@@ -15,7 +12,7 @@
 __license__ = "MIT"
 
 
-def read_genomic_ranges(robject: dict, **kwargs) -> GenomicRanges:
+def read_genomic_ranges(robject: dict, **kwargs):
     """Convert an R `GenomicRanges` object to a Python :py:class:`~genomicranges.GenomicRanges` object.
 
     Args:
@@ -29,6 +26,10 @@ def read_genomic_ranges(robject: dict, **kwargs) -> GenomicRanges:
         A Python `GenomicRanges` object containing genomic intervals
         with associated annotations.
     """
+
+    from genomicranges import GenomicRanges, SeqInfo
+    from iranges import IRanges
+
     _cls = get_class(robject)
 
     if _cls not in ["GenomicRanges", "GRanges"]:
@@ -74,7 +75,7 @@ def read_genomic_ranges(robject: dict, **kwargs) -> GenomicRanges:
     )
 
 
-def read_granges_list(robject: dict, **kwargs) -> GenomicRangesList:
+def read_granges_list(robject: dict, **kwargs):
     """Convert an R `GenomicRangesList` object to a Python :py:class:`~genomicranges.GenomicRangesList`.
 
     Args:
@@ -89,6 +90,8 @@ def read_granges_list(robject: dict, **kwargs) -> GenomicRangesList:
         `GenomicRanges` objects.
     """
 
+    from genomicranges import GenomicRangesList
+
     _cls = get_class(robject)
 
     if _cls not in ["CompressedGRangesList", "GRangesList"]:
diff --git a/src/rds2py/read_mae.py b/src/rds2py/read_mae.py
@@ -4,8 +4,6 @@
 preserving the complex relationships between multiple experimental assays and sample metadata.
 """
 
-from multiassayexperiment import MultiAssayExperiment
-
 from .generics import _dispatcher
 from .rdsutils import get_class
 from .read_matrix import MatrixWrapper
@@ -43,7 +41,7 @@ def _sanitize_expts(expts, **kwargs):
     return res
 
 
-def read_multi_assay_experiment(robject: dict, **kwargs) -> MultiAssayExperiment:
+def read_multi_assay_experiment(robject: dict, **kwargs):
     """Convert an R `MultiAssayExperiment` to a Python :py:class:`~multiassayexperiment.MultiAssayExperiment` object.
 
     Args:
@@ -73,6 +71,8 @@ def read_multi_assay_experiment(robject: dict, **kwargs) -> MultiAssayExperiment
     # parse coldata
     robj_coldata = _dispatcher(robject["attributes"]["colData"], **kwargs)
 
+    from multiassayexperiment import MultiAssayExperiment
+
     return MultiAssayExperiment(
         experiments=_sanitize_expts(robj_expts),
         sample_map=robj_samplemap,
diff --git a/src/rds2py/read_matrix.py b/src/rds2py/read_matrix.py
@@ -8,7 +8,6 @@
 from typing import Literal
 
 from numpy import ndarray
-from scipy.sparse import csc_matrix, csr_matrix, spmatrix
 
 from .generics import _dispatcher
 from .rdsutils import get_class
@@ -37,7 +36,7 @@ def __init__(self, matrix, dimnames=None) -> None:
         self.dimnames = dimnames
 
 
-def _as_sparse_matrix(robject: dict, **kwargs) -> spmatrix:
+def _as_sparse_matrix(robject: dict, **kwargs):
     """Convert an R sparse matrix to a SciPy sparse matrix.
 
     Notes:
@@ -57,6 +56,8 @@ def _as_sparse_matrix(robject: dict, **kwargs) -> spmatrix:
         A SciPy sparse matrix or wrapped matrix if dimension names exist.
     """
 
+    from scipy.sparse import csc_matrix, csr_matrix
+
     _cls = get_class(robject)
 
     if _cls not in ["dgCMatrix", "dgRMatrix", "dgTMatrix"]:
@@ -145,7 +146,7 @@ def _as_dense_matrix(robject, order: Literal["C", "F"] = "F", **kwargs) -> ndarr
     return mat
 
 
-def read_dgcmatrix(robject: dict, **kwargs) -> spmatrix:
+def read_dgcmatrix(robject: dict, **kwargs):
     """Parse an R dgCMatrix (sparse column matrix).
 
     Args:
@@ -161,7 +162,7 @@ def read_dgcmatrix(robject: dict, **kwargs) -> spmatrix:
     return _as_sparse_matrix(robject, **kwargs)
 
 
-def read_dgrmatrix(robject: dict, **kwargs) -> spmatrix:
+def read_dgrmatrix(robject: dict, **kwargs):
     """Parse an R dgRMatrix (sparse row matrix).
 
     Args:
@@ -177,7 +178,7 @@ def read_dgrmatrix(robject: dict, **kwargs) -> spmatrix:
     return _as_sparse_matrix(robject, **kwargs)
 
 
-def read_dgtmatrix(robject: dict, **kwargs) -> spmatrix:
+def read_dgtmatrix(robject: dict, **kwargs):
     """Parse an R dgTMatrix (sparse triplet matrix)..
 
     Args:
diff --git a/src/rds2py/read_sce.py b/src/rds2py/read_sce.py
@@ -5,8 +5,6 @@
 data including multiple assays, reduced dimensions, and alternative experiments.
 """
 
-from singlecellexperiment import SingleCellExperiment
-
 from .generics import _dispatcher
 from .rdsutils import get_class
 
@@ -30,7 +28,7 @@ def read_alts_summarized_experiment_by_column(robject: dict, **kwargs):
     return objs
 
 
-def read_single_cell_experiment(robject: dict, **kwargs) -> SingleCellExperiment:
+def read_single_cell_experiment(robject: dict, **kwargs):
     """Convert an R SingleCellExperiment to Python SingleCellExperiment.
 
     Args:
@@ -76,6 +74,8 @@ def read_single_cell_experiment(robject: dict, **kwargs) -> SingleCellExperiment
         # ignore colpairs for now, does anyone even use this ?
         # if col == "colPairs":
 
+    from singlecellexperiment import SingleCellExperiment
+
     return SingleCellExperiment(
         assays=_rse.assays,
         row_data=_rse.row_data,
diff --git a/src/rds2py/read_se.py b/src/rds2py/read_se.py
@@ -1,4 +1,8 @@
-from summarizedexperiment import RangedSummarizedExperiment, SummarizedExperiment
+"""Functions for parsing Bioconductor `SummarizedExperiment` objects.
+
+This module provides parsers for converting Bioconductor's `SummarizedExperiment`
+objects into their Python equivalents.
+"""
 
 from .generics import _dispatcher
 from .rdsutils import get_class
@@ -27,7 +31,7 @@ def _sanitize_assays(assays):
     return res
 
 
-def read_summarized_experiment(robject: dict, **kwargs) -> SummarizedExperiment:
+def read_summarized_experiment(robject: dict, **kwargs):
     """Convert an R SummarizedExperiment to Python
     :py:class:`~summarizedexperiment.SummarizedExperiment.SummarizedExperiment`.
 
@@ -68,14 +72,16 @@ def read_summarized_experiment(robject: dict, **kwargs) -> SummarizedExperiment:
     # parse rowdata
     robj_rowdata = _sanitize_empty_frame(_dispatcher(robject["attributes"]["elementMetadata"], **kwargs), assay_dims[0])
 
+    from summarizedexperiment import SummarizedExperiment
+
     return SummarizedExperiment(
         assays=_sanitize_assays(robj_asys),
         row_data=robj_rowdata,
         column_data=robj_coldata,
     )
 
 
-def read_ranged_summarized_experiment(robject: dict, **kwargs) -> RangedSummarizedExperiment:
+def read_ranged_summarized_experiment(robject: dict, **kwargs):
     """Convert an R RangedSummarizedExperiment to its Python equivalent.
 
     Args:
@@ -102,6 +108,8 @@ def read_ranged_summarized_experiment(robject: dict, **kwargs) -> RangedSummariz
     if "rowRanges" in robject["attributes"]:
         row_ranges_data = _dispatcher(robject["attributes"]["rowRanges"], **kwargs)
 
+    from summarizedexperiment import RangedSummarizedExperiment
+
     return RangedSummarizedExperiment(
         assays=_se.assays,
         row_data=_se.row_data,