Bug fixes resolVI. (#3308)

canergen · pre-commit-ci[bot] · web-flow · commit b3648e082800 · 2025-05-08T15:01:24.000+02:00
Several bug fixes for reported issues. #3283 #3208 #3289 #3267 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,10 +23,13 @@ to [Semantic Versioning]. Full commit history is available in the
 - Add consideration for missing monitor set during early stopping. {pr}`3226`.
 - Fix bug in SysVI get_normalized_expression function. {pr}`3255`.
 - Add support for IntegratedGradients for multimodal models. {pr}`3264`.
+- Fix bug in resolVI get_normalized expression function. {pr}`3308`.
+- Fix bug in resolVI gene-assay dispersion. {pr}`3308`.
 
 #### Changed
 
 - Updated Scvi-Tools AWS hub to Weizmann instead of Berkeley. {pr}`3246`.
+- Updated resolVI to use rapids-singlecell. {pr}`3308`.
 
 #### Removed
 
diff --git a/src/scvi/external/resolvi/_model.py b/src/scvi/external/resolvi/_model.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import importlib.util
 import logging
 from functools import partial
 from typing import TYPE_CHECKING
@@ -343,7 +342,11 @@ def setup_anndata(
         cls.register_manager(adata_manager)
 
     @staticmethod
-    def _prepare_data(adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None, **kwargs):
+    def _prepare_data(
+        adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None, slice_key=None, **kwargs
+    ):
+        if slice_key is not None:
+            batch_key = slice_key
         try:
             import scanpy
             from sklearn.neighbors._base import _kneighbors_from_graph
@@ -365,13 +368,15 @@ def _prepare_data(adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None
 
         for index in indices:
             sub_data = adata[index].copy()
-            if importlib.util.find_spec("cuml") is not None:
-                method = "rapids"
-            else:
-                method = "umap"
-            scanpy.pp.neighbors(
-                sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep, method=method
-            )
+            try:
+                import rapids_singlecell
+
+                print("RAPIDS SingleCell is installed and can be imported")
+                rapids_singlecell.pp.neighbors(
+                    sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep
+                )
+            except ImportError:
+                scanpy.pp.neighbors(sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep)
             distances = sub_data.obsp["distances"] ** 2
 
             distance_neighbor[index, :], index_neighbor_batch = _kneighbors_from_graph(
diff --git a/src/scvi/external/resolvi/_module.py b/src/scvi/external/resolvi/_module.py
@@ -163,8 +163,7 @@ def __init__(
             init_px_r = torch.full([n_input, n_batch], 0.01)
         else:
             raise ValueError(
-                "dispersion must be one of ['gene', 'gene-batch', 'gene-label'], but input was "
-                "{}.format(self.dispersion)"
+                f"dispersion must be one of ['gene', 'gene-batch'], but input was {dispersion}."
             )
         self.register_buffer("px_r", init_px_r)
 
@@ -751,8 +750,7 @@ def __init__(
             init_px_r = torch.full([n_input, n_batch], 0.01)
         else:
             raise ValueError(
-                "dispersion must be one of ['gene', 'gene-batch', 'gene-label'], but input was "
-                "{}.format(dispersion)"
+                f"dispersion must be one of ['gene', 'gene-batch'], but input was {dispersion}."
             )
         self.register_buffer("px_r", init_px_r)
         self.register_buffer("per_neighbor_diffusion_init", torch.zeros([n_obs, n_neighbors]))
@@ -868,7 +866,10 @@ def forward(  # not used arguments to have same set of arguments in model and gu
 
             if self.dispersion == "gene-batch":
                 px_r_inv = F.linear(
-                    torch.nn.functional.one_hot(batch_index.flatten(), self.n_batch), px_r_mle
+                    torch.nn.functional.one_hot(batch_index.flatten(), self.n_batch).to(
+                        px_r_mle.dtype
+                    ),
+                    px_r_mle,
                 )
             elif self.dispersion == "gene":
                 px_r_inv = px_r_mle
diff --git a/src/scvi/external/resolvi/_utils.py b/src/scvi/external/resolvi/_utils.py
@@ -229,7 +229,7 @@ def get_normalized_expression(
         library_size
             Scale the expression frequencies to a common library size.
             This allows gene expression levels to be interpreted on a common scale of relevant
-            magnitude. If set to `"latent"`, use the latent library size.
+            magnitude.
         n_samples
             Number of posterior samples to use for estimation.
         n_samples_overall
@@ -301,32 +301,28 @@ def get_normalized_expression(
                     kwargs["batch_index"],
                     *categorical_input,
                 )
-                z = torch.distributions.Normal(qz_m, qz_v.sqrt()).sample(
-                    [
-                        n_samples,
-                    ]
-                )
+                z = torch.distributions.Normal(qz_m, qz_v.sqrt()).sample([n_samples])
 
                 if kwargs["cat_covs"] is not None:
                     categorical_input = list(torch.split(kwargs["cat_covs"], 1, dim=1))
                 else:
                     categorical_input = ()
                 if batch is not None:
-                    batch = torch.full_like(kwargs["batch"], batch)
+                    batch = torch.full_like(kwargs["batch_index"], batch)
                 else:
                     batch = kwargs["batch_index"]
 
                 px_scale, _, px_rate, _ = self.module.model.decoder(
                     self.module.model.dispersion, z, kwargs["library"], batch, *categorical_input
                 )
                 if library_size is not None:
-                    exp_ = library_size * px_scale.reshape(-1, px_scale.shape[-1])
+                    exp_ = library_size * px_scale
                 else:
-                    exp_ = px_rate.reshape(-1, px_scale.shape[-1])
+                    exp_ = px_rate
 
                 exp_ = exp_[..., gene_mask]
                 per_batch_exprs.append(exp_[None].cpu())
-            per_batch_exprs = torch.cat(per_batch_exprs, dim=0).numpy()
+            per_batch_exprs = torch.cat(per_batch_exprs, dim=0).mean(0).numpy()
             exprs.append(per_batch_exprs)
 
         exprs = np.concatenate(exprs, axis=1)
diff --git a/tests/external/resolvi/test_resolvi.py b/tests/external/resolvi/test_resolvi.py
@@ -23,6 +23,10 @@ def test_resolvi_train(adata):
     model.train(
         max_epochs=2,
     )
+    model = RESOLVI(adata, dispersion="gene-batch")
+    model.train(
+        max_epochs=2,
+    )
 
 
 def test_resolvi_save_load(adata):
@@ -52,8 +56,21 @@ def test_resolvi_downstream(adata):
     )
     latent = model.get_latent_representation()
     assert latent.shape == (adata.n_obs, model.module.n_latent)
+    counts = model.get_normalized_expression(n_samples=31, library_size=10000)
+    counts = model.get_normalized_expression_importance(n_samples=30, library_size=10000)
+    print("FFFFFF", counts.shape)
     model.differential_expression(groupby="labels")
     model.differential_expression(groupby="labels", weights="importance")
+    model.sample_posterior(
+        model=model.module.model_residuals,
+        num_samples=30,
+        return_samples=False,
+        return_sites=None,
+        batch_size=1000,
+    )
+    model.sample_posterior(
+        model=model.module.model_residuals, num_samples=30, return_samples=False, batch_size=1000
+    )
     model_query = model.load_query_data(reference_model=model, adata=adata)
     model_query = model.load_query_data(reference_model="test_resolvi", adata=adata)
     model_query.train(