Skip to content

Commit 775b90d

Browse files
eroellZethson
andauthored
Clean up Round - cut down anndata extension functionality (#880)
* remove delete_from_obs * move get to proper get module, minimal test * remove generate_anndata, never used * delete unused private _detect_binary_columns * move anndata ext functions mostly relevant for testing to test * remove set_numeric_vars, reduce use get_column_indices * remove _assert_encoded completely idk why it would be needed even * remove not_encoded_error * remove get_numeric_vars * updates * fixes * underscore _assert_numeric_vars * update submodule * remove get_... * remove doc of delete_from_obs * fix typo * fix typo * to_dense and add get to docs Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> * polish Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> * submodule Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> * fix docs Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> * fix docs Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> * fix docs Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> --------- Signed-off-by: Lukas Heumos <lukas.heumos@posteo.net> Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>
1 parent 294aa6e commit 775b90d

File tree

14 files changed

+231
-614
lines changed

14 files changed

+231
-614
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,4 @@ notebooks/*-requirements.txt
158158

159159
*.DS_Store
160160
test.ipynb
161+
test2.ipynb

docs/api/anndata_index.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,15 @@
1616
anndata.df_to_anndata
1717
anndata.anndata_to_df
1818
anndata.move_to_obs
19-
anndata.delete_from_obs
2019
anndata.move_to_x
21-
anndata.get_obs_df
22-
anndata.get_var_df
23-
anndata.get_rank_features_df
20+
```
21+
22+
```{eval-rst}
23+
.. autosummary::
24+
:toctree: get
25+
:nosignatures:
26+
27+
get.obs_df
28+
get.var_df
29+
get.rank_features_groups_df
2430
```

ehrapy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
from ehrapy import anndata as ad
2121
from ehrapy import data as dt
22-
from ehrapy import io
22+
from ehrapy import get, io
2323
from ehrapy import plot as pl
2424
from ehrapy import preprocessing as pp
2525
from ehrapy import tools as tl

ehrapy/anndata/__init__.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,9 @@
66
)
77
from ehrapy.anndata.anndata_ext import (
88
anndata_to_df,
9-
delete_from_obs,
109
df_to_anndata,
11-
generate_anndata,
12-
get_obs_df,
13-
get_rank_features_df,
14-
get_var_df,
1510
move_to_obs,
1611
move_to_x,
17-
rank_genes_groups_df,
1812
)
1913

2014
__all__ = [
@@ -23,13 +17,7 @@
2317
"feature_type_overview",
2418
"infer_feature_types",
2519
"anndata_to_df",
26-
"delete_from_obs",
2720
"df_to_anndata",
28-
"generate_anndata",
29-
"get_obs_df",
30-
"get_rank_features_df",
31-
"get_var_df",
3221
"move_to_obs",
3322
"move_to_x",
34-
"rank_genes_groups_df",
3523
]

ehrapy/anndata/_feature_specifications.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,27 @@ def infer_feature_types(
130130
raise ValueError(f"Output format {output} not recognized. Choose between 'tree', 'dataframe', or None.")
131131

132132

133+
# TODO: this function is a different flavor of inferring feature types. We should decide on a single implementation in the future.
134+
def _infer_numerical_column_indices(
135+
adata: AnnData, layer: str | None = None, column_indices: Iterable[int] | None = None
136+
) -> list[int]:
137+
mtx = adata.X if layer is None else adata[layer]
138+
indices = (
139+
list(range(mtx.shape[1])) if column_indices is None else [i for i in column_indices if i < mtx.shape[1] - 1]
140+
)
141+
non_numerical_indices = []
142+
for i in indices:
143+
# The astype("float64") call will throw only if the feature’s data type cannot be cast to float64, meaning in
144+
# practice it contains non-numeric values. Consequently, it won’t throw if the values are numeric but stored
145+
# as an "object" dtype, as astype("float64") can successfully convert them to floats.
146+
try:
147+
mtx[::, i].astype("float64")
148+
except ValueError:
149+
non_numerical_indices.append(i)
150+
151+
return [idx for idx in indices if idx not in non_numerical_indices]
152+
153+
133154
def check_feature_types(func):
134155
@wraps(func)
135156
def wrapper(adata, *args, **kwargs):

0 commit comments

Comments
 (0)