Skip to content

Commit

Permalink
Merge pull request #110 from compomics/fix/psm_list_dtypes
Browse files Browse the repository at this point in the history
Fix dtypes in numpy arrays from PSMList accession
  • Loading branch information
RalfG authored Jan 17, 2025
2 parents 0ba376d + 3961624 commit 24f2a00
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
24 changes: 23 additions & 1 deletion psm_utils/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Any, Dict, List, Optional, Union

from pydantic import ConfigDict, BaseModel
from pydantic import BaseModel, ConfigDict

from psm_utils.peptidoform import Peptidoform

Expand Down Expand Up @@ -135,3 +135,25 @@ def get_usi(self, as_url=False) -> str:
if as_url:
usi = "http://proteomecentral.proteomexchange.org/usi/?usi=" + usi
return usi


NUMPY_DTYPES = {
"peptidoform": Peptidoform,
"spectrum_id": object,
"run": object,
"collection": object,
"spectrum": object,
"is_decoy": bool,
"score": float,
"qvalue": float,
"pep": float,
"precursor_mz": float,
"retention_time": float,
"ion_mobility": float,
"protein_list": object,
"rank": int,
"source": object,
"provenance_data": object,
"metadata": object,
"rescoring_features": object,
}
11 changes: 9 additions & 2 deletions psm_utils/psm_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyteomics import auxiliary, proforma
from rich.pretty import pretty_repr

from psm_utils.psm import PSM
from psm_utils.psm import NUMPY_DTYPES, PSM


class PSMList(BaseModel):
Expand Down Expand Up @@ -98,7 +98,14 @@ def __getitem__(self, item) -> PSM | list[PSM]:
return PSMList(psm_list=self.psm_list[item])
elif isinstance(item, str):
# Return PSM property as array across full PSMList
return np.fromiter([psm[item] for psm in self.psm_list], dtype=object, count=len(self))
try:
return np.fromiter(
(psm[item] for psm in self.psm_list), dtype=NUMPY_DTYPES[item], count=len(self)
)
except TypeError:
return np.fromiter(
(psm[item] for psm in self.psm_list), dtype=object, count=len(self)
)
elif _is_iterable_of_bools(item):
# Return new PSMList with items that were True
return PSMList(psm_list=[self.psm_list[i] for i in np.flatnonzero(item)])
Expand Down
3 changes: 3 additions & 0 deletions tests/test_psm_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def test___get_item__(self):

# PSM property as array
np.testing.assert_equal(psm_list["spectrum_id"], np.array(["1", "2", "3"]))
np.testing.assert_equal(psm_list["score"], np.array([140.2, 132.9, 55.7]))
np.testing.assert_equal(psm_list["rank"], np.array([None, None, None]))
np.testing.assert_equal(psm_list["qvalue"], np.array([np.nan, np.nan, np.nan]))

# Multiple PSM properties as 2D array
np.testing.assert_equal(
Expand Down

0 comments on commit 24f2a00

Please sign in to comment.