Skip to content

Commit

Permalink
feature: cached loaded peptides
Browse files Browse the repository at this point in the history
this is quite convienient in a jupyter notebook setting when using the
plotChromatograms() function. In this case, the chromatograms do not
have to be loaded from disk because they are already present.
  • Loading branch information
jcharkow committed Oct 30, 2024
1 parent bffe291 commit f886203
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
4 changes: 4 additions & 0 deletions massdash/loaders/MzMLDataLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .access import MzMLDataAccess, OSWDataAccess
from .SpectralLibraryLoader import SpectralLibraryLoader
from .GenericSpectrumLoader import GenericSpectrumLoader
from .ResultsLoader import ResultsLoader
# Structs
from ..structs import TransitionGroup, FeatureMap, TargetedDIAConfig, FeatureMapCollection, TopTransitionGroupFeatureCollection, TransitionGroupCollection
# Utils
Expand Down Expand Up @@ -47,6 +48,7 @@ def __init__(self, libraryFile, **kwargs):
if self.libraryAccess is None:
raise ValueError("If .osw file is not supplied, library file is required for MzMLDataLoader to perform targeted extraction")

@ResultsLoader.cache_results
def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConfig, runNames: Union[None, str, List[str]]=None) -> Dict[str, TransitionGroup]:
'''
Loads the transition group for a given peptide ID and charge across all files
Expand All @@ -63,6 +65,7 @@ def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConf

return TransitionGroupCollection({ run: data.to_chromatograms() for run, data in out_feature_map.items() })

@ResultsLoader.cache_results
def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIAConfig) -> Dict[str, pd.DataFrame]:
'''
Loads the transition group for a given peptide ID and charge across all files into a pandas DataFrame
Expand All @@ -86,6 +89,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIACo
out_df = out_df.loc[:,~out_df.columns.duplicated()]
return out_df

@ResultsLoader.cache_results
def loadFeatureMaps(self, pep_id: str, charge: int, config=TargetedDIAConfig, runNames: Union[None, str, List[str]] = None) -> FeatureMapCollection:
'''
Loads a dictionary of FeatureMaps (where the keys are the filenames) from the results file
Expand Down
28 changes: 25 additions & 3 deletions massdash/loaders/ResultsLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@
import pandas as pd
from pathlib import Path
import numpy as np
from functools import wraps

# Plotting
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange, Whisker, Legend, HoverTool
from bokeh.palettes import Category10
from bokeh.layouts import gridplot
from itertools import cycle
import plotly.express as px

# Loaders
from .SpectralLibraryLoader import SpectralLibraryLoader
from .access import OSWDataAccess, ResultsTSVDataAccess
# Structs
from ..structs import TransitionGroup, TransitionGroupFeatureCollection, TopTransitionGroupFeatureCollection
from ..structs import TransitionGroupFeatureCollection
# Utils
from ..util import LOGGER

Expand Down Expand Up @@ -51,6 +50,7 @@ def __init__(self,
self.software = None
self._oswAccess = None
self._oswAccessChecked = False
self.cache = {} # holds the cache of previously loaded peptides so do not have to load again

if isinstance(rsltsFile, str):
self.rsltsFile = [rsltsFile]
Expand Down Expand Up @@ -94,6 +94,24 @@ def _loadSoftware(self):
'''
return [i.getSoftware() for i in self.rsltsAccess]

def cache_results(func):
@wraps(func)
def wrapper(self, pep_id, charge, *args, **kwargs):
cache_key = (pep_id, charge)
result_type = func.__name__
if cache_key not in self.cache.keys():
print("cache miss")
self.cache[cache_key] = {}
self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs)
elif result_type not in self.cache[cache_key].keys():
print("cache miss")
self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs)
else:
print("cache hit")
pass
return self.cache[cache_key][result_type]
return wrapper

@abstractmethod
def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame:
'''
Expand All @@ -108,6 +126,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFram
'''
pass

@cache_results
def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Union[str, None, List[str]] = None) -> pd.DataFrame:
'''
Loads a TransitionGroupFeature object from the results file to a pandas dataframe
Expand Down Expand Up @@ -140,6 +159,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Unio

return pd.concat(out).reset_index(drop=True).drop_duplicates()

@cache_results
def loadTransitionGroupFeatures(self, pep_id: str, charge: int, runNames: Union[str, List[str], None] = None) -> TransitionGroupFeatureCollection:
"""
Load TransitionGroupFeature objects from the results file for the given peptide precursor
Expand Down Expand Up @@ -176,6 +196,7 @@ def _loadTransitionGroupFeature(runName):
raise ValueError("runName must be none, a string or list of strings")
return out

@cache_results
def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFrame:
'''
Loads a pandas dataframe of TransitionGroupFeatures across all runs
Expand All @@ -193,6 +214,7 @@ def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFr

return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runName'))

@cache_results
def loadTopTransitionGroupFeature(self, pep_id: str, charge: int) -> TransitionGroupFeatureCollection:
'''
Loads a PeakFeature object from the results file
Expand Down
4 changes: 3 additions & 1 deletion massdash/loaders/SqMassLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# Loaders
from .GenericChromatogramLoader import GenericChromatogramLoader
from .ResultsLoader import ResultsLoader
from .access import SqMassDataAccess
# Structs
from ..structs import TransitionGroup, TransitionGroupCollection
Expand All @@ -33,7 +34,7 @@ def __init__(self, **kwargs):
self.oswAccess = self.getOSWAccessPtr()
if self.oswAccess is None:
raise ValueError("No OSW file found in SqMassLoader, OSW file required for parsing sqMass files")
@ResultsLoader.cache_results
def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame:
transitionMetaInfo = self.oswAccess.getTransitionIDAnnotationFromSequence(pep_id, charge)
precursor_id = self.oswAccess.getPrecursorIDFromPeptideAndCharge(pep_id, charge)
Expand Down Expand Up @@ -62,6 +63,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame:

return pd.concat(out).reset_index().drop('level_1', axis=1).rename(columns=dict(level_0='run'))

@ResultsLoader.cache_results
def loadTransitionGroups(self, pep_id: str, charge: int, runNames: Union[None, str, List[str]] =None) -> Dict[str, TransitionGroupCollection]:
'''
Loads the transition group for a given peptide ID and charge across all files
Expand Down

0 comments on commit f886203

Please sign in to comment.