diff --git a/massdash/loaders/MzMLDataLoader.py b/massdash/loaders/MzMLDataLoader.py index 96e0177..1e73a1f 100644 --- a/massdash/loaders/MzMLDataLoader.py +++ b/massdash/loaders/MzMLDataLoader.py @@ -12,6 +12,7 @@ from .access import MzMLDataAccess, OSWDataAccess from .SpectralLibraryLoader import SpectralLibraryLoader from .GenericSpectrumLoader import GenericSpectrumLoader +from .ResultsLoader import ResultsLoader # Structs from ..structs import TransitionGroup, FeatureMap, TargetedDIAConfig, FeatureMapCollection, TopTransitionGroupFeatureCollection, TransitionGroupCollection # Utils @@ -47,6 +48,7 @@ def __init__(self, libraryFile, **kwargs): if self.libraryAccess is None: raise ValueError("If .osw file is not supplied, library file is required for MzMLDataLoader to perform targeted extraction") + @ResultsLoader.cache_results def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConfig, runNames: Union[None, str, List[str]]=None) -> Dict[str, TransitionGroup]: ''' Loads the transition group for a given peptide ID and charge across all files @@ -63,6 +65,7 @@ def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConf return TransitionGroupCollection({ run: data.to_chromatograms() for run, data in out_feature_map.items() }) + @ResultsLoader.cache_results def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIAConfig) -> Dict[str, pd.DataFrame]: ''' Loads the transition group for a given peptide ID and charge across all files into a pandas DataFrame @@ -86,6 +89,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIACo out_df = out_df.loc[:,~out_df.columns.duplicated()] return out_df + @ResultsLoader.cache_results def loadFeatureMaps(self, pep_id: str, charge: int, config=TargetedDIAConfig, runNames: Union[None, str, List[str]] = None) -> FeatureMapCollection: ''' Loads a dictionary of FeatureMaps (where the keys are the filenames) from the results file diff --git a/massdash/loaders/ResultsLoader.py b/massdash/loaders/ResultsLoader.py index fdc2151..cb7354d 100644 --- a/massdash/loaders/ResultsLoader.py +++ b/massdash/loaders/ResultsLoader.py @@ -9,20 +9,19 @@ import pandas as pd from pathlib import Path import numpy as np +from functools import wraps # Plotting from bokeh.plotting import figure from bokeh.models import ColumnDataSource, FactorRange, Whisker, Legend, HoverTool from bokeh.palettes import Category10 -from bokeh.layouts import gridplot from itertools import cycle import plotly.express as px # Loaders -from .SpectralLibraryLoader import SpectralLibraryLoader from .access import OSWDataAccess, ResultsTSVDataAccess # Structs -from ..structs import TransitionGroup, TransitionGroupFeatureCollection, TopTransitionGroupFeatureCollection +from ..structs import TransitionGroupFeatureCollection # Utils from ..util import LOGGER @@ -51,6 +50,7 @@ def __init__(self, self.software = None self._oswAccess = None self._oswAccessChecked = False + self.cache = {} # holds the cache of previously loaded peptides so do not have to load again if isinstance(rsltsFile, str): self.rsltsFile = [rsltsFile] @@ -94,6 +94,24 @@ def _loadSoftware(self): ''' return [i.getSoftware() for i in self.rsltsAccess] + def cache_results(func): + @wraps(func) + def wrapper(self, pep_id, charge, *args, **kwargs): + cache_key = (pep_id, charge) + result_type = func.__name__ + if cache_key not in self.cache.keys(): + print("cache miss") + self.cache[cache_key] = {} + self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs) + elif result_type not in self.cache[cache_key].keys(): + print("cache miss") + self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs) + else: + print("cache hit") + pass + return self.cache[cache_key][result_type] + return wrapper + @abstractmethod def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame: ''' @@ -108,6 +126,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFram ''' pass + @cache_results def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Union[str, None, List[str]] = None) -> pd.DataFrame: ''' Loads a TransitionGroupFeature object from the results file to a pandas dataframe @@ -140,6 +159,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Unio return pd.concat(out).reset_index(drop=True).drop_duplicates() + @cache_results def loadTransitionGroupFeatures(self, pep_id: str, charge: int, runNames: Union[str, List[str], None] = None) -> TransitionGroupFeatureCollection: """ Load TransitionGroupFeature objects from the results file for the given peptide precursor @@ -176,6 +196,7 @@ def _loadTransitionGroupFeature(runName): raise ValueError("runName must be none, a string or list of strings") return out + @cache_results def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFrame: ''' Loads a pandas dataframe of TransitionGroupFeatures across all runs @@ -193,6 +214,7 @@ def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFr return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runName')) + @cache_results def loadTopTransitionGroupFeature(self, pep_id: str, charge: int) -> TransitionGroupFeatureCollection: ''' Loads a PeakFeature object from the results file diff --git a/massdash/loaders/SqMassLoader.py b/massdash/loaders/SqMassLoader.py index 65bf9a1..e8a1089 100644 --- a/massdash/loaders/SqMassLoader.py +++ b/massdash/loaders/SqMassLoader.py @@ -11,6 +11,7 @@ # Loaders from .GenericChromatogramLoader import GenericChromatogramLoader +from .ResultsLoader import ResultsLoader from .access import SqMassDataAccess # Structs from ..structs import TransitionGroup, TransitionGroupCollection @@ -33,7 +34,7 @@ def __init__(self, **kwargs): self.oswAccess = self.getOSWAccessPtr() if self.oswAccess is None: raise ValueError("No OSW file found in SqMassLoader, OSW file required for parsing sqMass files") - + @ResultsLoader.cache_results def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame: transitionMetaInfo = self.oswAccess.getTransitionIDAnnotationFromSequence(pep_id, charge) precursor_id = self.oswAccess.getPrecursorIDFromPeptideAndCharge(pep_id, charge) @@ -62,6 +63,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame: return pd.concat(out).reset_index().drop('level_1', axis=1).rename(columns=dict(level_0='run')) + @ResultsLoader.cache_results def loadTransitionGroups(self, pep_id: str, charge: int, runNames: Union[None, str, List[str]] =None) -> Dict[str, TransitionGroupCollection]: ''' Loads the transition group for a given peptide ID and charge across all files