feature: cached loaded peptides

this is quite convienient in a jupyter notebook setting when using the plotChromatograms() function. In this case, the chromatograms do not have to be loaded from disk because they are already present.
Roestlab · Oct 30, 2024 · f886203 · f886203
1 parent bffe291
commit f886203
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 4 deletions.
diff --git a/massdash/loaders/MzMLDataLoader.py b/massdash/loaders/MzMLDataLoader.py
@@ -12,6 +12,7 @@
 from .access import MzMLDataAccess, OSWDataAccess
 from .SpectralLibraryLoader import SpectralLibraryLoader
 from .GenericSpectrumLoader import GenericSpectrumLoader
+from .ResultsLoader import ResultsLoader
 # Structs
 from ..structs import TransitionGroup, FeatureMap, TargetedDIAConfig, FeatureMapCollection, TopTransitionGroupFeatureCollection, TransitionGroupCollection
 # Utils
@@ -47,6 +48,7 @@ def __init__(self, libraryFile, **kwargs):
         if self.libraryAccess is None:
             raise ValueError("If .osw file is not supplied, library file is required for MzMLDataLoader to perform targeted extraction")
 
+    @ResultsLoader.cache_results
     def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConfig, runNames: Union[None, str, List[str]]=None) -> Dict[str, TransitionGroup]:
         '''
         Loads the transition group for a given peptide ID and charge across all files
@@ -63,6 +65,7 @@ def loadTransitionGroups(self, pep_id: str, charge: int, config: TargetedDIAConf
 
         return TransitionGroupCollection({ run: data.to_chromatograms() for run, data in out_feature_map.items() })
 
+    @ResultsLoader.cache_results
     def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIAConfig) -> Dict[str, pd.DataFrame]:
         '''
         Loads the transition group for a given peptide ID and charge across all files into a pandas DataFrame
@@ -86,6 +89,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int, config: TargetedDIACo
         out_df = out_df.loc[:,~out_df.columns.duplicated()]
         return out_df
 
+    @ResultsLoader.cache_results
     def loadFeatureMaps(self, pep_id: str, charge: int, config=TargetedDIAConfig, runNames: Union[None, str, List[str]] = None) -> FeatureMapCollection:
         '''
         Loads a dictionary of FeatureMaps (where the keys are the filenames) from the results file

diff --git a/massdash/loaders/ResultsLoader.py b/massdash/loaders/ResultsLoader.py
@@ -9,20 +9,19 @@
 import pandas as pd
 from pathlib import Path 
 import numpy as np
+from functools import wraps
 
 # Plotting
 from bokeh.plotting import figure
 from bokeh.models import ColumnDataSource, FactorRange, Whisker, Legend, HoverTool
 from bokeh.palettes import Category10
-from bokeh.layouts import gridplot
 from itertools import cycle
 import plotly.express as px
 
 # Loaders
-from .SpectralLibraryLoader import SpectralLibraryLoader
 from .access import OSWDataAccess, ResultsTSVDataAccess
 # Structs
-from ..structs import TransitionGroup, TransitionGroupFeatureCollection, TopTransitionGroupFeatureCollection
+from ..structs import TransitionGroupFeatureCollection 
 # Utils
 from ..util import LOGGER
 
@@ -51,6 +50,7 @@ def __init__(self,
         self.software = None
         self._oswAccess = None
         self._oswAccessChecked = False
+        self.cache = {} # holds the cache of previously loaded peptides so do not have to load again
 
         if isinstance(rsltsFile, str):
             self.rsltsFile = [rsltsFile]
@@ -94,6 +94,24 @@ def _loadSoftware(self):
         '''
         return [i.getSoftware() for i in self.rsltsAccess]
 
+    def cache_results(func):
+        @wraps(func)
+        def wrapper(self, pep_id, charge, *args, **kwargs):
+            cache_key = (pep_id, charge)
+            result_type = func.__name__
+            if cache_key not in self.cache.keys():
+                print("cache miss")
+                self.cache[cache_key] = {}
+                self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs)
+            elif result_type not in self.cache[cache_key].keys():
+                print("cache miss")
+                self.cache[cache_key][result_type] = func(self, pep_id, charge, *args, **kwargs)
+            else:
+                print("cache hit")
+                pass
+            return self.cache[cache_key][result_type]
+        return wrapper 
+
     @abstractmethod
     def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame:
         '''
@@ -108,6 +126,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFram
         '''
         pass
 
+    @cache_results
     def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Union[str, None, List[str]] = None) -> pd.DataFrame:
         '''
         Loads a TransitionGroupFeature object from the results file to a pandas dataframe
@@ -140,6 +159,7 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Unio
 
             return pd.concat(out).reset_index(drop=True).drop_duplicates()
 
+    @cache_results
     def loadTransitionGroupFeatures(self, pep_id: str, charge: int, runNames: Union[str, List[str], None] = None) -> TransitionGroupFeatureCollection:
         """
         Load TransitionGroupFeature objects from the results file for the given peptide precursor
@@ -176,6 +196,7 @@ def _loadTransitionGroupFeature(runName):
             raise ValueError("runName must be none, a string or list of strings")
         return out
 
+    @cache_results
     def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFrame:
         '''
         Loads a pandas dataframe of TransitionGroupFeatures across all runs 
@@ -193,6 +214,7 @@ def loadTopTransitionGroupFeatureDf(self, pep_id: str, charge: int) -> pd.DataFr
 
         return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runName'))
 
+    @cache_results
     def loadTopTransitionGroupFeature(self, pep_id: str, charge: int) -> TransitionGroupFeatureCollection:
         '''
         Loads a PeakFeature object from the results file

diff --git a/massdash/loaders/SqMassLoader.py b/massdash/loaders/SqMassLoader.py
@@ -11,6 +11,7 @@
 
 # Loaders
 from .GenericChromatogramLoader import GenericChromatogramLoader
+from .ResultsLoader import ResultsLoader
 from .access import SqMassDataAccess
 # Structs
 from ..structs import TransitionGroup, TransitionGroupCollection
@@ -33,7 +34,7 @@ def __init__(self, **kwargs):
         self.oswAccess = self.getOSWAccessPtr()
         if self.oswAccess is None:
             raise ValueError("No OSW file found in SqMassLoader, OSW file required for parsing sqMass files")
-                
+    @ResultsLoader.cache_results
     def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame:
         transitionMetaInfo = self.oswAccess.getTransitionIDAnnotationFromSequence(pep_id, charge)
         precursor_id = self.oswAccess.getPrecursorIDFromPeptideAndCharge(pep_id, charge)
@@ -62,6 +63,7 @@ def loadTransitionGroupsDf(self, pep_id: str, charge: int) -> pd.DataFrame:
 
         return pd.concat(out).reset_index().drop('level_1', axis=1).rename(columns=dict(level_0='run'))
 
+    @ResultsLoader.cache_results
     def loadTransitionGroups(self, pep_id: str, charge: int, runNames: Union[None, str, List[str]] =None) -> Dict[str, TransitionGroupCollection]:
         '''
         Loads the transition group for a given peptide ID and charge across all files