Feature: use pyopenms-viz plotting

This only uses pyopenms-viz plotting in high level plotting interface, will integrate with streamlit in the future
Roestlab · Oct 21, 2024 · ae5e2ac · ae5e2ac
2 parents 336c5df + bbd3b04
commit ae5e2ac
Show file tree

Hide file tree

Showing 117 changed files with 1,527 additions and 621 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,8 +8,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
-        # Latest pyOpenMS supports Python 3.9, 3.10, and 3.11
-        python-version: ["3.9", "3.10", "3.11"]
+        # Latest pyOpenMS supports Python 3.10, and 3.11
+        python-version: ["3.10", "3.11"]
     steps:
     - uses: actions/checkout@v4
 

diff --git a/docs/python_docs/Plotting1D.ipynb b/docs/python_docs/Plotting1D.ipynb
diff --git a/massdash/gui.py b/massdash/gui.py
@@ -23,6 +23,23 @@
 # Utils
 from massdash.util import LOGGER, get_download_folder, download_file, reset_app, open_page, close_app
 
+
+# Current streamlit version only supports bokeh 2.4.3
+# See work around: https://github.com/streamlit/streamlit/issues/5858#issuecomment-1482042533
+# Support for Bokeh is currently on Streamlit's roadmap: https://roadmap.streamlit.app/
+def use_file_for_bokeh(chart, chart_height=1200):
+    import streamlit.components.v1 as components
+
+    from bokeh.plotting import save
+    from bokeh.io import output_file
+    output_file('bokeh_graph.html')
+    save(chart)
+    with open("bokeh_graph.html", 'r', encoding='utf-8') as f:
+        html = f.read()
+    components.html(html, height=chart_height, scrolling=True)
+# Update the bokeh_chart method to use the file workaround
+st.bokeh_chart = use_file_for_bokeh
+
 @click.command()
 # @click.argument('args', default='args', type=str)
 @click.option('--verbose', '-v', is_flag=True, help="Enables verbose mode.")

diff --git a/massdash/loaders/GenericChromatogramLoader.py b/massdash/loaders/GenericChromatogramLoader.py
@@ -61,7 +61,7 @@ def plotChromatogram(self,
                         smooth: bool = True, 
                         sgolay_polynomial_order: int = 3, 
                         sgolay_frame_length: int = 11, 
-                        scale_intensity: bool = False) -> 'bokeh.plotting.figure.Figure':
+                        **kwargs) -> 'bokeh.plotting.figure.Figure':
         '''
         Plots a chromatogram for a given peptide sequence and charge state for a given run
 
@@ -86,8 +86,8 @@ def plotChromatogram(self,
         # load the transitionGroup for plotting
         transitionGroup = list(self.loadTransitionGroups(seq, charge, runNames=runName).values())[0]
         if includeBoundaries:
-            transitionGroupFeatures = list(self.loadTransitionGroupFeatures(seq, charge, runNames=runName).values())[0]
+            transitionGroupFeatures = self.loadTransitionGroupFeaturesDf(seq, charge)
         else:
-            transitionGroupFeatures = []
+            transitionGroupFeatures = None
 
-        return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, scale_intensity=scale_intensity)
+        return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, **kwargs)
diff --git a/massdash/loaders/GenericRawDataLoader.py b/massdash/loaders/GenericRawDataLoader.py
@@ -15,6 +15,10 @@
 from .SpectralLibraryLoader import SpectralLibraryLoader
 from ..util import LOGGER, in_notebook
 
+from scipy.signal import savgol_filter, convolve
+from scipy.signal.windows import gaussian
+import pandas as pd
+
 class GenericRawDataLoader(ResultsLoader, metaclass=ABCMeta):
     ''' 
     Abstract class for loading Chromatograms and peak features
@@ -57,10 +61,13 @@ def plotChromatogram(self,
                         transitionGroup: TransitionGroup,
                         transitionGroupFeatures: Optional[List[TransitionGroupFeature]],
                         include_ms1: bool = False, 
-                        smooth: bool = True, 
+                        smooth: Literal[str] = True, # can be 'savgol' 'gauss' or 'none'
                         sgolay_polynomial_order: int = 3, 
                         sgolay_frame_length: int = 11, 
-                        scale_intensity: bool = False) -> 'bokeh.plotting.figure.Figure':
+                        gaussian_window: int = 11,
+                        gaussian_sigma: float = 2,
+                        width=800,
+                        **kwargs) -> 'bokeh.plotting.figure.Figure':
         '''
         Plots a chromatogram for a transitionGroup and transitionGroupFeatures given peptide sequence and charge state for a given run
 
@@ -78,39 +85,41 @@ def plotChromatogram(self,
         '''
 
         from bokeh.plotting import output_notebook, show
-        from ..plotting import InteractivePlotter, PlotConfig
-
-        # Initiate Plotting in Jupyter Notebook (if in notebook)
-        if in_notebook():
-            output_notebook()
-
-        # Create an instance of the InteractivePlotter class and set appropriate config
-        pc = PlotConfig()
-        pc.include_ms1 = include_ms1
-        if smooth:
-            pc.smoothing_dict = {'type': 'sgolay', 'sgolay_polynomial_order': sgolay_polynomial_order, 'sgolay_frame_length': sgolay_frame_length}
+        output_notebook()
+
+        precursorChroms, transitionChroms = transitionGroup.toPandasDf(separate=True)
+
+        if include_ms1:
+            to_plot = pd.concat([precursorChroms, transitionChroms])
         else:
-            pc.smoothing_dict = {'type': 'none'}
-        pc.scale_intensity = scale_intensity
+            to_plot = transitionChroms
 
-        plotter = InteractivePlotter(pc)
+        # format transitionGroupFeatures for plotting with pyopenms_viz
+        if transitionGroupFeatures is not None:
+            transitionGroupFeatures.rename(columns={'leftBoundary':'leftWidth', 'rightBoundary':'rightWidth', 'consensusApexIntensity':'apexIntensity'}, inplace=True)
 
-        # Plot the chromatogram data
-        if len(transitionGroupFeatures) > 0:
+            # Determine the labels for the legend, this is dependent on software tool
             # if multiple software tools used, label by software
-            labelBySoftware = not all([f.software == transitionGroupFeatures[0].software for f in transitionGroupFeatures])
-            if transitionGroupFeatures[0].software is not None and labelBySoftware:
-                feature_legend_labels = [ f.software for f in transitionGroupFeatures if f.software is not None]
+            labelBySoftware = transitionGroupFeatures['software'].nunique() > 1
+            if transitionGroupFeatures.software is not None and labelBySoftware:
+                transitionGroupFeatures.rename(columns={'software':'name'}, inplace=True)
+
+        def apply_smoothing(group):
+            if smooth == 'savgol':
+                group['intensity'] = savgol_filter(group['intensity'], window_length=sgolay_frame_length, polyorder=sgolay_polynomial_order)
+            elif smooth == 'gauss':
+                window = gaussian(gaussian_window, std=gaussian_sigma)
+                group['intensity'] = convolve(group['intensity'], window, mode='same') / window.sum()
             else:
-                feature_legend_labels = [ f"Feature {i+1}" for i in  range(len(transitionGroupFeatures)) ]
-        else:
-            feature_legend_labels = []
+                pass
+
+            return group
 
-        fig = plotter.plot(transitionGroup, transitionGroupFeatures, feature_legend_labels=feature_legend_labels)
 
-        show(fig)
+        to_plot = to_plot.groupby('annotation').apply(apply_smoothing).reset_index(drop=True)
 
-        return fig
+        fig = to_plot.plot(x='rt', y='intensity', kind='chromatogram', by='annotation', backend='ms_bokeh', annotation_data=transitionGroupFeatures, width=width, show_plot=False, **kwargs) 
+        show(fig) # for the documentation figures to render correctly need to call show() here 
 
     def __repr__(self):
         tmp =  super().__repr__()

diff --git a/massdash/loaders/GenericSpectrumLoader.py b/massdash/loaders/GenericSpectrumLoader.py
@@ -80,10 +80,10 @@ def plotChromatogram(self,
                         smooth: bool = True, 
                         sgolay_polynomial_order: int = 3, 
                         sgolay_frame_length: int = 11, 
-                        scale_intensity: bool = False,
                         mz_tol: float = 20,
                         rt_window: float = 50,
-                        im_window: Optional[float] = None) -> 'bokeh.plotting.figure.Figure':
+                        im_window: Optional[float] = None,
+                        **kwargs) -> 'bokeh.plotting.figure.Figure':
         '''
         Plots a chromatogram for a given peptide sequence and charge state for a given run
 
@@ -96,7 +96,6 @@ def plotChromatogram(self,
             smooth (bool, optional): Whether to smooth the chromatogram. Defaults to True.
             sgolay_polynomial_order (int, optional): Order of the polynomial to use for smoothing. Defaults to 3.
             sgolay_frame_length (int, optional): Frame length to use for smoothing. Defaults to 11.
-            scale_intensity (bool, optional): Whether to scale the intensity of the chromatogram such that all chromatograms are individually normalized to 1. Defaults to False.
             mz_tol (float, optional): m/z tolerance for extraction (in ppm). Defaults to 20.
             rt_tol (float, optional): RT tolerance for extraction (in seconds). Defaults to 50.
             im_tol (float, optional): IM tolerance for extraction (in 1/k0). Defaults to None.
@@ -118,8 +117,8 @@ def plotChromatogram(self,
         # load the transitionGroup for plotting
         transitionGroup = list(self.loadTransitionGroups(seq, charge, extraction_parameters, runNames=runName).values())[0]
         if includeBoundaries:
-            transitionGroupFeatures = list(self.loadTransitionGroupFeatures(seq, charge, runNames=runName).values())[0]
+            transitionGroupFeatures = self.loadTransitionGroupFeaturesDf(seq, charge, runNames=runName)
         else:
-            transitionGroupFeatures = []
-
-        return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, scale_intensity=scale_intensity)
+            transitionGroupFeatures = None
+        
+        return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, **kwargs)
diff --git a/massdash/loaders/ResultsLoader.py b/massdash/loaders/ResultsLoader.py
@@ -115,22 +115,37 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFram
         '''
         pass
 
-    def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame:
+    def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Union[str, None, List[str]] = None) -> pd.DataFrame:
         '''
         Loads a TransitionGroupFeature object from the results file to a pandas dataframe
 
         Args:
             pep_id (str): Peptide ID
             charge (int): Charge
+            runNames (None | str | List[str]): Name of the run to extract the transition group from. If None, all runs are extracted. If str, only the specified run is extracted. If List[str], only the specified runs are extracted.
         
         Returns:
             DataFrame: DataFrame containing TransitionGroupObject information across all runs
         '''
-        out = {}
-        for d in self.runNames:
-            out[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])
-
-        return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname'))
+        if runNames is None:
+            out = {}
+            for d in self.runNames:
+                out[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])
+            return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname')).drop_duplicates()
+        elif isinstance(runNames, str): # get features across all software for single run
+            out = [ r.getTransitionGroupFeaturesDf(runNames, pep_id, charge) for r in self.rsltsAccess ]
+            out = pd.concat(out).reset_index(drop=True).drop_duplicates()
+            out['runname'] = runNames
+            return out
+        elif isinstance(runNames, list): # get features across all software for multiple specified runs
+            out = []
+            for d in runNames: # NOTE: iterate through user specified runs not all run names
+                print(d)
+                tmp = {}
+                tmp[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])
+                out.append(pd.concat(tmp).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname')).drop_duplicates())
+
+            return pd.concat(out).reset_index(drop=True).drop_duplicates()
 
     def loadTransitionGroupFeatures(self, pep_id: str, charge: int, runNames: Union[str, List[str], None] = None) -> TransitionGroupFeatureCollection:
         """

diff --git a/massdash/plotting/InteractivePlotter.py b/massdash/plotting/InteractivePlotter.py
@@ -186,6 +186,7 @@ def __add_peak_boundaries(self,
         # Add peak boundaries
         i = 0
         legend_items = []
+        hover_renderers = []
         for idx, feature in enumerate(features):
             # skip features if outside of plot range
             if feature.leftBoundary > transitionGroup.transitionData[0].data.max() or feature.rightBoundary < transitionGroup.transitionData[0].data.min():
@@ -216,11 +217,13 @@ def __add_peak_boundaries(self,
 
                 # Add a point to the left border to attached the hover tool to
                 leftWidth_apex_point = p.circle(source=source, x='leftWidth', y='Intensity', name='leftWidth_apex_point', alpha=0) 
+                hover_renderers.append(leftWidth_apex_point)
+
 
                 i += 1
 
         # Create a HoverTool
-        hover = HoverTool(names=['leftWidth_apex_point'],
+        hover = HoverTool(renderers=hover_renderers,
             tooltips=[
                 ("Intensity", "@Intensity"),
                 ("Left Width", "@leftWidth{0.00}"),

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,9 +20,9 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Topic :: Scientific/Engineering :: Chemistry",
 ]
-requires-python = ">=3.9, <=3.12"
+requires-python = ">=3.10, <=3.12"
 dependencies = [
-    "bokeh==2.4.3",
+    "bokeh>3.0",
     "click>=8.1",
     "joblib",
     "matplotlib",
@@ -34,7 +34,8 @@ dependencies = [
     "scipy>=1.12.0",
     "tqdm",
     "upsetplot",
-    "requests"
+    "requests",
+    "pyopenms_viz"
 ]
 
 [project.optional-dependencies]

diff --git a/...oader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-run1].hdf b/...oader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-run1].hdf
diff --git a/...test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-test_raw_1].hdf b/...test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-test_raw_1].hdf
diff --git a/...test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-test_raw_3].hdf b/...test_loadTransitionGroupFeaturesDf_runSpecific[combined-AGAANIVPNSTGAAK-2-test_raw_3].hdf
diff --git a/...ResultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-run1].hdf b/...ResultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-run1].hdf
diff --git a/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-test_raw_1].hdf b/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-test_raw_1].hdf
diff --git a/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-test_raw_3].hdf b/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[combined-INVALID-2-test_raw_3].hdf
diff --git a/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-run1].hdf b/...sLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-run1].hdf
diff --git a/...r/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-test_raw_1].hdf b/...r/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-test_raw_1].hdf
diff --git a/...r/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-test_raw_3].hdf b/...r/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-AGAANIVPNSTGAAK-2-test_raw_3].hdf
diff --git a/...t_ResultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-run1].hdf b/...t_ResultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-run1].hdf
diff --git a/...ltsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-test_raw_1].hdf b/...ltsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-test_raw_1].hdf
diff --git a/...ltsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-test_raw_3].hdf b/...ltsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[diann1-INVALID-2-test_raw_3].hdf
diff --git a/...ader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-run1].hdf b/...ader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-run1].hdf
diff --git a/...est_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-test_raw_1].hdf b/...est_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-test_raw_1].hdf
diff --git a/...est_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-test_raw_3].hdf b/...est_loadTransitionGroupFeaturesDf_runSpecific[openswath-AGAANIVPNSTGAAK-2-test_raw_3].hdf
diff --git a/...esultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-run1].hdf b/...esultsLoader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-run1].hdf
diff --git a/...Loader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-test_raw_1].hdf b/...Loader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-test_raw_1].hdf
diff --git a/...Loader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-test_raw_3].hdf b/...Loader/test_loadTransitionGroupFeaturesDf_runSpecific[openswath-INVALID-2-test_raw_3].hdf
diff --git a/test/loaders/test_ResultsLoader.py b/test/loaders/test_ResultsLoader.py
@@ -63,6 +63,13 @@ def test_loadTransitionGroupFeaturesDf(resultsLoader, precursor, charge, snapsho
     feature = feature.sort_values(by=['consensusApex'], ascending=True).reset_index(drop=True)
     assert snapshot_pandas == feature
 
+@pytest.mark.parametrize('run', ['test_raw_1', ['test_raw_1', 'test_raw_2'], 'test_raw_3']) #note that 'test_raw_3' is not in the data
+def test_loadTransitionGroupFeaturesDf_runSpecific(resultsLoader, precursor, charge, run, snapshot_pandas):
+    feature = resultsLoader.loadTransitionGroupFeaturesDf(precursor, charge, runNames=run)
+    print(feature)
+    feature = feature.sort_values(by=['consensusApex'], ascending=True).reset_index(drop=True)
+    assert snapshot_pandas == feature
+
 def test_loadSoftware(resultsLoader, snapshot):
     assert snapshot == resultsLoader._loadSoftware()