Skip to content

Commit

Permalink
Feature: use pyopenms-viz plotting
Browse files Browse the repository at this point in the history
This only uses pyopenms-viz plotting in high level plotting interface, will integrate with streamlit in the future
  • Loading branch information
jcharkow authored Oct 21, 2024
2 parents 336c5df + bbd3b04 commit ae5e2ac
Show file tree
Hide file tree
Showing 117 changed files with 1,527 additions and 621 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
# Latest pyOpenMS supports Python 3.9, 3.10, and 3.11
python-version: ["3.9", "3.10", "3.11"]
# Latest pyOpenMS supports Python 3.10, and 3.11
python-version: ["3.10", "3.11"]
steps:
- uses: actions/checkout@v4

Expand Down
128 changes: 91 additions & 37 deletions docs/python_docs/Plotting1D.ipynb

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions massdash/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,23 @@
# Utils
from massdash.util import LOGGER, get_download_folder, download_file, reset_app, open_page, close_app


# Current streamlit version only supports bokeh 2.4.3
# See work around: https://github.com/streamlit/streamlit/issues/5858#issuecomment-1482042533
# Support for Bokeh is currently on Streamlit's roadmap: https://roadmap.streamlit.app/
def use_file_for_bokeh(chart, chart_height=1200):
import streamlit.components.v1 as components

from bokeh.plotting import save
from bokeh.io import output_file
output_file('bokeh_graph.html')
save(chart)
with open("bokeh_graph.html", 'r', encoding='utf-8') as f:
html = f.read()
components.html(html, height=chart_height, scrolling=True)
# Update the bokeh_chart method to use the file workaround
st.bokeh_chart = use_file_for_bokeh

@click.command()
# @click.argument('args', default='args', type=str)
@click.option('--verbose', '-v', is_flag=True, help="Enables verbose mode.")
Expand Down
8 changes: 4 additions & 4 deletions massdash/loaders/GenericChromatogramLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def plotChromatogram(self,
smooth: bool = True,
sgolay_polynomial_order: int = 3,
sgolay_frame_length: int = 11,
scale_intensity: bool = False) -> 'bokeh.plotting.figure.Figure':
**kwargs) -> 'bokeh.plotting.figure.Figure':
'''
Plots a chromatogram for a given peptide sequence and charge state for a given run
Expand All @@ -86,8 +86,8 @@ def plotChromatogram(self,
# load the transitionGroup for plotting
transitionGroup = list(self.loadTransitionGroups(seq, charge, runNames=runName).values())[0]
if includeBoundaries:
transitionGroupFeatures = list(self.loadTransitionGroupFeatures(seq, charge, runNames=runName).values())[0]
transitionGroupFeatures = self.loadTransitionGroupFeaturesDf(seq, charge)
else:
transitionGroupFeatures = []
transitionGroupFeatures = None

return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, scale_intensity=scale_intensity)
return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, **kwargs)
63 changes: 36 additions & 27 deletions massdash/loaders/GenericRawDataLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
from .SpectralLibraryLoader import SpectralLibraryLoader
from ..util import LOGGER, in_notebook

from scipy.signal import savgol_filter, convolve
from scipy.signal.windows import gaussian
import pandas as pd

class GenericRawDataLoader(ResultsLoader, metaclass=ABCMeta):
'''
Abstract class for loading Chromatograms and peak features
Expand Down Expand Up @@ -57,10 +61,13 @@ def plotChromatogram(self,
transitionGroup: TransitionGroup,
transitionGroupFeatures: Optional[List[TransitionGroupFeature]],
include_ms1: bool = False,
smooth: bool = True,
smooth: Literal[str] = True, # can be 'savgol' 'gauss' or 'none'
sgolay_polynomial_order: int = 3,
sgolay_frame_length: int = 11,
scale_intensity: bool = False) -> 'bokeh.plotting.figure.Figure':
gaussian_window: int = 11,
gaussian_sigma: float = 2,
width=800,
**kwargs) -> 'bokeh.plotting.figure.Figure':
'''
Plots a chromatogram for a transitionGroup and transitionGroupFeatures given peptide sequence and charge state for a given run
Expand All @@ -78,39 +85,41 @@ def plotChromatogram(self,
'''

from bokeh.plotting import output_notebook, show
from ..plotting import InteractivePlotter, PlotConfig

# Initiate Plotting in Jupyter Notebook (if in notebook)
if in_notebook():
output_notebook()

# Create an instance of the InteractivePlotter class and set appropriate config
pc = PlotConfig()
pc.include_ms1 = include_ms1
if smooth:
pc.smoothing_dict = {'type': 'sgolay', 'sgolay_polynomial_order': sgolay_polynomial_order, 'sgolay_frame_length': sgolay_frame_length}
output_notebook()

precursorChroms, transitionChroms = transitionGroup.toPandasDf(separate=True)

if include_ms1:
to_plot = pd.concat([precursorChroms, transitionChroms])
else:
pc.smoothing_dict = {'type': 'none'}
pc.scale_intensity = scale_intensity
to_plot = transitionChroms

plotter = InteractivePlotter(pc)
# format transitionGroupFeatures for plotting with pyopenms_viz
if transitionGroupFeatures is not None:
transitionGroupFeatures.rename(columns={'leftBoundary':'leftWidth', 'rightBoundary':'rightWidth', 'consensusApexIntensity':'apexIntensity'}, inplace=True)

# Plot the chromatogram data
if len(transitionGroupFeatures) > 0:
# Determine the labels for the legend, this is dependent on software tool
# if multiple software tools used, label by software
labelBySoftware = not all([f.software == transitionGroupFeatures[0].software for f in transitionGroupFeatures])
if transitionGroupFeatures[0].software is not None and labelBySoftware:
feature_legend_labels = [ f.software for f in transitionGroupFeatures if f.software is not None]
labelBySoftware = transitionGroupFeatures['software'].nunique() > 1
if transitionGroupFeatures.software is not None and labelBySoftware:
transitionGroupFeatures.rename(columns={'software':'name'}, inplace=True)

def apply_smoothing(group):
if smooth == 'savgol':
group['intensity'] = savgol_filter(group['intensity'], window_length=sgolay_frame_length, polyorder=sgolay_polynomial_order)
elif smooth == 'gauss':
window = gaussian(gaussian_window, std=gaussian_sigma)
group['intensity'] = convolve(group['intensity'], window, mode='same') / window.sum()
else:
feature_legend_labels = [ f"Feature {i+1}" for i in range(len(transitionGroupFeatures)) ]
else:
feature_legend_labels = []
pass

return group

fig = plotter.plot(transitionGroup, transitionGroupFeatures, feature_legend_labels=feature_legend_labels)

show(fig)
to_plot = to_plot.groupby('annotation').apply(apply_smoothing).reset_index(drop=True)

return fig
fig = to_plot.plot(x='rt', y='intensity', kind='chromatogram', by='annotation', backend='ms_bokeh', annotation_data=transitionGroupFeatures, width=width, show_plot=False, **kwargs)
show(fig) # for the documentation figures to render correctly need to call show() here

def __repr__(self):
tmp = super().__repr__()
Expand Down
13 changes: 6 additions & 7 deletions massdash/loaders/GenericSpectrumLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ def plotChromatogram(self,
smooth: bool = True,
sgolay_polynomial_order: int = 3,
sgolay_frame_length: int = 11,
scale_intensity: bool = False,
mz_tol: float = 20,
rt_window: float = 50,
im_window: Optional[float] = None) -> 'bokeh.plotting.figure.Figure':
im_window: Optional[float] = None,
**kwargs) -> 'bokeh.plotting.figure.Figure':
'''
Plots a chromatogram for a given peptide sequence and charge state for a given run
Expand All @@ -96,7 +96,6 @@ def plotChromatogram(self,
smooth (bool, optional): Whether to smooth the chromatogram. Defaults to True.
sgolay_polynomial_order (int, optional): Order of the polynomial to use for smoothing. Defaults to 3.
sgolay_frame_length (int, optional): Frame length to use for smoothing. Defaults to 11.
scale_intensity (bool, optional): Whether to scale the intensity of the chromatogram such that all chromatograms are individually normalized to 1. Defaults to False.
mz_tol (float, optional): m/z tolerance for extraction (in ppm). Defaults to 20.
rt_tol (float, optional): RT tolerance for extraction (in seconds). Defaults to 50.
im_tol (float, optional): IM tolerance for extraction (in 1/k0). Defaults to None.
Expand All @@ -118,8 +117,8 @@ def plotChromatogram(self,
# load the transitionGroup for plotting
transitionGroup = list(self.loadTransitionGroups(seq, charge, extraction_parameters, runNames=runName).values())[0]
if includeBoundaries:
transitionGroupFeatures = list(self.loadTransitionGroupFeatures(seq, charge, runNames=runName).values())[0]
transitionGroupFeatures = self.loadTransitionGroupFeaturesDf(seq, charge, runNames=runName)
else:
transitionGroupFeatures = []

return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, scale_intensity=scale_intensity)
transitionGroupFeatures = None
return super().plotChromatogram(transitionGroup, transitionGroupFeatures, include_ms1=include_ms1, smooth=smooth, sgolay_polynomial_order=sgolay_polynomial_order, sgolay_frame_length=sgolay_frame_length, **kwargs)
27 changes: 21 additions & 6 deletions massdash/loaders/ResultsLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,37 @@ def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFram
'''
pass

def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame:
def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int, runNames: Union[str, None, List[str]] = None) -> pd.DataFrame:
'''
Loads a TransitionGroupFeature object from the results file to a pandas dataframe
Args:
pep_id (str): Peptide ID
charge (int): Charge
runNames (None | str | List[str]): Name of the run to extract the transition group from. If None, all runs are extracted. If str, only the specified run is extracted. If List[str], only the specified runs are extracted.
Returns:
DataFrame: DataFrame containing TransitionGroupObject information across all runs
'''
out = {}
for d in self.runNames:
out[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])

return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname'))
if runNames is None:
out = {}
for d in self.runNames:
out[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])
return pd.concat(out).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname')).drop_duplicates()
elif isinstance(runNames, str): # get features across all software for single run
out = [ r.getTransitionGroupFeaturesDf(runNames, pep_id, charge) for r in self.rsltsAccess ]
out = pd.concat(out).reset_index(drop=True).drop_duplicates()
out['runname'] = runNames
return out
elif isinstance(runNames, list): # get features across all software for multiple specified runs
out = []
for d in runNames: # NOTE: iterate through user specified runs not all run names
print(d)
tmp = {}
tmp[d] = pd.concat([ r.getTransitionGroupFeaturesDf(d, pep_id, charge) for r in self.rsltsAccess ])
out.append(pd.concat(tmp).reset_index().drop(columns='level_1').rename(columns=dict(level_0='runname')).drop_duplicates())

return pd.concat(out).reset_index(drop=True).drop_duplicates()

def loadTransitionGroupFeatures(self, pep_id: str, charge: int, runNames: Union[str, List[str], None] = None) -> TransitionGroupFeatureCollection:
"""
Expand Down
5 changes: 4 additions & 1 deletion massdash/plotting/InteractivePlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def __add_peak_boundaries(self,
# Add peak boundaries
i = 0
legend_items = []
hover_renderers = []
for idx, feature in enumerate(features):
# skip features if outside of plot range
if feature.leftBoundary > transitionGroup.transitionData[0].data.max() or feature.rightBoundary < transitionGroup.transitionData[0].data.min():
Expand Down Expand Up @@ -216,11 +217,13 @@ def __add_peak_boundaries(self,

# Add a point to the left border to attached the hover tool to
leftWidth_apex_point = p.circle(source=source, x='leftWidth', y='Intensity', name='leftWidth_apex_point', alpha=0)
hover_renderers.append(leftWidth_apex_point)


i += 1

# Create a HoverTool
hover = HoverTool(names=['leftWidth_apex_point'],
hover = HoverTool(renderers=hover_renderers,
tooltips=[
("Intensity", "@Intensity"),
("Left Width", "@leftWidth{0.00}"),
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Scientific/Engineering :: Chemistry",
]
requires-python = ">=3.9, <=3.12"
requires-python = ">=3.10, <=3.12"
dependencies = [
"bokeh==2.4.3",
"bokeh>3.0",
"click>=8.1",
"joblib",
"matplotlib",
Expand All @@ -34,7 +34,8 @@ dependencies = [
"scipy>=1.12.0",
"tqdm",
"upsetplot",
"requests"
"requests",
"pyopenms_viz"
]

[project.optional-dependencies]
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions test/loaders/test_ResultsLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ def test_loadTransitionGroupFeaturesDf(resultsLoader, precursor, charge, snapsho
feature = feature.sort_values(by=['consensusApex'], ascending=True).reset_index(drop=True)
assert snapshot_pandas == feature

@pytest.mark.parametrize('run', ['test_raw_1', ['test_raw_1', 'test_raw_2'], 'test_raw_3']) #note that 'test_raw_3' is not in the data
def test_loadTransitionGroupFeaturesDf_runSpecific(resultsLoader, precursor, charge, run, snapshot_pandas):
feature = resultsLoader.loadTransitionGroupFeaturesDf(precursor, charge, runNames=run)
print(feature)
feature = feature.sort_values(by=['consensusApex'], ascending=True).reset_index(drop=True)
assert snapshot_pandas == feature

def test_loadSoftware(resultsLoader, snapshot):
assert snapshot == resultsLoader._loadSoftware()

Expand Down
Loading

0 comments on commit ae5e2ac

Please sign in to comment.