From bbdb6cb4f8435d0b57982c6a076f745bdaba5710 Mon Sep 17 00:00:00 2001 From: prafols Date: Thu, 19 Mar 2020 09:26:19 +0100 Subject: [PATCH] improved mass resampling to handle FTICR and Orbitrap data --- DESCRIPTION | 2 +- NAMESPACE | 3 ++- R/RcppExports.R | 20 +++++----------- R/imzMLreader.R | 13 +++++++++-- R/zzz.R | 11 +++++++++ man/rMSI.Rd | 12 ++++++++++ src/mergeTwoMassAxes.cpp | 49 +++++++++++++++++++++++++++------------ src/plotDataReduction.cpp | 4 ---- 8 files changed, 77 insertions(+), 37 deletions(-) create mode 100644 R/zzz.R create mode 100644 man/rMSI.Rd diff --git a/DESCRIPTION b/DESCRIPTION index cec3de7..64805c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Version: 0.8 Date: 2015-11-30 Author: Pere Rafols Soler Maintainer: Pere Rafols Soler -Description: More about what it does (maybe more than one line) +Description: MSI data handling and visualization in R License: GPL3 LazyData: TRUE Imports: diff --git a/NAMESPACE b/NAMESPACE index e43ff4c..d5ff1f5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,5 +59,6 @@ export(saveImageSliceAtMass) export(saveImgChunkAtCoords) export(saveImgChunkAtIds) export(saveImgChunkToCube) +import(Rcpp) importFrom(Rcpp,evalCpp) -useDynLib(rMSI, .registration = TRUE) +useDynLib(rMSI) diff --git a/R/RcppExports.R b/R/RcppExports.R index 27901e7..65e0c88 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -12,21 +12,17 @@ #' @return ROI pixel coordinates arranged in a named list. #' CparseBrukerXML <- function(xml_path) { - .Call(`_rMSI_CparseBrukerXML`, xml_path) + .Call('_rMSI_CparseBrukerXML', PACKAGE = 'rMSI', xml_path) } CimzMLParse <- function(xml_path) { - .Call(`_rMSI_CimzMLParse`, xml_path) + .Call('_rMSI_CimzMLParse', PACKAGE = 'rMSI', xml_path) } CimzMLStore <- function(fname, imgInfo) { - .Call(`_rMSI_CimzMLStore`, fname, imgInfo) + .Call('_rMSI_CimzMLStore', PACKAGE = 'rMSI', fname, imgInfo) } -#' @importFrom Rcpp evalCpp -#' @useDynLib rMSI, .registration = TRUE -NULL - #' CalcMassAxisBinSize. #' #' Calc the bin size of a mass axis at each mass channels using simple peak-picking information. @@ -38,7 +34,7 @@ NULL #' @export #' CalcMassAxisBinSize <- function(mass, intensity) { - .Call(`_rMSI_CalcMassAxisBinSize`, mass, intensity) + .Call('_rMSI_CalcMassAxisBinSize', PACKAGE = 'rMSI', mass, intensity) } #' MergeMassAxis. @@ -57,14 +53,10 @@ CalcMassAxisBinSize <- function(mass, intensity) { #' @export #' MergeMassAxis <- function(mz1, bins1, mz2, bins2) { - .Call(`_rMSI_MergeMassAxis`, mz1, bins1, mz2, bins2) + .Call('_rMSI_MergeMassAxis', PACKAGE = 'rMSI', mz1, bins1, mz2, bins2) } -#' @importFrom Rcpp evalCpp -#' @useDynLib rMSI, .registration = TRUE -NULL - ReduceDataPointsC <- function(mass, intensity, massMin, massMax, npoints) { - .Call(`_rMSI_ReduceDataPointsC`, mass, intensity, massMin, massMax, npoints) + .Call('_rMSI_ReduceDataPointsC', PACKAGE = 'rMSI', mass, intensity, massMin, massMax, npoints) } diff --git a/R/imzMLreader.R b/R/imzMLreader.R index 64daf57..2995325 100644 --- a/R/imzMLreader.R +++ b/R/imzMLreader.R @@ -244,7 +244,7 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM #Fix duplicates and zero drops CurrSpectrumFixed <- fixImzMLDuplicates(mzdd, dd) - #Get Bin size at peaks + #Get Bin size at peaks LoadMass <- CurrSpectrumFixed$mass LoadBins <- rMSI::CalcMassAxisBinSize( LoadMass, CurrSpectrumFixed$intensity) @@ -279,7 +279,15 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM if(MergedSpc[[1]]$level == MergedSpc[[2]]$level || icurr > nrow(xmlRes$run_data)) { #Merge! - mam <- rMSI::MergeMassAxis(MergedSpc[[1]]$mass, MergedSpc[[1]]$bins, MergedSpc[[2]]$mass, MergedSpc[[2]]$bins ) + if(identical(MergedSpc[[1]]$mass, MergedSpc[[2]]$mass)) + { + #Both mass axes are identical so there is no need to merge them, this is the case for Bruker FTICR data + mam <- list(mass = MergedSpc[[1]]$mass, bins = MergedSpc[[1]]$bins ) + } + else + { + mam <- rMSI::MergeMassAxis(MergedSpc[[1]]$mass, MergedSpc[[1]]$bins, MergedSpc[[2]]$mass, MergedSpc[[2]]$bins ) + } MergedSpc[[1]] <- list( level = MergedSpc[[1]]$level + 1, mass = mam$mass, bins = mam$bins ) #Shift register @@ -392,6 +400,7 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM #Apply re-sampling (only if needed...) if( ! identical(mzdd, mzAxis)) { + cat("DBG: resampling intensity!\n") #TODO delete me! if(length(mzdd) == length(dd)) { dd <- (approx( x = mzdd, y = dd, xout = mzAxis, ties = "ordered", yleft = 0, yright = 0))$y diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..3f08b03 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,11 @@ +#' rMSI +#' +#' MSI data handling and visualization in R +#' +#' @docType package +#' @author Pere Rafols +#' @import Rcpp +#' @importFrom Rcpp evalCpp +#' @useDynLib rMSI +#' @name rMSI +NULL diff --git a/man/rMSI.Rd b/man/rMSI.Rd new file mode 100644 index 0000000..f2066e0 --- /dev/null +++ b/man/rMSI.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\docType{package} +\name{rMSI} +\alias{rMSI} +\title{rMSI} +\description{ +MSI data handling and visualization in R +} +\author{ +Pere Rafols +} diff --git a/src/mergeTwoMassAxes.cpp b/src/mergeTwoMassAxes.cpp index 6bb55ee..2d40c7d 100644 --- a/src/mergeTwoMassAxes.cpp +++ b/src/mergeTwoMassAxes.cpp @@ -22,10 +22,6 @@ using namespace Rcpp; -//' @importFrom Rcpp evalCpp -//' @useDynLib rMSI, .registration = TRUE - - //Simple peak detection with bin size calculation List simplePeakDetect(NumericVector mz, NumericVector intensity) { @@ -54,19 +50,21 @@ List simplePeakDetect(NumericVector mz, NumericVector intensity) //Calculate the mass bin size for each peak for( int i = 0; i < pkIndex.length(); i++) { - double localBinSize = 0; + double minBinSize = 10e4; + double localBinSize; int count = 0; //Left region if(pkIndex[i] > 0) { for( int j = pkIndex[i]; j >= 0; j--) { - if( (intensity[j] <= intensity[j-1]) ) + if( (intensity[j] < intensity[j-1]) ) { pkIndexLeft.insert(pkIndexLeft.end(), j ); //inserting C index break; //End of peak } - localBinSize += (mz[j] - mz[j-1]); + localBinSize = (mz[j] - mz[j-1]); + minBinSize = localBinSize < minBinSize ? localBinSize : minBinSize; count++; } } @@ -76,19 +74,20 @@ List simplePeakDetect(NumericVector mz, NumericVector intensity) { for( int j = pkIndex[i]; j < (mz.length()-1); j++) { - if( (intensity[j] <= intensity[j+1]) ) + if( (intensity[j] < intensity[j+1]) ) { pkIndexRight.insert(pkIndexRight.end(), j ); //inserting C index break; //End of peak } - localBinSize += (mz[j+1] - mz[j]); + localBinSize = (mz[j+1] - mz[j]); + minBinSize = localBinSize < minBinSize ? localBinSize : minBinSize; count++; } } if(count >0 ) { - localBinSize /= (double)count; - pkBinSize.insert(pkBinSize.end(), localBinSize); + pkBinSize.insert(pkBinSize.end(), minBinSize); + minBinSize = 10e4; //reset min bin size } } @@ -198,7 +197,12 @@ List MergeMassAxis(NumericVector mz1, NumericVector bins1, NumericVector mz2, Nu else if(i1 >= mz1.length()) { //We run out of elements in mz1 - if( (mz2[i2] - newMz[inew - 1]) >= bins2[i2] ) + if(inew == 0) + { + newMz[inew] = mz2[i2]; + inew++; + } + else if( (mz2[i2] - newMz[inew - 1]) >= bins2[i2] ) { newMz[inew] = mz2[i2]; newBins[inew] = bins2[i2]; @@ -209,7 +213,12 @@ List MergeMassAxis(NumericVector mz1, NumericVector bins1, NumericVector mz2, Nu else if(i2 >= mz2.length()) { //We run out of elements in mz2 - if( (mz1[i1] - newMz[inew - 1]) >= bins1[i1] ) + if(inew == 0) + { + newMz[inew] = mz1[i1]; + inew++; + } + else if( (mz1[i1] - newMz[inew - 1]) >= bins1[i1] ) { newMz[inew] = mz1[i1]; newBins[inew] = bins1[i1]; @@ -222,7 +231,12 @@ List MergeMassAxis(NumericVector mz1, NumericVector bins1, NumericVector mz2, Nu //There are remaining elements in both mass axes if(mz1[i1] < mz2[i2]) { - if( (mz1[i1] - newMz[inew - 1]) >= bins1[i1] ) + if(inew == 0) + { + newMz[inew] = mz1[i1]; + inew++; + } + else if( (mz1[i1] - newMz[inew - 1]) >= bins1[i1] ) { newMz[inew] = mz1[i1]; newBins[inew] = bins1[i1]; @@ -232,7 +246,12 @@ List MergeMassAxis(NumericVector mz1, NumericVector bins1, NumericVector mz2, Nu } else { - if( (mz2[i2] - newMz[inew - 1]) >= bins2[i2] ) + if(inew == 0) + { + newMz[inew] = mz2[i2]; + inew++; + } + else if( (mz2[i2] - newMz[inew - 1]) >= bins2[i2] ) { newMz[inew] = mz2[i2]; newBins[inew] = bins2[i2]; diff --git a/src/plotDataReduction.cpp b/src/plotDataReduction.cpp index 4297a30..2407a12 100644 --- a/src/plotDataReduction.cpp +++ b/src/plotDataReduction.cpp @@ -20,10 +20,6 @@ using namespace Rcpp; -//' @importFrom Rcpp evalCpp -//' @useDynLib rMSI, .registration = TRUE - - // Function to reduce the number of data points in part of spectrum to plot it faster. // This is implemented in C to provid an efficient implementation. // [[Rcpp::export]]