Skip to content

Commit

Permalink
- improved resampling performance on fticr data
Browse files Browse the repository at this point in the history
- rollback to previous bin size calculation which performs better
  • Loading branch information
prafols committed Mar 19, 2020
1 parent bbdb6cb commit 840fc88
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 19 deletions.
35 changes: 26 additions & 9 deletions R/imzMLreader.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM
bytes2ReadMz <- sizeInBytesFromDataType(xmlRes$mz_dataType)

bCreateRamdisk <- F #Start assuming data in processed and peak list
bNoNeed2Resample <- T #Start assuming there is no need to resample the data
if(xmlRes$continuous_mode)
{
mzAxis <- readBin(bincon, readDataTypeMz, xmlRes$run_data[1, "mzLength"], size = bytes2ReadMz, signed = T)
Expand All @@ -229,6 +230,11 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM
icurr <- 1
bLoad <- TRUE
MergedSpc <- list()

#Read only the first mass axis to compare if others are identical, this is the case for Bruker FTICR
seek(bincon, rw = "read", where = xmlRes$run_data[1, "mzOffset"] )
firstMassAxis <- unique(readBin(bincon, readDataTypeMz, xmlRes$run_data[1, "mzLength"], size = bytes2ReadMz, signed = T))

while(TRUE)
{
if(bLoad)
Expand All @@ -246,7 +252,17 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM

#Get Bin size at peaks
LoadMass <- CurrSpectrumFixed$mass
LoadBins <- rMSI::CalcMassAxisBinSize( LoadMass, CurrSpectrumFixed$intensity)
bMassMerge <- T
if(identical(firstMassAxis, LoadMass))
{
bMassMerge <- F
LoadBins <- NULL
}
else
{
LoadBins <- rMSI::CalcMassAxisBinSize( LoadMass, CurrSpectrumFixed$intensity)
}
bNoNeed2Resample <- bNoNeed2Resample & (!bMassMerge)

#Shift register
if(length(MergedSpc) > 0)
Expand All @@ -257,7 +273,7 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM
}
}

MergedSpc[[1]] <- list( level = 0, mass = LoadMass, bins = LoadBins )
MergedSpc[[1]] <- list( level = 0, mass = LoadMass, bins = LoadBins, merge = bMassMerge )
icurr <- icurr + 1
bLoad <- FALSE

Expand All @@ -279,16 +295,16 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM
if(MergedSpc[[1]]$level == MergedSpc[[2]]$level || icurr > nrow(xmlRes$run_data))
{
#Merge!
if(identical(MergedSpc[[1]]$mass, MergedSpc[[2]]$mass))
if( MergedSpc[[2]]$merge)
{
#Both mass axes are identical so there is no need to merge them, this is the case for Bruker FTICR data
mam <- list(mass = MergedSpc[[1]]$mass, bins = MergedSpc[[1]]$bins )
mam <- rMSI::MergeMassAxis(MergedSpc[[1]]$mass, MergedSpc[[1]]$bins, MergedSpc[[2]]$mass, MergedSpc[[2]]$bins )
}
else
{
mam <- rMSI::MergeMassAxis(MergedSpc[[1]]$mass, MergedSpc[[1]]$bins, MergedSpc[[2]]$mass, MergedSpc[[2]]$bins )
#Both mass axes are identical so there is no need to merge them, this is the case for Bruker FTICR data
mam <- list(mass = MergedSpc[[1]]$mass, bins = MergedSpc[[1]]$bins )
}
MergedSpc[[1]] <- list( level = MergedSpc[[1]]$level + 1, mass = mam$mass, bins = mam$bins )
MergedSpc[[1]] <- list( level = MergedSpc[[1]]$level + 1, mass = mam$mass, bins = mam$bins, merge = MergedSpc[[2]]$merge )

#Shift register
if(length(MergedSpc) > 2)
Expand Down Expand Up @@ -396,11 +412,12 @@ import_imzML <- function(imzML_File, ibd_File = paste(sub("\\.[^.]*$", "", imzM
{
#Fix duplicates and zero drops
CurrSpectrumFixed <- fixImzMLDuplicates(mzdd, dd)
mzdd <- CurrSpectrumFixed$mass
dd <- CurrSpectrumFixed$intensity

#Apply re-sampling (only if needed...)
if( ! identical(mzdd, mzAxis))
if( !bNoNeed2Resample )
{
cat("DBG: resampling intensity!\n") #TODO delete me!
if(length(mzdd) == length(dd))
{
dd <- (approx( x = mzdd, y = dd, xout = mzAxis, ties = "ordered", yleft = 0, yright = 0))$y
Expand Down
17 changes: 7 additions & 10 deletions src/mergeTwoMassAxes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,19 @@ List simplePeakDetect(NumericVector mz, NumericVector intensity)
//Calculate the mass bin size for each peak
for( int i = 0; i < pkIndex.length(); i++)
{
double minBinSize = 10e4;
double localBinSize;
double localBinSize = 0;
int count = 0;
//Left region
if(pkIndex[i] > 0)
{
for( int j = pkIndex[i]; j >= 0; j--)
{
if( (intensity[j] < intensity[j-1]) )
if( (intensity[j] <= intensity[j-1]) )
{
pkIndexLeft.insert(pkIndexLeft.end(), j ); //inserting C index
break; //End of peak
}
localBinSize = (mz[j] - mz[j-1]);
minBinSize = localBinSize < minBinSize ? localBinSize : minBinSize;
localBinSize += (mz[j] - mz[j-1]);
count++;
}
}
Expand All @@ -74,20 +72,19 @@ List simplePeakDetect(NumericVector mz, NumericVector intensity)
{
for( int j = pkIndex[i]; j < (mz.length()-1); j++)
{
if( (intensity[j] < intensity[j+1]) )
if( (intensity[j] <= intensity[j+1]) )
{
pkIndexRight.insert(pkIndexRight.end(), j ); //inserting C index
break; //End of peak
}
localBinSize = (mz[j+1] - mz[j]);
minBinSize = localBinSize < minBinSize ? localBinSize : minBinSize;
localBinSize += (mz[j+1] - mz[j]);
count++;
}
}
if(count >0 )
{
pkBinSize.insert(pkBinSize.end(), minBinSize);
minBinSize = 10e4; //reset min bin size
localBinSize /= (double)count;
pkBinSize.insert(pkBinSize.end(), localBinSize);
}
}

Expand Down

0 comments on commit 840fc88

Please sign in to comment.