Skip to content

Commit

Permalink
Switch bwa-mem to run using basic pipe
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Mar 8, 2024
1 parent 670a3fd commit 9ec4249
Showing 1 changed file with 31 additions and 84 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.labkey.sequenceanalysis.run.alignment;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.json.JSONObject;
Expand All @@ -14,17 +13,12 @@
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.ProcessUtils;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.StringUtilsLabKey;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -43,7 +37,7 @@ public BWAMemWrapper(@Nullable Logger logger)

public static class BWAMemAlignmentStep extends BWAAlignmentStep<BWAMemWrapper>
{
public BWAMemAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx)
public BWAMemAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new BWAMemWrapper(ctx.getLogger()));
}
Expand Down Expand Up @@ -102,93 +96,46 @@ public void performMemAlignment(PipelineJob job, AlignmentOutputImpl output, Fil
{
setOutputDir(outputDirectory);

getLogger().info("Running BWA-Mem (Piped)");
getLogger().info("Running BWA-Mem");
getLogger().debug("will write BAM to: " + outputDirectory);

List<String> args = new ArrayList<>();
args.add(getExe().getPath());
args.add("mem");
args.add("-v");
args.add("1");
List<String> bwaArgs = new ArrayList<>();
bwaArgs.add(getExe().getPath());
bwaArgs.add("mem");
bwaArgs.add("-v");
bwaArgs.add("1");
if (additionalArgs != null)
args.addAll(additionalArgs);
appendThreads(job, args);
bwaArgs.addAll(additionalArgs);
appendThreads(job, bwaArgs);

args.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
args.add(inputFastq1.getPath());
bwaArgs.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
bwaArgs.add(inputFastq1.getPath());

if (inputFastq2 != null)
{
args.add(inputFastq2.getPath());
bwaArgs.add(inputFastq2.getPath());
}

try
{
//run BWA and pipe directly to samtools to make BAM
File bam = new File(outputDirectory, basename + ".bam");

output.addCommandExecuted(StringUtils.join(args, " "));
ProcessBuilder bwaProcessBuilder = getProcessBuilder(args);
getLogger().info(StringUtils.join(args, " "));

SamtoolsRunner sr = new SamtoolsRunner(getLogger());
List<String> samtoolsArgs = Arrays.asList(sr.getSamtoolsPath().getPath(), "view", "-b", "-h", "-S", "-T", referenceGenome.getWorkingFastaFile().getPath(), "-o", bam.getPath(), "-");
output.addCommandExecuted(StringUtils.join(samtoolsArgs, " "));
ProcessBuilder samtoolsProcessBuilder = sr.getProcessBuilder(samtoolsArgs);
samtoolsProcessBuilder.redirectErrorStream(true);
getLogger().info(StringUtils.join(samtoolsArgs, " "));

Process bwaProcess = null;
Process samtoolsProcess = null;
try
{
samtoolsProcess = samtoolsProcessBuilder.start();
bwaProcess = bwaProcessBuilder.start();
new ProcessUtils.ProcessReader(getLogger(), true, true).readProcess(bwaProcess); //read STDERR in separate thread
new ProcessUtils.StreamRedirector(getLogger()).redirectStreams(bwaProcess, samtoolsProcess);

try (BufferedReader procReader = new BufferedReader(new InputStreamReader(samtoolsProcess.getInputStream(), StringUtilsLabKey.DEFAULT_CHARSET)))
{
String line;
while ((line = procReader.readLine()) != null)
{
getLogger().log(Level.DEBUG, "\t" + line);
}
}

int lastReturnCode = samtoolsProcess.waitFor();
if (lastReturnCode != 0)
{
throw new PipelineJobException("process exited with non-zero value: " + lastReturnCode);
}
}
catch (InterruptedException e)
{
throw new PipelineJobException(e);
}
finally
{
if (bwaProcess != null)
{
bwaProcess.destroy();
}

if (samtoolsProcess != null)
{
samtoolsProcess.destroy();
}
}

if (!bam.exists())
{
throw new PipelineJobException("Unable to find output file: " + bam.getPath());
}

output.addOutput(bam, AlignmentOutputImpl.BAM_ROLE);
}
catch (IOException e)
//run BWA and pipe directly to samtools to make BAM
File bam = new File(outputDirectory, basename + ".bam");
output.addCommandExecuted(StringUtils.join(bwaArgs, " "));

SamtoolsRunner sr = new SamtoolsRunner(getLogger());
List<String> samtoolsArgs = Arrays.asList(sr.getSamtoolsPath().getPath(), "view", "-b", "-h", "-S", "-T", referenceGenome.getWorkingFastaFile().getPath(), "-o", bam.getPath(), "-");
output.addCommandExecuted(StringUtils.join(samtoolsArgs, " "));

List<String> bashArgs = new ArrayList<>();
bashArgs.add("/bin/bash");
bashArgs.add("-c");
bashArgs.add(StringUtils.join(bwaArgs, " ") + " | " + StringUtils.join(samtoolsArgs, " "));

execute(bashArgs);

if (!bam.exists())
{
throw new PipelineJobException(e);
throw new PipelineJobException("Unable to find output file: " + bam.getPath());
}

output.addOutput(bam, AlignmentOutputImpl.BAM_ROLE);
}
}

0 comments on commit 9ec4249

Please sign in to comment.