Skip to content

Commit

Permalink
Merge pull request #285 from BimberLab/24.3_fb_merge
Browse files Browse the repository at this point in the history
Merge discvr-23.11 to discvr-24.3
  • Loading branch information
bbimber authored Jul 6, 2024
2 parents 4cf4f57 + 5a3b5e3 commit aabe6ff
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package org.labkey.api.sequenceanalysis.run;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.writer.PrintWriters;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;

public class DockerWrapper extends AbstractCommandWrapper
{
private final String _containerName;
private File _tmpDir = null;

public DockerWrapper(String containerName, Logger log)
{
super(log);
_containerName = containerName;
}

public void setTmpDir(File tmpDir)
{
_tmpDir = tmpDir;
}

public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
{
File localBashScript = new File(workDir, "docker.sh");
File dockerBashScript = new File(workDir, "dockerRun.sh");
tracker.addIntermediateFile(localBashScript);
tracker.addIntermediateFile(dockerBashScript);

setWorkingDir(workDir);
try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript))
{
writer.println("#!/bin/bash");
writer.println("set -x");
writer.println("WD=`pwd`");
writer.println("HOME=`echo ~/`");
writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
writer.println("sudo $DOCKER pull " + _containerName);
writer.println("sudo $DOCKER run --rm=true \\");
writer.println("\t-v \"${WD}:/work\" \\");
writer.println("\t-v \"${HOME}:/homeDir\" \\");
if (_tmpDir != null)
{
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
}
writer.println("\t--entrypoint /bin/bash \\");
writer.println("\t-w /work \\");
Integer maxRam = SequencePipelineService.get().getMaxRam();
if (maxRam != null)
{
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
writer.println("\t--memory='" + maxRam + "g' \\");
}
writer.println("\t" + _containerName + " \\");
writer.println("\t/work/" + dockerBashScript.getName());
writer.println("EXIT_CODE=$?");
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
writer.println("exit $EXIT_CODE");

dockerWriter.println("#!/bin/bash");
dockerWriter.println("set -x");
dockerWriter.println(StringUtils.join(containerArgs, " "));
dockerWriter.println("EXIT_CODE=$?");
dockerWriter.println("echo 'Exit code: '$?");
dockerWriter.println("exit $EXIT_CODE");
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
}

public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
{
try
{
if (workingDirectory.equals(input.getParentFile()))
{
return input;
}

File local = new File(workingDirectory, input.getName());
if (!local.exists())
{
getLogger().debug("Copying file locally: " + input.getPath());
FileUtils.copyFile(input, local);
}

output.addIntermediateFile(local);

return local;
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
import org.labkey.api.sequenceanalysis.run.DockerWrapper;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.api.util.FileUtil;
import org.labkey.api.writer.PrintWriters;
Expand Down Expand Up @@ -127,6 +127,7 @@ else if (!svVcf.exists())
// id path depth read length
// TNPRC-IB18 ../IB18.cram 29.77 150
File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt");
String rgId = null;
try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile()))
{
SAMFileHeader header = reader.getFileHeader();
Expand All @@ -139,52 +140,58 @@ else if (header.getReadGroups().size() > 1)
throw new PipelineJobException("More than one read group found in BAM");
}

String rgId = header.getReadGroups().get(0).getSample();
rgId = header.getReadGroups().get(0).getSample();

JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
writer.println("id\tpath\tdepth\tread length");
double depth = json.getJSONObject("autosome").getDouble("depth");
double readLength = json.getInt("read_length");
writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength);
writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
ctx.getFileManager().addIntermediateFile(coverageFile);

DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger());
List<String> paragraphArgs = new ArrayList<>();
paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath());
paragraphArgs.add("--verbose");
paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py");

File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
dockerWrapper.ensureLocalCopy(so.getFile(), ctx.getWorkingDirectory(), ctx.getFileManager());
dockerWrapper.ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), ctx.getWorkingDirectory(), ctx.getFileManager());

File paragraphOutDir = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()));
paragraphArgs.add("-o");
paragraphArgs.add(paragraphOut.getPath());
paragraphArgs.add("/work/" + paragraphOutDir.getName());

paragraphArgs.add("-i");
paragraphArgs.add(svVcf.getPath());
dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager());
dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager());
paragraphArgs.add("/work/" + svVcf.getName());

paragraphArgs.add("-m");
paragraphArgs.add(coverageFile.getPath());
paragraphArgs.add("/work/" + coverageFile.getName());

paragraphArgs.add("-r");
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile();
dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager());
dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager());
paragraphArgs.add("/work/" + genomeFasta.getName());

paragraphArgs.add("--scratch-dir");
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());
paragraphArgs.add("/tmp");
dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir()));

if (threads != null)
{
paragraphArgs.add("--threads");
paragraphArgs.add(threads.toString());
}

paragraphArgs.add("--logfile");
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());

new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);
dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager());

File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
File genotypes = new File(paragraphOutDir, "genotypes.vcf.gz");
if (!genotypes.exists())
{
throw new PipelineJobException("Missing file: " + genotypes.getPath());
Expand All @@ -200,6 +207,11 @@ else if (header.getReadGroups().size() > 1)
}

ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");

ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.json.gz"));
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.vcf.gz"));
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "genotypes.json.gz"));
ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "grmpy.log"));
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
pointsConfigMap.put(field, doublePointsConfig);
}
case Integer -> {
numericQueryParserFields.put(field, SortField.Type.INT);
numericQueryParserFields.put(field, SortField.Type.LONG);
pointsConfigMap.put(field, intPointsConfig);
}
}
Expand Down

0 comments on commit aabe6ff

Please sign in to comment.