diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java index 91a6d3937..0767a7471 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/BcftoolsRunner.java @@ -18,7 +18,7 @@ public BcftoolsRunner(@Nullable Logger logger) super(logger); } - public File getBcfToolsPath() + public static File getBcfToolsPath() { return SequencePipelineService.get().getExeForPackage("BCFTOOLSPATH", "bcftools"); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 1d095b04a..950cae97b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -45,6 +45,8 @@ import org.labkey.sequenceanalysis.analysis.BamHaplotypeHandler; import org.labkey.sequenceanalysis.analysis.CombineStarGeneCountsHandler; import org.labkey.sequenceanalysis.analysis.CombineSubreadGeneCountsHandler; +import org.labkey.sequenceanalysis.analysis.DeepVariantHandler; +import org.labkey.sequenceanalysis.analysis.GLNexusHandler; import org.labkey.sequenceanalysis.analysis.GenotypeGVCFHandler; import org.labkey.sequenceanalysis.analysis.HaplotypeCallerHandler; import org.labkey.sequenceanalysis.analysis.LiftoverHandler; @@ -330,6 +332,8 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler()); SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler()); SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler()); + SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); + SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler()); @@ -382,8 +386,8 @@ public void doStartupAfterSpringConfig(ModuleContext moduleContext) LDKService.get().registerQueryButton(new DownloadSraButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); LDKService.get().registerQueryButton(new ArchiveReadsetsButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); - LDKService.get().registerQueryButton(new ChangeReadsetStatusForAnalysesButton(), "sequenceanalysis", "sequence_analyses"); - LDKService.get().registerQueryButton(new ChangeReadsetStatusButton(), "sequenceanalysis", "sequence_readsets"); + LDKService.get().registerQueryButton(new ChangeReadsetStatusForAnalysesButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_ANALYSES); + LDKService.get().registerQueryButton(new ChangeReadsetStatusButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_READSETS); ExperimentService.get().registerExperimentRunTypeSource(new ExperimentRunTypeSource() { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java new file mode 100644 index 000000000..6b3c3d6b8 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java @@ -0,0 +1,162 @@ +package org.labkey.sequenceanalysis.analysis; + +import org.apache.commons.lang3.StringUtils; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileType; +import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +/** + * Created by bimber on 2/3/2016. + */ +public class DeepVariantHandler extends AbstractParameterizedOutputHandler +{ + private final FileType _bamOrCramFileType = new FileType(Arrays.asList("bam", "cram"), "bam"); + + public DeepVariantHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Run DeepVariant", "This will run DeepVariant on the selected BAMs to generate gVCF files.", null, DeepVariantAnalysis.getToolDescriptors()); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && _bamOrCramFileType.isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + public class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + String modelType = ctx.getParams().optString("modelType"); + DeepVariantAnalysis.inferModelType(modelType, ctx); + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + PipelineJob job = ctx.getJob(); + if (inputFiles.size() != 1) + { + throw new PipelineJobException("Expected a single input file"); + } + + SequenceOutputFile so = inputFiles.get(0); + + RecordedAction action = new RecordedAction(getName()); + action.setStartTime(new Date()); + + action.addInput(so.getFile(), "Input BAM File"); + + File outputFile = new File(ctx.getOutputDir(), FileUtil.getBaseName(so.getFile()) + ".g.vcf.gz"); + + DeepVariantAnalysis.DeepVariantWrapper wrapper = new DeepVariantAnalysis.DeepVariantWrapper(job.getLogger()); + wrapper.setOutputDir(ctx.getOutputDir()); + + ReferenceGenome referenceGenome = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); + if (referenceGenome == null) + { + throw new PipelineJobException("No reference genome found for output: " + so.getRowid()); + } + + String inferredModelType = ctx.getSequenceSupport().getCachedObject("modelType", String.class); + String modelType = inferredModelType == null ? ctx.getParams().optString("modelType") : inferredModelType; + if (modelType == null) + { + throw new PipelineJobException("Missing model type"); + } + + List args = new ArrayList<>(getClientCommandArgs(ctx.getParams())); + args.add("--model_type=" + modelType); + + String binVersion = ctx.getParams().optString("binVersion"); + if (binVersion == null) + { + throw new PipelineJobException("Missing binVersion"); + } + + wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, ctx.getFileManager(), binVersion, args); + + action.addOutput(outputFile, "gVCF File", false); + + SequenceOutputFile o = new SequenceOutputFile(); + o.setName(outputFile.getName()); + o.setFile(outputFile); + o.setLibrary_id(so.getLibrary_id()); + o.setCategory("DeepVariant gVCF File"); + o.setReadset(so.getReadset()); + o.setDescription("DeepVariant Version: " + binVersion); + + ctx.addSequenceOutput(o); + + ctx.addActions(action); + } + + private List getClientCommandArgs(JSONObject params) + { + List ret = new ArrayList<>(); + + for (ToolParameterDescriptor desc : getParameters()) + { + if (desc.getCommandLineParam() != null) + { + String val = params.optString(desc.getName(), null); + if (StringUtils.trimToNull(val) != null) + { + ret.addAll(desc.getCommandLineParam().getArguments(" ", val)); + } + } + } + + return ret; + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java new file mode 100644 index 000000000..afe54151f --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -0,0 +1,283 @@ +package org.labkey.sequenceanalysis.analysis; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.FileType; +import org.labkey.api.writer.PrintWriters; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.run.util.BgzipRunner; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler.VCF_CATEGORY; + +/** + * Created by bimber on 2/3/2016. + */ +public class GLNexusHandler extends AbstractParameterizedOutputHandler +{ + protected FileType _gvcfFileType = new FileType(List.of(".g.vcf"), ".g.vcf", false, FileType.gzSupportLevel.SUPPORT_GZ); + + public GLNexusHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Run DeepVariant", "This will run GLNexus on the selected gVCFs.", null, Arrays.asList( + ToolParameterDescriptor.create("binVersion", "GLNexus Version", "The version of GLNexus to run, which is passed to their docker container", "textfield", new JSONObject(){{ + put("allowBlank", false); + }}, "v1.2.7"), + ToolParameterDescriptor.create("fileBaseName", "Filename", "This is the basename that will be used for the output gzipped VCF", "textfield", new JSONObject(){{ + put("allowBlank", false); + }}, "CombinedGenotypes") + )); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + + return o.getFile() != null && _gvcfFileType.isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + @Override + public boolean doSplitJobs() + { + return false; + } + + public class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + Set genomeIds = new HashSet<>(); + for (SequenceOutputFile so : inputFiles) + { + genomeIds.add(so.getLibrary_id()); + } + + if (genomeIds.size() > 1) + { + throw new PipelineJobException("The selected files use more than one genome"); + } + else if (genomeIds.isEmpty()) + { + throw new PipelineJobException("No genome ID found for inputs"); + } + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + RecordedAction action = new RecordedAction(getName()); + action.setStartTime(new Date()); + + Set genomeIds = new HashSet<>(); + List inputVcfs = new ArrayList<>(); + for (SequenceOutputFile so : inputFiles) + { + genomeIds.add(so.getLibrary_id()); + inputVcfs.add(so.getFile()); + action.addInput(so.getFile(), "Input gVCF File"); + } + + if (genomeIds.size() > 1) + { + throw new PipelineJobException("The selected files use more than one genome"); + } + else if (genomeIds.isEmpty()) + { + throw new PipelineJobException("No genome ID found for inputs"); + } + + int genomeId = genomeIds.iterator().next(); + + String basename = StringUtils.trimToNull(ctx.getParams().optString("fileBaseName")); + if (basename == null) + { + throw new PipelineJobException("Basename not supplied for output VCF"); + } + + String binVersion = ctx.getParams().optString("binVersion"); + if (binVersion == null) + { + throw new PipelineJobException("Missing binVersion"); + } + + File outputVcf = new File(ctx.getOutputDir(), basename + ".vcf.gz"); + + new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, outputVcf, ctx.getFileManager(), binVersion); + + ctx.getLogger().debug("adding sequence output: " + outputVcf.getPath()); + SequenceOutputFile so1 = new SequenceOutputFile(); + so1.setName(outputVcf.getName()); + so1.setDescription("GLNexus output. Version: " + binVersion + ". Total samples: " + inputFiles.size()); + so1.setFile(outputVcf); + so1.setLibrary_id(genomeId); + so1.setCategory(VCF_CATEGORY); + so1.setContainer(ctx.getJob().getContainerId()); + so1.setCreated(new Date()); + so1.setModified(new Date()); + + ctx.getFileManager().addSequenceOutput(so1); + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + } + + public static class GLNexusWrapper extends AbstractCommandWrapper + { + public GLNexusWrapper(Logger logger) + { + super(logger); + } + + private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + { + try + { + if (workingDirectory.equals(input.getParentFile())) + { + return input; + } + + File local = new File(workingDirectory, input.getName()); + if (!local.exists()) + { + getLogger().debug("Copying file locally: " + input.getPath()); + FileUtils.copyFile(input, local); + } + + output.addIntermediateFile(local); + + return local; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion) throws PipelineJobException + { + File workDir = outputVcf.getParentFile(); + tracker.addIntermediateFile(outputVcf); + tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); + + List gvcfsLocal = new ArrayList<>(); + for (File f : inputGvcfs) + { + gvcfsLocal.add(ensureLocalCopy(f, workDir, tracker)); + ensureLocalCopy(SequenceUtil.getExpectedIndex(f), workDir, tracker); + } + + File localBashScript = new File(workDir, "docker.sh"); + tracker.addIntermediateFile(localBashScript); + + try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) + { + writer.println("#!/bin/bash"); + writer.println("set -x"); + writer.println("WD=`pwd`"); + writer.println("HOME=`echo ~/`"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + writer.println("sudo $DOCKER pull quay.io/mlin/glnexus:" + binVersion); + writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("\t-v \"${WD}:/work\" \\"); + writer.println("\t-v \"${HOME}:/homeDir\" \\"); + if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) + { + writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); + } + writer.println("\t-u $UID \\"); + writer.println("\t-e USERID=$UID \\"); + writer.println("\t--entrypoint /bin/bash \\"); + writer.println("\t-w /work \\"); + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + writer.println("\t--memory='" + maxRam + "g' \\"); + } + writer.println("\tquay.io/mlin/glnexus:" + binVersion + " \\"); + + writer.println("\tglnexus_cli" + " \\"); + writer.println("\t--config DeepVariant" + " \\"); + + gvcfsLocal.forEach(f -> { + writer.println("\t-i gvcf=/work/" + f.getPath() + " \\"); + }); + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (maxThreads != null) + { + writer.println("\t--threads " + maxThreads + " \\"); + } + + File bcftools = BcftoolsRunner.getBcfToolsPath(); + File bgzip = BgzipRunner.getExe(); + writer.println("\t| " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + execute(Arrays.asList(localBashScript.getPath())); + + if (!outputVcf.exists()) + { + throw new PipelineJobException("File not found: " + outputVcf.getPath()); + } + + File idxFile = new File(outputVcf.getPath() + ".tbi"); + if (!idxFile.exists()) + { + throw new PipelineJobException("Missing index: " + idxFile.getPath()); + } + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 08efce26f..06121ab4f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -83,15 +83,20 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException throw new PipelineJobException("Missing model type"); } + inferModelType(modelType, getPipelineCtx()); + } + + public static void inferModelType(String modelType, PipelineContext ctx) throws PipelineJobException + { if ("AUTO".equals(modelType)) { - getPipelineCtx().getLogger().info("Inferring model type by readset type:"); - if (support.getCachedReadsets().size() != 1) + ctx.getLogger().info("Inferring model type by readset type:"); + if (ctx.getSequenceSupport().getCachedReadsets().size() != 1) { - throw new PipelineJobException("Expected a single cached readset, found: " + support.getCachedReadsets().size()); + throw new PipelineJobException("Expected a single cached readset, found: " + ctx.getSequenceSupport().getCachedReadsets().size()); } - Readset rs = support.getCachedReadsets().get(0); + Readset rs = ctx.getSequenceSupport().getCachedReadsets().get(0); if ("ILLUMINA".equals(rs.getPlatform())) { switch (rs.getApplication()) @@ -119,7 +124,7 @@ else if ("PACBIO".equals(rs.getPlatform())) throw new PipelineJobException("Unable to infer modelType for: " + rs.getName()); } - support.cacheObject("modelType", modelType); + ctx.getSequenceSupport().cacheObject("modelType", modelType); } } @@ -285,6 +290,12 @@ public void execute(File inputBam, File refFasta, File outputGvcf, PipelineOutpu { throw new PipelineJobException("File not found: " + outputGvcf.getPath()); } + + File idxFile = new File(outputGvcf.getPath() + ".tbi"); + if (!idxFile.exists()) + { + throw new PipelineJobException("Missing index: " + idxFile.getPath()); + } } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java index a0867ae5e..136dba7a3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/BgzipRunner.java @@ -95,7 +95,7 @@ private List getParams(File input, boolean preserveInput) return params; } - public File getExe() + public static File getExe() { return SequencePipelineService.get().getExeForPackage("BGZIPPATH", "bgzip"); } diff --git a/cluster/src/org/labkey/cluster/ClusterModule.java b/cluster/src/org/labkey/cluster/ClusterModule.java index 4cf4399ec..e7638ddcc 100644 --- a/cluster/src/org/labkey/cluster/ClusterModule.java +++ b/cluster/src/org/labkey/cluster/ClusterModule.java @@ -45,7 +45,6 @@ import org.labkey.cluster.query.ViewClusterSubmissionsButton; import org.labkey.cluster.query.ViewJavaLogButton; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet;