diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java index e18b9799e..2e3e84e2c 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java @@ -289,7 +289,7 @@ protected boolean isThrowNonZeroExits() return _throwNonZeroExits; } - protected static File resolveFileInPath(String exe, @Nullable String packageName, boolean throwIfNotFound) + public static File resolveFileInPath(String exe, @Nullable String packageName, boolean throwIfNotFound) { File fn; String path; diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java index 334fcf88b..91959921b 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java @@ -90,6 +90,7 @@ public List getBaseArgs(@Nullable String toolName) List args = new ArrayList<>(); args.add(SequencePipelineService.get().getJavaFilepath()); args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride)); + args.add("-DGATK_STACKTRACE_ON_USER_EXCEPTION=true"); args.add("-jar"); args.add(getJAR().getPath()); @@ -98,6 +99,8 @@ public List getBaseArgs(@Nullable String toolName) args.add(toolName); } + + return args; } diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 558d131c7..9333860c7 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -486,10 +486,10 @@ then rm -Rf bcftools* rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.18/bcftools-1.18.tar.bz2 - tar xjvf bcftools-1.18.tar.bz2 - chmod 755 bcftools-1.18 - cd bcftools-1.18 + wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2 + tar xjvf bcftools-1.20.tar.bz2 + chmod 755 bcftools-1.20 + cd bcftools-1.20 rm -f plugins/liftover.c wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml new file mode 100644 index 000000000..6ad9c3493 --- /dev/null +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml new file mode 100644 index 000000000..d7d962cf2 --- /dev/null +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js index fba9f476e..15f6a5a6c 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js @@ -117,6 +117,11 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { itemId: 'useBcfTools', checked: false, fieldLabel: 'Use bcftools' + },{ + xtype: 'checkbox', + itemId: 'doNotRetainUnmapped', + checked: false, + fieldLabel: 'Do Not Retain Unmapped' }].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)), buttons: [{ text: 'Submit', @@ -157,6 +162,14 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { params.dropGenotypes = this.down('#dropGenotypes').getValue(); } + if (this.down('#useBcfTools').getValue()){ + params.useBcfTools = this.down('#useBcfTools').getValue(); + } + + if (this.down('#doNotRetainUnmapped').getValue()){ + params.doNotRetainUnmapped = this.down('#doNotRetainUnmapped').getValue(); + } + Ext4.Msg.wait('Saving...'); LABKEY.Ajax.request({ url: LABKEY.ActionURL.buildURL('sequenceanalysis', 'runSequenceHandler', this.containerPath), diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 1830fbf8b..777ccc87a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -59,6 +59,7 @@ import org.labkey.sequenceanalysis.analysis.RnaSeqcHandler; import org.labkey.sequenceanalysis.analysis.SbtGeneCountHandler; import org.labkey.sequenceanalysis.analysis.UnmappedSequenceBasedGenotypeHandler; +import org.labkey.sequenceanalysis.analysis.UpdateReadsetFilesHandler; import org.labkey.sequenceanalysis.button.AddSraRunButton; import org.labkey.sequenceanalysis.button.ArchiveReadsetsButton; import org.labkey.sequenceanalysis.button.ChangeReadsetStatusButton; @@ -338,6 +339,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); + SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java index 600780cb7..dd63cc081 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java @@ -28,6 +28,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.util.FileType; @@ -176,6 +177,7 @@ public void processFilesRemote(List inputFiles, JobContext c boolean dropGenotypes = params.optBoolean("dropGenotypes", false); boolean useBcfTools = params.optBoolean("useBcfTools", false); + boolean doNotRetainUnmapped = params.optBoolean("doNotRetainUnmapped", false); int chainFileId = params.getInt("chainFileId"); File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId); @@ -214,7 +216,7 @@ else if (_vcfFileType.isType(f.getFile())) } File lifted = new File(outDir, baseName + ".lifted-" + targetGenomeId + ext); - File unmappedOutput = new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext); + File unmappedOutput = doNotRetainUnmapped ? null : new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext); try { @@ -260,7 +262,11 @@ else if (_vcfFileType.isType(f.getFile())) ctx.addSequenceOutput(so1); } - if (!unmappedOutput.exists()) + if (unmappedOutput == null) + { + // skip + } + else if (!unmappedOutput.exists()) { job.getLogger().info("no unmapped intervals"); } @@ -328,6 +334,8 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG { LiftoverBcfToolsWrapper wrapper = new LiftoverBcfToolsWrapper(job.getLogger()); wrapper.doLiftover(currentVCF, chain, sourceGenome.getWorkingFastaFile(), targetGenome.getWorkingFastaFile(), unmappedOutput, output); + + SequencePipelineService.get().sortVcf(output, null, targetGenome.getSequenceDictionary(), ctx.getLogger()); } else { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java new file mode 100644 index 000000000..9fc12cc93 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -0,0 +1,315 @@ +package org.labkey.sequenceanalysis.analysis; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMReadGroupRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.util.FileExtensions; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFReader; +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.PicardWrapper; +import org.labkey.api.util.FileType; +import org.labkey.api.writer.PrintWriters; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler +{ + public UpdateReadsetFilesHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Update Sample/Header Information", "This will re-header any BAM or gVCF files using the sample name from the source readset. All inputs must be single-sample and have a readset attached to the record", null, List.of( + + )); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public boolean canProcess(SequenceOutputFile f) + { + return f.getFile() != null && ( + SequenceUtil.FILETYPE.gvcf.getFileType().isType(f.getFile()) || + SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(f.getFile()) + ); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + if (inputFiles.size() > 1) + { + throw new PipelineJobException("This job expects a single input file!"); + } + + SequenceOutputFile so = inputFiles.get(0); + if (so.getReadset() == null) + { + throw new PipelineJobException("All inputs must have a readset, missing: " + so.getRowid()); + } + + Readset rs = SequenceAnalysisService.get().getReadset(so.getReadset(), ctx.getJob().getUser()); + String newRsName = SequenceUtil.getLegalReadGroupName(rs.getName()); + + if (SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(so.getFile())) + { + getAndValidateHeaderForBam(so, newRsName); + } + else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | SequenceUtil.FILETYPE.vcf.getFileType().isType(so.getFile())) + { + getAndValidateHeaderForVcf(so, newRsName); + } + + ctx.getSequenceSupport().cacheObject("readsetId", newRsName); + } + + private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String newRsName) throws PipelineJobException + { + SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); + try (SamReader reader = samReaderFactory.open(so.getFile())) + { + SAMFileHeader header = reader.getFileHeader().clone(); + int nSamples = reader.getFileHeader().getReadGroups().size(); + if (nSamples != 1) + { + throw new PipelineJobException("File has more than one read group, found: " + nSamples); + } + + List rgs = header.getReadGroups(); + String existingSample = rgs.get(0).getSample(); + if (existingSample.equals(newRsName)) + { + throw new PipelineJobException("Sample names match, aborting"); + } + + return header; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private VCFHeader getAndValidateHeaderForVcf(SequenceOutputFile so, String newRsName) throws PipelineJobException + { + try (VCFReader reader = new VCFFileReader(so.getFile())) + { + VCFHeader header = reader.getHeader(); + int nSamples = header.getGenotypeSamples().size(); + if (nSamples != 1) + { + throw new PipelineJobException("File has more than one sample, found: " + nSamples); + } + + String existingSample = header.getGenotypeSamples().get(0); + if (existingSample.equals(newRsName)) + { + throw new PipelineJobException("Sample names match, aborting"); + } + + return header; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + String newRsName = ctx.getSequenceSupport().getCachedObject("readsetId", String.class); + if (newRsName == null) + { + throw new PipelineJobException("Missing cached readsetId"); + } + + SequenceOutputFile so = inputFiles.get(0); + if (SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(so.getFile())) + { + reheaderBamOrCram(so, ctx, newRsName); + } + else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | SequenceUtil.FILETYPE.vcf.getFileType().isType(so.getFile())) + { + reheaderVcf(so, ctx, newRsName); + } + } + + private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName) throws PipelineJobException + { + VCFHeader header = getAndValidateHeaderForVcf(so, newRsName); + String existingSample = header.getGenotypeSamples().get(0); + + File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt"); + try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND)) + { + writer.println(newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(sampleNamesFile); + + File outputVcf = new File(ctx.getWorkingDirectory(), so.getFile().getName()); + + BcftoolsRunner wrapper = new BcftoolsRunner(ctx.getLogger()); + wrapper.execute(Arrays.asList(BcftoolsRunner.getBcfToolsPath().getPath(), "reheader", "-s", sampleNamesFile.getPath(), "-o", outputVcf.getPath(), so.getFile().getPath())); + + try + { + File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false); + FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + + FileType gz = new FileType(".gz"); + File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX); + FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING); + + addTracker(so, existingSample, newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private void addTracker(SequenceOutputFile so, String existingSample, String newRsName) throws IOException + { + File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); + boolean preExisting = tracker.exists(); + try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND)) + { + if (!preExisting) + { + writer.println("OriginalSample\tNewSample"); + } + + writer.println(existingSample + "\t" + newRsName); + } + } + + private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String newRsName) throws PipelineJobException + { + try + { + SAMFileHeader header = getAndValidateHeaderForBam(so, newRsName); + + List rgs = header.getReadGroups(); + String existingSample = rgs.get(0).getSample(); + rgs.get(0).setSample(newRsName); + + File headerBam = new File(ctx.getWorkingDirectory(), "header.bam"); + try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam)) + { + + } + ctx.getFileManager().addIntermediateFile(headerBam); + ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); + + File output = new File(ctx.getWorkingDirectory(), so.getFile().getName()); + new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam); + if (!output.exists()) + { + throw new PipelineJobException("Missing file: " + output.getPath()); + } + + File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false); + + FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING); + + addTracker(so, existingSample, newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private static class ReplaceSamHeaderWrapper extends PicardWrapper + { + public ReplaceSamHeaderWrapper(Logger log) + { + super(log); + } + + @Override + protected String getToolName() + { + return "ReplaceSamHeader"; + } + + public void execute(File input, File output, File headerBam) throws PipelineJobException + { + List params = new ArrayList<>(getBaseArgs()); + + params.add("--INPUT"); + params.add(input.getPath()); + + params.add("--OUTPUT"); + params.add(output.getPath()); + + params.add("--HEADER"); + params.add(headerBam.getPath()); + + execute(params); + } + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java index 05a702793..883b5b6fb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java @@ -17,6 +17,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.util.ArrayList; @@ -61,7 +62,7 @@ protected void doPerformAlignment(AlignmentOutputImpl output, File inputFastq1, rg.add("LB:" + rs.getReadsetId().toString()); rg.add("PL:" + (rs.getPlatform() == null ? "ILLUMINA" : rs.getPlatform())); rg.add("PU:" + (platformUnit == null ? rs.getReadsetId().toString() : platformUnit)); - rg.add("SM:" + rs.getName().replaceAll(" ", "_")); + rg.add("SM:" + SequenceUtil.getLegalReadGroupName(rs)); extraArgs.add("'" + StringUtils.join(rg, "\\t") + "'"); getWrapper().performMemAlignment(getPipelineCtx().getJob(), output, inputFastq1, inputFastq2, outputDirectory, referenceGenome, basename, extraArgs); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 76780d215..20819c231 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -1,5 +1,9 @@ package org.labkey.sequenceanalysis.run.alignment; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; @@ -12,14 +16,17 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; +import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; -import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -57,7 +64,7 @@ public boolean doRunLocal() @Override public SequenceOutputProcessor getProcessor() { - return new DepthOfCoverageHandler.Processor(); + return new Processor(); } public static class Processor implements SequenceOutputProcessor @@ -71,61 +78,89 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport @Override public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { - File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF")); - if (!inputVCF.exists()) + int svVcfId = ctx.getParams().optInt("svVCF", 0); + if (svVcfId == 0) { - throw new PipelineJobException("Unable to find file: " + inputVCF.getPath()); + throw new PipelineJobException("svVCF param was null"); } + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); + if (svVcf == null) + { + throw new PipelineJobException("File not found for ID: " + svVcfId); + } + else if (!svVcf.exists()) + { + throw new PipelineJobException("Missing file: " + svVcf.getPath()); + } + + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); for (SequenceOutputFile so : inputFiles) { List depthArgs = new ArrayList<>(); depthArgs.add("idxdepth"); - depthArgs.add("-d"); + depthArgs.add("-b"); depthArgs.add(so.getFile().getPath()); - File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + File coverageJson = new File(ctx.getWorkingDirectory(), "coverage.json"); depthArgs.add("-o"); - depthArgs.add(coverageFile.getPath()); + depthArgs.add(coverageJson.getPath()); depthArgs.add("-r"); depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); + if (threads != null) + { + depthArgs.add("--threads"); + depthArgs.add(threads.toString()); + } + new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); - if (!coverageFile.exists()) + if (!coverageJson.exists()) { - throw new PipelineJobException("Missing file: " + coverageFile.getPath()); + throw new PipelineJobException("Missing file: " + coverageJson.getPath()); } + ctx.getFileManager().addIntermediateFile(coverageJson); // Should produce a simple text file: // id path depth read length // TNPRC-IB18 ../IB18.cram 29.77 150 + File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) + { + SAMFileHeader header = reader.getFileHeader(); + if (header.getReadGroups().size() == 0) + { + throw new PipelineJobException("No read groups found in input BAM"); + } + else if (header.getReadGroups().size() > 1) + { + throw new PipelineJobException("More than one read group found in BAM"); + } + + String rgId = header.getReadGroups().get(0).getSample(); + + JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset())); + writer.println("id\tpath\tdepth\tread length"); + double depth = json.getJSONObject("autosome").getDouble("depth"); + double readLength = json.getInt("read_length"); + writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(coverageFile); List paragraphArgs = new ArrayList<>(); - paragraphArgs.add("multigrmpy.py"); + paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath()); paragraphArgs.add("--verbose"); File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt"); paragraphArgs.add("-o"); paragraphArgs.add(paragraphOut.getPath()); - int svVcfId = ctx.getParams().optInt("svVCF"); - if (svVcfId == 0) - { - throw new PipelineJobException("Missing svVCF ID"); - } - - File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); - if (svVcf == null) - { - throw new PipelineJobException("File not found for ID: " + svVcfId); - } - else if (!svVcf.exists()) - { - throw new PipelineJobException("Missing file: " + svVcf.getPath()); - } - paragraphArgs.add("-i"); paragraphArgs.add(svVcf.getPath()); @@ -138,7 +173,6 @@ else if (!svVcf.exists()) paragraphArgs.add("--scratch-dir"); paragraphArgs.add(SequencePipelineService.get().getJavaTempDir()); - Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); if (threads != null) { paragraphArgs.add("--threads"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java index 776a42182..4b845742d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java @@ -11,6 +11,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.sequenceanalysis.run.util.AddOrReplaceReadGroupsWrapper; +import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; @@ -48,7 +49,7 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".readgroups.bam"); output.addIntermediateFile(outputBam); - output.setBAM(getWrapper().executeCommand(inputBam, outputBam, rs.getReadsetId().toString(), rs.getPlatform(), rs.getReadsetId().toString(), rs.getName().replaceAll(" ", "_"))); + output.setBAM(getWrapper().executeCommand(inputBam, outputBam, rs.getReadsetId().toString(), rs.getPlatform(), rs.getReadsetId().toString(), SequenceUtil.getLegalReadGroupName(rs))); return output; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java index 46ffae773..6af568325 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java @@ -27,6 +27,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -110,7 +111,7 @@ else if (enforceChangeAll) List queryIntervals = intervals; if (queryIntervals == null || queryIntervals.isEmpty()) { - queryIntervals.add(null); + queryIntervals = Collections.singletonList(null); } int i = 0; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java index 90978fcf0..9055c0cbe 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java @@ -420,7 +420,8 @@ else if (refName.endsWith("_alt")) refName = StringUtils.join(Arrays.copyOfRange(tokens, 1, tokens.length), "_"); } - if (refName.equals("chrM")) + // NOTE: hg19 and GRCh37 have different MT contigs. chrM is the legacy version. + if (refName.equals("chrMT")) { return "MT"; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index f270b7e8d..6808024d9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -31,6 +31,7 @@ import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.model.Readset; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.run.CommandWrapper; @@ -650,4 +651,14 @@ else if (FILETYPE.cram.getFileType().isType(bamOrCram)) return null; } + + public static String getLegalReadGroupName(Readset rs) + { + return getLegalReadGroupName(rs.getName()); + } + + public static String getLegalReadGroupName(String rsName) + { + return rsName.replaceAll(" ", "_"); + } } \ No newline at end of file