From 4cc44ec18ffe03945a0d74c5b0c8381991fde667 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 12 Jun 2024 21:37:03 -0700 Subject: [PATCH 01/14] Add step to replace the sample in BAM/VCFs based on the current DB info --- .../run/AbstractGatk4Wrapper.java | 3 + .../SequenceAnalysisModule.java | 2 + .../analysis/UpdateReadsetFilesHandler.java | 315 ++++++++++++++++++ .../run/alignment/BWAMemWrapper.java | 3 +- .../AddOrReplaceReadGroupsStep.java | 3 +- .../sequenceanalysis/util/SequenceUtil.java | 11 + 6 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java index 334fcf88b..91959921b 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java @@ -90,6 +90,7 @@ public List getBaseArgs(@Nullable String toolName) List args = new ArrayList<>(); args.add(SequencePipelineService.get().getJavaFilepath()); args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride)); + args.add("-DGATK_STACKTRACE_ON_USER_EXCEPTION=true"); args.add("-jar"); args.add(getJAR().getPath()); @@ -98,6 +99,8 @@ public List getBaseArgs(@Nullable String toolName) args.add(toolName); } + + return args; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 1830fbf8b..777ccc87a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -59,6 +59,7 @@ import org.labkey.sequenceanalysis.analysis.RnaSeqcHandler; import org.labkey.sequenceanalysis.analysis.SbtGeneCountHandler; import org.labkey.sequenceanalysis.analysis.UnmappedSequenceBasedGenotypeHandler; +import org.labkey.sequenceanalysis.analysis.UpdateReadsetFilesHandler; import org.labkey.sequenceanalysis.button.AddSraRunButton; import org.labkey.sequenceanalysis.button.ArchiveReadsetsButton; import org.labkey.sequenceanalysis.button.ChangeReadsetStatusButton; @@ -338,6 +339,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); + SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java new file mode 100644 index 000000000..9fc12cc93 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -0,0 +1,315 @@ +package org.labkey.sequenceanalysis.analysis; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMReadGroupRecord; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.util.FileExtensions; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFReader; +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.PicardWrapper; +import org.labkey.api.util.FileType; +import org.labkey.api.writer.PrintWriters; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler +{ + public UpdateReadsetFilesHandler() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Update Sample/Header Information", "This will re-header any BAM or gVCF files using the sample name from the source readset. All inputs must be single-sample and have a readset attached to the record", null, List.of( + + )); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public boolean canProcess(SequenceOutputFile f) + { + return f.getFile() != null && ( + SequenceUtil.FILETYPE.gvcf.getFileType().isType(f.getFile()) || + SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(f.getFile()) + ); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + if (inputFiles.size() > 1) + { + throw new PipelineJobException("This job expects a single input file!"); + } + + SequenceOutputFile so = inputFiles.get(0); + if (so.getReadset() == null) + { + throw new PipelineJobException("All inputs must have a readset, missing: " + so.getRowid()); + } + + Readset rs = SequenceAnalysisService.get().getReadset(so.getReadset(), ctx.getJob().getUser()); + String newRsName = SequenceUtil.getLegalReadGroupName(rs.getName()); + + if (SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(so.getFile())) + { + getAndValidateHeaderForBam(so, newRsName); + } + else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | SequenceUtil.FILETYPE.vcf.getFileType().isType(so.getFile())) + { + getAndValidateHeaderForVcf(so, newRsName); + } + + ctx.getSequenceSupport().cacheObject("readsetId", newRsName); + } + + private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String newRsName) throws PipelineJobException + { + SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault(); + try (SamReader reader = samReaderFactory.open(so.getFile())) + { + SAMFileHeader header = reader.getFileHeader().clone(); + int nSamples = reader.getFileHeader().getReadGroups().size(); + if (nSamples != 1) + { + throw new PipelineJobException("File has more than one read group, found: " + nSamples); + } + + List rgs = header.getReadGroups(); + String existingSample = rgs.get(0).getSample(); + if (existingSample.equals(newRsName)) + { + throw new PipelineJobException("Sample names match, aborting"); + } + + return header; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private VCFHeader getAndValidateHeaderForVcf(SequenceOutputFile so, String newRsName) throws PipelineJobException + { + try (VCFReader reader = new VCFFileReader(so.getFile())) + { + VCFHeader header = reader.getHeader(); + int nSamples = header.getGenotypeSamples().size(); + if (nSamples != 1) + { + throw new PipelineJobException("File has more than one sample, found: " + nSamples); + } + + String existingSample = header.getGenotypeSamples().get(0); + if (existingSample.equals(newRsName)) + { + throw new PipelineJobException("Sample names match, aborting"); + } + + return header; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + String newRsName = ctx.getSequenceSupport().getCachedObject("readsetId", String.class); + if (newRsName == null) + { + throw new PipelineJobException("Missing cached readsetId"); + } + + SequenceOutputFile so = inputFiles.get(0); + if (SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(so.getFile())) + { + reheaderBamOrCram(so, ctx, newRsName); + } + else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | SequenceUtil.FILETYPE.vcf.getFileType().isType(so.getFile())) + { + reheaderVcf(so, ctx, newRsName); + } + } + + private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName) throws PipelineJobException + { + VCFHeader header = getAndValidateHeaderForVcf(so, newRsName); + String existingSample = header.getGenotypeSamples().get(0); + + File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt"); + try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND)) + { + writer.println(newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(sampleNamesFile); + + File outputVcf = new File(ctx.getWorkingDirectory(), so.getFile().getName()); + + BcftoolsRunner wrapper = new BcftoolsRunner(ctx.getLogger()); + wrapper.execute(Arrays.asList(BcftoolsRunner.getBcfToolsPath().getPath(), "reheader", "-s", sampleNamesFile.getPath(), "-o", outputVcf.getPath(), so.getFile().getPath())); + + try + { + File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false); + FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + + FileType gz = new FileType(".gz"); + File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX); + FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING); + + addTracker(so, existingSample, newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private void addTracker(SequenceOutputFile so, String existingSample, String newRsName) throws IOException + { + File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); + boolean preExisting = tracker.exists(); + try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND)) + { + if (!preExisting) + { + writer.println("OriginalSample\tNewSample"); + } + + writer.println(existingSample + "\t" + newRsName); + } + } + + private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String newRsName) throws PipelineJobException + { + try + { + SAMFileHeader header = getAndValidateHeaderForBam(so, newRsName); + + List rgs = header.getReadGroups(); + String existingSample = rgs.get(0).getSample(); + rgs.get(0).setSample(newRsName); + + File headerBam = new File(ctx.getWorkingDirectory(), "header.bam"); + try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam)) + { + + } + ctx.getFileManager().addIntermediateFile(headerBam); + ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); + + File output = new File(ctx.getWorkingDirectory(), so.getFile().getName()); + new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam); + if (!output.exists()) + { + throw new PipelineJobException("Missing file: " + output.getPath()); + } + + File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false); + + FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING); + + addTracker(so, existingSample, newRsName); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private static class ReplaceSamHeaderWrapper extends PicardWrapper + { + public ReplaceSamHeaderWrapper(Logger log) + { + super(log); + } + + @Override + protected String getToolName() + { + return "ReplaceSamHeader"; + } + + public void execute(File input, File output, File headerBam) throws PipelineJobException + { + List params = new ArrayList<>(getBaseArgs()); + + params.add("--INPUT"); + params.add(input.getPath()); + + params.add("--OUTPUT"); + params.add(output.getPath()); + + params.add("--HEADER"); + params.add(headerBam.getPath()); + + execute(params); + } + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java index 05a702793..883b5b6fb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java @@ -17,6 +17,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.util.ArrayList; @@ -61,7 +62,7 @@ protected void doPerformAlignment(AlignmentOutputImpl output, File inputFastq1, rg.add("LB:" + rs.getReadsetId().toString()); rg.add("PL:" + (rs.getPlatform() == null ? "ILLUMINA" : rs.getPlatform())); rg.add("PU:" + (platformUnit == null ? rs.getReadsetId().toString() : platformUnit)); - rg.add("SM:" + rs.getName().replaceAll(" ", "_")); + rg.add("SM:" + SequenceUtil.getLegalReadGroupName(rs)); extraArgs.add("'" + StringUtils.join(rg, "\\t") + "'"); getWrapper().performMemAlignment(getPipelineCtx().getJob(), output, inputFastq1, inputFastq2, outputDirectory, referenceGenome, basename, extraArgs); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java index 776a42182..4b845742d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java @@ -11,6 +11,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.sequenceanalysis.run.util.AddOrReplaceReadGroupsWrapper; +import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; @@ -48,7 +49,7 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".readgroups.bam"); output.addIntermediateFile(outputBam); - output.setBAM(getWrapper().executeCommand(inputBam, outputBam, rs.getReadsetId().toString(), rs.getPlatform(), rs.getReadsetId().toString(), rs.getName().replaceAll(" ", "_"))); + output.setBAM(getWrapper().executeCommand(inputBam, outputBam, rs.getReadsetId().toString(), rs.getPlatform(), rs.getReadsetId().toString(), SequenceUtil.getLegalReadGroupName(rs))); return output; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index f270b7e8d..6808024d9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -31,6 +31,7 @@ import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.model.Readset; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.run.CommandWrapper; @@ -650,4 +651,14 @@ else if (FILETYPE.cram.getFileType().isType(bamOrCram)) return null; } + + public static String getLegalReadGroupName(Readset rs) + { + return getLegalReadGroupName(rs.getName()); + } + + public static String getLegalReadGroupName(String rsName) + { + return rsName.replaceAll(" ", "_"); + } } \ No newline at end of file From 6ae10e430636c14df4cf88132b71c3fb5998e67d Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 15 Jun 2024 05:25:50 -0700 Subject: [PATCH 02/14] Backport Dimplot update --- singlecell/resources/chunks/DimPlots.R | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/singlecell/resources/chunks/DimPlots.R b/singlecell/resources/chunks/DimPlots.R index 17aa129b3..d0ec59d97 100644 --- a/singlecell/resources/chunks/DimPlots.R +++ b/singlecell/resources/chunks/DimPlots.R @@ -13,17 +13,20 @@ for (datasetId in names(seuratObjects)) { next } - P1 <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'tsne') - P2 <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'umap') + plotList <- list() + if ('tsne' %in% names(seuratObj@reductions)) { + plotList[['tsne']] <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'tsne') + } + + if ('umap' %in% names(seuratObj@reductions)) { + plotList[['umap']] <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'umap') + } if ('wnn.umap' %in% names(seuratObj@reductions)) { - P3 <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'wnn.umap') - P1 <- P1 | P2 | P3 - } else { - P1 <- P1 | P2 + plotList[['wnn.umap']] <- Seurat::DimPlot(seuratObj, group.by = field, reduction = 'wnn.umap') } - P1 <- P1 + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect") + P1 <- patchwork::wrap_plots(plotList) + patchwork::plot_layout(ncol = length(plotList)) + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect") print(P1) } From 0a4c838d140486a9180fa9f36721aa086f03f18a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 16 Jun 2024 18:44:49 -0700 Subject: [PATCH 03/14] Support additional liftover params --- .../web/SequenceAnalysis/window/LiftoverWindow.js | 8 ++++++++ .../sequenceanalysis/analysis/LiftoverHandler.java | 9 +++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js index fba9f476e..9a41148d5 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js @@ -157,6 +157,14 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { params.dropGenotypes = this.down('#dropGenotypes').getValue(); } + if (this.down('#useBcfTools').getValue()){ + params.useBcfTools = this.down('#useBcfTools').getValue(); + } + + if (this.down('#doNotRetainUnmapped').getValue()){ + params.doNotRetainUnmapped = this.down('#doNotRetainUnmapped').getValue(); + } + Ext4.Msg.wait('Saving...'); LABKEY.Ajax.request({ url: LABKEY.ActionURL.buildURL('sequenceanalysis', 'runSequenceHandler', this.containerPath), diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java index 600780cb7..fbe6d8e04 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java @@ -176,6 +176,7 @@ public void processFilesRemote(List inputFiles, JobContext c boolean dropGenotypes = params.optBoolean("dropGenotypes", false); boolean useBcfTools = params.optBoolean("useBcfTools", false); + boolean doNotRetainUnmapped = params.optBoolean("doNotRetainUnmapped", false); int chainFileId = params.getInt("chainFileId"); File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId); @@ -214,7 +215,7 @@ else if (_vcfFileType.isType(f.getFile())) } File lifted = new File(outDir, baseName + ".lifted-" + targetGenomeId + ext); - File unmappedOutput = new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext); + File unmappedOutput = doNotRetainUnmapped ? new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext) : null; try { @@ -260,7 +261,11 @@ else if (_vcfFileType.isType(f.getFile())) ctx.addSequenceOutput(so1); } - if (!unmappedOutput.exists()) + if (unmappedOutput == null) + { + // skip + } + else if (!unmappedOutput.exists()) { job.getLogger().info("no unmapped intervals"); } From 99cabf0242b2ca4528ab89b2095f9af7d3eac149 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 16 Jun 2024 21:56:40 -0700 Subject: [PATCH 04/14] Support additional liftover params --- .../resources/web/SequenceAnalysis/window/LiftoverWindow.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js index 9a41148d5..15f6a5a6c 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js @@ -117,6 +117,11 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { itemId: 'useBcfTools', checked: false, fieldLabel: 'Use bcftools' + },{ + xtype: 'checkbox', + itemId: 'doNotRetainUnmapped', + checked: false, + fieldLabel: 'Do Not Retain Unmapped' }].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)), buttons: [{ text: 'Submit', From 9f5f325c4425a3bd431dd3e8d4418b205ea570c6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 17 Jun 2024 16:53:47 -0700 Subject: [PATCH 05/14] Account for difference between hg19 and GRCh37 MT contigs --- SequenceAnalysis/pipeline_code/sequence_tools_install.sh | 8 ++++---- .../labkey/sequenceanalysis/analysis/LiftoverHandler.java | 2 +- .../labkey/sequenceanalysis/util/ChainFileValidator.java | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 558d131c7..9333860c7 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -486,10 +486,10 @@ then rm -Rf bcftools* rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.18/bcftools-1.18.tar.bz2 - tar xjvf bcftools-1.18.tar.bz2 - chmod 755 bcftools-1.18 - cd bcftools-1.18 + wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2 + tar xjvf bcftools-1.20.tar.bz2 + chmod 755 bcftools-1.20 + cd bcftools-1.20 rm -f plugins/liftover.c wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java index fbe6d8e04..e986726b4 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java @@ -215,7 +215,7 @@ else if (_vcfFileType.isType(f.getFile())) } File lifted = new File(outDir, baseName + ".lifted-" + targetGenomeId + ext); - File unmappedOutput = doNotRetainUnmapped ? new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext) : null; + File unmappedOutput = doNotRetainUnmapped ? null : new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext); try { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java index 90978fcf0..9055c0cbe 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ChainFileValidator.java @@ -420,7 +420,8 @@ else if (refName.endsWith("_alt")) refName = StringUtils.join(Arrays.copyOfRange(tokens, 1, tokens.length), "_"); } - if (refName.equals("chrM")) + // NOTE: hg19 and GRCh37 have different MT contigs. chrM is the legacy version. + if (refName.equals("chrMT")) { return "MT"; } From c1c85c83ff709d408846dafc561a390ee7467414 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 17 Jun 2024 17:58:16 -0700 Subject: [PATCH 06/14] Add chain_file default views --- .../sequenceanalysis/chain_files/.qview.xml | 16 ++++++++++++++++ .../chain_files/With Filepath.qview.xml | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml create mode 100644 SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml new file mode 100644 index 000000000..6ad9c3493 --- /dev/null +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/.qview.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml new file mode 100644 index 000000000..d7d962cf2 --- /dev/null +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/chain_files/With Filepath.qview.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file From f99aba9e5ffbb2167fc2944720613fc78f7cacd7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 17 Jun 2024 20:59:21 -0700 Subject: [PATCH 07/14] Fix paraGRAPH typo --- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 76780d215..68cd280c3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -15,7 +15,6 @@ import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.sequenceanalysis.SequenceAnalysisModule; -import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; @@ -57,7 +56,7 @@ public boolean doRunLocal() @Override public SequenceOutputProcessor getProcessor() { - return new DepthOfCoverageHandler.Processor(); + return new Processor(); } public static class Processor implements SequenceOutputProcessor From 8bfdcebf016d960fcf9848714c3977f3807eeb5f Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 18 Jun 2024 06:19:20 -0700 Subject: [PATCH 08/14] Fix paraGRAPH / idxdepth args --- .../sequenceanalysis/run/alignment/ParagraphStep.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 68cd280c3..b4d955df9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -76,11 +76,12 @@ public void processFilesRemote(List inputFiles, JobContext c throw new PipelineJobException("Unable to find file: " + inputVCF.getPath()); } + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); for (SequenceOutputFile so : inputFiles) { List depthArgs = new ArrayList<>(); depthArgs.add("idxdepth"); - depthArgs.add("-d"); + depthArgs.add("-b"); depthArgs.add(so.getFile().getPath()); File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); @@ -90,6 +91,12 @@ public void processFilesRemote(List inputFiles, JobContext c depthArgs.add("-r"); depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); + if (threads != null) + { + depthArgs.add("--threads"); + depthArgs.add(threads.toString()); + } + new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); if (!coverageFile.exists()) @@ -137,7 +144,6 @@ else if (!svVcf.exists()) paragraphArgs.add("--scratch-dir"); paragraphArgs.add(SequencePipelineService.get().getJavaTempDir()); - Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); if (threads != null) { paragraphArgs.add("--threads"); From 53eadbc4679728f3aca4a4f092ccb5d7b2e4e44f Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 18 Jun 2024 14:33:23 -0700 Subject: [PATCH 09/14] Ensure bcftools liftover sorts VCF --- .../org/labkey/sequenceanalysis/analysis/LiftoverHandler.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java index e986726b4..dd63cc081 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java @@ -28,6 +28,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.util.FileType; @@ -333,6 +334,8 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG { LiftoverBcfToolsWrapper wrapper = new LiftoverBcfToolsWrapper(job.getLogger()); wrapper.doLiftover(currentVCF, chain, sourceGenome.getWorkingFastaFile(), targetGenome.getWorkingFastaFile(), unmappedOutput, output); + + SequencePipelineService.get().sortVcf(output, null, targetGenome.getSequenceDictionary(), ctx.getLogger()); } else { From a6a8d3dbe87bebae23a13ea47b464a1b176529b3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 18 Jun 2024 15:40:07 -0700 Subject: [PATCH 10/14] Resolve paragraph executables within PATH --- .../api/sequenceanalysis/run/AbstractCommandWrapper.java | 2 +- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java index e18b9799e..2e3e84e2c 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java @@ -289,7 +289,7 @@ protected boolean isThrowNonZeroExits() return _throwNonZeroExits; } - protected static File resolveFileInPath(String exe, @Nullable String packageName, boolean throwIfNotFound) + public static File resolveFileInPath(String exe, @Nullable String packageName, boolean throwIfNotFound) { File fn; String path; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index b4d955df9..8800bf6e2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -12,6 +12,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.sequenceanalysis.SequenceAnalysisModule; @@ -109,7 +110,7 @@ public void processFilesRemote(List inputFiles, JobContext c // TNPRC-IB18 ../IB18.cram 29.77 150 List paragraphArgs = new ArrayList<>(); - paragraphArgs.add("multigrmpy.py"); + paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath()); paragraphArgs.add("--verbose"); File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt"); From 2fa209773d63d34a87df45f181e0e2e1ef92fd1a Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 18 Jun 2024 17:28:53 -0700 Subject: [PATCH 11/14] Clean up paraGRAPH VCF code --- .../run/alignment/ParagraphStep.java | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 8800bf6e2..06780cdc6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -71,10 +71,20 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport @Override public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { - File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF")); - if (!inputVCF.exists()) + int svVcfId = ctx.getParams().optInt("svVCF", 0); + if (svVcfId == 0) { - throw new PipelineJobException("Unable to find file: " + inputVCF.getPath()); + throw new PipelineJobException("svVCF param was null"); + } + + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); + if (svVcf == null) + { + throw new PipelineJobException("File not found for ID: " + svVcfId); + } + else if (!svVcf.exists()) + { + throw new PipelineJobException("Missing file: " + svVcf.getPath()); } Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); @@ -117,22 +127,6 @@ public void processFilesRemote(List inputFiles, JobContext c paragraphArgs.add("-o"); paragraphArgs.add(paragraphOut.getPath()); - int svVcfId = ctx.getParams().optInt("svVCF"); - if (svVcfId == 0) - { - throw new PipelineJobException("Missing svVCF ID"); - } - - File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); - if (svVcf == null) - { - throw new PipelineJobException("File not found for ID: " + svVcfId); - } - else if (!svVcf.exists()) - { - throw new PipelineJobException("Missing file: " + svVcf.getPath()); - } - paragraphArgs.add("-i"); paragraphArgs.add(svVcf.getPath()); From 65bdfe54cfeb6a1d68760f71194524f0353c991b Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 19 Jun 2024 05:21:08 -0700 Subject: [PATCH 12/14] Fix handling of idxdepth/paraGRAPH coverage --- .../run/alignment/ParagraphStep.java | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 06780cdc6..cb92bd051 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -1,5 +1,9 @@ package org.labkey.sequenceanalysis.run.alignment; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; +import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; @@ -15,11 +19,14 @@ import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; +import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -95,9 +102,9 @@ else if (!svVcf.exists()) depthArgs.add("-b"); depthArgs.add(so.getFile().getPath()); - File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + File coverageJson = new File(ctx.getWorkingDirectory(), "coverage.json"); depthArgs.add("-o"); - depthArgs.add(coverageFile.getPath()); + depthArgs.add(coverageJson.getPath()); depthArgs.add("-r"); depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); @@ -110,14 +117,41 @@ else if (!svVcf.exists()) new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); - if (!coverageFile.exists()) + if (!coverageJson.exists()) { - throw new PipelineJobException("Missing file: " + coverageFile.getPath()); + throw new PipelineJobException("Missing file: " + coverageJson.getPath()); } + ctx.getFileManager().addIntermediateFile(coverageJson); // Should produce a simple text file: // id path depth read length // TNPRC-IB18 ../IB18.cram 29.77 150 + File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) + { + SAMFileHeader header = reader.getFileHeader(); + if (header.getReadGroups().size() == 0) + { + throw new PipelineJobException("No read groups found in input BAM"); + } + else if (header.getReadGroups().size() > 1) + { + throw new PipelineJobException("More than one read group found in BAM"); + } + + String rgId = header.getReadGroups().get(0).getSample(); + + JSONObject json = new JSONObject(FileUtils.readFileToString(coverageFile, Charset.defaultCharset())); + writer.println("id\tpath\tdepth\tread length"); + double depth = json.getJSONObject("autosome").getDouble("depth"); + double readLength = json.getInt("read_length"); + writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(coverageFile); List paragraphArgs = new ArrayList<>(); paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath()); From 15f6fc24570c1072ea76afb93d3fc69d3e15b8cd Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 20 Jun 2024 20:19:41 -0700 Subject: [PATCH 13/14] Bugfix for SampleRenameStep without intervals --- .../labkey/sequenceanalysis/run/variant/SampleRenameStep.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java index 46ffae773..6af568325 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SampleRenameStep.java @@ -27,6 +27,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -110,7 +111,7 @@ else if (enforceChangeAll) List queryIntervals = intervals; if (queryIntervals == null || queryIntervals.isEmpty()) { - queryIntervals.add(null); + queryIntervals = Collections.singletonList(null); } int i = 0; From 82ecaa85446f302f34ede27e2a1c4e9dcf4bffb2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 20 Jun 2024 20:22:28 -0700 Subject: [PATCH 14/14] Bugfix for ParagraphStep --- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index cb92bd051..20819c231 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -141,7 +141,7 @@ else if (header.getReadGroups().size() > 1) String rgId = header.getReadGroups().get(0).getSample(); - JSONObject json = new JSONObject(FileUtils.readFileToString(coverageFile, Charset.defaultCharset())); + JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset())); writer.println("id\tpath\tdepth\tread length"); double depth = json.getJSONObject("autosome").getDouble("depth"); double readLength = json.getInt("read_length");