Skip to content

Commit

Permalink
Merge pull request #256 from LabKey/fb_merge_23.11_to_develop
Browse files Browse the repository at this point in the history
Merge discvr-23.11 to develop
  • Loading branch information
bbimber authored Dec 21, 2023
2 parents da147a9 + 3db76df commit c95ba94
Show file tree
Hide file tree
Showing 15 changed files with 466 additions and 220 deletions.
1 change: 1 addition & 0 deletions QueryExtensions/module.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ModuleClass: org.labkey.queryextensions.QueryExtensionsModule
ManageVersion: false
Label: Query Extensions
Description: This module contains low-level extensions to the LabKey Query layer
License: Apache 2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ Ext4.define('SequenceAnalysis.panel.VariantProcessingPanel', {
description: 'A BED or similar file with intervals to skip',
defaultValue: null
},{
fieldXtype: 'ldk-expdatafield',
fieldXtype: 'checkbox',
name: 'skipExcessHetAndInbreeding',
label: 'Skip Excess Het And Inbreeding',
description: 'If checked, the ExcessHet and InbreedingCoeff annotations will be skipped, which can be important when using force-output-intervals',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ else if (!d.getFile().exists())
{
if (d != null && d.getFile() != null && d.getFile().exists())
{
// NOTE: ultimately remove this:
log.info("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
log.error("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis;
import org.labkey.sequenceanalysis.run.analysis.GenrichStep;
import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis;
Expand Down Expand Up @@ -312,6 +313,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new KingInferenceStep.Provider());
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());

//handlers
SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package org.labkey.sequenceanalysis.run.analysis;

import htsjdk.samtools.util.Interval;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONObject;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class BcftoolsFillTagsStep extends AbstractCommandPipelineStep<BcftoolsRunner> implements VariantProcessingStep
{
public BcftoolsFillTagsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new BcftoolsRunner(ctx.getLogger()));
}

public static class Provider extends AbstractVariantProcessingStepProvider<BcftoolsFillTagsStep> implements VariantProcessingStep.RequiresPedigree, VariantProcessingStep.SupportsScatterGather
{
public Provider()
{
super("BcftoolsFillTagsStep", "Bcftools Fill-tags", "bcftools", "Annotate variants using bcftools fill-tags", Arrays.asList(
ToolParameterDescriptor.create("hwe", "HWE", "If selected, HWE will be annotated", "checkbox", new JSONObject(){{
put("checked", true);
}}, true),
ToolParameterDescriptor.create("exchet", "Excess Het", "If selected, ExcHet will be annotated.", "checkbox", new JSONObject(){{
put("checked", true);
}}, true)
), null, "");
}

@Override
public BcftoolsFillTagsStep create(PipelineContext ctx)
{
return new BcftoolsFillTagsStep(this, ctx);
}
}

@Override
public VariantProcessingStep.Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
{
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();

List<String> options = new ArrayList<>();
options.add(getWrapper().getBcfToolsPath().getPath());
options.add("+fill-tags");

options.add(inputVCF.getPath());

if (intervals != null)
{
options.add("--regions");
options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(",")));
}

options.add("-O");
options.add("z9");

Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
if (threads != null)
{
options.add("--threads");
options.add(threads.toString());
}

File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".ft.vcf.gz");
options.add("-o");
options.add(outputVcf.getPath());

List<String> annotations = new ArrayList<>();
if (getProvider().getParameterByName("hwe").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false))
{
annotations.add("hwe");
}

if (getProvider().getParameterByName("exchet").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false))
{
annotations.add("exchet");
}

if (annotations.isEmpty())
{
throw new PipelineJobException("No annotations were selected");
}

options.add("-t");
options.add(StringUtils.join(annotations, ","));

BcftoolsRunner wrapper = getWrapper();

String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS"));
if (bcfPluginDir != null)
{
getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir);
wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir);
}

wrapper.execute(options);
if (!outputVcf.exists())
{
throw new PipelineJobException("output not found: " + outputVcf);
}

try
{
SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger());
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

output.setVcf(outputVcf);

return output;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,22 @@ public Provider()
put("width", 400);
put("allowBlank", true);
}}, null),
ToolParameterDescriptor.create("generateConsensus", "Generate Consensus", "If selected, a FASTA with the simple majority consensus will be generated.", "checkbox", new JSONObject()
{{

}}, false),
ToolParameterDescriptor.create("minCoverage", "Min Coverage For Consensus", "If provided, a consensus will only be called over regions with at least this depth", "ldk-integerfield", new JSONObject(){{
put("minValue", 0);
}}, 25),
ToolParameterDescriptor.create("generateTable", "Generate Variant Table", "If selected, a TSV listing variants above the given threshold will be generated.", "checkbox", new JSONObject()
{{

}}, false),
ToolParameterDescriptor.create("minFractionForTable", "Min Fraction for Table", "If the option to generate a table output is used, only variants with frequency of this threshold will be included", "ldk-numberfield", new JSONObject(){{
put("minValue", 0);
put("maxValue", 1);
put("decimalPrecision", 3);
}}, 0.01),
ToolParameterDescriptor.createExpDataParam("primerBedFile", "Primer Sites (BED File)", "This is a BED file specifying the primer binding sites, which will be used to flag variants. Strandedness is ignored.", "ldk-expdatafield", new JSONObject()
{{
put("allowBlank", true);
Expand Down Expand Up @@ -470,6 +483,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
}

double minFractionForConsensus = getProvider().getParameterByName("minFractionForConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
boolean generateConsensus = getProvider().getParameterByName("generateConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);

boolean generateTable = getProvider().getParameterByName("generateTable").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
double minFractionForTable = getProvider().getParameterByName("minFractionForTable").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);

Integer primerDataId = getProvider().getParameterByName("primerBedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
List<Interval> primerIntervals = new ArrayList<>();
Expand Down Expand Up @@ -517,11 +534,12 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc

File loFreqConsensusVcf = getConsensusVcf(outputDir, inputBam);
File loFreqAllVcf = getAllVcf(outputDir, inputBam);
File tableFile = getTableFile(outputDir, inputBam);
Double strandBiasRecoveryAF = getProvider().getParameterByName("strandBiasRecoveryAF").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 1.0);
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath());
VariantContextWriterBuilder writerBuilderConsensus = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict);
VariantContextWriterBuilder writerBuilderAll = new VariantContextWriterBuilder().setOutputFile(loFreqAllVcf).setReferenceDictionary(dict);
try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build();CSVWriter variantTableWriter = generateTable ? new CSVWriter(PrintWriters.getPrintWriter(tableFile), '\t', CSVWriter.NO_QUOTE_CHARACTER) : null)
{
VCFHeader header = reader.getFileHeader();

Expand All @@ -532,6 +550,11 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
writerConsensus.writeHeader(header);
writerAll.writeHeader(header);

if (generateTable)
{
variantTableWriter.writeNext(new String[]{"Contig", "Start", "End", "Reference", "Alt", "Frequency", "AlleleDepth", "TotalDepth"});
}

SortingCollection<VariantContext> allVariants = getVariantSorter(header);
SortingCollection<VariantContext> consensusVariants = getVariantSorter(header);
while (it.hasNext())
Expand Down Expand Up @@ -571,6 +594,16 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
}
}

if (generateTable)
{
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > minFractionForTable)
{
List<Integer> depths = vc.getAttributeAsIntList("DP4", 0);
int alleleDepth = depths.get(2) + depths.get(3);
variantTableWriter.writeNext(new String[]{vc.getContig(), String.valueOf(vc.getStart()), String.valueOf(vc.getEnd()), vc.getReference().getBaseString(), vc.getAlternateAllele(0).getBaseString(), String.valueOf(vc.getAttributeAsDouble("AF", 0.0)), String.valueOf(alleleDepth), String.valueOf(vc.getAttributeAsInt("DP", 0))});
}
}

totalVariants++;
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > 0.01)
{
Expand Down Expand Up @@ -672,6 +705,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
}
consensusVariants.cleanup();
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

NumberFormat fmt = NumberFormat.getPercentInstance();
fmt.setMaximumFractionDigits(2);
Expand Down Expand Up @@ -720,12 +757,24 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
}

output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
if (generateConsensus)
{
output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
}

boolean runPangolinAndNextClade = getProvider().getParameterByName("runPangolin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);

output.addSequenceOutput(loFreqAllVcf, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);

if (generateTable)
{
if (!tableFile.exists())
{

}
output.addSequenceOutput(tableFile, "LoFreq: " + rs.getName(), "LoFreq Variant Table", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
}

Map<String, String> pangolinData = null;
if (runPangolinAndNextClade)
{
Expand Down Expand Up @@ -781,6 +830,11 @@ private File getAllVcf(File outputDir, File inputBam)
return new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.all.vcf.gz");
}

private File getTableFile(File outputDir, File inputBam)
{
return new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.txt");
}

private Set<String> runBcftools(File inputBam, ReferenceGenome referenceGenome, File mask, int minCoverage) throws PipelineJobException
{
Set<String> variantsBcftools = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
*/
public class VariantAnnotatorStep extends AbstractCommandPipelineStep<VariantAnnotatorWrapper> implements VariantProcessingStep
{
public VariantAnnotatorStep(PipelineStepProvider provider, PipelineContext ctx)
public VariantAnnotatorStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new VariantAnnotatorWrapper(ctx.getLogger()));
}
Expand Down
1 change: 1 addition & 0 deletions Studies/module.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Label: Studies
Description: Extensions to the study framework, designed to more flexibly manage multiple study designs from one source of data
License: Apache 2.0
LicenseURL: http://www.apache.org/licenses/LICENSE-2.0
ManageVersion: false
Loading

0 comments on commit c95ba94

Please sign in to comment.