Skip to content

Commit

Permalink
Merge branch 'discvr-23.11' into 23.11_fb_serverSort
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber authored Mar 28, 2024
2 parents 7535a3c + 68bd133 commit 12a9ea9
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 37 deletions.
32 changes: 16 additions & 16 deletions jbrowse/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.RecordedAction;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
Expand Down Expand Up @@ -36,13 +37,13 @@ public class VireoHandler extends AbstractParameterizedOutputHandler<SequenceOu

public VireoHandler()
{
super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Run Vireo", "This will run cellsnp-lite and vireo to infer cell-to-sample based on genotype.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js")), Arrays.asList(
super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Run CellSnp-Lite/Vireo", "This will run cellsnp-lite and vireo to infer cell-to-sample based on genotype.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js")), Arrays.asList(
ToolParameterDescriptor.create("nDonors", "# Donors", "The number of donors to demultiplex", "ldk-integerfield", new JSONObject(){{
put("allowBlank", false);
}}, null),
ToolParameterDescriptor.create("maxDepth", "Max Depth", "At a position, read maximally INT reads per input file, to avoid excessive memory usage", "ldk-integerfield", new JSONObject(){{
put("minValue", 0);
}}, 50000),
}}, null),
ToolParameterDescriptor.create("contigs", "Allowable Contigs", "A comma-separated list of contig names to use", "textfield", new JSONObject(){{

}}, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20"),
Expand Down Expand Up @@ -247,6 +248,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
vireo.add(ctx.getWorkingDirectory().getPath());

int nDonors = ctx.getParams().optInt("nDonors", 0);
boolean storeCellSnpVcf = ctx.getParams().optBoolean("storeCellSnpVcf", false);
if (nDonors == 0)
{
throw new PipelineJobException("Must provide nDonors");
Expand All @@ -255,39 +257,73 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
vireo.add("-N");
vireo.add(String.valueOf(nDonors));

new SimpleScriptWrapper(ctx.getLogger()).execute(vireo);
if (nDonors == 1)
{
storeCellSnpVcf = true;
ctx.getLogger().info("nDonor was 1, skipping vireo");
}
else
{
new SimpleScriptWrapper(ctx.getLogger()).execute(vireo);

File[] outFiles = ctx.getWorkingDirectory().listFiles(f -> f.getName().endsWith("donor_ids.tsv"));
if (outFiles == null || outFiles.length == 0)
File[] outFiles = ctx.getWorkingDirectory().listFiles(f -> f.getName().endsWith("donor_ids.tsv"));
if (outFiles == null || outFiles.length == 0)
{
throw new PipelineJobException("Missing vireo output file");
}
else if (outFiles.length > 1)
{
throw new PipelineJobException("More than one possible vireo output file found");
}

SequenceOutputFile so = new SequenceOutputFile();
so.setReadset(inputFiles.get(0).getReadset());
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
so.setFile(outFiles[0]);
if (so.getReadset() != null)
{
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Vireo Demultiplexing");
}
else
{
so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing");
}
so.setCategory("Vireo Demultiplexing");
ctx.addSequenceOutput(so);
}

File cellSnpBaseVcf = new File(cellsnpDir, "cellSNP.base.vcf.gz");
if (!cellSnpBaseVcf.exists())
{
throw new PipelineJobException("Missing vireo output file");
throw new PipelineJobException("Unable to find cellsnp base VCF");
}
else if (outFiles.length > 1)


File cellSnpCellsVcf = new File(cellsnpDir, "cellSNP.cells.vcf.gz");
if (!cellSnpCellsVcf.exists())
{
throw new PipelineJobException("More than one possible vireo output file found");
throw new PipelineJobException("Unable to find cellsnp calls VCF");
}

SequenceOutputFile so = new SequenceOutputFile();
so.setReadset(inputFiles.get(0).getReadset());
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
so.setFile(outFiles[0]);
if (so.getReadset() != null)
try
{
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Vireo Demultiplexing");
SequencePipelineService.get().sortVcf(cellSnpBaseVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
SequenceAnalysisService.get().ensureVcfIndex(cellSnpBaseVcf, ctx.getLogger());

SequencePipelineService.get().sortVcf(cellSnpCellsVcf, null, genome.getSequenceDictionary(), ctx.getLogger());
SequenceAnalysisService.get().ensureVcfIndex(cellSnpCellsVcf, ctx.getLogger());
}
else
catch (IOException e)
{
so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing");
throw new PipelineJobException(e);
}
so.setCategory("Vireo Demultiplexing");
ctx.addSequenceOutput(so);

if (ctx.getParams().optBoolean("storeCellSnpVcf", false))
if (storeCellSnpVcf)
{
so = new SequenceOutputFile();
SequenceOutputFile so = new SequenceOutputFile();
so.setReadset(inputFiles.get(0).getReadset());
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
so.setFile(outFiles[0]);
so.setFile(cellSnpCellsVcf);
if (so.getReadset() != null)
{
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Cellsnp-lite VCF");
Expand Down

0 comments on commit 12a9ea9

Please sign in to comment.