Skip to content

Commit

Permalink
Merge pull request #254 from LabKey/fb_merge_23.11_to_develop
Browse files Browse the repository at this point in the history
Merge discvr-23.11 to develop
  • Loading branch information
bbimber authored Dec 14, 2023
2 parents 9558a1d + f718b82 commit 26fc0a5
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 67 deletions.
3 changes: 1 addition & 2 deletions SequenceAnalysis/pipeline_code/sequence_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -512,13 +512,12 @@ then

wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.18/bcftools-1.18.tar.bz2
tar xjvf bcftools-1.18.tar.bz2
bzip2 bcftools-1.18.tar
chmod 755 bcftools-1.18
cd bcftools-1.18
rm -f plugins/liftover.c
wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c

./configure CFLAGS="-g -Wall -O2 -std=c99"
./configure
make

install ./bcftools $LKTOOLS_DIR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ public static void possiblyCacheSupportFiles(SequenceOutputHandler.JobContext ct
try
{
FileUtils.copyFile(inputFile, localCopy);
if (inputFile.getPath().toLowerCase().endsWith("vcf.gz"))
{
File inputFileIdx = new File(inputFile.getPath() + ".tbi");
File localCopyIdx = new File(ScatterGatherUtils.getLocalCopyDir(ctx, true), inputFile.getName() + ".tbi");
if (!inputFileIdx.exists())
{
throw new PipelineJobException("Unable to find file: " + inputFileIdx.getPath());
}

FileUtils.copyFile(inputFileIdx, localCopyIdx);
}
FileUtils.touch(doneFile);
}
catch (IOException e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ else if (!d.getFile().exists())
{
if (d != null && d.getFile() != null && d.getFile().exists())
{
log.error("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
// NOTE: ultimately remove this:
log.info("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,15 +307,18 @@ public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutpu
boolean hasMetrics = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS), PageFlowUtil.set("RowId"), filter, null).exists();
if (!hasMetrics)
{
job.getLogger().debug("No existing metrics found for: " + rd.getFileId1());
List<Integer> toAdd = new ArrayList<>(rd.getFileId1());
if (rd.getFileId2() != null)
{
toAdd.add(rd.getFileId2());
}

job.getLogger().debug("adding metrics for " + toAdd.size() + " total fastq files");
for (int dataId : toAdd)
{
//then delete/add:
job.getLogger().debug("adding metrics for: " + dataId);
ReadsetCreationTask.addQualityMetricsForReadset(rs, dataId, job, true);
}
}
Expand All @@ -332,15 +335,22 @@ public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutpu
}
}

Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
TableInfo ti = QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READ_DATA);
try
if (!rows.isEmpty())
{
ti.getUpdateService().updateRows(job.getUser(), target, rows, rows, null, null);
Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
TableInfo ti = QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READ_DATA);
try
{
ti.getUpdateService().updateRows(job.getUser(), target, rows, rows, null, null);
}
catch (InvalidKeyException | BatchValidationException | QueryUpdateServiceException | SQLException e)
{
throw new PipelineJobException(e);
}
}
catch (InvalidKeyException | BatchValidationException | QueryUpdateServiceException | SQLException e)
else
{
throw new PipelineJobException(e);
job.getLogger().debug("There were no readdata rows to update");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public class LofreqAnalysis extends AbstractCommandPipelineStep<LofreqAnalysis.L
{
public static final String CATEGORY = "Lowfreq VCF";

public LofreqAnalysis(PipelineStepProvider provider, PipelineContext ctx)
public LofreqAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new LofreqWrapper(ctx.getLogger()));
}
Expand All @@ -108,7 +108,7 @@ public Provider()
{{
put("extensions", Arrays.asList("gtf", "gff", "gbk"));
put("width", 400);
put("allowBlank", false);
put("allowBlank", true);
}}, null),
ToolParameterDescriptor.create("minCoverage", "Min Coverage For Consensus", "If provided, a consensus will only be called over regions with at least this depth", "ldk-integerfield", new JSONObject(){{
put("minValue", 0);
Expand Down Expand Up @@ -212,7 +212,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc

File outputVcfRaw = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.vcf.gz");
File outputVcfFiltered = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.filtered.vcf.gz");
File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");

//LoFreq call
getWrapper().execute(inputBam, outputVcfRaw, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()));
Expand All @@ -222,6 +221,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
output.addIntermediateFile(outputVcfRaw);
output.addIntermediateFile(new File(outputVcfRaw.getPath() + ".tbi"));

output.addIntermediateFile(outputVcfFiltered);
output.addIntermediateFile(new File(outputVcfFiltered.getPath() + ".tbi"));

//Add depth for downstream use:
File coverageOut = new File(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(outputVcfRaw.getName()) + ".coverage");
runDepthOfCoverage(getPipelineCtx(), output, outputDir, referenceGenome, inputBam, coverageOut);
Expand Down Expand Up @@ -438,24 +440,35 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
}

//SnpEff:
File activeVCF = outputVcfFiltered;
Integer geneFileId = getProvider().getParameterByName(SNPEffStep.GENE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx().getSequenceSupport(), getPipelineCtx().getLogger(), referenceGenome, geneFileId);
if (geneFileId != null)
{
File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");

SnpEffWrapper snpEffWrapper = new SnpEffWrapper(getPipelineCtx().getLogger());
snpEffWrapper.runSnpEff(referenceGenome.getGenomeId(), geneFileId, snpEffBaseDir, outputVcfFiltered, outputVcfSnpEff, null);
File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx().getSequenceSupport(), getPipelineCtx().getLogger(), referenceGenome, geneFileId);
SnpEffWrapper snpEffWrapper = new SnpEffWrapper(getPipelineCtx().getLogger());
snpEffWrapper.runSnpEff(referenceGenome.getGenomeId(), geneFileId, snpEffBaseDir, outputVcfFiltered, outputVcfSnpEff, null);

try
{
SequenceAnalysisService.get().ensureVcfIndex(outputVcfSnpEff, getPipelineCtx().getLogger());
try
{
SequenceAnalysisService.get().ensureVcfIndex(outputVcfSnpEff, getPipelineCtx().getLogger());
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

output.addIntermediateFile(outputVcfSnpEff);
output.addIntermediateFile(new File(outputVcfSnpEff.getPath() + ".tbi"));

activeVCF = outputVcfSnpEff;
}
catch (IOException e)
else
{
throw new PipelineJobException(e);
getPipelineCtx().getLogger().info("No GTF provided, skipping SnpEff");
}

output.addIntermediateFile(outputVcfFiltered);
output.addIntermediateFile(new File(outputVcfFiltered.getPath() + ".tbi"));

double minFractionForConsensus = getProvider().getParameterByName("minFractionForConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);

Integer primerDataId = getProvider().getParameterByName("primerBedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
Expand Down Expand Up @@ -508,7 +521,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath());
VariantContextWriterBuilder writerBuilderConsensus = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict);
VariantContextWriterBuilder writerBuilderAll = new VariantContextWriterBuilder().setOutputFile(loFreqAllVcf).setReferenceDictionary(dict);
try (VCFFileReader reader = new VCFFileReader(outputVcfSnpEff);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
{
VCFHeader header = reader.getFileHeader();

Expand Down Expand Up @@ -706,8 +719,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
getPipelineCtx().getLogger().warn("Consensus ambiguities from bcftools and lofreq did not match: " + bcfToolsConsensusNs + " / " + lofreqConsensusNs);
}

output.addIntermediateFile(outputVcfSnpEff);
output.addIntermediateFile(new File(outputVcfSnpEff.getPath() + ".tbi"));
output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ const FilterForm = (props: FilterFormProps ) => {
<InputLabel id="value-select-label">Value</InputLabel>
<Select
labelId="value-select-label"
id={`value-select-${index}`}
value={filter.value}
onChange={(event) =>
handleFilterChange(index, "value", event.target.value)
Expand Down Expand Up @@ -305,6 +306,7 @@ const FilterForm = (props: FilterFormProps ) => {
<InputLabel id="value-select-label">Value</InputLabel>
<Select
labelId="value-select-label"
id={`value-select-${index}`}
value={filter.value}
disabled={filter.operator === "is empty" || filter.operator === "is not empty"}
onChange={(event) =>
Expand All @@ -321,6 +323,7 @@ const FilterForm = (props: FilterFormProps ) => {
) : (
<TextFieldMinWidth
label="Value"
id={`value-select-${index}`}
sx={ highlightedInputs[index]?.value ? highlightedSx : null }
value={filter.value}
disabled={filter.operator === "is empty" || filter.operator === "is not empty"}
Expand Down
34 changes: 19 additions & 15 deletions jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public class JBrowseLuceneSearch
private final JBrowseSession _session;
private final JsonFile _jsonFile;
private final User _user;
private final String[] specialStartPatterns = {"*:* -", "+", "-"};

private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u)
{
Expand Down Expand Up @@ -105,7 +106,7 @@ private String tryUrlDecode(String input) {
//special case for urls containing +; this isn't necessary for strings sent from the client-side, but URLs
//sent via unit tests autodecode, and strings containing + rather than the URL-encoded symbol are unsafe
//to pass through URLDecoded.decode
if(input.contains("+")) {
if (input.contains("+")) {
return input;
}

Expand All @@ -115,6 +116,19 @@ private String tryUrlDecode(String input) {
}
}

public String extractFieldName(String queryString) {
// Check if the query starts with any of the start patterns
for (String pattern : specialStartPatterns) {
if (queryString.startsWith(pattern)) {
queryString = queryString.substring(pattern.length()).trim();
break;
}
}

// Split the remaining string by ':' and return the first part (field name)
String[] parts = queryString.split(":", 2);
return parts.length > 0 ? parts[0].trim() : null;
}

public JSONObject doSearch(User u, String searchString, final int pageSize, final int offset) throws IOException, ParseException
{
Expand Down Expand Up @@ -180,18 +194,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
String queryString = tokenizer.nextToken();
Query query = null;

// Type is defined by the first field in the lucene query
// "First" field is defined by getting the first consecutive string of ASCII characters or underscores terminated by a colon
// we might just want to return the field(s) in the form instead
Pattern pattern = Pattern.compile("[\\p{ASCII}&&[^\\s:*+-]][\\p{ASCII}&&[^:\\p{Punct}*]]*:");

Matcher matcher = pattern.matcher(queryString);

String fieldName = null;
if (matcher.find())
{
fieldName = matcher.group().substring(0, matcher.group().length() - 1);
}
String fieldName = extractFieldName(queryString);

if (VARIABLE_SAMPLES.equals(fieldName))
{
Expand All @@ -202,7 +205,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
{
query = queryParser.parse(queryString);
}
else if(numericQueryParserFields.contains(fieldName))
else if (numericQueryParserFields.contains(fieldName))
{
try
{
Expand All @@ -212,7 +215,8 @@ else if(numericQueryParserFields.contains(fieldName))
{
e.printStackTrace();
}
} else
}
else
{
throw new IllegalArgumentException("No such field(s), or malformed query.");
}
Expand Down
24 changes: 23 additions & 1 deletion jbrowse/src/org/labkey/jbrowse/model/JsonFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -964,10 +964,32 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool
}
else if (existingLuceneDir != null && existingLuceneDir.exists())
{
// Note: this could exist, but be an empty folder:
if (luceneDir.exists())
{
log.info("Deleting existing lucene index dir: " + luceneDir.getPath());
try
{
FileUtils.deleteDirectory(luceneDir);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

log.debug("Creating symlink to existing index: " + existingLuceneDir.getPath());
log.debug("Symlink target: " + luceneDir.getPath());

try
{
Files.createSymbolicLink(existingLuceneDir.toPath(), existingLuceneDir.toPath());
if (!luceneDir.getParentFile().exists())
{
log.debug("Creating parent directories: " + luceneDir.getParentFile().getPath());
FileUtil.mkdirs(luceneDir.getParentFile());
}

Files.createSymbolicLink(luceneDir.toPath(), existingLuceneDir.toPath());
}
catch (IOException e)
{
Expand Down
Loading

0 comments on commit 26fc0a5

Please sign in to comment.