Merge pull request #254 from LabKey/fb_merge_23.11_to_develop

Merge discvr-23.11 to develop
BimberLab · Dec 14, 2023 · 26fc0a5 · 26fc0a5
2 parents 9558a1d + f718b82
commit 26fc0a5
Show file tree

Hide file tree

Showing 11 changed files with 334 additions and 67 deletions.
diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh
@@ -512,13 +512,12 @@ then
 
     wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.18/bcftools-1.18.tar.bz2
     tar xjvf bcftools-1.18.tar.bz2
-    bzip2 bcftools-1.18.tar
     chmod 755 bcftools-1.18
     cd bcftools-1.18
     rm -f plugins/liftover.c
     wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c
 
-    ./configure CFLAGS="-g -Wall -O2 -std=c99"
+    ./configure
     make
 
     install ./bcftools $LKTOOLS_DIR

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/ScatterGatherUtils.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/ScatterGatherUtils.java
@@ -54,6 +54,17 @@ public static void possiblyCacheSupportFiles(SequenceOutputHandler.JobContext ct
                     try
                     {
                         FileUtils.copyFile(inputFile, localCopy);
+                        if (inputFile.getPath().toLowerCase().endsWith("vcf.gz"))
+                        {
+                            File inputFileIdx = new File(inputFile.getPath() + ".tbi");
+                            File localCopyIdx = new File(ScatterGatherUtils.getLocalCopyDir(ctx, true), inputFile.getName() + ".tbi");
+                            if (!inputFileIdx.exists())
+                            {
+                                throw new PipelineJobException("Unable to find file: " + inputFileIdx.getPath());
+                            }
+
+                            FileUtils.copyFile(inputFileIdx, localCopyIdx);
+                        }
                         FileUtils.touch(doneFile);
                     }
                     catch (IOException e)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java
@@ -172,7 +172,8 @@ else if (!d.getFile().exists())
                 {
                     if (d != null && d.getFile() != null && d.getFile().exists())
                     {
-                        log.error("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
+                        // NOTE: ultimately remove this:
+                        log.info("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
                     }
                 }
             }

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
@@ -307,15 +307,18 @@ public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutpu
                     boolean hasMetrics = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS), PageFlowUtil.set("RowId"), filter, null).exists();
                     if (!hasMetrics)
                     {
+                        job.getLogger().debug("No existing metrics found for: " + rd.getFileId1());
                         List<Integer> toAdd = new ArrayList<>(rd.getFileId1());
                         if (rd.getFileId2() != null)
                         {
                             toAdd.add(rd.getFileId2());
                         }
 
+                        job.getLogger().debug("adding metrics for " + toAdd.size() + " total fastq files");
                         for (int dataId : toAdd)
                         {
                             //then delete/add:
+                            job.getLogger().debug("adding metrics for: " + dataId);
                             ReadsetCreationTask.addQualityMetricsForReadset(rs, dataId, job, true);
                         }
                     }
@@ -332,15 +335,22 @@ public void complete(PipelineJob job, List<Readset> readsets, List<SequenceOutpu
                 }
             }
 
-            Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
-            TableInfo ti = QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READ_DATA);
-            try
+            if (!rows.isEmpty())
             {
-                ti.getUpdateService().updateRows(job.getUser(), target, rows, rows, null, null);
+                Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
+                TableInfo ti = QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READ_DATA);
+                try
+                {
+                    ti.getUpdateService().updateRows(job.getUser(), target, rows, rows, null, null);
+                }
+                catch (InvalidKeyException | BatchValidationException | QueryUpdateServiceException | SQLException e)
+                {
+                    throw new PipelineJobException(e);
+                }
             }
-            catch (InvalidKeyException | BatchValidationException | QueryUpdateServiceException | SQLException e)
+            else
             {
-                throw new PipelineJobException(e);
+                job.getLogger().debug("There were no readdata rows to update");
             }
         }
 

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java
@@ -94,7 +94,7 @@ public class LofreqAnalysis extends AbstractCommandPipelineStep<LofreqAnalysis.L
 {
     public static final String CATEGORY = "Lowfreq VCF";
 
-    public LofreqAnalysis(PipelineStepProvider provider, PipelineContext ctx)
+    public LofreqAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
         super(provider, ctx, new LofreqWrapper(ctx.getLogger()));
     }
@@ -108,7 +108,7 @@ public Provider()
                     {{
                         put("extensions", Arrays.asList("gtf", "gff", "gbk"));
                         put("width", 400);
-                        put("allowBlank", false);
+                        put("allowBlank", true);
                     }}, null),
                     ToolParameterDescriptor.create("minCoverage", "Min Coverage For Consensus", "If provided, a consensus will only be called over regions with at least this depth", "ldk-integerfield", new JSONObject(){{
                         put("minValue", 0);
@@ -212,7 +212,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
 
         File outputVcfRaw = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.vcf.gz");
         File outputVcfFiltered = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.filtered.vcf.gz");
-        File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");
 
         //LoFreq call
         getWrapper().execute(inputBam, outputVcfRaw, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()));
@@ -222,6 +221,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
         output.addIntermediateFile(outputVcfRaw);
         output.addIntermediateFile(new File(outputVcfRaw.getPath() + ".tbi"));
 
+        output.addIntermediateFile(outputVcfFiltered);
+        output.addIntermediateFile(new File(outputVcfFiltered.getPath() + ".tbi"));
+
         //Add depth for downstream use:
         File coverageOut = new File(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(outputVcfRaw.getName()) + ".coverage");
         runDepthOfCoverage(getPipelineCtx(), output, outputDir, referenceGenome, inputBam, coverageOut);
@@ -438,24 +440,35 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
         }
 
         //SnpEff:
+        File activeVCF = outputVcfFiltered;
         Integer geneFileId = getProvider().getParameterByName(SNPEffStep.GENE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
-        File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx().getSequenceSupport(), getPipelineCtx().getLogger(), referenceGenome, geneFileId);
+        if (geneFileId != null)
+        {
+            File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");
 
-        SnpEffWrapper snpEffWrapper = new SnpEffWrapper(getPipelineCtx().getLogger());
-        snpEffWrapper.runSnpEff(referenceGenome.getGenomeId(), geneFileId, snpEffBaseDir, outputVcfFiltered, outputVcfSnpEff, null);
+            File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx().getSequenceSupport(), getPipelineCtx().getLogger(), referenceGenome, geneFileId);
+            SnpEffWrapper snpEffWrapper = new SnpEffWrapper(getPipelineCtx().getLogger());
+            snpEffWrapper.runSnpEff(referenceGenome.getGenomeId(), geneFileId, snpEffBaseDir, outputVcfFiltered, outputVcfSnpEff, null);
 
-        try
-        {
-            SequenceAnalysisService.get().ensureVcfIndex(outputVcfSnpEff, getPipelineCtx().getLogger());
+            try
+            {
+                SequenceAnalysisService.get().ensureVcfIndex(outputVcfSnpEff, getPipelineCtx().getLogger());
+            }
+            catch (IOException e)
+            {
+                throw new PipelineJobException(e);
+            }
+
+            output.addIntermediateFile(outputVcfSnpEff);
+            output.addIntermediateFile(new File(outputVcfSnpEff.getPath() + ".tbi"));
+
+            activeVCF = outputVcfSnpEff;
         }
-        catch (IOException e)
+        else
         {
-            throw new PipelineJobException(e);
+            getPipelineCtx().getLogger().info("No GTF provided, skipping SnpEff");
         }
 
-        output.addIntermediateFile(outputVcfFiltered);
-        output.addIntermediateFile(new File(outputVcfFiltered.getPath() + ".tbi"));
-
         double minFractionForConsensus = getProvider().getParameterByName("minFractionForConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
 
         Integer primerDataId = getProvider().getParameterByName("primerBedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
@@ -508,7 +521,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
         SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath());
         VariantContextWriterBuilder writerBuilderConsensus = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict);
         VariantContextWriterBuilder writerBuilderAll = new VariantContextWriterBuilder().setOutputFile(loFreqAllVcf).setReferenceDictionary(dict);
-        try (VCFFileReader reader = new VCFFileReader(outputVcfSnpEff);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
+        try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
         {
             VCFHeader header = reader.getFileHeader();
 
@@ -706,8 +719,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
             getPipelineCtx().getLogger().warn("Consensus ambiguities from bcftools and lofreq did not match: " + bcfToolsConsensusNs + " / " + lofreqConsensusNs);
         }
 
-        output.addIntermediateFile(outputVcfSnpEff);
-        output.addIntermediateFile(new File(outputVcfSnpEff.getPath() + ".tbi"));
         output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
         output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
 

diff --git a/jbrowse/src/client/JBrowse/VariantSearch/components/FilterForm.tsx b/jbrowse/src/client/JBrowse/VariantSearch/components/FilterForm.tsx
@@ -265,6 +265,7 @@ const FilterForm = (props: FilterFormProps ) => {
                                         <InputLabel id="value-select-label">Value</InputLabel>
                                         <Select
                                             labelId="value-select-label"
+                                            id={`value-select-${index}`}
                                             value={filter.value}
                                             onChange={(event) =>
                                                 handleFilterChange(index, "value", event.target.value)
@@ -305,6 +306,7 @@ const FilterForm = (props: FilterFormProps ) => {
                                         <InputLabel id="value-select-label">Value</InputLabel>
                                         <Select
                                             labelId="value-select-label"
+                                            id={`value-select-${index}`}
                                             value={filter.value}
                                             disabled={filter.operator === "is empty" || filter.operator === "is not empty"}
                                             onChange={(event) =>
@@ -321,6 +323,7 @@ const FilterForm = (props: FilterFormProps ) => {
                                 ) : (
                                     <TextFieldMinWidth
                                         label="Value"
+                                        id={`value-select-${index}`}
                                         sx={ highlightedInputs[index]?.value ? highlightedSx : null }
                                         value={filter.value}
                                         disabled={filter.operator === "is empty" || filter.operator === "is not empty"}

diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java
@@ -60,6 +60,7 @@ public class JBrowseLuceneSearch
     private final JBrowseSession _session;
     private final JsonFile _jsonFile;
     private final User _user;
+    private final String[] specialStartPatterns = {"*:* -", "+", "-"};
 
     private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u)
     {
@@ -105,7 +106,7 @@ private String tryUrlDecode(String input) {
             //special case for urls containing +; this isn't necessary for strings sent from the client-side, but URLs
             //sent via unit tests autodecode, and strings containing + rather than the URL-encoded symbol are unsafe
             //to pass through URLDecoded.decode
-            if(input.contains("+")) {
+            if (input.contains("+")) {
                 return input;
             }
 
@@ -115,6 +116,19 @@ private String tryUrlDecode(String input) {
         }
     }
 
+    public String extractFieldName(String queryString) {
+        // Check if the query starts with any of the start patterns
+        for (String pattern : specialStartPatterns) {
+            if (queryString.startsWith(pattern)) {
+                queryString = queryString.substring(pattern.length()).trim();
+                break;
+            }
+        }
+
+        // Split the remaining string by ':' and return the first part (field name)
+        String[] parts = queryString.split(":", 2);
+        return parts.length > 0 ? parts[0].trim() : null;
+    }
 
     public JSONObject doSearch(User u, String searchString, final int pageSize, final int offset) throws IOException, ParseException
     {
@@ -180,18 +194,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
                 String queryString = tokenizer.nextToken();
                 Query query = null;
 
-                // Type is defined by the first field in the lucene query
-                // "First" field is defined by getting the first consecutive string of ASCII characters or underscores terminated by a colon
-                // we might just want to return the field(s) in the form instead
-                Pattern pattern = Pattern.compile("[\\p{ASCII}&&[^\\s:*+-]][\\p{ASCII}&&[^:\\p{Punct}*]]*:");
-
-                Matcher matcher = pattern.matcher(queryString);
-
-                String fieldName = null;
-                if (matcher.find())
-                {
-                    fieldName = matcher.group().substring(0, matcher.group().length() - 1);
-                }
+                String fieldName = extractFieldName(queryString);
 
                 if (VARIABLE_SAMPLES.equals(fieldName))
                 {
@@ -202,7 +205,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
                 {
                     query = queryParser.parse(queryString);
                 }
-                else if(numericQueryParserFields.contains(fieldName))
+                else if (numericQueryParserFields.contains(fieldName))
                 {
                     try
                     {
@@ -212,7 +215,8 @@ else if(numericQueryParserFields.contains(fieldName))
                     {
                         e.printStackTrace();
                     }
-                } else
+                }
+                else
                 {
                     throw new IllegalArgumentException("No such field(s), or malformed query.");
                 }

diff --git a/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java b/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java
@@ -964,10 +964,32 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool
             }
             else if (existingLuceneDir != null && existingLuceneDir.exists())
             {
+                // Note: this could exist, but be an empty folder:
+                if (luceneDir.exists())
+                {
+                    log.info("Deleting existing lucene index dir: " + luceneDir.getPath());
+                    try
+                    {
+                        FileUtils.deleteDirectory(luceneDir);
+                    }
+                    catch (IOException e)
+                    {
+                        throw new PipelineJobException(e);
+                    }
+                }
+
                 log.debug("Creating symlink to existing index: " + existingLuceneDir.getPath());
+                log.debug("Symlink target: " + luceneDir.getPath());
+
                 try
                 {
-                    Files.createSymbolicLink(existingLuceneDir.toPath(), existingLuceneDir.toPath());
+                    if (!luceneDir.getParentFile().exists())
+                    {
+                        log.debug("Creating parent directories: " + luceneDir.getParentFile().getPath());
+                        FileUtil.mkdirs(luceneDir.getParentFile());
+                    }
+
+                    Files.createSymbolicLink(luceneDir.toPath(), existingLuceneDir.toPath());
                 }
                 catch (IOException e)
                 {