Skip to content

Commit

Permalink
Merge pull request #292 from LabKey/fb_merge_24.3_to_develop
Browse files Browse the repository at this point in the history
Merge discvr-24.3 to develop
  • Loading branch information
bbimber authored May 11, 2024
2 parents f74bd78 + 795e2bb commit 95d4c03
Show file tree
Hide file tree
Showing 20 changed files with 346 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import org.labkey.api.data.TableInfo;
import org.labkey.api.data.TableSelector;
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExpRun;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.ldk.LDKService;
import org.labkey.api.pipeline.PipeRoot;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineService;
import org.labkey.api.pipeline.PipelineStatusFile;
import org.labkey.api.query.FieldKey;
import org.labkey.api.security.User;
import org.labkey.api.sequenceanalysis.RefNtSequenceModel;
Expand All @@ -34,6 +36,8 @@

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -44,6 +48,7 @@
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Created by bimber on 9/15/2014.
Expand Down Expand Up @@ -229,6 +234,60 @@ else if (!d.getFile().exists())
log.error("Unable to find file associated with analysis: " + m.getAnalysisId() + ", " + m.getAlignmentFile() + ", " + d.getFile().getPath() + " for container: " + (c == null ? m.getContainer() : c.getPath()));
}
}

inspectForCoreFiles(m.getRunId(), log);
}
}

private void inspectForCoreFiles(Integer runId, Logger log)
{
if (runId == null)
{
return;
}

ExpRun run = ExperimentService.get().getExpRun(runId);
if (run == null)
{
log.info("Not ExpRun found for runId: " + runId);
return;
}
else if (run.getJobId() == null)
{
log.info("ExpRun lacks jobId: " + runId);
return;
}

PipelineStatusFile sf = PipelineService.get().getStatusFile(run.getJobId());
if (sf == null)
{
log.error("Unknown statusFile: " + run.getJobId() + ", for run: " + runId);
return;
}
else if (sf.getFilePath() == null)
{
log.error("StatusFile filepath is null: " + run.getJobId() + ", for run: " + runId);
return;
}

File root = new File(sf.getFilePath());
if (!root.exists())
{
log.error("Run fileroot does not exist: " + runId + " / " + root.getPath());
return;
}

try (Stream<Path> stream = Files.walk(root.toPath()))
{
List<Path> files = stream.filter(x -> x.getFileName().startsWith("core.")).toList();
if (!files.isEmpty())
{
files.forEach(x -> log.error("Found core file: " + x.toFile().getPath()));
}
}
catch (IOException e)
{
log.error("Error walking file root: " + run.getFilePathRootPath(), e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import htsjdk.samtools.util.Interval;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
Expand Down Expand Up @@ -41,7 +42,8 @@ public Provider()
{
super("SelectSamples", "Select Specific Samples", "GATK SelectVariants", "A VCF will be generated containing only the samples specified below.", Arrays.asList(
ToolParameterDescriptor.create(SAMPLE_INCLUDE, "Select Sample(s) Include", "Only variants of the selected type(s) will be included", "sequenceanalysis-trimmingtextarea", null, null),
ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null)
ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null),
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--allow-nonoverlapping-command-line-samples"), "allowNnonoverlappingSamples", "Allow non-overlapping Samples", "Normally the job will fail is samples are selected that do not exist in the VCF. If checked, this will be allowed.", "checkbox", null, null)
), PageFlowUtil.set("/sequenceanalysis/field/TrimmingTextArea.js"), "https://software.broadinstitute.org/gatk/");
}

Expand Down Expand Up @@ -72,6 +74,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
});
}

options.addAll(getClientCommandArgs());

File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".selectSamples.vcf.gz");
getWrapper().execute(genome.getWorkingFastaFile(), inputVCF, outputVcf, options);
if (!outputVcf.exists())
Expand Down
2 changes: 2 additions & 0 deletions jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ static public void setInstance(JBrowseService instance)

abstract public void registerLuceneIndexDetector(LuceneIndexDetector detector);

abstract public void cacheDefaultQuery(User u, String sessionId, String trackId);

public interface LuceneIndexDetector
{
SequenceOutputFile findMatchingLuceneIndex(SequenceOutputFile vcfFile, List<String> infoFieldsToIndex, User u, @Nullable Logger log) throws PipelineJobException;
Expand Down
10 changes: 10 additions & 0 deletions jbrowse/src/org/labkey/jbrowse/JBrowseController.java
Original file line number Diff line number Diff line change
Expand Up @@ -1035,5 +1035,15 @@ public void setIncludeDefaultFields(boolean includeDefaultFields)
this.includeDefaultFields = includeDefaultFields;
}
}

@RequiresPermission(ReadPermission.class)
public static class GetLuceneCacheInfoAction extends ReadOnlyApiAction<Object>
{
@Override
public ApiResponse execute(Object form, BindException errors)
{
return new ApiSimpleResponse("cacheInfo", JBrowseLuceneSearch.reportCacheInfo());
}
}
}

2 changes: 2 additions & 0 deletions jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,14 @@ public static Map<String, JBrowseFieldDescriptor> getGenotypeDependentFields(@Nu
else
{
ret.put(VARIABLE_SAMPLES, new JBrowseFieldDescriptor(VARIABLE_SAMPLES, "All samples with this variant", true, true, VCFHeaderLineType.Character, 7).multiValued(true).label("Samples With Variant"));
ret.put(HOMOZYGOUS_VAR, new JBrowseFieldDescriptor(HOMOZYGOUS_VAR, "Samples that are homozygous for the variant allele", false, true, VCFHeaderLineType.Character, 8).multiValued(true).label("Samples Homozygous for Variant"));
ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes"));
ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant"));
ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called"));
ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes"));

ret.get(VARIABLE_SAMPLES).allowableValues(header.getSampleNamesInOrder());
ret.get(HOMOZYGOUS_VAR).allowableValues(header.getSampleNamesInOrder());
}
}
}
Expand Down
99 changes: 95 additions & 4 deletions jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.labkey.jbrowse;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
Expand All @@ -15,16 +16,22 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LRUQueryCache;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryCache;
import org.apache.lucene.search.QueryCachingPolicy;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.NumericUtils;
import org.jetbrains.annotations.Nullable;
import org.json.JSONArray;
import org.json.JSONObject;
import org.labkey.api.cache.Cache;
import org.labkey.api.cache.CacheManager;
import org.labkey.api.data.Container;
import org.labkey.api.data.ContainerManager;
import org.labkey.api.jbrowse.AbstractJBrowseFieldCustomizer;
Expand All @@ -33,6 +40,7 @@
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.security.User;
import org.labkey.api.settings.AppProps;
import org.labkey.api.util.logging.LogHelper;
import org.labkey.jbrowse.model.JBrowseSession;
import org.labkey.jbrowse.model.JsonFile;

Expand All @@ -51,20 +59,24 @@
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.labkey.jbrowse.JBrowseFieldUtils.VARIABLE_SAMPLES;
import static org.labkey.jbrowse.JBrowseFieldUtils.getSession;
import static org.labkey.jbrowse.JBrowseFieldUtils.getTrack;

public class JBrowseLuceneSearch
{
private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries");
private final JBrowseSession _session;
private final JsonFile _jsonFile;
private final User _user;
private final String[] specialStartPatterns = {"*:* -", "+", "-"};
private static final String ALL_DOCS = "all";
private static final String GENOMIC_POSITION = "genomicPosition";
private static final int maxCachedQueries = 1000;
private static final long maxRamBytesUsed = 250 * 1024 * 1024L;

private static final Cache<String, LRUQueryCache> _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "JBrowseLuceneSearchCache");

private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u)
{
Expand All @@ -85,6 +97,17 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User
return new JBrowseLuceneSearch(session, getTrack(session, trackId, u), u);
}

private static synchronized QueryCache getCacheForSession(String trackObjectId) {
LRUQueryCache qc = _cache.get(trackObjectId);
if (qc == null)
{
qc = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed);
_cache.put(trackObjectId, qc);
}

return qc;
}

private String templateReplace(final String searchString) {
String result = searchString;
Pattern pattern = Pattern.compile("~(.*?)~");
Expand Down Expand Up @@ -148,6 +171,8 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
)
{
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setQueryCache(getCacheForSession(_jsonFile.getObjectId()));
indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy());

List<String> stringQueryParserFields = new ArrayList<>();
Map<String, SortField.Type> numericQueryParserFields = new HashMap<>();
Expand Down Expand Up @@ -245,7 +270,7 @@ else if (numericQueryParserFields.containsKey(fieldName))
throw new IllegalArgumentException("Could not find type for sort field: " + sortField);
}

sort = new Sort(new SortField(sortField, fieldType, sortReverse));
sort = new Sort(new SortField(sortField + "_sort", fieldType, sortReverse));
}

// Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more.
Expand All @@ -263,7 +288,7 @@ else if (numericQueryParserFields.containsKey(fieldName))
for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++)
{
JSONObject elem = new JSONObject();
Document doc = indexSearcher.doc(topDocs.scoreDocs[i].doc);
Document doc = indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc);

for (IndexableField field : doc.getFields()) {
String fieldName = field.name();
Expand Down Expand Up @@ -345,4 +370,70 @@ public boolean isAvailable(Container c, User u)
return true;
}
}

public static class ForceMatchAllDocsCachingPolicy implements QueryCachingPolicy {
private final UsageTrackingQueryCachingPolicy defaultPolicy = new UsageTrackingQueryCachingPolicy();

@Override
public boolean shouldCache(Query query) throws IOException {
if (query instanceof BooleanQuery bq) {
for (BooleanClause clause : bq) {
if (clause.getQuery() instanceof MatchAllDocsQuery) {
return true;
}
}
}

return defaultPolicy.shouldCache(query);
}

@Override
public void onUse(Query query) {
defaultPolicy.onUse(query);
}
}

public static JSONArray reportCacheInfo()
{
JSONArray cacheInfo = new JSONArray();
for (String sessionId : _cache.getKeys())
{
LRUQueryCache qc = _cache.get(sessionId);
JSONObject info = new JSONObject();
info.put("cacheSize", qc.getCacheSize());
info.put("cacheCount", qc.getCacheCount());
info.put("hitCount", qc.getHitCount());
info.put("missCount", qc.getMissCount());
info.put("evictionCount", qc.getEvictionCount());
info.put("totalCount", qc.getTotalCount());
cacheInfo.put(info);
}

return cacheInfo;
}

public void cacheDefaultQuery()
{
try
{
JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId());
doSearch(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false);
}
catch (ParseException | IOException e)
{
_log.error("Unable to cache default query for: " + _jsonFile.getObjectId(), e);
}
}

public static void clearCache(@Nullable String jbrowseTrackId)
{
if (jbrowseTrackId == null)
{
_cache.clear();
}
else
{
_cache.remove(jbrowseTrackId);
}
}
}
7 changes: 7 additions & 0 deletions jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@ public void registerLuceneIndexDetector(LuceneIndexDetector detector)
_detectors.add(detector);
}

@Override
public void cacheDefaultQuery(User u, String sessionId, String trackId)
{
JBrowseLuceneSearch luceneSearch = JBrowseLuceneSearch.create(sessionId, trackId, u);
luceneSearch.cacheDefaultQuery();
}

public static final class DefaultLuceneIndexDetector implements LuceneIndexDetector
{
@Override
Expand Down
7 changes: 5 additions & 2 deletions jbrowse/src/org/labkey/jbrowse/model/JsonFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@
import org.labkey.api.util.PageFlowUtil;
import org.labkey.api.util.Path;
import org.labkey.api.view.UnauthorizedException;
import org.labkey.jbrowse.JBrowseLuceneSearch;
import org.labkey.jbrowse.JBrowseManager;
import org.labkey.jbrowse.JBrowseSchema;
import org.labkey.jbrowse.pipeline.IndexVariantsStep;
import org.labkey.jbrowse.pipeline.JBrowseLucenePipelineJob;
import org.labkey.sequenceanalysis.run.util.TabixRunner;

Expand Down Expand Up @@ -964,6 +964,8 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool
}
else if (existingLuceneDir != null && existingLuceneDir.exists())
{
JBrowseLuceneSearch.clearCache(getObjectId());

// Note: this could exist, but be an empty folder:
if (luceneDir.exists())
{
Expand Down Expand Up @@ -1004,7 +1006,7 @@ else if (sizeInGb > 50)
try
{
PipeRoot root = PipelineService.get().getPipelineRootSetting(getContainerObj());
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, getObjectId(), vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
}
catch (PipelineValidationException e)
{
Expand All @@ -1030,6 +1032,7 @@ else if (sizeInGb > 50)
if (forceReprocess || !doesLuceneIndexExist())
{
JBrowseLucenePipelineJob.prepareLuceneIndex(targetFile, luceneDir, log, getInfoFieldsToIndex(), allowLenientLuceneProcessing());
JBrowseLuceneSearch.clearCache(getObjectId());
}
else
{
Expand Down
Loading

0 comments on commit 95d4c03

Please sign in to comment.