diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java index 637752206..e6b593698 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java @@ -12,11 +12,13 @@ import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExpRun; import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.ldk.LDKService; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; +import org.labkey.api.pipeline.PipelineStatusFile; import org.labkey.api.query.FieldKey; import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.RefNtSequenceModel; @@ -34,6 +36,8 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -44,6 +48,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Created by bimber on 9/15/2014. @@ -229,6 +234,60 @@ else if (!d.getFile().exists()) log.error("Unable to find file associated with analysis: " + m.getAnalysisId() + ", " + m.getAlignmentFile() + ", " + d.getFile().getPath() + " for container: " + (c == null ? m.getContainer() : c.getPath())); } } + + inspectForCoreFiles(m.getRunId(), log); + } + } + + private void inspectForCoreFiles(Integer runId, Logger log) + { + if (runId == null) + { + return; + } + + ExpRun run = ExperimentService.get().getExpRun(runId); + if (run == null) + { + log.info("Not ExpRun found for runId: " + runId); + return; + } + else if (run.getJobId() == null) + { + log.info("ExpRun lacks jobId: " + runId); + return; + } + + PipelineStatusFile sf = PipelineService.get().getStatusFile(run.getJobId()); + if (sf == null) + { + log.error("Unknown statusFile: " + run.getJobId() + ", for run: " + runId); + return; + } + else if (sf.getFilePath() == null) + { + log.error("StatusFile filepath is null: " + run.getJobId() + ", for run: " + runId); + return; + } + + File root = new File(sf.getFilePath()); + if (!root.exists()) + { + log.error("Run fileroot does not exist: " + runId + " / " + root.getPath()); + return; + } + + try (Stream stream = Files.walk(root.toPath())) + { + List files = stream.filter(x -> x.getFileName().startsWith("core.")).toList(); + if (!files.isEmpty()) + { + files.forEach(x -> log.error("Found core file: " + x.toFile().getPath())); + } + } + catch (IOException e) + { + log.error("Error walking file root: " + run.getFilePathRootPath(), e); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSamplesStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSamplesStep.java index 8338aeeea..ee6b584c1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSamplesStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSamplesStep.java @@ -3,6 +3,7 @@ import htsjdk.samtools.util.Interval; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; @@ -41,7 +42,8 @@ public Provider() { super("SelectSamples", "Select Specific Samples", "GATK SelectVariants", "A VCF will be generated containing only the samples specified below.", Arrays.asList( ToolParameterDescriptor.create(SAMPLE_INCLUDE, "Select Sample(s) Include", "Only variants of the selected type(s) will be included", "sequenceanalysis-trimmingtextarea", null, null), - ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null) + ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--allow-nonoverlapping-command-line-samples"), "allowNnonoverlappingSamples", "Allow non-overlapping Samples", "Normally the job will fail is samples are selected that do not exist in the VCF. If checked, this will be allowed.", "checkbox", null, null) ), PageFlowUtil.set("/sequenceanalysis/field/TrimmingTextArea.js"), "https://software.broadinstitute.org/gatk/"); } @@ -72,6 +74,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno }); } + options.addAll(getClientCommandArgs()); + File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".selectSamples.vcf.gz"); getWrapper().execute(genome.getWorkingFastaFile(), inputVCF, outputVcf, options); if (!outputVcf.exists()) diff --git a/jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java b/jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java index 76e774a15..513adb923 100644 --- a/jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java +++ b/jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java @@ -48,6 +48,8 @@ static public void setInstance(JBrowseService instance) abstract public void registerLuceneIndexDetector(LuceneIndexDetector detector); + abstract public void cacheDefaultQuery(User u, String sessionId, String trackId); + public interface LuceneIndexDetector { SequenceOutputFile findMatchingLuceneIndex(SequenceOutputFile vcfFile, List infoFieldsToIndex, User u, @Nullable Logger log) throws PipelineJobException; diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java index 11a01c283..caaf68229 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseController.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseController.java @@ -1035,5 +1035,15 @@ public void setIncludeDefaultFields(boolean includeDefaultFields) this.includeDefaultFields = includeDefaultFields; } } + + @RequiresPermission(ReadPermission.class) + public static class GetLuceneCacheInfoAction extends ReadOnlyApiAction + { + @Override + public ApiResponse execute(Object form, BindException errors) + { + return new ApiSimpleResponse("cacheInfo", JBrowseLuceneSearch.reportCacheInfo()); + } + } } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java b/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java index 22dee6a04..6e5e2df27 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java @@ -132,12 +132,14 @@ public static Map getGenotypeDependentFields(@Nu else { ret.put(VARIABLE_SAMPLES, new JBrowseFieldDescriptor(VARIABLE_SAMPLES, "All samples with this variant", true, true, VCFHeaderLineType.Character, 7).multiValued(true).label("Samples With Variant")); + ret.put(HOMOZYGOUS_VAR, new JBrowseFieldDescriptor(HOMOZYGOUS_VAR, "Samples that are homozygous for the variant allele", false, true, VCFHeaderLineType.Character, 8).multiValued(true).label("Samples Homozygous for Variant")); ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes")); ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant")); ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called")); ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes")); ret.get(VARIABLE_SAMPLES).allowableValues(header.getSampleNamesInOrder()); + ret.get(HOMOZYGOUS_VAR).allowableValues(header.getSampleNamesInOrder()); } } } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index 14d0aa031..1b2780f01 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -1,6 +1,7 @@ package org.labkey.jbrowse; import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -15,16 +16,22 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryCache; +import org.apache.lucene.search.QueryCachingPolicy; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.search.UsageTrackingQueryCachingPolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.NumericUtils; import org.jetbrains.annotations.Nullable; +import org.json.JSONArray; import org.json.JSONObject; +import org.labkey.api.cache.Cache; +import org.labkey.api.cache.CacheManager; import org.labkey.api.data.Container; import org.labkey.api.data.ContainerManager; import org.labkey.api.jbrowse.AbstractJBrowseFieldCustomizer; @@ -33,6 +40,7 @@ import org.labkey.api.module.ModuleLoader; import org.labkey.api.security.User; import org.labkey.api.settings.AppProps; +import org.labkey.api.util.logging.LogHelper; import org.labkey.jbrowse.model.JBrowseSession; import org.labkey.jbrowse.model.JsonFile; @@ -51,7 +59,6 @@ import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; import static org.labkey.jbrowse.JBrowseFieldUtils.VARIABLE_SAMPLES; import static org.labkey.jbrowse.JBrowseFieldUtils.getSession; @@ -59,12 +66,17 @@ public class JBrowseLuceneSearch { + private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries"); private final JBrowseSession _session; private final JsonFile _jsonFile; private final User _user; private final String[] specialStartPatterns = {"*:* -", "+", "-"}; private static final String ALL_DOCS = "all"; private static final String GENOMIC_POSITION = "genomicPosition"; + private static final int maxCachedQueries = 1000; + private static final long maxRamBytesUsed = 250 * 1024 * 1024L; + + private static final Cache _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "JBrowseLuceneSearchCache"); private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u) { @@ -85,6 +97,17 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User return new JBrowseLuceneSearch(session, getTrack(session, trackId, u), u); } + private static synchronized QueryCache getCacheForSession(String trackObjectId) { + LRUQueryCache qc = _cache.get(trackObjectId); + if (qc == null) + { + qc = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); + _cache.put(trackObjectId, qc); + } + + return qc; + } + private String templateReplace(final String searchString) { String result = searchString; Pattern pattern = Pattern.compile("~(.*?)~"); @@ -148,6 +171,8 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina ) { IndexSearcher indexSearcher = new IndexSearcher(indexReader); + indexSearcher.setQueryCache(getCacheForSession(_jsonFile.getObjectId())); + indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy()); List stringQueryParserFields = new ArrayList<>(); Map numericQueryParserFields = new HashMap<>(); @@ -245,7 +270,7 @@ else if (numericQueryParserFields.containsKey(fieldName)) throw new IllegalArgumentException("Could not find type for sort field: " + sortField); } - sort = new Sort(new SortField(sortField, fieldType, sortReverse)); + sort = new Sort(new SortField(sortField + "_sort", fieldType, sortReverse)); } // Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more. @@ -263,7 +288,7 @@ else if (numericQueryParserFields.containsKey(fieldName)) for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++) { JSONObject elem = new JSONObject(); - Document doc = indexSearcher.doc(topDocs.scoreDocs[i].doc); + Document doc = indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc); for (IndexableField field : doc.getFields()) { String fieldName = field.name(); @@ -345,4 +370,70 @@ public boolean isAvailable(Container c, User u) return true; } } + + public static class ForceMatchAllDocsCachingPolicy implements QueryCachingPolicy { + private final UsageTrackingQueryCachingPolicy defaultPolicy = new UsageTrackingQueryCachingPolicy(); + + @Override + public boolean shouldCache(Query query) throws IOException { + if (query instanceof BooleanQuery bq) { + for (BooleanClause clause : bq) { + if (clause.getQuery() instanceof MatchAllDocsQuery) { + return true; + } + } + } + + return defaultPolicy.shouldCache(query); + } + + @Override + public void onUse(Query query) { + defaultPolicy.onUse(query); + } + } + + public static JSONArray reportCacheInfo() + { + JSONArray cacheInfo = new JSONArray(); + for (String sessionId : _cache.getKeys()) + { + LRUQueryCache qc = _cache.get(sessionId); + JSONObject info = new JSONObject(); + info.put("cacheSize", qc.getCacheSize()); + info.put("cacheCount", qc.getCacheCount()); + info.put("hitCount", qc.getHitCount()); + info.put("missCount", qc.getMissCount()); + info.put("evictionCount", qc.getEvictionCount()); + info.put("totalCount", qc.getTotalCount()); + cacheInfo.put(info); + } + + return cacheInfo; + } + + public void cacheDefaultQuery() + { + try + { + JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId()); + doSearch(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false); + } + catch (ParseException | IOException e) + { + _log.error("Unable to cache default query for: " + _jsonFile.getObjectId(), e); + } + } + + public static void clearCache(@Nullable String jbrowseTrackId) + { + if (jbrowseTrackId == null) + { + _cache.clear(); + } + else + { + _cache.remove(jbrowseTrackId); + } + } } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java index 54dec578d..daeb91419 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java @@ -312,6 +312,13 @@ public void registerLuceneIndexDetector(LuceneIndexDetector detector) _detectors.add(detector); } + @Override + public void cacheDefaultQuery(User u, String sessionId, String trackId) + { + JBrowseLuceneSearch luceneSearch = JBrowseLuceneSearch.create(sessionId, trackId, u); + luceneSearch.cacheDefaultQuery(); + } + public static final class DefaultLuceneIndexDetector implements LuceneIndexDetector { @Override diff --git a/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java b/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java index 86546b93d..aa63b1e50 100644 --- a/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java +++ b/jbrowse/src/org/labkey/jbrowse/model/JsonFile.java @@ -51,9 +51,9 @@ import org.labkey.api.util.PageFlowUtil; import org.labkey.api.util.Path; import org.labkey.api.view.UnauthorizedException; +import org.labkey.jbrowse.JBrowseLuceneSearch; import org.labkey.jbrowse.JBrowseManager; import org.labkey.jbrowse.JBrowseSchema; -import org.labkey.jbrowse.pipeline.IndexVariantsStep; import org.labkey.jbrowse.pipeline.JBrowseLucenePipelineJob; import org.labkey.sequenceanalysis.run.util.TabixRunner; @@ -964,6 +964,8 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool } else if (existingLuceneDir != null && existingLuceneDir.exists()) { + JBrowseLuceneSearch.clearCache(getObjectId()); + // Note: this could exist, but be an empty folder: if (luceneDir.exists()) { @@ -1004,7 +1006,7 @@ else if (sizeInGb > 50) try { PipeRoot root = PipelineService.get().getPipelineRootSetting(getContainerObj()); - PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing())); + PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, getObjectId(), vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing())); } catch (PipelineValidationException e) { @@ -1030,6 +1032,7 @@ else if (sizeInGb > 50) if (forceReprocess || !doesLuceneIndexExist()) { JBrowseLucenePipelineJob.prepareLuceneIndex(targetFile, luceneDir, log, getInfoFieldsToIndex(), allowLenientLuceneProcessing()); + JBrowseLuceneSearch.clearCache(getObjectId()); } else { diff --git a/jbrowse/src/org/labkey/jbrowse/pipeline/IndexVariantsStep.java b/jbrowse/src/org/labkey/jbrowse/pipeline/IndexVariantsStep.java index 407136745..b8fce84ea 100644 --- a/jbrowse/src/org/labkey/jbrowse/pipeline/IndexVariantsStep.java +++ b/jbrowse/src/org/labkey/jbrowse/pipeline/IndexVariantsStep.java @@ -67,7 +67,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno throw new PipelineJobException("Unable to find file: " + idx.getPath()); } - output.addSequenceOutput(idx, "Lucene index: " + inputVCF.getName(), CATEGORY, null, null, genome.getGenomeId(), "Fields indexed: " + infoFieldsRaw); + output.addSequenceOutput(idx, "Lucene index: " + inputVCF.getName(), CATEGORY, null, null, genome.getGenomeId(), "Fields indexed: " + infoFields.size()); return output; } diff --git a/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLuceneFinalTask.java b/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLuceneFinalTask.java new file mode 100644 index 000000000..b25f9b05d --- /dev/null +++ b/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLuceneFinalTask.java @@ -0,0 +1,84 @@ +package org.labkey.jbrowse.pipeline; + +import org.jetbrains.annotations.NotNull; +import org.labkey.api.pipeline.AbstractTaskFactory; +import org.labkey.api.pipeline.AbstractTaskFactorySettings; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.PipelineJobService; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.pipeline.RecordedActionSet; +import org.labkey.api.util.FileType; +import org.labkey.jbrowse.JBrowseLuceneSearch; + +import java.util.Collections; +import java.util.List; + +/** + * User: bbimber + * Date: 8/6/12 + * Time: 12:57 PM + */ +public class JBrowseLuceneFinalTask extends PipelineJob.Task +{ + protected JBrowseLuceneFinalTask(Factory factory, PipelineJob job) + { + super(factory, job); + } + + public static class Factory extends AbstractTaskFactory + { + public Factory() + { + super(JBrowseLuceneFinalTask.class); + } + + @Override + public List getInputTypes() + { + return Collections.emptyList(); + } + + @Override + public String getStatusName() + { + return PipelineJob.TaskStatus.running.toString(); + } + + @Override + public List getProtocolActionNames() + { + return List.of("JBrowse-Lucene-Finalize"); + } + + @Override + public PipelineJob.Task createTask(PipelineJob job) + { + return new JBrowseLuceneFinalTask(this, job); + } + + @Override + public boolean isJobComplete(PipelineJob job) + { + return false; + } + } + + @Override + @NotNull + public RecordedActionSet run() throws PipelineJobException + { + if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer) + { + throw new PipelineJobException("This task must run on the webserver!"); + } + + JBrowseLuceneSearch.clearCache(getPipelineJob().getJbrowseTrackId()); + return new RecordedActionSet(Collections.singleton(new RecordedAction("JBrowse-Lucene"))); + } + + private JBrowseLucenePipelineJob getPipelineJob() + { + return (JBrowseLucenePipelineJob)getJob(); + } +} diff --git a/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java b/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java index a3960ff9e..4c4f6ad92 100644 --- a/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java +++ b/jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java @@ -39,6 +39,7 @@ public class JBrowseLucenePipelineJob extends PipelineJob { private List _infoFields; + private String _jbrowseTrackId; private File _vcf; private File _targetDir; private boolean _allowLenientLuceneProcessing = false; @@ -48,9 +49,10 @@ protected JBrowseLucenePipelineJob() { } - public JBrowseLucenePipelineJob(Container c, User user, PipeRoot pipeRoot, File vcf, File targetDir, List infoFields, boolean allowLenientLuceneProcessing) + public JBrowseLucenePipelineJob(Container c, User user, PipeRoot pipeRoot, String jbrowseTrackId, File vcf, File targetDir, List infoFields, boolean allowLenientLuceneProcessing) { super(JBrowseLucenePipelineProvider.NAME, new ViewBackgroundInfo(c, user, null), pipeRoot); + _jbrowseTrackId = jbrowseTrackId; _vcf = vcf; _targetDir = targetDir; _infoFields = infoFields; @@ -90,7 +92,7 @@ public ActionURL getStatusHref() } @Override - public TaskPipeline getTaskPipeline() + public TaskPipeline getTaskPipeline() { return PipelineJobService.get().getTaskPipeline(new TaskId(JBrowseLucenePipelineJob.class)); } @@ -105,6 +107,16 @@ public void setInfoFields(List infoFields) _infoFields = infoFields; } + public String getJbrowseTrackId() + { + return _jbrowseTrackId; + } + + public void setJbrowseTrackId(String jbrowseTrackId) + { + _jbrowseTrackId = jbrowseTrackId; + } + public File getVcf() { return _vcf; diff --git a/jbrowse/webapp/WEB-INF/jbrowse/jbrowseContext.xml b/jbrowse/webapp/WEB-INF/jbrowse/jbrowseContext.xml index 8ffdbc8b5..b477169cb 100644 --- a/jbrowse/webapp/WEB-INF/jbrowse/jbrowseContext.xml +++ b/jbrowse/webapp/WEB-INF/jbrowse/jbrowseContext.xml @@ -8,6 +8,7 @@ + @@ -25,6 +26,7 @@ org.labkey.jbrowse.pipeline.JBrowseLuceneTask + org.labkey.jbrowse.pipeline.JBrowseLuceneFinalTask diff --git a/singlecell/resources/chunks/AppendCiteSeq.R b/singlecell/resources/chunks/AppendCiteSeq.R index a9688bd7e..b070f3fb0 100644 --- a/singlecell/resources/chunks/AppendCiteSeq.R +++ b/singlecell/resources/chunks/AppendCiteSeq.R @@ -24,7 +24,7 @@ for (datasetId in names(seuratObjects)) { } } - seuratObj <- CellMembrane::AppendCiteSeq(seuratObj, unfilteredMatrixDir = matrixDir, normalizeMethod = normalizeMethod, datasetId = datasetId, featureMetadata = featureMetadata, adtWhitelist = adtWhitelist, runCellBender = runCellBender, aggregateBarcodeFile = aggregateBarcodeFile) + seuratObj <- CellMembrane::AppendCiteSeq(seuratObj, unfilteredMatrixDir = matrixDir, normalizeMethod = normalizeMethod, datasetId = datasetId, featureMetadata = featureMetadata, adtWhitelist = adtWhitelist, aggregateBarcodeFile = aggregateBarcodeFile) } else { print('matrixDir was NULL, skipping CITE-seq') } diff --git a/singlecell/resources/chunks/CheckExpectations.R b/singlecell/resources/chunks/CheckExpectations.R index a82c2ac31..803c3f423 100644 --- a/singlecell/resources/chunks/CheckExpectations.R +++ b/singlecell/resources/chunks/CheckExpectations.R @@ -51,7 +51,23 @@ for (datasetId in names(seuratObjects)) { } if (requireRiraImmune) { - CheckField(seuratObj, datasetId, 'RIRA_Immune_v2.cellclass') + # Note: if a given dataset has fewer than 200 cells, it will return NAs for RIRA: + if (!'RIRA_Immune_v2.cellclass' %in% names(seuratObj@meta.data)) { + addErrorMessage(paste0(paste0('Missing ', 'RIRA_Immune_v2.cellclass', ' for dataset: ', datasetId))) + } + + if ('DatasetId' %in% names(seuratObj@meta.data)) { + for (ds in sort(unique(seuratObj@meta.data$DatasetId))) { + dat <- seuratObj@meta.data$RIRA_Immune_v2.cellclass[seuratObj@meta.data$DatasetId == ds] + if (any(is.na(dat))) { + if (length(dat) > 200) { + addErrorMessage(paste0('NA values found for RIRA_Immune_v2.cellclass for DatasetId: ', ds, ', for dataset: ', datasetId)) + } else { + print(paste0('NA values found for RIRA_Immune_v2.cellclass for DatasetId: ', ds, ', for dataset: ', datasetId, '. ignoring because there are fewer than 200 total cells')) + } + } + } + } } if (length(errorMessages) > 0) { diff --git a/singlecell/resources/chunks/ClassifyTNKByExpression.R b/singlecell/resources/chunks/ClassifyTNKByExpression.R index 3eb990f58..822ab94d8 100644 --- a/singlecell/resources/chunks/ClassifyTNKByExpression.R +++ b/singlecell/resources/chunks/ClassifyTNKByExpression.R @@ -1,12 +1,24 @@ +if (!file.exists('/homeDir/.netrc')) { + print(list.files('/homeDir')) + stop('Unable to find file: /homeDir/.netrc') +} + +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) + for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + if (!'HasCDR3Data' %in% names(seuratObj@meta.data)) { + seuratObj <- Rdiscvr::DownloadAndAppendTcrClonotypes(seuratObj) + } - seuratObj <- Rdiscvr::ClassifyTNKByExpression(seuratObj) + seuratObj <- Rdiscvr::ClassifyTNKByExpression(seuratObj) - saveData(seuratObj, datasetId) + saveData(seuratObj, datasetId) - # Cleanup - rm(seuratObj) - gc() + # Cleanup + rm(seuratObj) + gc() } \ No newline at end of file diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index 88ff78ea4..b2f56edfc 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -94,7 +94,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { transform: 'hto' },{ name: 'cells', - labels: ['Cells', 'Cell #', 'Sort', 'Sort Cell Count'], + labels: ['Cells', 'Cell #', 'Sort', 'Sort Cell Count', 'Cells/sample'], allowRowSpan: false, allowBlank: false, transform: 'cells' diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 6febfc8bb..898c5ccdc 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -127,6 +127,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo FieldKey.fromString("citeseqReadsetId/totalFiles"), FieldKey.fromString("citeseqPanel"), FieldKey.fromString("status"), + FieldKey.fromString("plateId"), FieldKey.fromString("readsetId")) ); @@ -151,6 +152,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo writer.writeNext(new String[]{"ReadsetId", "CDNA_ID", "SubjectId", "Stim", "Population", "HashingReadsetId", "HasHashingReads", "HTO_Name", "HTO_Seq", "CiteSeqReadsetId", "HasCiteSeqReads", "CiteSeqPanel"}); Set distinctHTOs = new HashSet<>(); Set hashingStatus = new HashSet<>(); + Map> plateToHto = new HashMap<>(); AtomicInteger totalWritten = new AtomicInteger(0); for (Readset rs : cachedReadsets) { @@ -186,10 +188,17 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo uniqueGex.add(results.getInt(FieldKey.fromString("readsetId"))); } - boolean useCellHashing = results.getObject(FieldKey.fromString("sortId/hto")) != null; - hashingStatus.add(useCellHashing); - if (useCellHashing) + boolean hasHTO = results.getObject(FieldKey.fromString("sortId/hto")) != null; + + if (!plateToHto.containsKey(results.getString(FieldKey.fromString("plateId")))) { + plateToHto.put(results.getString(FieldKey.fromString("plateId")), new HashSet<>()); + } + + if (hasHTO) + { + plateToHto.get(results.getString(FieldKey.fromString("plateId"))).add(results.getString(FieldKey.fromString("sortId/hto"))); + if (results.getObject(FieldKey.fromString("hashingReadsetId")) == null) { // NOTE: there can be lanes with single HTOs/lane. This will fail down the line @@ -244,12 +253,17 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo throw new PipelineJobException("There is a problem with either cell hashing or CITE-seq. See the file: " + output.getName()); } + // If there is one HTO per plate, hashing is not actually needed: + for (String plateId : plateToHto.keySet()) + { + Set htos = plateToHto.get(plateId); + hashingStatus.add(htos.size() > 1); + } + if (hashingStatus.size() > 1) { job.getLogger().info("The selected readsets/cDNA records use a mixture of cell hashing and non-hashing."); } - - //NOTE: hashingStatus.isEmpty() indicates there are no cDNA records associated with the data } if (doH5Caching) diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index d2b95a86c..c8ab0caf6 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -1107,11 +1107,6 @@ else if ("NotUsed".equals(val)) descriptions.add("Cite-seq Normalization: " + citeNormalize); } - if (ctx.getParams().optBoolean("singleCell.AppendCiteSeq.runCellBender", false)) - { - descriptions.add("Cite-seq/CellBender: true"); - } - return StringUtils.join(descriptions, "\n"); } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java index b8d73ef16..8f5a2286e 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java @@ -54,10 +54,6 @@ private static List getParams() put("initialValues", "clr"); }}, "clr")); - ret.add(SeuratToolParameter.create("runCellBender", "Run CellBender", "If checked, cellbender will be run on the raw count matrix to remove background/ambient RNA signal", "checkbox", new JSONObject(){{ - - }}, false)); - ret.add(SeuratToolParameter.create("dropAggregateBarcodes", "Drop Aggregate Barcodes", "If checked, any barcodes marked as protein aggregates by cellranger will be dropped.", "checkbox", new JSONObject(){{ put("checked", true); }}, true)); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 6b49790c2..f7bbfe34a 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -343,7 +343,7 @@ private void updateNimbleConfigFile(File configFile, NimbleGenome genome) throws config.put("num_mismatches", 5); config.put("intersect_level", 0); config.put("score_threshold", 45); - config.put("score_percent", 0.75); + config.put("score_percent", 0.45); config.put("score_filter", 25); //discard_multiple_matches: false //discard_multi_hits: ? @@ -353,7 +353,8 @@ else if ("strict".equals(alignTemplate)) { config.put("num_mismatches", 0); config.put("intersect_level", 0); - config.put("score_percent", 0.99); + // NOTE: this allows a small amount of mismatched ends: + config.put("score_percent", 0.90); config.put("score_threshold", 45); config.put("score_filter", 25); }