diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js index 8522f184c..d724dd672 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/AnalysisSectionPanel.js @@ -203,6 +203,8 @@ Ext4.define('SequenceAnalysis.panel.AnalysisSectionPanel', { title: 'Add Steps', border: false, width: 800, + autoScroll: true, + maxHeight: '90%', items: items, buttons: [{ text: 'Done', diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js index dfaa1e07d..a36c4fe1f 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceAnalysisPanel.js @@ -142,7 +142,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { containerPath: this.queryContainer, schemaName: 'sequenceanalysis', queryName: 'readdata', - columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists', + columns: 'rowid,readset,readset/name,container,container/displayName,container/path,fileid1,fileid1/name,fileid1/fileexists,fileid2,fileid2/name,fileid2/fileexists,sra_accession', metadata: { queryContainerPath: { createIfDoesNotExist: true, @@ -160,11 +160,17 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { load: function (store) { var errors = []; var errorNames = []; + var archived = []; store.each(function(rec){ if (rec.get('fileid1')){ if (!rec.get('fileid1/fileexists')){ - errors.push(rec); - errorNames.push(rec.get('readset/name')); + if (!rec.get('sra_accession')) { + errors.push(rec); + errorNames.push(rec.get('readset/name')); + } + else { + archived.push(rec.get('readset/name')) + } } else { this.fileIds.push(rec.get('fileid1')); @@ -178,8 +184,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { if (rec.get('fileid2')){ if (!rec.get('fileid2/fileexists')){ - errors.push(rec); - errorNames.push(rec.get('name')) + if (!rec.get('sra_accession')) { + errors.push(rec); + errorNames.push(rec.get('name')) + } + else { + archived.push(rec.get('name')); + } } else { this.fileIds.push(rec.get('fileid2')); @@ -188,7 +199,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { } }, this); - this.onStoreLoad(errorNames); + this.onStoreLoad(errorNames, archived); var target = this.down('#readsetCount'); if (target) { @@ -201,13 +212,18 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { storesLoaded: 0, errorNames: [], + archivedNames: [], - onStoreLoad: function(errorNames){ + onStoreLoad: function(errorNames, archivedNames){ this.storesLoaded++; if (errorNames){ this.errorNames = this.errorNames.concat(errorNames); this.errorNames = Ext4.unique(this.errorNames); } + + if (archivedNames) { + this.archivedNames = Ext4.unique(this.archivedNames.concat(archivedNames)); + } if (this.storesLoaded === 2){ this.afterStoreLoad(); } @@ -225,7 +241,10 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { dv.refresh(); if (this.errorNames.length){ - alert('The follow readsets lack an input file and will be skipped: ' + this.errorNames.join(', ')); + alert('The following readsets lack an input file and will be skipped: ' + this.errorNames.join(', ')); + } + else if (this.archivedNames.length) { + Ext4.Msg.alert('Warning', 'One or more readsets contains SRA archived data. Please choose the option to auto-download these data'); } }, @@ -326,6 +345,14 @@ Ext4.define('SequenceAnalysis.panel.SequenceAnalysisPanel', { uncheckedValue: false, checked: false, xtype: 'checkbox' + },{ + fieldLabel: 'Restore SRA Data If Needed', + helpPopup: 'If selected, any archived sequence data that contains an SRA accession will be re-downloaded to a temp location', + name: 'doSraDownloadIfNeeded', + inputValue: true, + uncheckedValue: false, + checked: true, + xtype: 'checkbox' }, this.getSaveTemplateCfg(),{ fieldLabel: 'Submit Jobs To Same Folder/Workbook As Readset?', helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.', diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 70f711efa..9616213a2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -1,5 +1,6 @@ package org.labkey.sequenceanalysis.analysis; +import com.google.common.io.Files; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; @@ -22,6 +23,7 @@ import org.labkey.api.sequenceanalysis.model.Readset; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; @@ -111,6 +113,10 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc { getAndValidateHeaderForVcf(so, newRsName); } + else + { + throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath()); + } ctx.getSequenceSupport().cacheObject("readsetId", newRsName); } @@ -207,6 +213,18 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName String existingSample = header.getGenotypeSamples().get(0); File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt"); + if (!sampleNamesFile.exists()) + { + try + { + Files.touch(sampleNamesFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + try (PrintWriter writer = PrintWriters.getPrintWriter(sampleNamesFile, StandardOpenOption.APPEND)) { writer.println(newRsName); @@ -225,11 +243,19 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName try { File outputIdx = SequenceAnalysisService.get().ensureVcfIndex(outputVcf, ctx.getLogger(), false); - FileUtils.moveFile(outputVcf, so.getFile(), StandardCopyOption.REPLACE_EXISTING); + if (so.getFile().exists()) + { + so.getFile().delete(); + } + FileUtils.moveFile(outputVcf, so.getFile()); FileType gz = new FileType(".gz"); File inputIndex = gz.isType(so.getFile()) ? new File(so.getFile().getPath() + ".tbi") : new File(so.getFile().getPath() + FileExtensions.TRIBBLE_INDEX); - FileUtils.moveFile(outputIdx, inputIndex, StandardCopyOption.REPLACE_EXISTING); + if (inputIndex.exists()) + { + inputIndex.delete(); + } + FileUtils.moveFile(outputIdx, inputIndex); addTracker(so, existingSample, newRsName); } @@ -243,6 +269,11 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new { File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); boolean preExisting = tracker.exists(); + if (!preExisting) + { + Files.touch(tracker); + } + try (PrintWriter writer = PrintWriters.getPrintWriter(tracker, StandardOpenOption.APPEND)) { if (!preExisting) @@ -279,11 +310,17 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new throw new PipelineJobException("Expected header was not created: " + headerBam.getPath()); } + ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); + if (rg == null) + { + throw new PipelineJobException("Unable to find genome: " + so.getLibrary_id()); + } + ctx.getFileManager().addIntermediateFile(headerBam); ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam)); File output = new File(ctx.getWorkingDirectory(), so.getFile().getName()); - new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam); + new ReplaceSamHeaderWrapper(ctx.getLogger()).execute(so.getFile(), output, headerBam, rg); if (!output.exists()) { throw new PipelineJobException("Missing file: " + output.getPath()); @@ -291,8 +328,18 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new File outputIdx = SequencePipelineService.get().ensureBamIndex(output, ctx.getLogger(), false); - FileUtils.moveFile(output, so.getFile(), StandardCopyOption.REPLACE_EXISTING); - FileUtils.moveFile(outputIdx, SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), StandardCopyOption.REPLACE_EXISTING); + if (so.getFile().exists()) + { + so.getFile().delete(); + } + FileUtils.moveFile(output, so.getFile()); + + File inputIndex = SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()); + if (inputIndex.exists()) + { + inputIndex.delete(); + } + FileUtils.moveFile(outputIdx, inputIndex); addTracker(so, existingSample, newRsName); } @@ -315,7 +362,7 @@ protected String getToolName() return "ReplaceSamHeader"; } - public void execute(File input, File output, File headerBam) throws PipelineJobException + public void execute(File input, File output, File headerBam, ReferenceGenome genome) throws PipelineJobException { List params = new ArrayList<>(getBaseArgs()); @@ -328,6 +375,9 @@ public void execute(File input, File output, File headerBam) throws PipelineJobE params.add("--HEADER"); params.add(headerBam.getPath()); + params.add("-R"); + params.add(genome.getWorkingFastaFile().getPath()); + execute(params); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java index 203df59db..4636cc2e6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java @@ -5,6 +5,7 @@ import org.labkey.api.pipeline.RecordedAction; import org.labkey.api.pipeline.RecordedActionSet; import org.labkey.api.pipeline.WorkDirectoryTask; +import org.labkey.api.sequenceanalysis.model.ReadData; import org.labkey.api.sequenceanalysis.pipeline.AbstractSequenceTaskFactory; import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep; @@ -106,7 +107,15 @@ public RecordedActionSet run() throws PipelineJobException if (getPipelineJob().getReadset().hasArchivedData()) { - throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments"); + if (!getPipelineJob().shouldAllowArchivedReadsets()) + { + throw new PipelineJobException("The input readset has archived read data and cannot be used for new alignments"); + } + + if (getPipelineJob().getReadset().getReadData().stream().filter(ReadData::isArchived).filter(rd -> rd.getSra_accession() == null).count() > 1) + { + throw new PipelineJobException("The input readset has archived readsets that lack SRA accessions"); + } } getHelper().cacheExpDatasForParams(); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index bc6a3ea03..ab54beb8e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -18,6 +18,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.common.io.Files; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.ValidationStringency; import org.apache.commons.io.FileUtils; @@ -68,6 +69,7 @@ import org.labkey.api.util.Pair; import org.labkey.sequenceanalysis.ReadDataImpl; import org.labkey.sequenceanalysis.SequenceReadsetImpl; +import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; import org.labkey.sequenceanalysis.run.bampostprocessing.SortSamStep; import org.labkey.sequenceanalysis.run.preprocessing.TrimmomaticWrapper; import org.labkey.sequenceanalysis.run.util.AddOrReplaceReadGroupsWrapper; @@ -238,6 +240,7 @@ public RecordedActionSet run() throws PipelineJobException try { SequenceReadsetImpl rs = getPipelineJob().getReadset(); + restoreArchivedReadDataIfNeeded(rs); copyReferenceResources(); //then we preprocess FASTQ files, if needed @@ -291,6 +294,8 @@ private void logEnvironment() throws PipelineJobException private Map> performFastqPreprocessing(SequenceReadsetImpl rs) throws IOException, PipelineJobException { + getJob().setStatus(PipelineJob.TaskStatus.running, "Fastq Preprocessing"); + if (_resumer.isFastqPreprocessingDone()) { getJob().getLogger().info("resuming FASTQ preprocessing from saved state"); @@ -670,6 +675,9 @@ else if (pair.first.equals(pair.second)) private void alignSet(Readset rs, String basename, Map> files, ReferenceGenome referenceGenome) throws IOException, PipelineJobException { + AlignmentStep alignmentStep = getHelper().getSingleStep(AlignmentStep.class).create(getHelper()); + boolean discardBam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + File bam; if (_resumer.isInitialAlignmentDone()) { @@ -687,11 +695,28 @@ private void alignSet(Readset rs, String basename, Map toRetain = Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); + List toRetain = bam == null ? Collections.emptyList() : Arrays.asList(bam, SequenceUtil.getExpectedIndex(bam)); getTaskFileManagerImpl().deleteIntermediateFiles(toRetain); } - _resumer.setInitialAlignmentDone(bam, alignActions); + _resumer.setInitialAlignmentDone(bam, alignActions, discardBam); + } + + // This is a special case where the alignment does not actually generate a permanent BAM + if (bam == null && discardBam) + { + if (!SequencePipelineService.get().getSteps(getJob(), BamProcessingStep.class).isEmpty()) + { + throw new PipelineJobException("No BAM was created, but post-procesing steps were selected!"); + } + + if (!SequencePipelineService.get().getSteps(getJob(), AnalysisStep.class).isEmpty()) + { + throw new PipelineJobException("No BAM was created, but analysis steps were selected!"); + } + + getJob().getLogger().info("No BAM was created, but discard BAM was selected, so skipping all downstream steps"); + return; } //post-processing @@ -780,7 +805,6 @@ else if (step.expectToCreateNewBam()) //always end with coordinate sorted getJob().setStatus(PipelineJob.TaskStatus.running, "SORTING BAM"); - AlignmentStep alignmentStep = getHelper().getSingleStep(AlignmentStep.class).create(getHelper()); if (_resumer.isBamSortDone()) { getJob().getLogger().info("BAM sort already performed, resuming"); @@ -1378,57 +1402,66 @@ public File doAlignmentForSet(List> inputFiles, ReferenceGenome AlignmentStep.AlignmentOutput alignmentOutput = alignmentStep.performAlignment(rs, forwardFastqs, reverseFastqs, outputDirectory, referenceGenome, SequenceTaskHelper.getUnzippedBaseName(inputFiles.get(0).first.getName()) + "." + alignmentStep.getProvider().getName().toLowerCase(), String.valueOf(lowestReadDataId), platformUnit); getHelper().getFileManager().addStepOutputs(alignmentAction, alignmentOutput); - if (alignmentOutput.getBAM() == null || !alignmentOutput.getBAM().exists()) - { - throw new PipelineJobException("Unable to find BAM file after alignment: " + alignmentOutput.getBAM()); - } Date end = new Date(); alignmentAction.setEndTime(end); getJob().getLogger().info(alignmentStep.getProvider().getLabel() + " Duration: " + DurationFormatUtils.formatDurationWords(end.getTime() - start.getTime(), true, true)); actions.add(alignmentAction); - SequenceUtil.logFastqBamDifferences(getJob().getLogger(), alignmentOutput.getBAM()); - - ToolParameterDescriptor mergeParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.SUPPORT_MERGED_UNALIGNED); - boolean doMergeUnaligned = alignmentStep.getProvider().supportsMergeUnaligned() && mergeParam != null && mergeParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); - if (doMergeUnaligned) + boolean discardBam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + if (discardBam && alignmentOutput.getBAM() == null) { - getJob().setStatus(PipelineJob.TaskStatus.running, "MERGING UNALIGNED READS INTO BAM" + msgSuffix); - getJob().getLogger().info("merging unaligned reads into BAM"); - File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(alignmentOutput.getBAM()); - if (idx.exists()) - { - getJob().getLogger().debug("deleting index: " + idx.getPath()); - idx.delete(); - } - - //merge unaligned reads and clean file - MergeBamAlignmentWrapper wrapper = new MergeBamAlignmentWrapper(getJob().getLogger()); - wrapper.executeCommand(referenceGenome.getWorkingFastaFile(), alignmentOutput.getBAM(), inputFiles, null); - getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + getJob().getLogger().info("The alignment did not produce a BAM, but discardBam was selected. BAM stats steps will be skipped."); } else { - getJob().getLogger().info("skipping merge of unaligned reads on BAM"); - } + if (alignmentOutput.getBAM() == null || !alignmentOutput.getBAM().exists()) + { + throw new PipelineJobException("Unable to find BAM file after alignment: " + alignmentOutput.getBAM()); + } - if (alignmentStep.doAddReadGroups()) - { - getJob().setStatus(PipelineJob.TaskStatus.running, "ADDING READ GROUPS" + msgSuffix); - AddOrReplaceReadGroupsWrapper wrapper = new AddOrReplaceReadGroupsWrapper(getJob().getLogger()); - wrapper.executeCommand(alignmentOutput.getBAM(), null, rs.getReadsetId().toString(), rs.getPlatform(), (platformUnit == null ? rs.getReadsetId().toString() : platformUnit), rs.getName().replaceAll(" ", "_")); - getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); - } - else - { - getJob().getLogger().info("skipping read group assignment"); - } + SequenceUtil.logFastqBamDifferences(getJob().getLogger(), alignmentOutput.getBAM()); - //generate stats - getJob().setStatus(PipelineJob.TaskStatus.running, "Generating BAM Stats"); - FlagStatRunner runner = new FlagStatRunner(getJob().getLogger()); - runner.execute(alignmentOutput.getBAM()); - getHelper().getFileManager().addCommandsToAction(runner.getCommandsExecuted(), alignmentAction); + ToolParameterDescriptor mergeParam = alignmentStep.getProvider().getParameterByName(AbstractAlignmentStepProvider.SUPPORT_MERGED_UNALIGNED); + boolean doMergeUnaligned = alignmentStep.getProvider().supportsMergeUnaligned() && mergeParam != null && mergeParam.extractValue(getJob(), alignmentStep.getProvider(), alignmentStep.getStepIdx(), Boolean.class, false); + if (doMergeUnaligned) + { + getJob().setStatus(PipelineJob.TaskStatus.running, "MERGING UNALIGNED READS INTO BAM" + msgSuffix); + getJob().getLogger().info("merging unaligned reads into BAM"); + File idx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(alignmentOutput.getBAM()); + if (idx.exists()) + { + getJob().getLogger().debug("deleting index: " + idx.getPath()); + idx.delete(); + } + + //merge unaligned reads and clean file + MergeBamAlignmentWrapper wrapper = new MergeBamAlignmentWrapper(getJob().getLogger()); + wrapper.executeCommand(referenceGenome.getWorkingFastaFile(), alignmentOutput.getBAM(), inputFiles, null); + getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + } + else + { + getJob().getLogger().info("skipping merge of unaligned reads on BAM"); + } + + if (alignmentStep.doAddReadGroups()) + { + getJob().setStatus(PipelineJob.TaskStatus.running, "ADDING READ GROUPS" + msgSuffix); + AddOrReplaceReadGroupsWrapper wrapper = new AddOrReplaceReadGroupsWrapper(getJob().getLogger()); + wrapper.executeCommand(alignmentOutput.getBAM(), null, rs.getReadsetId().toString(), rs.getPlatform(), (platformUnit == null ? rs.getReadsetId().toString() : platformUnit), rs.getName().replaceAll(" ", "_")); + getHelper().getFileManager().addCommandsToAction(wrapper.getCommandsExecuted(), alignmentAction); + } + else + { + getJob().getLogger().info("skipping read group assignment"); + } + + //generate stats + getJob().setStatus(PipelineJob.TaskStatus.running, "Generating BAM Stats"); + FlagStatRunner runner = new FlagStatRunner(getJob().getLogger()); + runner.execute(alignmentOutput.getBAM()); + getHelper().getFileManager().addCommandsToAction(runner.getCommandsExecuted(), alignmentAction); + } _resumer.setReadDataAlignmentDone(lowestReadDataId, actions, alignmentOutput.getBAM()); @@ -1579,9 +1612,9 @@ public boolean isInitialAlignmentDone() return _mergedBamFile != null; } - public void setInitialAlignmentDone(File mergedBamFile, List actions) throws PipelineJobException + public void setInitialAlignmentDone(File mergedBamFile, List actions, boolean allowNullBam) throws PipelineJobException { - if (mergedBamFile == null) + if (!allowNullBam && mergedBamFile == null) { throw new PipelineJobException("BAM is null"); } @@ -1858,5 +1891,86 @@ public void serializeTest() throws Exception file.delete(); } } + + private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobException + { + Set sraIDs = new HashSet<>(); + for (ReadData rd : rs.getReadData()) + { + if (! (rd instanceof ReadDataImpl rdi)) + { + throw new PipelineJobException("Expected readdata to be a ReadDataImpl"); + } + + if (rd.isArchived()) + { + getJob().setStatus(PipelineJob.TaskStatus.running, "Downloading from SRA"); + getPipelineJob().getLogger().info("Restoring files for readdata: " + rd.getRowid() + " / " + rd.getSra_accession()); + if (!getPipelineJob().shouldAllowArchivedReadsets()) + { + throw new PipelineJobException("This job is not configured to allow archived readsets!"); + } + + if (rd.getSra_accession() == null) + { + throw new PipelineJobException("Missing SRA accession: " + rd.getRowid()); + } + else if (sraIDs.contains(rd.getSra_accession())) + { + getJob().getLogger().debug("Already encountered accession, skipping: " + rd.getSra_accession()); + if (rs instanceof SequenceReadsetImpl rsi) + { + // Remove the duplicate + List rdl = new ArrayList<>(rsi.getReadDataImpl()); + rdl.remove(rd); + rsi.setReadData(rdl); + } + else + { + throw new PipelineJobException("Expected readset to be SequenceReadsetImpl"); + } + + continue; + } + + File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); + getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); + + File doneFile = new File(outDir, rd.getSra_accession() + ".done"); + sraIDs.add(rd.getSra_accession()); + RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); + if (doneFile.exists()) + { + rdi.setFile(new File(outDir, rd.getSra_accession() + "_1.fastq.gz"), 1); + if (rd.getFileId2() != null) + { + rdi.setFile(new File(outDir, rd.getSra_accession() + "_2.fastq.gz"), 2); + } + } + else + { + if (!outDir.exists()) + { + outDir.mkdirs(); + } + + Pair downloaded = sra.downloadSra(rd.getSra_accession(), outDir); + rdi.setFile(downloaded.first, 1); + rdi.setFile(downloaded.second, 2); + + try + { + Files.touch(doneFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + rdi.setArchived(false); + } + } + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java index 146bf959c..8881a7d96 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAnalysisTask.java @@ -365,21 +365,7 @@ private void processAnalyses(AnalysisModelImpl analysisModel, int runId, List downloadSra(String dataset, File outDir) throws Pipeline { getLogger().info("Deleting extra files: "); files.forEach(f -> { - getLogger().info(f.getName()); - f.delete(); + if (f.exists()) + { + getLogger().info(f.getName()); + f.delete(); + } }); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/SortSamStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/SortSamStep.java index 2216a23bb..9356068b1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/SortSamStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/SortSamStep.java @@ -25,7 +25,7 @@ */ public class SortSamStep extends AbstractCommandPipelineStep implements BamProcessingStep { - public SortSamStep(PipelineStepProvider provider, PipelineContext ctx) + public SortSamStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new SortSamWrapper(ctx.getLogger())); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java index c254047b1..171bf5d07 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/MergeBamAlignmentWrapper.java @@ -94,7 +94,7 @@ else if (header.getReadGroups().size() > 1) FastqToSamWrapper fq = new FastqToSamWrapper(getLogger()); fq.setOutputDir(alignedBam.getParentFile()); fq.setStringency(ValidationStringency.SILENT); - File unmappedReadsBam = fq.execute(pair.getKey(), pair.getValue(), SAMFileHeader.SortOrder.queryname, rg); + File unmappedReadsBam = fq.execute(pair.getKey(), pair.getValue(), SAMFileHeader.SortOrder.unsorted, rg); if (!unmappedReadsBam.exists()) { throw new PipelineJobException("BAM file not created, expected: " + unmappedReadsBam.getPath()); @@ -121,8 +121,6 @@ else if (header.getReadGroups().size() > 1) params.add("--UNMAPPED_BAM"); params.add(finalUnmappedReadsBam.getPath()); - //TODO: bisulfiteSequence - params.add("--REFERENCE_SEQUENCE"); params.add(refFasta.getPath()); diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index c1721f054..97b12c45f 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -103,6 +103,7 @@ public static class CellHashingParameters public boolean skipNormalizationQc = false; public Integer minCountPerCell = 5; public Double majorityConsensusThreshold = null; + public Double minAllowableDoubletRateFilter = null; public Double callerDisagreementThreshold = null; public List methods = CALLING_METHOD.getDefaultConsensusMethods(); //Default to just executing the set used for default consensus calls, rather than additional ones public List consensusMethods = null; @@ -110,7 +111,7 @@ public static class CellHashingParameters public Integer cells = 0; public boolean keepMarkdown = false; public File h5File = null; - public boolean doTSNE = true; + public boolean doTSNE = false; private CellHashingParameters() { @@ -124,8 +125,9 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu ret.skipNormalizationQc = step.getProvider().getParameterByName("skipNormalizationQc").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); ret.minCountPerCell = step.getProvider().getParameterByName("minCountPerCell").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Integer.class, 3); ret.majorityConsensusThreshold = step.getProvider().getParameterByName("majorityConsensusThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); + ret.minAllowableDoubletRateFilter = step.getProvider().getParameterByName("minAllowableDoubletRateFilter").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.callerDisagreementThreshold = step.getProvider().getParameterByName("callerDisagreementThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); - ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, null); + ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); ret.retainRawCountFile = step.getProvider().getParameterByName("retainRawCountFile").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, true); ret.failIfUnexpectedHtosFound = step.getProvider().getParameterByName("failIfUnexpectedHtosFound").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, true); ret.htoReadset = htoReadset; @@ -166,8 +168,9 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse ret.skipNormalizationQc = params.optBoolean("skipNormalizationQc", false); ret.minCountPerCell = params.optInt("minCountPerCell", 3); ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold"); + ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter"); ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold"); - ret.doTSNE = params.get("doTSNE") == null || params.getBoolean("doTSNE"); + ret.doTSNE = params.optBoolean("doTSNE", false); ret.retainRawCountFile = params.optBoolean("retainRawCountFile", true); ret.failIfUnexpectedHtosFound = params.optBoolean("failIfUnexpectedHtosFound", true); ret.htoReadset = htoReadset; diff --git a/singlecell/resources/chunks/AppendCiteSeq.R b/singlecell/resources/chunks/AppendCiteSeq.R index b070f3fb0..b22411466 100644 --- a/singlecell/resources/chunks/AppendCiteSeq.R +++ b/singlecell/resources/chunks/AppendCiteSeq.R @@ -20,7 +20,8 @@ for (datasetId in names(seuratObjects)) { if (dropAggregateBarcodes) { aggregateBarcodeFile <- paste0(matrixDir, ".aggregateCounts.csv") if (!file.exists(aggregateBarcodeFile)) { - stop(paste0('Unable to find file: ', aggregateBarcodeFile)) + print(paste0('Unable to find file, skipping: ', aggregateBarcodeFile)) + aggregateBarcodeFile <- NULL } } diff --git a/singlecell/resources/chunks/FeaturePlots.R b/singlecell/resources/chunks/FeaturePlots.R index 1cbc3d20a..31a492ce2 100644 --- a/singlecell/resources/chunks/FeaturePlots.R +++ b/singlecell/resources/chunks/FeaturePlots.R @@ -8,19 +8,19 @@ for (datasetId in names(seuratObjects)) { } tryCatch({ - P1 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'tsne', min.cutoff = 'q05', max.cutoff = 'q95') - P2 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'umap', min.cutoff = 'q05', max.cutoff = 'q95') - - if ('wnn.umap' %in% names(seuratObj@reductions)) { - P3 <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = 'wnn.umap', min.cutoff = 'q05', max.cutoff = 'q95') - P1 <- P1 | P2 | P3 - } else { - P1 <- P1 | P2 + reductions <- intersect(c('tsne', 'umap', 'wnn.umap'), names(seuratObj@reductions)) + if (length(reductions) == 0) { + stop('No reductions found to plot') } - P1 <- P1 + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect") + plotList <- list() + i <- 0 + for (reduction in reductions) { + i <- i + 1 + plotList[[i]] <- Seurat::FeaturePlot(seuratObj, features = c(field), reduction = reduction, min.cutoff = 'q05', max.cutoff = 'q95') + } - print(P1) + print(patchwork::wrap_plots(plotList, ncol = length(reductions)) + patchwork::plot_annotation(title = field) + patchwork::plot_layout(guides = "collect")) }, error = function(e){ print(paste0('Error running FeaturePlots for: ', datasetId)) print(conditionMessage(e)) diff --git a/singlecell/resources/chunks/RunRiraClassification.R b/singlecell/resources/chunks/RunRiraClassification.R index 986208e64..1e7d5932a 100644 --- a/singlecell/resources/chunks/RunRiraClassification.R +++ b/singlecell/resources/chunks/RunRiraClassification.R @@ -4,7 +4,6 @@ for (datasetId in names(seuratObjects)) { seuratObj <- RIRA::Classify_ImmuneCells(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) seuratObj <- RIRA::Classify_TNK(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) - seuratObj$RIRA_TNK_v2.predicted_labels[seuratObj$RIRA_Immune_v2.majority_voting != 'T_NK'] <- 'Other' seuratObj <- RIRA::Classify_Myeloid(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 898c5ccdc..4e2a03321 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -964,8 +964,13 @@ public List getHashingCallingParams(boolean allowMethod put("maxValue", 1); put("decimalPrecision", 2); }}, 0.2), + ToolParameterDescriptor.create("minAllowableDoubletRateFilter", "Min Allowable Doublet Rate Filter", "This applies to filtering algorithms based on their doublet rate. Typically, any algorithm with a high doublet rate (based on the theoretical rate) is discarded. This sets a minimum to the threshold used for triaging an algorithm", "ldk-numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0.2), ToolParameterDescriptor.create("skipNormalizationQc", "Skip Normalization QC", null, "checkbox", null, true), - ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, true), + ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, false), ToolParameterDescriptor.create("retainRawCountFile", "Retain Raw Counts File", null, "checkbox", null, false), ToolParameterDescriptor.create("failIfUnexpectedHtosFound", "Fail If Unexpected HTOs Found", "If checked and if there are any HTOs (testing all known HTOs) with counts above the HTOs expected in this experiment, then an error will be thrown", "checkbox", new JSONObject(){{ put("checked", true); @@ -1263,7 +1268,24 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '/work/" + h5.getName() + "'"; String consensusMethodString = consensusMethodNames.isEmpty() ? "" : ", methodsForConsensus = c('" + StringUtils.join(consensusMethodNames, "','") + "')"; - writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + ", molInfoFile = '/work/" + molInfo.getName() + "', reportFile = '/work/" + htmlFile.getName() + "', callFile = '/work/" + callsFile.getName() + "', metricsFile = '/work/" + metricsFile.getName() + "', rawCountsExport = '/work/" + countFile.getName() + "', cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + ", title = '" + parameters.getReportTitle() + "', skipNormalizationQc = " + skipNormalizationQcString + ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + consensusMethodString + ", keepMarkdown = " + keepMarkdown + ", minCountPerCell = " + (parameters.minCountPerCell == null ? "NULL" : parameters.minCountPerCell) + ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + ", doTSNE = " + doTSNE + ")"); + writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + + ", molInfoFile = '/work/" + molInfo.getName() + "'" + + ", reportFile = '/work/" + htmlFile.getName() + "'" + + ", callFile = '/work/" + callsFile.getName() + "'" + + ", metricsFile = '/work/" + metricsFile.getName() + "'" + + ", rawCountsExport = '/work/" + countFile.getName() + "'" + + ", cellbarcodeWhitelist = " + cellbarcodeWhitelist + + ", barcodeWhitelist = " + allowableBarcodeParam + + ", title = '" + parameters.getReportTitle() + "'" + + ", skipNormalizationQc = " + skipNormalizationQcString + + ", methods = c('" + StringUtils.join(methodNames, "','") + "')" + + consensusMethodString + + ", keepMarkdown = " + keepMarkdown + + ", minCountPerCell = " + (parameters.minCountPerCell == null ? "NULL" : parameters.minCountPerCell) + + ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + + (parameters.minAllowableDoubletRateFilter == null ? "" : ", minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) + + ", doTSNE = " + doTSNE + ")"); writer.println("print('Rmarkdown complete')"); } diff --git a/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java index 0ef890877..5aefab2c8 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/ProcessSingleCellHandler.java @@ -23,7 +23,10 @@ public String getName() @Override public boolean canProcess(SequenceOutputFile f) { - return f.getFile() != null && LOUPE_TYPE.isType(f.getFile()); + return f.getFile() != null && ( + LOUPE_TYPE.isType(f.getFile()) || + ("10x Run Summary".equals(f.getCategory()) && f.getName().contains("10x Count Summary")) + ); } @Override diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java index 8f5a2286e..d9d58fa37 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendCiteSeq.java @@ -132,26 +132,36 @@ protected Map prepareCountData(SingleCellOutput output, SequenceO if (dropAggregateBarcodes) { + // NOTE: this is the location in CellRanger <= 6.x File aggregateCountFile = new File(existingCountMatrixUmiDir.getParentFile(), "antibody_analysis/aggregate_barcodes.csv"); if (!aggregateCountFile.exists()) { - throw new PipelineJobException("Unable to find aggregate count file: " + aggregateCountFile.getPath()); + // This is the location for >= 7.x + aggregateCountFile = new File(existingCountMatrixUmiDir.getParentFile(), "aggregate_barcodes.csv"); } - localAggregateCountFile = new File(ctx.getOutputDir(), localCopyUmiCountDir.getName() + ".aggregateCounts.csv"); - try - { - if (localAggregateCountFile.exists()) - { - localAggregateCountFile.delete(); - } - FileUtils.copyFile(aggregateCountFile, localAggregateCountFile); + if (!aggregateCountFile.exists()) + { + ctx.getLogger().info("Unable to find aggregate count file, assuming there are none: " + aggregateCountFile.getPath()); } - catch (IOException e) + else { - throw new PipelineJobException(e); + localAggregateCountFile = new File(ctx.getOutputDir(), localCopyUmiCountDir.getName() + ".aggregateCounts.csv"); + try + { + if (localAggregateCountFile.exists()) + { + localAggregateCountFile.delete(); + } + + FileUtils.copyFile(aggregateCountFile, localAggregateCountFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + ctx.getFileManager().addIntermediateFile(localAggregateCountFile); } - ctx.getFileManager().addIntermediateFile(localAggregateCountFile); } File validAdt = CellHashingServiceImpl.get().getValidCiteSeqBarcodeMetadataFile(ctx.getSourceDirectory(), parentReadset.getReadsetId()); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java index 322cd00dd..0314d69d1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendSaturation.java @@ -65,6 +65,11 @@ public void init(SequenceOutputHandler.JobContext ctx, List { if (!LOUPE_TYPE.isType(so.getFile())) { + if (!so.getFile().getName().endsWith("seurat.rds")) + { + throw new PipelineJobException("Unexpected file type: " + so.getFile().getPath()); + } + File meta = new File(so.getFile().getPath().replaceAll(".seurat.rds", ".cellBarcodes.csv")); if (!meta.exists()) { diff --git a/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java b/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java index 4e8f025d0..654fb1238 100644 --- a/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/AbstractCellRangerDependentStep.java @@ -18,7 +18,7 @@ public class AbstractCellRangerDependentStep extends CellRangerGexCountStep { - public AbstractCellRangerDependentStep(AlignmentStepProvider provider, PipelineContext ctx, CellRangerWrapper wrapper) + public AbstractCellRangerDependentStep(AlignmentStepProvider provider, PipelineContext ctx, CellRangerWrapper wrapper) { super(provider, ctx, wrapper); } @@ -36,7 +36,6 @@ protected File runCellRanger(AlignmentOutputImpl output, Readset rs, List File localBam = new File(outputDirectory, basename + ".cellranger.bam"); File localBamIdx = SequenceAnalysisService.get().getExpectedBamOrCramIndex(localBam); - String idParam = StringUtils.trimToNull(getProvider().getParameterByName("id").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); File cellrangerOutdir = new File(outputDirectory, CellRangerWrapper.getId(idParam, rs)); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java b/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java index 22b583814..60a265113 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java @@ -309,7 +309,11 @@ private void processType(List> inputFastqs, AlignmentOutputImpl } FileUtils.moveFile(outputHtml, outputHtmlRename); - String description = ctx.getParams().optBoolean("useGEX", false) ? "HTO and GEX Counts" : null; + String description = "Version: " + wrapper.getVersionString(); + if (ctx.getParams().optBoolean("useGEX", false)) + { + description = description + "\n" + "HTO and GEX Counts"; + }; output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x " + rs.getApplication() + " Summary", "10x Run Summary", rs.getRowId(), null, null, description); File rawCounts = new File(outsdir, "raw_feature_bc_matrix/matrix.mtx.gz"); @@ -366,7 +370,7 @@ private File makeDummyIndex(JobContext ctx) throws PipelineJobException CellRangerWrapper wrapper = new CellRangerWrapper(ctx.getLogger()); List args = new ArrayList<>(); - args.add(wrapper.getExe(true).getPath()); + args.add(wrapper.getExe().getPath()); args.add("mkref"); args.add("--fasta=" + fasta.getPath()); args.add("--genes=" + gtf.getPath()); diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 96da22169..9b73487cf 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -124,8 +124,9 @@ public static List getCellRangerGexParams(@Nullable Lis ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--chemistry"), "chemistry", "Chemistry", "This is usually left blank, in which case cellranger will auto-detect. Example values are: SC3Pv1, SC3Pv2, SC3Pv3, SC5P-PE, SC5P-R2, or SC5P-R1", "textfield", new JSONObject(){{ }}, null), - ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--include-introns"), "includeIntrons", "Include Introns", "If checked, reads from introns will be included in the counts", "checkbox", new JSONObject(){{ - + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--include-introns"), "includeIntrons", "Include Introns", "If selected, reads from introns will be included in the counts", "ldk-simplecombo", new JSONObject(){{ + put("storeValues", "true;false"); + put("value", "false"); }}, null) ); @@ -147,10 +148,10 @@ public boolean supportsGzipFastqs() @Override public String getAlignmentDescription() { - return getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), true); + return getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), true, getWrapper().getVersionString()); } - protected static String getAlignDescription(PipelineStepProvider provider, PipelineContext ctx, int stepIdx, boolean addAligner) + protected static String getAlignDescription(PipelineStepProvider provider, PipelineContext ctx, int stepIdx, boolean addAligner, String cellrangerVersion) { Integer gtfId = provider.getParameterByName("gtfFile").extractValue(ctx.getJob(), provider, stepIdx, Integer.class); File gtfFile = ctx.getSequenceSupport().getCachedData(gtfId); @@ -164,6 +165,10 @@ protected static String getAlignDescription(PipelineStepProvider provider, Pipel } List lines = new ArrayList<>(); + + String includeIntrons = provider.getParameterByName("includeIntrons").extractValue(ctx.getJob(), provider, stepIdx, String.class, "false"); + lines.add("Include Introns: " + includeIntrons); + if (addAligner) { lines.add("Aligner: " + provider.getName()); @@ -174,7 +179,9 @@ protected static String getAlignDescription(PipelineStepProvider provider, Pipel lines.add("GTF: " + gtfFile.getName()); } - return lines.isEmpty() ? null : StringUtils.join(lines, '\n'); + lines.add("Version: " + cellrangerVersion); + + return StringUtils.join(lines, '\n'); } @Override @@ -271,7 +278,7 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) } List args = new ArrayList<>(); - args.add(getWrapper().getExe(true).getPath()); + args.add(getWrapper().getExe().getPath()); args.add("mkref"); args.add("--fasta=" + referenceGenome.getWorkingFastaFile().getPath()); args.add("--genes=" + gtfFile.getPath()); @@ -312,6 +319,18 @@ public boolean canAlignMultiplePairsAtOnce() return true; } + private boolean shouldDiscardBam() + { + return !_alwaysRetainBam && getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + } + + private boolean _alwaysRetainBam = false; + + public void setAlwaysRetainBam(boolean alwaysRetainBam) + { + _alwaysRetainBam = alwaysRetainBam; + } + @Override public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { @@ -352,7 +371,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File indexDir = AlignerIndexUtil.getIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob())); args.add("--transcriptome=" + indexDir.getPath()); - args.add("--create-bam=true"); + args.add("--create-bam=" + !shouldDiscardBam()); getWrapper().setWorkingDir(outputDirectory); @@ -369,12 +388,15 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File outdir = new File(outputDirectory, id); outdir = new File(outdir, "outs"); - File bam = new File(outdir, "possorted_genome_bam.bam"); - if (!bam.exists()) + if (!shouldDiscardBam()) { - throw new PipelineJobException("Unable to find file: " + bam.getPath()); + File bam = new File(outdir, "possorted_genome_bam.bam"); + if (!bam.exists()) + { + throw new PipelineJobException("Unable to find file: " + bam.getPath()); + } + output.setBAM(bam); } - output.setBAM(bam); getWrapper().deleteSymlinks(getWrapper().getLocalFastqDir(outputDirectory)); @@ -393,7 +415,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu outputHtmlRename.delete(); } FileUtils.moveFile(outputHtml, outputHtmlRename); - String description = getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), false); + String description = getAlignDescription(getProvider(), getPipelineCtx(), getStepIdx(), false, getWrapper().getVersionString()); output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x Count Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), description); File loupe = new File(outdir, "cloupe.cloupe"); @@ -453,7 +475,25 @@ public boolean alwaysCopyIndexToWorkingDir() @Override public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Collection outputFilesCreated) throws PipelineJobException { - File metrics = new File(model.getAlignmentFileObject().getParentFile(), "metrics_summary.csv"); + SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> LOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null); + if (outputForData == null) + { + outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow(); + } + + File outsDir = outputForData.getFile().getParentFile(); + Integer dataId = outputForData.getDataId(); + if (dataId == null) + { + throw new PipelineJobException("Unable to find dataId for output file"); + } + + if (model.getRowId() == null) + { + throw new PipelineJobException("Unable to find rowId for analysis"); + } + + File metrics = new File(outsDir, "metrics_summary.csv"); if (metrics.exists()) { getPipelineCtx().getLogger().debug("adding 10x metrics"); @@ -479,17 +519,12 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co i++; } - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics"); //NOTE: if this job errored and restarted, we may have duplicate records: SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); + filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL); filter.addCondition(FieldKey.fromString("category"), "Cell Ranger", CompareType.EQUAL); filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); @@ -509,7 +544,7 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co toInsert.put("created", new Date()); toInsert.put("readset", model.getReadset()); toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); + toInsert.put("dataid", dataId); toInsert.put("category", "Cell Ranger"); toInsert.put("metricname", header[j]); @@ -571,97 +606,4 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co } } } - - private void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException - { - getPipelineCtx().getLogger().debug("adding 10x metrics"); - - File metrics = new File(outDir, "metrics_summary.csv"); - if (!metrics.exists()) - { - throw new PipelineJobException("Unable to find file: " + metrics.getPath()); - } - - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - - try (CSVReader reader = new CSVReader(Readers.getReader(metrics))) - { - String[] line; - List metricValues = new ArrayList<>(); - - int i = 0; - while ((line = reader.readNext()) != null) - { - i++; - if (i == 1) - { - continue; - } - - metricValues.add(line); - } - - int totalAdded = 0; - TableInfo ti = DbSchema.get("sequenceanalysis", DbSchemaType.Module).getTable("quality_metrics"); - - //NOTE: if this job errored and restarted, we may have duplicate records: - SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); - filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); - TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); - if (ts.exists()) - { - getPipelineCtx().getLogger().info("Deleting existing QC metrics (probably from prior restarted job)"); - ts.getArrayList(Integer.class).forEach(rowid -> { - Table.delete(ti, rowid); - }); - } - - for (String[] row : metricValues) - { - //TODO - if ("Fastq ID".equals(row[2]) || "Physical library ID".equals(row[2])) - { - continue; - } - - Map toInsert = new CaseInsensitiveHashMap<>(); - toInsert.put("container", getPipelineCtx().getJob().getContainer().getId()); - toInsert.put("createdby", getPipelineCtx().getJob().getUser().getUserId()); - toInsert.put("created", new Date()); - toInsert.put("readset", model.getReadset()); - toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); - - toInsert.put("category", "Cell Ranger"); - toInsert.put("metricname", row[4]); - - row[5] = row[5].replaceAll(",", ""); //remove commas - Object val = row[5]; - if (row[5].contains("%")) - { - row[5] = row[5].replaceAll("%", ""); - Double d = ConvertHelper.convert(row[5], Double.class); - d = d / 100.0; - val = d; - } - - toInsert.put("metricvalue", val); - - Table.insert(getPipelineCtx().getJob().getUser(), ti, toInsert); - totalAdded++; - } - - getPipelineCtx().getLogger().info("total metrics added: " + totalAdded); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } } diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 633214b51..12fa0555a 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -53,6 +53,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -69,6 +70,7 @@ public CellRangerVDJWrapper(@Nullable Logger logger) } public static final String INNER_ENRICHMENT_PRIMERS = "innerEnrichmentPrimers"; + public static final String VLOUPE_CATEGORY = "10x VLoupe"; public static class VDJProvider extends AbstractAlignmentStepProvider { @@ -81,6 +83,7 @@ public VDJProvider() ToolParameterDescriptor.create(INNER_ENRICHMENT_PRIMERS, "Inner Enrichment Primers", "An option comma-separated list of the inner primers used for TCR enrichment. These will be used for trimming.", "textarea", new JSONObject(){{ put("height", 100); put("width", 400); + put("allowBlank", false); }}, null) ), null, "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); } @@ -159,6 +162,7 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException // Special-case TRAVxx/DVxx lineages: if (lineage.startsWith("TRA") && lineage.contains("DV") && !lineage.contains("/DV")) { + getPipelineCtx().getLogger().debug("Updating -DV to /DV in: " + lineage); if (lineage.contains("-DV")) { lineage = lineage.replace("-DV", "/DV"); @@ -169,6 +173,15 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException } } + // NOTE: cellranger expects TRDV segment to start with TRDV, so re-order + if ("TRD".equals(locus) && lineage.startsWith("TRA")) + { + getPipelineCtx().getLogger().debug("Editing TRA/DV lineage: " + lineage); + String[] tokens = lineage.split("/"); + lineage = "TR" + tokens[1] + "/" + tokens[0].replaceAll("^TR", ""); + getPipelineCtx().getLogger().debug("to: " + lineage); + } + StringBuilder header = new StringBuilder(); header.append(">").append(i.get()).append("|").append(name).append(" ").append(lineage).append("|").append(lineage).append("|"); @@ -301,29 +314,31 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp File indexDir = AlignerIndexUtil.getIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob())); String primers = StringUtils.trimToNull(getProvider().getParameterByName(INNER_ENRICHMENT_PRIMERS).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, null)); - File primerFile = new File(outputDirectory, "primers.txt"); - if (primers != null) + if (primers == null) { - primers = primers.replaceAll("\\s+", ","); - primers = primers.replaceAll(",+", ","); + throw new PipelineJobException("Enrichment primers are required"); + } - try (PrintWriter writer = PrintWriters.getPrintWriter(primerFile)) - { - Arrays.stream(primers.split(",")).forEach(x -> { - x = StringUtils.trimToNull(x); - if (x != null) - { - writer.println(x); - } - }); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + File primerFile = new File(outputDirectory, "primers.txt"); + primers = primers.replaceAll("\\s+", ","); + primers = primers.replaceAll(",+", ","); - output.addIntermediateFile(primerFile); + try (PrintWriter writer = PrintWriters.getPrintWriter(primerFile)) + { + Arrays.stream(primers.split(",")).forEach(x -> { + x = StringUtils.trimToNull(x); + if (x != null) + { + writer.println(x); + } + }); } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + output.addIntermediateFile(primerFile); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); if (maxThreads != null) @@ -406,6 +421,9 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp getPipelineCtx().getLogger().warn("Unable to find folder: " + directory.getPath()); } + output.addIntermediateFile(new File(outdir, "vdj_reference")); + output.addIntermediateFile(new File(outdir, "multi")); + try { File outputHtml = new File(outdir, "per_sample_outs/" + id + "/web_summary.html"); @@ -421,7 +439,8 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp } FileUtils.moveFile(outputHtml, outputHtmlRename); - output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), null); + String versionString = "Version: " + getWrapper().getVersionString(); + output.addSequenceOutput(outputHtmlRename, rs.getName() + " 10x VDJ Summary", "10x Run Summary", rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } catch (IOException e) { @@ -436,7 +455,6 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome referenceGenome, File outdir, AlignmentOutputImpl output, String subdirName) throws PipelineJobException { boolean isPrimaryDir = "vdj_t".equals(subdirName); - String chainType = "vdj_t".equals(subdirName) ? "Alpha/Beta" : "Gamma/Delta"; File multiDir = new File(outdir, "multi/" + subdirName); if (!multiDir.exists()) @@ -455,7 +473,13 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome { try { - FileUtils.moveFile(f, new File(sampleDir, f.getName())); + File dest = new File(sampleDir, f.getName()); + if (dest.exists()) + { + dest.delete(); + } + + FileUtils.moveFile(f, dest); } catch (IOException e) { @@ -499,9 +523,23 @@ private File processOutputsForType(String sampleId, Readset rs, ReferenceGenome // NOTE: only tag the vloupe file for a/b: else if (isPrimaryDir) { - output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", "10x VLoupe", rs.getRowId(), null, referenceGenome.getGenomeId(), null); + String versionString = "Version: " + getWrapper().getVersionString(); + output.addSequenceOutput(outputVloupe, rs.getName() + " 10x VLoupe", VLOUPE_CATEGORY, rs.getRowId(), null, referenceGenome.getGenomeId(), versionString); } + output.addIntermediateFile(new File(sampleDir, "airr_rearrangement.tsv")); + output.addIntermediateFile(new File(sampleDir, "all_contig_annotations.bed")); + output.addIntermediateFile(new File(sampleDir, "all_contig_annotations.json")); + output.addIntermediateFile(new File(sampleDir, "cell_barcodes.json")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.bam")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.bam.bai")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.fasta")); + output.addIntermediateFile(new File(sampleDir, "concat_ref.fasta.fai")); + output.addIntermediateFile(new File(sampleDir, "consensus.bam")); + output.addIntermediateFile(new File(sampleDir, "consensus.bam.bai")); + output.addIntermediateFile(new File(sampleDir, "donor_regions.fa")); + output.addIntermediateFile(new File(sampleDir, "vdj_contig_info.pb")); + return csv; } @@ -637,7 +675,7 @@ public void deleteSymlinks(File localFqDir) throws PipelineJobException } } - public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobException + public void addMetrics(File outDir, AnalysisModel model, int dataId) throws PipelineJobException { getPipelineCtx().getLogger().debug("adding 10x metrics"); @@ -647,11 +685,6 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep throw new PipelineJobException("Unable to find file: " + metrics.getPath()); } - if (model.getAlignmentFile() == null) - { - throw new PipelineJobException("model.getAlignmentFile() was null"); - } - try (CSVReader reader = new CSVReader(Readers.getReader(metrics))) { String[] line; @@ -675,7 +708,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep //NOTE: if this job errored and restarted, we may have duplicate records: SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), model.getReadset()); filter.addCondition(FieldKey.fromString("analysis_id"), model.getRowId(), CompareType.EQUAL); - filter.addCondition(FieldKey.fromString("dataid"), model.getAlignmentFile(), CompareType.EQUAL); + filter.addCondition(FieldKey.fromString("dataid"), dataId, CompareType.EQUAL); filter.addCondition(FieldKey.fromString("category"), "Cell Ranger VDJ", CompareType.EQUAL); filter.addCondition(FieldKey.fromString("container"), getPipelineCtx().getJob().getContainer().getId(), CompareType.EQUAL); TableSelector ts = new TableSelector(ti, PageFlowUtil.set("rowid"), filter, null); @@ -700,7 +733,7 @@ public void addMetrics(File outDir, AnalysisModel model) throws PipelineJobExcep toInsert.put("created", new Date()); toInsert.put("readset", model.getReadset()); toInsert.put("analysis_id", model.getRowId()); - toInsert.put("dataid", model.getAlignmentFile()); + toInsert.put("dataid", dataId); toInsert.put("category", "Cell Ranger VDJ"); @@ -743,15 +776,20 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co throw new PipelineJobException("Expected sequence outputs to be created"); } - File html = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow().getFile(); - - addMetrics(html.getParentFile(), model); + SequenceOutputFile outputForData = outputFilesCreated.stream().filter(x -> VLOUPE_CATEGORY.equals(x.getCategory())).findFirst().orElse(null); + if (outputForData == null) + { + outputForData = outputFilesCreated.stream().filter(x -> "10x Run Summary".equals(x.getCategory())).findFirst().orElseThrow(); + } - File bam = model.getAlignmentData().getFile(); - if (!bam.exists()) + File outsDir = outputForData.getFile().getParentFile(); + Integer dataId = outputForData.getDataId(); + if (dataId == null) { - getPipelineCtx().getLogger().warn("BAM not found, expected: " + bam.getPath()); + throw new PipelineJobException("Unable to find dataId for output file"); } + + addMetrics(outsDir, model, dataId); } private static final Pattern FILE_PATTERN = Pattern.compile("^(.+?)(_S[0-9]+){0,1}_L(.+?)_(R){0,1}([0-9])(_[0-9]+){0,1}(.*?)(\\.f(ast){0,1}q)(\\.gz)?$"); @@ -812,7 +850,8 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp try (BufferedReader reader = Readers.getReader(inputCsv)) { String line; - int chimericCallsRecovered = 0; + Map chimericCallsRecovered = new HashMap<>(); + int restoredTRDVAV = 0; int lineIdx = 0; while ((line = reader.readLine()) != null) @@ -830,6 +869,15 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp //Infer correct chain from the V, J and C genes String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings + + // Restore original value for TRD/TRA + if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV")) + { + restoredTRDVAV++; + String[] split = tokens[6].split("/"); + tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); + } + List chains = new ArrayList<>(); String vGeneChain = null; String jGeneChain = null; @@ -863,24 +911,28 @@ else if (idx == 9) // Recover TRDV/TRAJ/TRAC: if (uniqueChains.size() > 1) { + // Defer to the constant region, if known: if (cGeneChain != null) { uniqueChains.clear(); uniqueChains.add(cGeneChain); - chimericCallsRecovered++; + String key = originalChain + "->" + cGeneChain + " (based on C-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); } else if (uniqueChains.size() == 2) { if ("TRD".equals(vGeneChain) && "TRA".equals(jGeneChain)) { uniqueChains.clear(); - chimericCallsRecovered++; + String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); uniqueChains.add(vGeneChain); } if ("TRA".equals(vGeneChain) && "TRD".equals(jGeneChain)) { uniqueChains.clear(); - chimericCallsRecovered++; + String key = originalChain + "->" + vGeneChain + " (based on V-GENE)"; + chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); uniqueChains.add(vGeneChain); } } @@ -893,7 +945,7 @@ else if (uniqueChains.size() == 2) } else { - log.error("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); + log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); } if (acceptableChains.contains(tokens[5])) @@ -902,7 +954,32 @@ else if (uniqueChains.size() == 2) } } - log.info("\tChimeric calls recovered: " + chimericCallsRecovered); + if (!chimericCallsRecovered.isEmpty()) + { + log.info("\tChimeric calls recovered: "); + for (String key : chimericCallsRecovered.keySet()) + { + log.info("\t" + key + ": " + chimericCallsRecovered.get(key)); + } + } + + log.info("\tTRDV/AV calls restored: " + restoredTRDVAV); + } + } + + public @Nullable String getVersionString() + { + try + { + String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + + return ret.replaceAll("^cellranger ", ""); + } + catch (PipelineJobException e) + { + getLogger().error("Unable to find cellRanger version"); + + return "Unknown"; } } } diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java index 74c964031..80ef3c95c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerWrapper.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -32,9 +33,9 @@ public CellRangerWrapper(@Nullable Logger logger) super(logger); } - protected File getExe(boolean use31) + protected File getExe() { - return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger" + (use31 ? "-31" : "")); + return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger"); } public static Set getRawDataDirs(File outputDir, boolean filteredOnly, boolean includeAnalysis) @@ -75,7 +76,7 @@ public File getLocalFastqDir(File outputDirectory) public List prepareCountArgs(AlignmentOutputImpl output, String id, File outputDirectory, Readset rs, List> inputFastqPairs, List extraArgs, boolean writeFastqArgs) throws PipelineJobException { List args = new ArrayList<>(); - args.add(getExe(false).getPath()); + args.add(getExe().getPath()); args.add("count"); args.add("--id=" + id); @@ -259,4 +260,20 @@ public static String getId(String idParam, Readset rs) return id; } + + public @Nullable String getVersionString() + { + try + { + String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + + return ret.replaceAll("^cellranger ", ""); + } + catch (PipelineJobException e) + { + getLogger().error("Unable to find cellRanger version"); + + return "Unknown"; + } + } } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index ba5044721..04d65c79b 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -67,6 +67,10 @@ public static List getToolParameters() public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { AlignmentOutputImpl output = new AlignmentOutputImpl(); + + // We need to ensure we keep the BAM for post-processing: + setAlwaysRetainBam(true); + File localBam = runCellRanger(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); File crDir = new File(localBam.getPath().replace(".nimble.cellranger.bam", "")); diff --git a/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java index 06bcb3c4d..689e92307 100644 --- a/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/VelocytoAlignmentStep.java @@ -67,6 +67,9 @@ public AlignmentStep create(PipelineContext context) public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException { AlignmentOutputImpl output = new AlignmentOutputImpl(); + + setAlwaysRetainBam(true); + File localBam = runCellRanger(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); File gtf = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName("gtfFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));