Skip to content

Commit

Permalink
Merge pull request #315 from LabKey/fb_merge_24.7_to_develop
Browse files Browse the repository at this point in the history
Merge 24.7 to develop
  • Loading branch information
bbimber authored Sep 25, 2024
2 parents 4352276 + bcc1703 commit 20eae1c
Show file tree
Hide file tree
Showing 42 changed files with 4,244 additions and 2,827 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ default void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Reco

void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException;

default void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
default void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
{

}
Expand Down
22 changes: 11 additions & 11 deletions SequenceAnalysis/pipeline_code/sequence_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -457,12 +457,12 @@ then
rm -Rf $LKTOOLS_DIR/samtools
rm -Rf $LKTOOLS_DIR/bcftools

wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/1.16.1/samtools-1.16.1.tar.bz2
bunzip2 samtools-1.16.1.tar.bz2
tar -xf samtools-1.16.1.tar
wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/1.20/samtools-1.20.tar.bz2
bunzip2 samtools-1.20.tar.bz2
tar -xf samtools-1.20.tar
echo "Compressing TAR"
bzip2 samtools-1.16.1.tar
cd samtools-1.16.1
bzip2 samtools-1.20.tar
cd samtools-1.20
./configure
make
install ./samtools ${LKTOOLS_DIR}/samtools
Expand Down Expand Up @@ -519,13 +519,13 @@ then
rm -Rf $LKTOOLS_DIR/tabix
rm -Rf $LKTOOLS_DIR/bgzip

wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/1.16/htslib-1.16.tar.bz2
bunzip2 htslib-1.16.tar.bz2
tar -xf htslib-1.16.tar
wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2
bunzip2 htslib-1.20.tar.bz2
tar -xf htslib-1.20.tar
echo "Compressing TAR"
bzip2 htslib-1.16.tar
chmod 755 htslib-1.16
cd htslib-1.16
bzip2 htslib-1.20.tar
chmod 755 htslib-1.20
cd htslib-1.20
./configure
make

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
<pkColumnName>rowids</pkColumnName>
<columns>
<column columnName="rowids">
<columnTitle>RowIds</columnTitle>
<isHidden>true</isHidden>
<isKeyField>true</isKeyField>
</column>
<column columnName="alleles">
Expand Down Expand Up @@ -50,10 +52,6 @@
</conditionalFormat>
</conditionalFormats>
</column>
<column columnName="rowids">
<columnTitle>RowIds</columnTitle>
<isHidden>true</isHidden>
</column>
<column columnName="nAlignments">
<columnTitle># Alignments</columnTitle>
<isHidden>true</isHidden>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ FROM sequenceanalysis.outputfiles

UNION ALL

SELECT 'VCF File'
SELECT 'VCF File' as category

UNION ALL

SELECT 'BAM File'
SELECT 'BAM File' as category

UNION ALL

SELECT 'BED File'
SELECT 'BED File' as category

UNION ALL

SELECT 'gVCF File'
SELECT 'gVCF File' as category

) t

Expand Down
25 changes: 19 additions & 6 deletions SequenceAnalysis/src/org/labkey/sequenceanalysis/ReadDataImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,28 @@ public void setModifiedBy(Integer modifiedBy)
_modifiedBy = modifiedBy;
}

public File getFile1(boolean allowArchived)
{
return getFile(1, _fileId1, allowArchived);
}

@Override
@Transient
public File getFile1()
{
return getFile(1, _fileId1);
return getFile1(false);
}

@Override
@Transient
public File getFile2()
{
return getFile(2, _fileId2);
return getFile2(false);
}

public File getFile2(boolean allowArchived)
{
return getFile(2, _fileId2, false);
}

public void setFile(File f, int fileIdx)
Expand Down Expand Up @@ -248,9 +258,9 @@ public Integer getTotalReads()
}

@Transient
private File getFile(int fileIdx, Integer fileId)
private File getFile(int fileIdx, Integer fileId, boolean allowArchived)
{
if (isArchived())
if (isArchived() && !allowArchived)
{
return null;
}
Expand All @@ -274,13 +284,16 @@ private File getFile(int fileIdx, Integer fileId)
ret = d.getFile();
}

if (ret != null && !ret.exists())
if (!isArchived() && ret != null && !ret.exists())
{
throw new IllegalArgumentException("File does not exist: " + ret.getPath());
}
}

_cachedFiles.put(fileIdx, ret);
if (!isArchived())
{
_cachedFiles.put(fileIdx, ret);
}

return ret;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
}

@Override
public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
public void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
{
if (!(job instanceof SequenceOutputHandlerJob shj))
if (!(ctx.getJob() instanceof SequenceOutputHandlerJob shj))
{
throw new IllegalStateException("Expected job to be a SequenceOutputHandlerJob");
}
Expand All @@ -144,10 +144,10 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
Integer analysisId = o.getAnalysis_id();
if (analysisId == null)
{
job.getLogger().warn("no analysis Id for file, attempting to find this job: " + o.getName());
PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobGUID());
ctx.getJob().getLogger().warn("no analysis Id for file, attempting to find this job: " + o.getName());
PipelineStatusFile sf = PipelineService.get().getStatusFile(ctx.getJob().getJobGUID());

TableSelector ts = new TableSelector(QueryService.get().getUserSchema(job.getUser(), job.getContainer(), SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_ANALYSES), PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("runid/JobId"), sf.getRowId()), null);
TableSelector ts = new TableSelector(QueryService.get().getUserSchema(ctx.getJob().getUser(), ctx.getJob().getContainer(), SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_ANALYSES), PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("runid/JobId"), sf.getRowId()), null);
if (ts.exists())
{
analysisId = ts.getObject(Integer.class);
Expand All @@ -160,15 +160,15 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ

if (o.getLibrary_id() == null)
{
job.getLogger().warn("no genome associated with file: " + o.getName());
ctx.getJob().getLogger().warn("no genome associated with file: " + o.getName());
continue;
}

AnalysisModel m = AnalysisModelImpl.getFromDb(analysisId, job.getUser());
AnalysisModel m = AnalysisModelImpl.getFromDb(analysisId, ctx.getJob().getUser());
if (m != null)
{
job.getLogger().warn("processing analysis: " + m.getRowId());
File outputDir = ((SequenceOutputHandlerJob)job).getWebserverDir(false);
ctx.getJob().getLogger().warn("processing analysis: " + m.getRowId());
File outputDir = ((SequenceOutputHandlerJob)ctx.getJob()).getWebserverDir(false);
List<File> metricsFiles = new ArrayList<>();

File mf = new File(outputDir, FileUtil.getBaseName(o.getFile()) + ".summary.metrics");
Expand All @@ -191,7 +191,7 @@ else if (collectInsertSize)
// This output is only created for paired data:
if (o.getReadset() != null)
{
Readset rs = SequenceAnalysisService.get().getReadset(o.getReadset(), job.getUser());
Readset rs = SequenceAnalysisService.get().getReadset(o.getReadset(), ctx.getJob().getUser());
if (rs.getReadData().stream().filter(rd -> rd.getFileId2() != null).count() > 0)
{
throw new PipelineJobException("Missing file: " + mf2.getPath());
Expand Down Expand Up @@ -219,7 +219,7 @@ else if (collectWgsNonZero)
throw new PipelineJobException("Missing file: " + mf4.getPath());
}

File mf5 = new MarkDuplicatesWrapper(job.getLogger()).getMetricsFile(o.getFile());
File mf5 = new MarkDuplicatesWrapper(ctx.getJob().getLogger()).getMetricsFile(o.getFile());
if (mf5.exists())
{
metricsFiles.add(mf5);
Expand All @@ -232,23 +232,23 @@ else if (runMarkDuplicates)
TableInfo ti = SequenceAnalysisManager.get().getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS);
for (File f : metricsFiles)
{
List<Map<String, Object>> lines = PicardMetricsUtil.processFile(f, job.getLogger());
List<Map<String, Object>> lines = PicardMetricsUtil.processFile(f, ctx.getJob().getLogger());
for (Map<String, Object> row : lines)
{
row.put("container", o.getContainer());
row.put("createdby", job.getUser().getUserId());
row.put("createdby", ctx.getJob().getUser().getUserId());
row.put("created", new Date());
row.put("readset", m.getReadset());
row.put("analysis_id", m.getRowId());
row.put("dataid", m.getAlignmentFile());

Table.insert(job.getUser(), ti, row);
Table.insert(ctx.getJob().getUser(), ti, row);
}
}
}
else
{
job.getLogger().warn("Analysis Id " + o.getAnalysis_id() + " not found for file: " + o.getName());
ctx.getJob().getLogger().warn("Analysis Id " + o.getAnalysis_id() + " not found for file: " + o.getName());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ private void checkCramAndIndex(SequenceOutputFile so) throws PipelineJobExceptio
}

@Override
public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
public void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
{
List<Map<String, Object>> toUpdate = new ArrayList<>();
List<Map<String, Object>> oldKeys = inputs.stream().map(x -> {
Expand All @@ -175,11 +175,11 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
File cram = new File(so.getFile().getParentFile(), FileUtil.getBaseName(so.getFile()) + ".cram");
checkCramAndIndex(so);

job.getLogger().info("Updating ExpData record with new filepath: " + cram.getPath());
ctx.getJob().getLogger().info("Updating ExpData record with new filepath: " + cram.getPath());
ExpData d = so.getExpData();
d.setDataFileURI(cram.toURI());
d.setName(cram.getName());
d.save(job.getUser());
d.save(ctx.getJob().getUser());

if (so.getName().contains(".bam"))
{
Expand All @@ -194,8 +194,8 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ

try
{
Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(job.getUser(), target, toUpdate, oldKeys, null, null);
Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer();
QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null);
}
catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -722,14 +722,14 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
}

@Override
public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
public void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
{
SequenceTaskHelper taskHelper = new SequenceTaskHelper(getPipelineJob(job), getPipelineJob(job).getDataDirectory());
List<PipelineStepCtx<VariantProcessingStep>> providers = SequencePipelineService.get().getSteps(job, VariantProcessingStep.class);
SequenceTaskHelper taskHelper = new SequenceTaskHelper(getPipelineJob(ctx.getJob()), getPipelineJob(ctx.getJob()).getDataDirectory());
List<PipelineStepCtx<VariantProcessingStep>> providers = SequencePipelineService.get().getSteps(ctx.getJob(), VariantProcessingStep.class);
for (PipelineStepCtx<VariantProcessingStep> stepCtx : providers)
{
VariantProcessingStep step = stepCtx.getProvider().create(taskHelper);
step.complete(job, inputs, outputsCreated, support);
step.complete(ctx.getJob(), inputs, outputsCreated, ctx.getSequenceSupport());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ public RecordedActionSet run() throws PipelineJobException
for (SequenceReadsetImpl rs : readsets)
{
getJob().getLogger().debug("caching readset: " + rs.getName() + " with " + rs.getReadData().size() + " files");
getPipelineJob().getSequenceSupport().cacheReadset(rs);

// NOTE: allow archived data
getPipelineJob().getSequenceSupport().cacheReadset(rs, true);
}
}
catch (IOException e)
Expand Down Expand Up @@ -505,10 +507,10 @@ private void checkForDuplicateFileNames(List<SequenceReadsetImpl> readsets, List
{
Map<String, File> existingFileNames = new HashMap<>();
preexistingReadData.forEach(rd -> {
existingFileNames.put(rd.getFile1().getName(), rd.getFile1());
existingFileNames.put(rd.getFile1(true).getName(), rd.getFile1(true));
if (rd.getFile2() != null)
{
existingFileNames.put(rd.getFile2().getName(), rd.getFile2());
existingFileNames.put(rd.getFile2(true).getName(), rd.getFile2(true));
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ private File getReblockedName(File gvcf)
}

@Override
public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated, SequenceAnalysisJobSupport support) throws PipelineJobException
public void complete(JobContext ctx, List<SequenceOutputFile> inputs, List<SequenceOutputFile> outputsCreated) throws PipelineJobException
{
List<Map<String, Object>> toUpdate = new ArrayList<>();
List<Map<String, Object>> oldKeys = inputs.stream().map(x -> {
Expand All @@ -146,11 +146,11 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
throw new PipelineJobException("Unable to find file: " + reblocked.getPath());
}

job.getLogger().info("Updating ExpData record with new filepath: " + reblocked.getPath());
ctx.getJob().getLogger().info("Updating ExpData record with new filepath: " + reblocked.getPath());
ExpData d = so.getExpData();
d.setDataFileURI(reblocked.toURI());
d.setName(reblocked.getName());
d.save(job.getUser());
d.save(ctx.getJob().getUser());

if (so.getName().contains(".g.vcf.gz"))
{
Expand All @@ -165,8 +165,8 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ

try
{
Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
QueryService.get().getUserSchema(job.getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(job.getUser(), target, toUpdate, oldKeys, null, null);
Container target = ctx.getJob().getContainer().isWorkbook() ? ctx.getJob().getContainer().getParent() : ctx.getJob().getContainer();
QueryService.get().getUserSchema(ctx.getJob().getUser(), target, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES).getUpdateService().updateRows(ctx.getJob().getUser(), target, toUpdate, oldKeys, null, null);
}
catch (QueryUpdateServiceException | InvalidKeyException | BatchValidationException | SQLException e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ else if (lane.isEmpty())

if (rd.isEmpty())
{
getJob().getLogger().warn("no file groups found ofr rs: " + rs.getName() + " with fileGroupId: " + rs.getFileSetName());
getJob().getLogger().warn("no file groups found for readset: " + rs.getName() + " with fileGroupId: " + rs.getFileSetName());
}

rs.setReadData(rd);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ public RecordedActionSet run() throws PipelineJobException
}

//run final handler
getPipelineJob().getHandler().getProcessor().complete(getPipelineJob(), getPipelineJob().getFiles(), outputsCreated, getPipelineJob().getSequenceSupport());
TaskFileManagerImpl manager = new TaskFileManagerImpl(getPipelineJob(), getPipelineJob().getAnalysisDirectory(), null);
JobContextImpl ctx = new JobContextImpl(getPipelineJob(), getPipelineJob().getSequenceSupport(), getPipelineJob().getParameterJson(), getPipelineJob().getAnalysisDirectory(), manager, null);
getPipelineJob().getHandler().getProcessor().complete(ctx, getPipelineJob().getFiles(), outputsCreated);

File xml = getPipelineJob().getSerializedOutputFilesFile();
if (xml.exists())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ else if (SequenceAnalysisSchema.TABLE_BARCODES.equalsIgnoreCase(name))

return ret;
}
else if (SequenceAnalysisSchema.TABLE_INSTRUMENT_RUNS.equalsIgnoreCase(name))
{
TableInfo ret = super.createWrappedTable(name, sourceTable, cf);
LDKService.get().applyNaturalSort((AbstractTableInfo)ret, "name");

return ret;
}
else if (SequenceAnalysisSchema.TABLE_ANALYSES.equalsIgnoreCase(name))
{
return createAnalysesTable(sourceTable, cf);
Expand Down
Loading

0 comments on commit 20eae1c

Please sign in to comment.