Skip to content

Commit 1fde264

Browse files
authored
Merge discvr-20.3 to develop (#41)
* Mark intermediate BAMs for deletion in pipeline * Migrate DepthOfCoverage to GARK4 * Report depth by amplicon * Pass -L to DepthOfCoverage in all cases * Retain TSV output for DepthOfCoverage * Delete output BAM if it exists for lofreq indelqual * Use annotated VCF as output * Don't use thousands separator in table output * Also report median depth over amplicons * After failure, ensure we delete the working copy of the GenomicsDB workspace and remake.
1 parent c58258b commit 1fde264

File tree

10 files changed

+305
-133
lines changed

10 files changed

+305
-133
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractGatk4Wrapper.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
package org.labkey.api.sequenceanalysis.run;
22

3+
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
34
import org.apache.commons.lang3.StringUtils;
4-
import org.apache.commons.math3.exception.ConvergenceException;
55
import org.apache.log4j.Logger;
6-
import org.labkey.api.data.ConvertHelper;
76
import org.labkey.api.pipeline.PipelineJobException;
87
import org.labkey.api.pipeline.PipelineJobService;
98
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
9+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1010
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
11+
import org.labkey.api.writer.PrintWriters;
1112

1213
import java.io.File;
14+
import java.io.IOException;
15+
import java.io.PrintWriter;
1316
import java.util.ArrayList;
1417
import java.util.List;
1518

@@ -103,4 +106,22 @@ protected List<String> getBaseArgs()
103106

104107
return args;
105108
}
109+
110+
public static List<String> generateIntervalArgsForFullGenome(ReferenceGenome rg, File intervalFile) throws PipelineJobException
111+
{
112+
try (PrintWriter writer = PrintWriters.getPrintWriter(intervalFile))
113+
{
114+
SAMSequenceDictionaryExtractor.extractDictionary(rg.getSequenceDictionary().toPath()).getSequences().forEach(x -> writer.println(x.getSequenceName()));
115+
}
116+
catch (IOException e)
117+
{
118+
throw new PipelineJobException(e);
119+
}
120+
121+
List<String> ret = new ArrayList<>();
122+
ret.add("-L");
123+
ret.add(intervalFile.getPath());
124+
125+
return ret;
126+
}
106127
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,13 @@ private void alignSet(Readset rs, String basename, Map<ReadData, Pair<File, File
708708

709709
if (output.getBAM() != null)
710710
{
711+
//If we have change the BAM, mark the previous for deletion
712+
if (!bam.equals(output.getBAM()))
713+
{
714+
getHelper().getFileManager().addIntermediateFile(bam);
715+
getHelper().getFileManager().addIntermediateFile(new File(bam.getPath() + ".bai"));
716+
}
717+
711718
bam = output.getBAM();
712719

713720
//can take a long time to execute

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 162 additions & 84 deletions
Large diffs are not rendered by default.

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/IndelRealignerStep.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ public Provider()
3838
{{
3939
put("checked", false);
4040
}}, false),
41+
//TODO: consider supporting:
42+
//--maxReadsForRealignment
43+
//--maxReadsForConsensuses
44+
4145
ToolParameterDescriptor.create("minRamPerQueueJob", "Min RAM Per Queue Job", "This only applies if queue is checked. If provided, the scatter count (number of jobs) for queue will be adjusted to ensure at least this amount of RAM, in GB, is available for each job", "ldk-integerfield", null, null)
4246
), null, "http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_IndelRealigner.html");
4347
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/LofreqIndelQualStep.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen
4545

4646
File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".lofreqindel.bam");
4747
output.addIntermediateFile(outputBam);
48+
if (outputBam.exists())
49+
{
50+
outputBam.delete();
51+
}
52+
53+
File bamIdx = new File(outputBam.getPath() + ".bai");
54+
if (bamIdx.exists())
55+
{
56+
bamIdx.delete();
57+
}
58+
4859
output.setBAM(getWrapper().addIndelQuals(inputBam, outputBam, referenceGenome.getWorkingFastaFile()));
4960

5061
SequencePipelineService.get().ensureBamIndex(outputBam, getPipelineCtx().getLogger(), false);

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
371371
{
372372
File workspace = getSourceWorkspace(params, support);
373373
uniqueSamples.addAll(getSamplesForWorkspace(workspace));
374+
job.getLogger().info("Samples in the existing workspace: " + uniqueSamples.size());
374375
}
375376

376377
for (SequenceOutputFile so : inputFiles)
@@ -453,10 +454,14 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
453454

454455
Set<File> toDelete = new HashSet<>();
455456
File doneFile = new File(destinationWorkspaceFolder, "genomicsdb.done");
456-
boolean isResume = doneFile.exists();
457+
File startedFile = new File(destinationWorkspaceFolder, "genomicsdb.started");
458+
boolean genomicsDbCompleted = doneFile.exists();
459+
boolean genomicsDbStarted = startedFile.exists();
460+
ctx.getFileManager().addIntermediateFile(doneFile);
461+
ctx.getFileManager().addIntermediateFile(startedFile);
457462
if (_append)
458463
{
459-
if (isResume)
464+
if (genomicsDbCompleted)
460465
{
461466
ctx.getLogger().debug("GenomicsDB previously completed, resuming");
462467
}
@@ -477,14 +482,16 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
477482
File destContigFolder = new File(destinationWorkspaceFolder, sourceFolder.getName());
478483
File copyDone = new File(destContigFolder.getPath() + ".copy.done");
479484
toDelete.add(copyDone);
480-
if (copyDone.exists())
485+
486+
//NOTE: if GenomicsDB has started, but dies mid-job, the resulting workspace probably cannot be resumed
487+
if (!genomicsDbStarted && copyDone.exists())
481488
{
482489
ctx.getLogger().info("has been copied, skipping");
483490
continue;
484491
}
485492

486493
//Allow the above to complete so we track the .done files
487-
if (isResume)
494+
if (genomicsDbCompleted)
488495
{
489496
continue;
490497
}
@@ -517,7 +524,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
517524
}
518525
else
519526
{
520-
if (isResume)
527+
if (genomicsDbCompleted)
521528
{
522529
ctx.getLogger().debug("GenomicsDB previously completed, resuming");
523530
}
@@ -542,7 +549,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
542549
if (doCopyGVcfLocal)
543550
{
544551
ctx.getLogger().info("making local copies of gVCFs");
545-
vcfsToProcess.addAll(GenotypeGVCFsWrapper.copyVcfsLocally(inputVcfs, toDelete, null, ctx.getLogger(), isResume));
552+
vcfsToProcess.addAll(GenotypeGVCFsWrapper.copyVcfsLocally(inputVcfs, toDelete, null, ctx.getLogger(), genomicsDbCompleted));
546553
}
547554
else
548555
{
@@ -558,13 +565,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
558565
wrapper.addToEnvironment("TILEDB_DISABLE_FILE_LOCKING", "1");
559566
}
560567

561-
if (!isResume)
568+
if (!genomicsDbCompleted)
562569
{
563-
List<Interval> intervals = getIntervals(ctx);
564-
wrapper.execute(genome, vcfsToProcess, destinationWorkspaceFolder, intervals, options, _append);
565-
566570
try
567571
{
572+
FileUtils.touch(startedFile);
573+
574+
List<Interval> intervals = getIntervals(ctx);
575+
wrapper.execute(genome, vcfsToProcess, destinationWorkspaceFolder, intervals, options, _append);
576+
568577
FileUtils.touch(doneFile);
569578
ctx.getLogger().debug("GenomicsDB complete, touching file: " + doneFile.getPath());
570579
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/DepthOfCoverageWrapper.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import org.apache.log4j.Logger;
44
import org.jetbrains.annotations.Nullable;
55
import org.labkey.api.pipeline.PipelineJobException;
6-
import org.labkey.api.sequenceanalysis.run.AbstractGatkWrapper;
6+
import org.labkey.api.sequenceanalysis.run.AbstractGatk4Wrapper;
77

88
import java.io.File;
99
import java.util.ArrayList;
@@ -13,7 +13,7 @@
1313
/**
1414
* Created by bimber on 4/24/2017.
1515
*/
16-
public class DepthOfCoverageWrapper extends AbstractGatkWrapper
16+
public class DepthOfCoverageWrapper extends AbstractGatk4Wrapper
1717
{
1818
public DepthOfCoverageWrapper(Logger log)
1919
{
@@ -28,7 +28,6 @@ public void run(List<File> inputBams, String outputBaseName, File referenceFasta
2828
public void run(List<File> inputBams, String outputBaseName, File referenceFasta, @Nullable List<String> options, boolean deleteExtraFiles) throws PipelineJobException
2929
{
3030
List<String> args = new ArrayList<>(getBaseArgs());
31-
args.add("-T");
3231
args.add("DepthOfCoverage");
3332
args.add("-R");
3433
args.add(referenceFasta.getPath());
@@ -37,7 +36,10 @@ public void run(List<File> inputBams, String outputBaseName, File referenceFasta
3736
args.add("-I");
3837
args.add(f.getPath());
3938
}
40-
args.add("-o");
39+
args.add("--output-format");
40+
args.add("TABLE");
41+
42+
args.add("-O");
4143
args.add(outputBaseName);
4244
if (options != null)
4345
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/DepthOfCoverageHandler.java

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
5-
import htsjdk.samtools.util.Interval;
5+
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
66
import org.apache.commons.lang3.StringUtils;
77
import org.json.JSONObject;
88
import org.labkey.api.module.ModuleLoader;
@@ -15,7 +15,6 @@
1515
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1616
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
1717
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
18-
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1918
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
2019
import org.labkey.api.util.PageFlowUtil;
2120
import org.labkey.api.writer.PrintWriters;
@@ -25,9 +24,9 @@
2524

2625
import java.io.File;
2726
import java.io.IOException;
27+
import java.io.PrintWriter;
2828
import java.util.ArrayList;
2929
import java.util.Arrays;
30-
import java.util.Collections;
3130
import java.util.HashSet;
3231
import java.util.LinkedHashSet;
3332
import java.util.List;
@@ -107,6 +106,20 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
107106
throw new PipelineJobException("No basename was provided");
108107
}
109108

109+
List<File> inputBams = new ArrayList<>();
110+
Set<Integer> libraryIds = new HashSet<>();
111+
for (SequenceOutputFile so : inputFiles)
112+
{
113+
inputBams.add(so.getFile());
114+
libraryIds.add(so.getLibrary_id());
115+
}
116+
117+
if (libraryIds.size() != 1)
118+
{
119+
throw new PipelineJobException("Not all files use the same reference library");
120+
}
121+
ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(libraryIds.iterator().next());
122+
110123
String intervalString = StringUtils.trimToNull(ctx.getParams().optString("intervals"));
111124
if (intervalString != null)
112125
{
@@ -118,46 +131,32 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
118131
extraArgs.add(i);
119132
}
120133
}
134+
else
135+
{
136+
//GATK4 now requires intervals:
137+
File intervalList = new File(ctx.getOutputDir(), "depthOfCoverageIntervals.intervals");
138+
ctx.getFileManager().addIntermediateFile(intervalList);
139+
extraArgs.addAll(DepthOfCoverageWrapper.generateIntervalArgsForFullGenome(rg, intervalList));
140+
}
121141

122142
Integer mmq = ctx.getParams().optInt("mmq");
123143
if (mmq > 0)
124144
{
125-
extraArgs.add("-mmq");
145+
extraArgs.add("--read-filter");
146+
extraArgs.add("MappingQualityReadFilter");
147+
extraArgs.add("--minimum-mapping-quality");
126148
extraArgs.add(mmq.toString());
127149
}
128150

129151
Integer mbq = ctx.getParams().optInt("mbq");
130152
if (mbq > 0)
131153
{
132-
extraArgs.add("-mbq");
154+
extraArgs.add("--min-base-quality");
133155
extraArgs.add(mbq.toString());
134156
}
135157

136-
extraArgs.add("-omitLocusTable");
137-
extraArgs.add("-omitIntervals");
138-
139-
if (SequencePipelineService.get().getMaxThreads(ctx.getLogger()) != null)
140-
{
141-
extraArgs.add("-nt");
142-
extraArgs.add(SequencePipelineService.get().getMaxThreads(ctx.getLogger()).toString());
143-
}
144-
145-
extraArgs.add("-U");
146-
extraArgs.add("ALLOW_N_CIGAR_READS");
147-
148-
List<File> inputBams = new ArrayList<>();
149-
Set<Integer> libraryIds = new HashSet<>();
150-
for (SequenceOutputFile so : inputFiles)
151-
{
152-
inputBams.add(so.getFile());
153-
libraryIds.add(so.getLibrary_id());
154-
}
155-
156-
if (libraryIds.size() != 1)
157-
{
158-
throw new PipelineJobException("Not all files use the same reference library");
159-
}
160-
ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(libraryIds.iterator().next());
158+
extraArgs.add("--omit-locus-table");
159+
extraArgs.add("--omit-interval-statistics");
161160

162161
File outputFile = new File(ctx.getOutputDir(), basename + ".coverage");
163162

jbrowse/resources/views/databaseDetails.html

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
'<br>' +
1616
'<div id="members_'+webpart.wrapperDivId+'"></div>' +
1717
'<br>' +
18+
'<div id="jsonfiles2_'+webpart.wrapperDivId+'"></div>' +
19+
'<br>' +
1820
'<div id="jsonfiles_'+webpart.wrapperDivId+'"></div>'
1921
);
2022

@@ -30,7 +32,7 @@
3032
}
3133
});
3234

33-
LDK.Utils.getReadOnlyQWP({
35+
LDK.Utils.getBasicQWP({
3436
title: 'Resources Displayed In This Session',
3537
schemaName: 'jbrowse',
3638
queryName: 'database_members',
@@ -51,7 +53,7 @@
5153
var genomeId = results.rows[0].libraryId;
5254
var container = results.rows[0].container;
5355

54-
LDK.Utils.getReadOnlyQWP({
56+
LDK.Utils.getBasicQWP({
5557
title: 'Additional Tracks Provided By The Base Genome',
5658
schemaName: 'jbrowse',
5759
queryName: 'jsonfiles',
@@ -61,6 +63,39 @@
6163
}
6264
}
6365
});
66+
67+
LABKEY.Query.selectRows({
68+
containerPath: Laboratory.Utils.getQueryContainerPath(),
69+
schemaName: 'jbrowse',
70+
queryName: 'database_members',
71+
columns: 'jsonfile,container',
72+
filterArray: [LABKEY.Filter.create('database', objectid, LABKEY.Filter.Types.EQUAL)],
73+
scope: this,
74+
error: LDK.Utils.getErrorCallback(),
75+
success: function (results) {
76+
if (results.rows && results.rows.length) {
77+
var jsonFiles = [];
78+
var containers = [];
79+
Ext4.Array.forEach(results.rows, function(r){
80+
jsonFiles.push(r.jsonfile);
81+
containers.push(r.container);
82+
}, this);
83+
84+
jsonFiles = Ext4.unique(jsonFiles);
85+
containers = Ext4.unique(containers);
86+
var container = containers[0];
87+
88+
LDK.Utils.getReadOnlyQWP({
89+
title: 'Tracks Provided By This Session',
90+
schemaName: 'jbrowse',
91+
queryName: 'jsonfiles',
92+
containerPath: container,
93+
filterArray: [LABKEY.Filter.create('objectid', jsonFiles.join(';'), LABKEY.Filter.Types.IN)]
94+
}).render('jsonfiles2_' + webpart.wrapperDivId);
95+
}
96+
}
97+
});
98+
6499
});
65100

66101
</script>

jbrowse/resources/web/jbrowse/window/ModifyJsonConfigWindow.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,13 @@ Ext4.define('JBrowse.window.ModifyJsonConfigWindow', {
6363
text: 'Chunk Size Limit',
6464
scope: this,
6565
handler: function (gridBtn) {
66-
this.addAttribute('chunkSizeLimit ', null, 'INT');
66+
this.addAttribute('chunkSizeLimit', null, 'INT');
67+
}
68+
},{
69+
text: 'Max Track Height',
70+
scope: this,
71+
handler: function (gridBtn) {
72+
this.addAttribute('maxHeight', 1000, 'INT');
6773
}
6874
},{
6975
text: 'XY Plot',

0 commit comments

Comments
 (0)