Skip to content

Commit 71f223e

Browse files
committed
Allow variant jobs to run on specific interval list
1 parent 9d9fe88 commit 71f223e

File tree

8 files changed

+86
-25
lines changed

8 files changed

+86
-25
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package org.labkey.api.sequenceanalysis.pipeline;
1717

18+
import htsjdk.samtools.SAMSequenceDictionary;
1819
import org.jetbrains.annotations.NotNull;
1920

2021
import java.io.File;
@@ -87,6 +88,8 @@ public interface ReferenceGenome extends Serializable
8788
*/
8889
File getSequenceDictionary();
8990

91+
SAMSequenceDictionary extractDictionary();
92+
9093
/**
9194
* @return True if this is a genome not defined in the main database, such as a job using an ad hoc FASTA file or genome based on querying the NT records
9295
*/

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ enum ScatterGatherMethod
5151
none(false),
5252
contig(false),
5353
chunked(true),
54-
fixedJobs(false);
54+
fixedJobs(false),
55+
specificInternals(false);
5556

5657
private final boolean _mayRequireSort;
5758

SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantScatterGatherPanel.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@ Ext4.define('SequenceAnalysis.panel.VariantScatterGatherPanel', {
8787
value: 10
8888
});
8989
}
90+
else if (val === 'specificIntervals') {
91+
toAdd.push({
92+
xtype: 'sequenceanalysis-intervalfield',
93+
labelWidth: this.labelWidth,
94+
name: 'scatterGather.specificIntervals',
95+
label: 'Intervals to Process',
96+
helpPopup: 'The intervals to process. They should be in the form: chr01:102-20394',
97+
allowBlank: false,
98+
defaultValue: null
99+
});
100+
}
90101

91102
if (toAdd.length) {
92103
panel.add(toAdd);

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GenotypeGVCFHandler.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.labkey.api.writer.PrintWriters;
3737
import org.labkey.sequenceanalysis.ScatterGatherUtils;
3838
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
39-
import org.labkey.sequenceanalysis.pipeline.JobContextImpl;
4039
import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
4140
import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob;
4241
import org.labkey.sequenceanalysis.run.util.AbstractGenomicsDBImportHandler;
@@ -50,7 +49,6 @@
5049
import java.io.IOException;
5150
import java.io.PrintWriter;
5251
import java.util.ArrayList;
53-
import java.util.Arrays;
5452
import java.util.Collection;
5553
import java.util.Date;
5654
import java.util.HashSet;

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReferenceGenomeImpl.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package org.labkey.sequenceanalysis.pipeline;
22

33
import com.fasterxml.jackson.annotation.JsonIgnore;
4+
import htsjdk.samtools.SAMSequenceDictionary;
5+
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
46
import org.apache.logging.log4j.Logger;
57
import org.jetbrains.annotations.NotNull;
68
import org.jetbrains.annotations.Nullable;
@@ -110,6 +112,13 @@ public File getSequenceDictionary()
110112
return getWorkingFastaFile() == null ? null : new File(FileUtil.getBaseName(getWorkingFastaFile().getPath()) + ".dict");
111113
}
112114

115+
@JsonIgnore
116+
@Override
117+
public SAMSequenceDictionary extractDictionary()
118+
{
119+
return getSequenceDictionary() == null ? null : SAMSequenceDictionaryExtractor.extractDictionary(getSequenceDictionary().toPath());
120+
}
121+
113122
@Override
114123
public String getName()
115124
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingJob.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
3838
import org.labkey.api.writer.PrintWriters;
3939
import org.labkey.sequenceanalysis.util.ScatterGatherUtils;
40+
import org.labkey.sequenceanalysis.util.SequenceUtil;
4041

4142
import java.io.File;
4243
import java.io.IOException;
@@ -164,6 +165,22 @@ else if (_scatterGatherMethod == VariantProcessingStep.ScatterGatherMethod.fixed
164165
getLogger().info("Creating " + numJobs + " jobs with approximate size: " + jobSize + " bp.");
165166
ret = ScatterGatherUtils.divideGenome(dict, jobSize, true, -1, false);
166167
}
168+
else if (_scatterGatherMethod == VariantProcessingStep.ScatterGatherMethod.specificInternals)
169+
{
170+
try
171+
{
172+
String intervalsRaw = StringUtils.trimToNull(getParameterJson().getString("scatterGather.specificIntervals"));
173+
String[] intervals = intervalsRaw.split(";");
174+
List<Interval> values = SequenceUtil.validateAndParseIntervals(intervals, dict);
175+
176+
ret = new LinkedHashMap<>();
177+
ret.put("Job1", values);
178+
}
179+
catch (PipelineJobException e)
180+
{
181+
throw new IllegalArgumentException(e);
182+
}
183+
}
167184
else
168185
{
169186
throw new IllegalArgumentException("Unknown scatter type: " + _scatterGatherMethod.name());

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/DepthOfCoverageHandler.java

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
116116
if (intervalString != null)
117117
{
118118
String[] intervals = intervalString.split(";");
119-
validateIntervals(intervals);
119+
SequenceUtil.validateAndParseIntervals(intervals, rg.extractDictionary());
120120
for (String i : intervals)
121121
{
122122
extraArgs.add("-L");
@@ -225,25 +225,4 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
225225
ctx.getFileManager().addSequenceOutput(so);
226226
}
227227
}
228-
229-
public static void validateIntervals(String[] intervals) throws PipelineJobException
230-
{
231-
for (String i : intervals)
232-
{
233-
//NOTE: the contig name can contain hyphen..
234-
String[] tokens = i.split(":");
235-
if (tokens.length > 2)
236-
{
237-
throw new PipelineJobException("Invalid interval: " + i);
238-
}
239-
else if (tokens.length == 2)
240-
{
241-
String[] coords = tokens[1].split("-");
242-
if (coords.length != 2)
243-
{
244-
throw new PipelineJobException("Invalid interval: " + i);
245-
}
246-
}
247-
}
248-
}
249228
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import htsjdk.samtools.SAMReadGroupRecord;
66
import htsjdk.samtools.SAMRecord;
77
import htsjdk.samtools.SAMRecordIterator;
8+
import htsjdk.samtools.SAMSequenceDictionary;
9+
import htsjdk.samtools.SAMSequenceRecord;
810
import htsjdk.samtools.SamReader;
911
import htsjdk.samtools.SamReaderFactory;
1012
import htsjdk.samtools.ValidationStringency;
@@ -661,4 +663,45 @@ public static String getLegalReadGroupName(String rsName)
661663
{
662664
return rsName.replaceAll(" ", "_");
663665
}
666+
667+
public static List<Interval> validateAndParseIntervals(String[] intervals, SAMSequenceDictionary dict) throws PipelineJobException
668+
{
669+
List<Interval> ret = new ArrayList<>();
670+
for (String i : intervals)
671+
{
672+
String contig;
673+
//NOTE: the contig name can contain hyphen..
674+
String[] tokens = i.split(":");
675+
if (tokens.length > 2)
676+
{
677+
throw new PipelineJobException("Invalid interval: " + i);
678+
}
679+
680+
if (tokens.length == 2)
681+
{
682+
String[] coords = tokens[1].split("-");
683+
if (coords.length != 2)
684+
{
685+
throw new PipelineJobException("Invalid interval: " + i);
686+
}
687+
688+
int start = Integer.parseInt(coords[0]);
689+
int end = Integer.parseInt(coords[1]);
690+
691+
ret.add(new Interval(tokens[0], start, end));
692+
}
693+
else
694+
{
695+
SAMSequenceRecord rec = dict.getSequence(tokens[0]);
696+
if (rec == null)
697+
{
698+
throw new PipelineJobException("Unable to find sequence: " + tokens[0]);
699+
}
700+
701+
ret.add(new Interval(tokens[0], 1, rec.getSequenceLength()));
702+
}
703+
}
704+
705+
return ret;
706+
}
664707
}

0 commit comments

Comments
 (0)