Skip to content

Commit 0a65541

Browse files
committed
Update Scatter/Gather logic to sort on contig size
1 parent 0c73722 commit 0a65541

File tree

2 files changed

+22
-13
lines changed

2 files changed

+22
-13
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,17 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
151151
List<File> outputs = new ArrayList<>();
152152
if (getVariantPipelineJob(ctx.getJob()) != null && getVariantPipelineJob(ctx.getJob()).isScatterJob())
153153
{
154-
for (Interval i : getVariantPipelineJob(ctx.getJob()).getIntervalsForTask())
154+
int idx = 0;
155+
List<Interval> intervals = getVariantPipelineJob(ctx.getJob()).getIntervalsForTask();
156+
for (Interval i : intervals)
155157
{
158+
idx++;
156159
if (i.getStart() != 1)
157160
{
158161
throw new PipelineJobException("Expected all intervals to start on the first base: " + i);
159162
}
160163

161-
File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig(), jobCompleted);
164+
File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig(), (" (" + idx + " of " + intervals.size() + ")"), jobCompleted);
162165
if (o != null)
163166
{
164167
outputs.add(o);
@@ -167,7 +170,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
167170
}
168171
else
169172
{
170-
outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName, null, jobCompleted));
173+
outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName, null, null, jobCompleted));
171174
}
172175

173176
try
@@ -228,11 +231,11 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
228231
}
229232
}
230233

231-
private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName, @Nullable String contig, boolean jobCompleted) throws PipelineJobException
234+
private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName, @Nullable String contig, @Nullable String statusSuffix, boolean jobCompleted) throws PipelineJobException
232235
{
233236
if (contig != null)
234237
{
235-
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + contig);
238+
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + contig + (statusSuffix == null ? "" : statusSuffix));
236239
}
237240

238241
if (inputs.isEmpty())

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/ScatterGatherUtils.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import org.junit.Test;
88

99
import java.util.ArrayList;
10+
import java.util.Collections;
11+
import java.util.Comparator;
1012
import java.util.HashSet;
1113
import java.util.LinkedHashMap;
1214
import java.util.List;
@@ -114,7 +116,11 @@ private void addInterval(String refName, int start, int end)
114116
public static LinkedHashMap<String, List<Interval>> divideGenome(SAMSequenceDictionary dict, int optimalBasesPerJob, boolean allowSplitChromosomes, int maxContigsPerJob)
115117
{
116118
ActiveIntervalSet ais = new ActiveIntervalSet(optimalBasesPerJob, allowSplitChromosomes, maxContigsPerJob);
117-
for (SAMSequenceRecord rec : dict.getSequences())
119+
120+
// Sort the sequences in descending length, rather than alphabetic on name:
121+
List<SAMSequenceRecord> sortedSeqs = new ArrayList<>(dict.getSequences());
122+
sortedSeqs.sort(Comparator.comparingInt(SAMSequenceRecord::getSequenceLength).reversed());
123+
for (SAMSequenceRecord rec : sortedSeqs)
118124
{
119125
ais.add(rec);
120126
}
@@ -148,8 +154,8 @@ public void testScatter()
148154
SAMSequenceDictionary dict = getDict();
149155
Map<String, List<Interval>> ret = divideGenome(dict, 1000, true, -1);
150156
assertEquals("Incorrect number of jobs", 8, ret.size());
151-
assertEquals("Incorrect interval end", 2000, ret.get("Job3").get(0).getEnd());
152-
assertEquals("Incorrect start", 1001, ret.get("Job3").get(0).getStart());
157+
assertEquals("Incorrect interval end", 1000, ret.get("Job3").get(0).getEnd());
158+
assertEquals("Incorrect start", 1, ret.get("Job3").get(0).getStart());
153159
assertEquals("Incorrect interval end", 4, ret.get("Job8").size());
154160

155161
Map<String, List<Interval>> ret2 = divideGenome(dict, 3000, false, -1);
@@ -183,12 +189,12 @@ public void testScatter()
183189
}
184190

185191
Map<String, List<Interval>> ret5 = divideGenome(dict, 750, true, -1);
186-
assertEquals("Incorrect number of jobs", 10, ret5.size());
187-
assertEquals("Incorrect interval end", 1000, ret5.get("Job1").get(0).getEnd());
188-
assertEquals("Incorrect interval end", 4, ret5.get("Job10").size());
192+
assertEquals("Incorrect number of jobs", 9, ret5.size());
193+
assertEquals("Incorrect interval end", 750, ret5.get("Job1").get(0).getEnd());
194+
assertEquals("Incorrect interval end", 4, ret5.get("Job9").size());
189195

190-
assertEquals("Incorrect interval start", 751, ret5.get("Job3").get(0).getStart());
191-
assertEquals("Incorrect interval start", 1501, ret5.get("Job8").get(0).getStart());
196+
assertEquals("Incorrect interval start", 1501, ret5.get("Job3").get(0).getStart());
197+
assertEquals("Incorrect interval start", 1, ret5.get("Job8").get(0).getStart());
192198

193199
Map<String, List<Interval>> ret6 = divideGenome(dict, 5000, false, 2);
194200
assertEquals("Incorrect number of jobs", 5, ret6.size());

0 commit comments

Comments
 (0)