Skip to content

Commit 217b078

Browse files
committed
Add sorting to PBSV merge, since translocations can produce an out-of-order VCF
1 parent 111bcc3 commit 217b078

File tree

6 files changed

+34
-10
lines changed

6 files changed

+34
-10
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/SequenceAnalysisService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ static public void setInstance(SequenceAnalysisService instance)
9494

9595
abstract public File combineVcfs(List<File> files, File outputGz, ReferenceGenome genome, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel) throws PipelineJobException;
9696

97+
abstract public File combineVcfs(List<File> files, File outputGz, ReferenceGenome genome, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel, boolean sortAfterMerge) throws PipelineJobException;
98+
9799
abstract public String getScriptPath(String moduleName, String path) throws PipelineJobException;
98100

99101
abstract public void sortGxf(Logger log, File input, @Nullable File output) throws PipelineJobException;

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, P
6767
{
6868

6969
}
70+
71+
default boolean doSortAfterMerge()
72+
{
73+
return false;
74+
}
7075
}
7176

7277
public static interface MayRequirePrepareTask

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
package org.labkey.sequenceanalysis;
22

33
import htsjdk.samtools.util.FileExtensions;
4-
import htsjdk.tribble.Tribble;
54
import htsjdk.tribble.index.Index;
65
import htsjdk.tribble.index.IndexFactory;
76
import htsjdk.variant.vcf.VCFCodec;
87
import org.apache.commons.lang3.StringUtils;
98
import org.apache.commons.lang3.SystemUtils;
109
import org.apache.logging.log4j.LogManager;
1110
import org.apache.logging.log4j.Logger;
12-
import org.jetbrains.annotations.NotNull;
1311
import org.jetbrains.annotations.Nullable;
1412
import org.labkey.api.data.CompareType;
1513
import org.labkey.api.data.Container;
@@ -46,7 +44,6 @@
4644
import org.labkey.sequenceanalysis.pipeline.ReferenceGenomeImpl;
4745
import org.labkey.sequenceanalysis.pipeline.ReferenceLibraryPipelineJob;
4846
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;
49-
import org.labkey.sequenceanalysis.run.util.BgzipRunner;
5047
import org.labkey.sequenceanalysis.run.util.FastaIndexer;
5148
import org.labkey.sequenceanalysis.run.util.GxfSorter;
5249
import org.labkey.sequenceanalysis.run.util.IndexFeatureFileWrapper;
@@ -60,7 +57,6 @@
6057
import java.sql.ResultSet;
6158
import java.sql.SQLException;
6259
import java.util.ArrayList;
63-
import java.util.Arrays;
6460
import java.util.Collection;
6561
import java.util.Collections;
6662
import java.util.HashMap;
@@ -69,7 +65,6 @@
6965
import java.util.Map;
7066
import java.util.Set;
7167
import java.util.function.Function;
72-
import java.util.function.Predicate;
7368

7469
/**
7570
* User: bimber
@@ -476,7 +471,13 @@ public String createReferenceLibrary(List<Integer> sequenceIds, Container c, Use
476471
@Override
477472
public File combineVcfs(List<File> files, File outputGz, ReferenceGenome genome, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel) throws PipelineJobException
478473
{
479-
return SequenceUtil.combineVcfs(files, genome, outputGz, log, multiThreaded, compressionLevel);
474+
return combineVcfs(files, outputGz, genome, log, multiThreaded, compressionLevel, false);
475+
}
476+
477+
@Override
478+
public File combineVcfs(List<File> files, File outputGz, ReferenceGenome genome, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel, boolean sortAfterMerge) throws PipelineJobException
479+
{
480+
return SequenceUtil.combineVcfs(files, genome, outputGz, log, multiThreaded, compressionLevel, sortAfterMerge);
480481
}
481482

482483
@Override

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ private File runDefaultVariantMerge(JobContextImpl ctx, TaskFileManagerImpl mana
230230
throw new PipelineJobException("Missing one of more VCFs: " + missing.stream().map(File::getPath).collect(Collectors.joining(",")));
231231
}
232232

233-
combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, getJob().getLogger(), true, null);
233+
boolean sortAfterMerge = handler instanceof VariantProcessingStep.SupportsScatterGather && ((VariantProcessingStep.SupportsScatterGather)handler).doSortAfterMerge();
234+
combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, getJob().getLogger(), true, null, sortAfterMerge);
234235
}
235236
manager.addOutput(action, "Merged VCF", combined);
236237

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
179179
ctx.getFileManager().addIntermediateFile(SequenceAnalysisService.get().ensureVcfIndex(f, ctx.getLogger(), false));
180180
}
181181
vcfOutGz = SequenceUtil.combineVcfs(outputs, genome, new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz"), ctx.getLogger(), true, null, false);
182+
183+
// NOTE: the resulting file can be out of order due to translocations
184+
SequenceUtil.sortROD(vcfOutGz, ctx.getLogger(), 2);
182185
}
183186

184187
SequenceAnalysisService.get().ensureVcfIndex(vcfOutGz, ctx.getLogger(), true);
@@ -437,4 +440,10 @@ public void verifyAndAddMissingSamples(JobContext ctx, File input, List<File> in
437440
throw new PipelineJobException(e);
438441
}
439442
}
443+
444+
@Override
445+
public boolean doSortAfterMerge()
446+
{
447+
return true;
448+
}
440449
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -436,12 +436,12 @@ public static void sortROD(File input, Logger log, Integer startColumnIdx) throw
436436
sorted.delete();
437437
}
438438

439-
public static File combineVcfs(List<File> files, ReferenceGenome genome, File outputGzip, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel) throws PipelineJobException
439+
public static File combineVcfs(List<File> files, ReferenceGenome genome, File outputGzip, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel, boolean sortAfterMerge) throws PipelineJobException
440440
{
441-
return combineVcfs(files, genome, outputGzip, log, multiThreaded, compressionLevel, true);
441+
return combineVcfs(files, genome, outputGzip, log, multiThreaded, compressionLevel, true, sortAfterMerge);
442442
}
443443

444-
public static File combineVcfs(List<File> files, ReferenceGenome genome, File outputGzip, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel, boolean showTotals) throws PipelineJobException
444+
public static File combineVcfs(List<File> files, ReferenceGenome genome, File outputGzip, Logger log, boolean multiThreaded, @Nullable Integer compressionLevel, boolean showTotals, boolean sortAfterMerge) throws PipelineJobException
445445
{
446446
log.info("combining VCFs: ");
447447

@@ -506,6 +506,12 @@ else if (!samples.equals(header.getGenotypeSamples()))
506506
SimpleScriptWrapper wrapper = new SimpleScriptWrapper(log);
507507
wrapper.execute(Arrays.asList("/bin/bash", bashTmp.getPath()));
508508

509+
if (sortAfterMerge)
510+
{
511+
log.debug("sorting VCF");
512+
sortROD(outputGzip, log, 2);
513+
}
514+
509515
SequenceAnalysisService.get().ensureVcfIndex(outputGzip, log);
510516

511517
bashTmp.delete();

0 commit comments

Comments
 (0)