Skip to content

Commit 85b85de

Browse files
committed
Add step to annotate a VCF using mGAP release
1 parent 88c6857 commit 85b85de

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed

mGAP/src/org/labkey/mgap/mGAPModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.labkey.mgap.pipeline.RenameSamplesForMgapStep;
5454
import org.labkey.mgap.pipeline.SampleSpecificGenotypeFiltrationStep;
5555
import org.labkey.mgap.pipeline.VcfComparisonStep;
56+
import org.labkey.mgap.pipeline.mGapReleaseAlleleFreqStep;
5657
import org.labkey.mgap.pipeline.mGapReleaseAnnotateNovelSitesStep;
5758
import org.labkey.mgap.pipeline.mGapReleaseComparisonStep;
5859
import org.labkey.mgap.pipeline.mGapReleaseGenerator;
@@ -136,6 +137,7 @@ public PipelineStartup()
136137
SequencePipelineService.get().registerPipelineStep(new mGapReleaseAnnotateNovelSitesStep.Provider());
137138
SequencePipelineService.get().registerPipelineStep(new GenerateMgapTracksStep.Provider());
138139
SequencePipelineService.get().registerPipelineStep(new IndexVariantsForMgapStep.Provider());
140+
SequencePipelineService.get().registerPipelineStep(new mGapReleaseAlleleFreqStep.Provider());
139141

140142
_hasRegistered = true;
141143
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package org.labkey.mgap.pipeline;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import org.apache.logging.log4j.Logger;
5+
import org.json.JSONObject;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
11+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
12+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
13+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
14+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
15+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
16+
import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
17+
import org.labkey.api.util.PageFlowUtil;
18+
19+
import javax.annotation.Nullable;
20+
import java.io.File;
21+
import java.util.ArrayList;
22+
import java.util.List;
23+
24+
/**
25+
* User: bimber
26+
* Date: 6/15/2014
27+
* Time: 12:39 PM
28+
*/
29+
public class mGapReleaseAlleleFreqStep extends AbstractCommandPipelineStep<mGapReleaseAlleleFreqStep.VariantAnnotatorWrapper> implements VariantProcessingStep, VariantProcessingStep.SupportsScatterGather
30+
{
31+
public static final String REF_VCF = "refVcf";
32+
33+
public mGapReleaseAlleleFreqStep(PipelineStepProvider<?> provider, PipelineContext ctx)
34+
{
35+
super(provider, ctx, new VariantAnnotatorWrapper(ctx.getLogger()));
36+
}
37+
38+
public static class Provider extends AbstractVariantProcessingStepProvider<mGapReleaseAlleleFreqStep> implements SupportsScatterGather
39+
{
40+
public Provider()
41+
{
42+
super("mGapReleaseAlleleFreq", "Compare VCF to mGap Release", "DiscvrVariantAnnotator", "Annotate a VCF using the AF field from an mGAP release.", List.of(
43+
ToolParameterDescriptor.createExpDataParam(REF_VCF, "mGAP Release", "The mGAP release VCF to use for annotation", "sequenceanalysis-sequenceoutputfileselectorfield", new JSONObject()
44+
{{
45+
put("allowBlank", false);
46+
put("category", "mGAP Release");
47+
put("performGenomeFilter", false);
48+
put("doNotIncludeInTemplates", true);
49+
}}, null)
50+
), PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js"), null);
51+
}
52+
53+
@Override
54+
public mGapReleaseAlleleFreqStep create(PipelineContext ctx)
55+
{
56+
return new mGapReleaseAlleleFreqStep(this, ctx);
57+
}
58+
}
59+
60+
@Override
61+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
62+
{
63+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
64+
getPipelineCtx().getLogger().info("Annotating VCF using mGAP Release");
65+
66+
Integer refFileId = getProvider().getParameterByName(REF_VCF).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
67+
File refVcf = getPipelineCtx().getSequenceSupport().getCachedData(refFileId);
68+
if (refVcf == null || !refVcf.exists())
69+
{
70+
throw new PipelineJobException("Unable to find file: " + refFileId + ", path: " + refVcf);
71+
}
72+
73+
List<String> extraArgs = new ArrayList<>();
74+
if (intervals != null)
75+
{
76+
intervals.forEach(interval -> {
77+
extraArgs.add("-L");
78+
extraArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
79+
});
80+
}
81+
82+
extraArgs.add("-A");
83+
extraArgs.add("RefAlleleFrequency");
84+
85+
extraArgs.add("--target-info-field-key");
86+
extraArgs.add("mGAP.AF");
87+
88+
extraArgs.add("--af-source-vcf");
89+
extraArgs.add(refVcf.getPath());
90+
91+
File outputVcf = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".af.vcf.gz");
92+
93+
getWrapper().execute(genome.getWorkingFastaFile(), inputVCF, outputVcf, extraArgs);
94+
if (!outputVcf.exists())
95+
{
96+
throw new PipelineJobException("Unable to find output: " + outputVcf.getPath());
97+
}
98+
99+
output.addInput(inputVCF, "Input VCF");
100+
output.addInput(genome.getWorkingFastaFile(), "Reference Genome");
101+
102+
output.addOutput(outputVcf, "Annotated VCF");
103+
output.setVcf(outputVcf);
104+
105+
return output;
106+
}
107+
108+
public static class VariantAnnotatorWrapper extends AbstractDiscvrSeqWrapper
109+
{
110+
public VariantAnnotatorWrapper(Logger log)
111+
{
112+
super(log);
113+
}
114+
115+
public void execute(File referenceFasta, File inputVcf, File outputVcf, List<String> options) throws PipelineJobException
116+
{
117+
getLogger().info("Running DiscvrVariantAnnotator");
118+
119+
ensureDictionary(referenceFasta);
120+
121+
List<String> args = new ArrayList<>(getBaseArgs());
122+
args.add("DiscvrVariantAnnotator");
123+
args.add("-R");
124+
args.add(referenceFasta.getPath());
125+
126+
args.add("-V");
127+
args.add(inputVcf.getPath());
128+
129+
args.add("-O");
130+
args.add(outputVcf.getPath());
131+
132+
if (options != null)
133+
{
134+
args.addAll(options);
135+
}
136+
137+
execute(args);
138+
if (!outputVcf.exists())
139+
{
140+
throw new PipelineJobException("Expected output not found: " + outputVcf.getPath());
141+
}
142+
}
143+
}
144+
}

0 commit comments

Comments
 (0)