Skip to content

Commit ba19924

Browse files
committed
Restore original ClinVar annotation code
1 parent 8921ad2 commit ba19924

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
public class AnnotationStep extends AbstractCommandPipelineStep<CassandraRunner> implements VariantProcessingStep
6161
{
6262
public static final String GRCH37 = "genome37";
63+
private static final String CLINVAR_VCF = "clinvar37";
6364
private static final String DBNSFP_FILE = "dbnsfpFile";
6465

6566
public static final String CHAIN_FILE = "CHAIN_FILE";
@@ -74,6 +75,10 @@ public static class Provider extends AbstractVariantProcessingStepProvider<Annot
7475
public Provider()
7576
{
7677
super("AnnotateVariants", "Annotate VCF for mGAP", "VCF Annotation", "This will annotate an input NHP VCF using human annotations including funcotator and SnpSift. This jobs will automatically look for chain files based on the source VCF genome and GRCh37/38 targets and will fail if these are not found.", Arrays.asList(
78+
ToolParameterDescriptor.createExpDataParam(CLINVAR_VCF, "Clinvar 2.0 VCF (GRCh37)", "This is the DataId of the VCF containing human Clinvar variants, which should use the GRCh37 genome. After liftover of the rhesus data, any matching variants are annotated.", "ldk-expdatafield", new JSONObject()
79+
{{
80+
put("allowBlank", false);
81+
}}, null),
7782
ToolParameterDescriptor.createExpDataParam(DBNSFP_FILE, "dbNSFP Database (GRCh37)", "This is the DataId of the dbNSFP database (txt.gz file) using the GRCh37 genome.", "ldk-expdatafield", new JSONObject()
7883
{{
7984
put("allowBlank", false);
@@ -205,6 +210,12 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
205210
{
206211
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
207212

213+
File clinvarVCF = getPipelineCtx().getSequenceSupport().getCachedData(getProvider().getParameterByName(CLINVAR_VCF).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
214+
if (!clinvarVCF.exists())
215+
{
216+
throw new PipelineJobException("Unable to find file: " + clinvarVCF.getPath());
217+
}
218+
208219
ReferenceGenome grch37Genome = getPipelineCtx().getSequenceSupport().getCachedGenome(getProvider().getParameterByName(GRCH37).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class));
209220
Integer chainFileId = getPipelineCtx().getSequenceSupport().getCachedObject(CHAIN_FILE, Integer.class);
210221
File chainFile = getPipelineCtx().getSequenceSupport().getCachedData(chainFileId);
@@ -370,6 +381,38 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
370381
output.addIntermediateFile(liftedToGRCh37);
371382
output.addIntermediateFile(new File(liftedToGRCh37.getPath() + ".tbi"));
372383

384+
//annotate with clinvar
385+
getPipelineCtx().getLogger().info("annotating with ClinVar 2.0");
386+
File clinvarAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cv.vcf.gz");
387+
if (forceRecreate || !indexExists(clinvarAnnotated))
388+
{
389+
ClinvarAnnotatorRunner cvRunner = new ClinvarAnnotatorRunner(getPipelineCtx().getLogger());
390+
cvRunner.execute(liftedToGRCh37, clinvarVCF, clinvarAnnotated);
391+
}
392+
else
393+
{
394+
getPipelineCtx().getLogger().info("resuming with existing file: " + clinvarAnnotated.getPath());
395+
}
396+
output.addOutput(clinvarAnnotated, "VCF Annotated With ClinVar2.0");
397+
output.addIntermediateFile(clinvarAnnotated);
398+
output.addIntermediateFile(new File(clinvarAnnotated.getPath() + ".tbi"));
399+
400+
//backport ClinVar
401+
getPipelineCtx().getLogger().info("backport ClinVar 2.0 to source genome");
402+
File clinvarAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(clinvarAnnotated.getName()) + ".bp.vcf.gz");
403+
if (forceRecreate || !indexExists(clinvarAnnotatedBackport ))
404+
{
405+
BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger());
406+
bpRunner.execute(clinvarAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), clinvarAnnotatedBackport);
407+
}
408+
else
409+
{
410+
getPipelineCtx().getLogger().info("resuming with existing file: " + clinvarAnnotatedBackport.getPath());
411+
}
412+
output.addOutput(clinvarAnnotatedBackport, "VCF Annotated With Clinvar, Backported");
413+
output.addIntermediateFile(clinvarAnnotatedBackport);
414+
output.addIntermediateFile(new File(clinvarAnnotatedBackport.getPath() + ".tbi"));
415+
373416
//annotate with SnpSift
374417
getPipelineCtx().getLogger().info("annotating with SnpSift/dbnsfp");
375418
File snpSiftAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".snpSift.vcf.gz");
@@ -543,7 +586,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
543586
addToolFieldNames("SnpSift", "-ssf", options, multiAnnotated.getParentFile(), output, liftFields, SOURCE_FIELDS);
544587
addToolFieldNames("SnpSift", "-rssf", options, multiAnnotated.getParentFile(), output, liftFields, TARGET_FIELDS);
545588

546-
maRunner.execute(inputVCF, cassandraAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, snpSiftAnnotatedBackport, multiAnnotated, options);
589+
maRunner.execute(inputVCF, cassandraAnnotatedBackport, clinvarAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, snpSiftAnnotatedBackport, multiAnnotated, options);
547590
}
548591
else
549592
{

mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public MultiSourceAnnotatorRunner(Logger log)
1515
super(log);
1616
}
1717

18-
public File execute(File inputVcf, @Nullable File cassandraVcf, File liftoverRejects, @Nullable File funcotator, @Nullable File snpSift, File outputVcf, @Nullable List<String> options) throws PipelineJobException
18+
public File execute(File inputVcf, @Nullable File cassandraVcf, File clinvarAnnotatedBackport, File liftoverRejects, @Nullable File funcotator, @Nullable File snpSift, File outputVcf, @Nullable List<String> options) throws PipelineJobException
1919
{
2020
List<String> args = getBaseArgs("MultiSourceAnnotator");
2121

@@ -31,6 +31,9 @@ public File execute(File inputVcf, @Nullable File cassandraVcf, File liftoverRej
3131
args.add("-lr");
3232
args.add(liftoverRejects.getPath());
3333

34+
args.add("-cv");
35+
args.add(clinvarAnnotatedBackport.getPath());
36+
3437
if (funcotator != null)
3538
{
3639
args.add("-f");

0 commit comments

Comments
 (0)