Skip to content

Commit 3db76df

Browse files
committed
Merge discvr-23.11 to develop
2 parents da147a9 + 6382513 commit 3db76df

File tree

15 files changed

+466
-220
lines changed

15 files changed

+466
-220
lines changed

QueryExtensions/module.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ModuleClass: org.labkey.queryextensions.QueryExtensionsModule
2+
ManageVersion: false
23
Label: Query Extensions
34
Description: This module contains low-level extensions to the LabKey Query layer
45
License: Apache 2.0

SequenceAnalysis/resources/web/SequenceAnalysis/panel/VariantProcessingPanel.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ Ext4.define('SequenceAnalysis.panel.VariantProcessingPanel', {
269269
description: 'A BED or similar file with intervals to skip',
270270
defaultValue: null
271271
},{
272-
fieldXtype: 'ldk-expdatafield',
272+
fieldXtype: 'checkbox',
273273
name: 'skipExcessHetAndInbreeding',
274274
label: 'Skip Excess Het And Inbreeding',
275275
description: 'If checked, the ExcessHet and InbreedingCoeff annotations will be skipped, which can be important when using force-output-intervals',

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,7 @@ else if (!d.getFile().exists())
172172
{
173173
if (d != null && d.getFile() != null && d.getFile().exists())
174174
{
175-
// NOTE: ultimately remove this:
176-
log.info("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
175+
log.error("ReadData marked as archived, but file exists: " + rd.getRowid() + ", " + rd.getFileId1() + ", " + d.getFile().getPath() + " for container: " + (c == null ? rd.getContainer() : c.getPath()));
177176
}
178177
}
179178
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
7979
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
8080
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
81+
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
8182
import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis;
8283
import org.labkey.sequenceanalysis.run.analysis.GenrichStep;
8384
import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis;
@@ -312,6 +313,7 @@ public static void registerPipelineSteps()
312313
SequencePipelineService.get().registerPipelineStep(new KingInferenceStep.Provider());
313314
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
314315
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
316+
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
315317

316318
//handlers
317319
SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler());
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package org.labkey.sequenceanalysis.run.analysis;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.json.JSONObject;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
12+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
14+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
15+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
16+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
17+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
18+
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;
19+
20+
import javax.annotation.Nullable;
21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.List;
26+
import java.util.stream.Collectors;
27+
28+
public class BcftoolsFillTagsStep extends AbstractCommandPipelineStep<BcftoolsRunner> implements VariantProcessingStep
29+
{
30+
public BcftoolsFillTagsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
31+
{
32+
super(provider, ctx, new BcftoolsRunner(ctx.getLogger()));
33+
}
34+
35+
public static class Provider extends AbstractVariantProcessingStepProvider<BcftoolsFillTagsStep> implements VariantProcessingStep.RequiresPedigree, VariantProcessingStep.SupportsScatterGather
36+
{
37+
public Provider()
38+
{
39+
super("BcftoolsFillTagsStep", "Bcftools Fill-tags", "bcftools", "Annotate variants using bcftools fill-tags", Arrays.asList(
40+
ToolParameterDescriptor.create("hwe", "HWE", "If selected, HWE will be annotated", "checkbox", new JSONObject(){{
41+
put("checked", true);
42+
}}, true),
43+
ToolParameterDescriptor.create("exchet", "Excess Het", "If selected, ExcHet will be annotated.", "checkbox", new JSONObject(){{
44+
put("checked", true);
45+
}}, true)
46+
), null, "");
47+
}
48+
49+
@Override
50+
public BcftoolsFillTagsStep create(PipelineContext ctx)
51+
{
52+
return new BcftoolsFillTagsStep(this, ctx);
53+
}
54+
}
55+
56+
@Override
57+
public VariantProcessingStep.Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
58+
{
59+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
60+
61+
List<String> options = new ArrayList<>();
62+
options.add(getWrapper().getBcfToolsPath().getPath());
63+
options.add("+fill-tags");
64+
65+
options.add(inputVCF.getPath());
66+
67+
if (intervals != null)
68+
{
69+
options.add("--regions");
70+
options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(",")));
71+
}
72+
73+
options.add("-O");
74+
options.add("z9");
75+
76+
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
77+
if (threads != null)
78+
{
79+
options.add("--threads");
80+
options.add(threads.toString());
81+
}
82+
83+
File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".ft.vcf.gz");
84+
options.add("-o");
85+
options.add(outputVcf.getPath());
86+
87+
List<String> annotations = new ArrayList<>();
88+
if (getProvider().getParameterByName("hwe").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false))
89+
{
90+
annotations.add("hwe");
91+
}
92+
93+
if (getProvider().getParameterByName("exchet").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false))
94+
{
95+
annotations.add("exchet");
96+
}
97+
98+
if (annotations.isEmpty())
99+
{
100+
throw new PipelineJobException("No annotations were selected");
101+
}
102+
103+
options.add("-t");
104+
options.add(StringUtils.join(annotations, ","));
105+
106+
BcftoolsRunner wrapper = getWrapper();
107+
108+
String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS"));
109+
if (bcfPluginDir != null)
110+
{
111+
getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir);
112+
wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir);
113+
}
114+
115+
wrapper.execute(options);
116+
if (!outputVcf.exists())
117+
{
118+
throw new PipelineJobException("output not found: " + outputVcf);
119+
}
120+
121+
try
122+
{
123+
SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger());
124+
}
125+
catch (IOException e)
126+
{
127+
throw new PipelineJobException(e);
128+
}
129+
130+
output.setVcf(outputVcf);
131+
132+
return output;
133+
}
134+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,22 @@ public Provider()
110110
put("width", 400);
111111
put("allowBlank", true);
112112
}}, null),
113+
ToolParameterDescriptor.create("generateConsensus", "Generate Consensus", "If selected, a FASTA with the simple majority consensus will be generated.", "checkbox", new JSONObject()
114+
{{
115+
116+
}}, false),
113117
ToolParameterDescriptor.create("minCoverage", "Min Coverage For Consensus", "If provided, a consensus will only be called over regions with at least this depth", "ldk-integerfield", new JSONObject(){{
114118
put("minValue", 0);
115119
}}, 25),
120+
ToolParameterDescriptor.create("generateTable", "Generate Variant Table", "If selected, a TSV listing variants above the given threshold will be generated.", "checkbox", new JSONObject()
121+
{{
122+
123+
}}, false),
124+
ToolParameterDescriptor.create("minFractionForTable", "Min Fraction for Table", "If the option to generate a table output is used, only variants with frequency of this threshold will be included", "ldk-numberfield", new JSONObject(){{
125+
put("minValue", 0);
126+
put("maxValue", 1);
127+
put("decimalPrecision", 3);
128+
}}, 0.01),
116129
ToolParameterDescriptor.createExpDataParam("primerBedFile", "Primer Sites (BED File)", "This is a BED file specifying the primer binding sites, which will be used to flag variants. Strandedness is ignored.", "ldk-expdatafield", new JSONObject()
117130
{{
118131
put("allowBlank", true);
@@ -470,6 +483,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
470483
}
471484

472485
double minFractionForConsensus = getProvider().getParameterByName("minFractionForConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
486+
boolean generateConsensus = getProvider().getParameterByName("generateConsensus").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
487+
488+
boolean generateTable = getProvider().getParameterByName("generateTable").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
489+
double minFractionForTable = getProvider().getParameterByName("minFractionForTable").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
473490

474491
Integer primerDataId = getProvider().getParameterByName("primerBedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
475492
List<Interval> primerIntervals = new ArrayList<>();
@@ -517,11 +534,12 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
517534

518535
File loFreqConsensusVcf = getConsensusVcf(outputDir, inputBam);
519536
File loFreqAllVcf = getAllVcf(outputDir, inputBam);
537+
File tableFile = getTableFile(outputDir, inputBam);
520538
Double strandBiasRecoveryAF = getProvider().getParameterByName("strandBiasRecoveryAF").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 1.0);
521539
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath());
522540
VariantContextWriterBuilder writerBuilderConsensus = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict);
523541
VariantContextWriterBuilder writerBuilderAll = new VariantContextWriterBuilder().setOutputFile(loFreqAllVcf).setReferenceDictionary(dict);
524-
try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build())
542+
try (VCFFileReader reader = new VCFFileReader(activeVCF);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writerConsensus = writerBuilderConsensus.build();VariantContextWriter writerAll = writerBuilderAll.build();CSVWriter variantTableWriter = generateTable ? new CSVWriter(PrintWriters.getPrintWriter(tableFile), '\t', CSVWriter.NO_QUOTE_CHARACTER) : null)
525543
{
526544
VCFHeader header = reader.getFileHeader();
527545

@@ -532,6 +550,11 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
532550
writerConsensus.writeHeader(header);
533551
writerAll.writeHeader(header);
534552

553+
if (generateTable)
554+
{
555+
variantTableWriter.writeNext(new String[]{"Contig", "Start", "End", "Reference", "Alt", "Frequency", "AlleleDepth", "TotalDepth"});
556+
}
557+
535558
SortingCollection<VariantContext> allVariants = getVariantSorter(header);
536559
SortingCollection<VariantContext> consensusVariants = getVariantSorter(header);
537560
while (it.hasNext())
@@ -571,6 +594,16 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
571594
}
572595
}
573596

597+
if (generateTable)
598+
{
599+
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > minFractionForTable)
600+
{
601+
List<Integer> depths = vc.getAttributeAsIntList("DP4", 0);
602+
int alleleDepth = depths.get(2) + depths.get(3);
603+
variantTableWriter.writeNext(new String[]{vc.getContig(), String.valueOf(vc.getStart()), String.valueOf(vc.getEnd()), vc.getReference().getBaseString(), vc.getAlternateAllele(0).getBaseString(), String.valueOf(vc.getAttributeAsDouble("AF", 0.0)), String.valueOf(alleleDepth), String.valueOf(vc.getAttributeAsInt("DP", 0))});
604+
}
605+
}
606+
574607
totalVariants++;
575608
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) > 0.01)
576609
{
@@ -672,6 +705,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
672705
}
673706
consensusVariants.cleanup();
674707
}
708+
catch (IOException e)
709+
{
710+
throw new PipelineJobException(e);
711+
}
675712

676713
NumberFormat fmt = NumberFormat.getPercentInstance();
677714
fmt.setMaximumFractionDigits(2);
@@ -720,12 +757,24 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
720757
}
721758

722759
output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
723-
output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
760+
if (generateConsensus)
761+
{
762+
output.addSequenceOutput(consensusFastaLoFreq, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
763+
}
724764

725765
boolean runPangolinAndNextClade = getProvider().getParameterByName("runPangolin").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
726766

727767
output.addSequenceOutput(loFreqAllVcf, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
728768

769+
if (generateTable)
770+
{
771+
if (!tableFile.exists())
772+
{
773+
774+
}
775+
output.addSequenceOutput(tableFile, "LoFreq: " + rs.getName(), "LoFreq Variant Table", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
776+
}
777+
729778
Map<String, String> pangolinData = null;
730779
if (runPangolinAndNextClade)
731780
{
@@ -781,6 +830,11 @@ private File getAllVcf(File outputDir, File inputBam)
781830
return new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.all.vcf.gz");
782831
}
783832

833+
private File getTableFile(File outputDir, File inputBam)
834+
{
835+
return new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.txt");
836+
}
837+
784838
private Set<String> runBcftools(File inputBam, ReferenceGenome referenceGenome, File mask, int minCoverage) throws PipelineJobException
785839
{
786840
Set<String> variantsBcftools = new HashSet<>();

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/VariantAnnotatorStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
*/
2929
public class VariantAnnotatorStep extends AbstractCommandPipelineStep<VariantAnnotatorWrapper> implements VariantProcessingStep
3030
{
31-
public VariantAnnotatorStep(PipelineStepProvider provider, PipelineContext ctx)
31+
public VariantAnnotatorStep(PipelineStepProvider<?> provider, PipelineContext ctx)
3232
{
3333
super(provider, ctx, new VariantAnnotatorWrapper(ctx.getLogger()));
3434
}

Studies/module.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ Label: Studies
33
Description: Extensions to the study framework, designed to more flexibly manage multiple study designs from one source of data
44
License: Apache 2.0
55
LicenseURL: http://www.apache.org/licenses/LICENSE-2.0
6+
ManageVersion: false

0 commit comments

Comments
 (0)