Skip to content

Commit bd92167

Browse files
committed
Add support for alignment clipping
1 parent 4cb1a9f commit bd92167

File tree

3 files changed

+165
-2
lines changed

3 files changed

+165
-2
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
import org.labkey.sequenceanalysis.run.bampostprocessing.BaseQualityScoreRecalibrator;
9090
import org.labkey.sequenceanalysis.run.bampostprocessing.CallMdTagsStep;
9191
import org.labkey.sequenceanalysis.run.bampostprocessing.CleanSamStep;
92+
import org.labkey.sequenceanalysis.run.bampostprocessing.ClipOverlappingAlignmentsWrapper;
9293
import org.labkey.sequenceanalysis.run.bampostprocessing.DiscardUnmappedReadsStep;
9394
import org.labkey.sequenceanalysis.run.bampostprocessing.FixMateInformationStep;
9495
import org.labkey.sequenceanalysis.run.bampostprocessing.IndelRealignerStep;
@@ -262,6 +263,7 @@ public static void registerPipelineSteps()
262263
SequencePipelineService.get().registerPipelineStep(new MarkDuplicatesWithMateCigarStep.Provider());
263264
SequencePipelineService.get().registerPipelineStep(new SortSamStep.Provider());
264265
SequencePipelineService.get().registerPipelineStep(new SplitNCigarReadsStep.Provider());
266+
SequencePipelineService.get().registerPipelineStep(new ClipOverlappingAlignmentsWrapper.ClipOverlappingAlignmentsStep.Provider());
265267

266268
//analysis
267269
SequencePipelineService.get().registerPipelineStep(new SequenceBasedTypingAnalysis.Provider());

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.labkey.sequenceanalysis.run.analysis;
22

3+
import htsjdk.samtools.util.CloseableIterator;
4+
import htsjdk.variant.variantcontext.VariantContext;
5+
import htsjdk.variant.vcf.VCFFileReader;
36
import org.apache.log4j.Logger;
47
import org.jetbrains.annotations.Nullable;
58
import org.json.JSONObject;
@@ -17,7 +20,9 @@
1720
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
1821
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
1922
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
23+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
2024
import org.labkey.api.util.FileUtil;
25+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
2126
import org.labkey.sequenceanalysis.run.util.DepthOfCoverageWrapper;
2227
import org.labkey.sequenceanalysis.run.variant.SNPEffStep;
2328
import org.labkey.sequenceanalysis.run.variant.SnpEffWrapper;
@@ -101,8 +106,47 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
101106
output.addIntermediateFile(outputVcf);
102107
output.addIntermediateFile(new File(outputVcf.getPath() + ".tbi"));
103108

104-
output.addSequenceOutput(outputVcfSnpEff, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
105-
output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
109+
int totalVariants = 0;
110+
int totalGT1 = 0;
111+
int totalIndelGT1 = 0;
112+
try (VCFFileReader reader = new VCFFileReader(outputVcfSnpEff);CloseableIterator<VariantContext> it = reader.iterator())
113+
{
114+
while (it.hasNext())
115+
{
116+
VariantContext vc = it.next();
117+
totalVariants++;
118+
if (vc.hasAttribute("AF") && vc.getAttributeAsDouble("AF", 0.0) >= 0.01)
119+
{
120+
totalGT1++;
121+
if (vc.hasAttribute("INDEL"))
122+
{
123+
totalIndelGT1++;
124+
}
125+
}
126+
}
127+
}
128+
129+
//generate consensus
130+
// File script = new File(SequenceAnalysisService.get().getScriptPath(SequenceAnalysisModule.NAME, "external/viral_consensus.sh"));
131+
// if (!script.exists())
132+
// {
133+
// throw new PipelineJobException("Unable to find script: " + script.getPath());
134+
// }
135+
//
136+
// SimpleScriptWrapper consensusWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger());
137+
// consensusWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory());
138+
//
139+
// Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
140+
// if (maxThreads != null)
141+
// {
142+
// consensusWrapper.addToEnvironment("SEQUENCEANALYSIS_MAX_THREADS", maxThreads.toString());
143+
// }
144+
//
145+
// consensusWrapper.execute(Arrays.asList("/bin/bash", script.getName(), inputBam.getPath(), referenceGenome.getWorkingFastaFile().getPath()));
146+
147+
String description = String.format("Total Variants: %s\nTotal GT 1 PCT: %s\nTotal Indel GT 1 PCT: %s", totalVariants, totalGT1, totalIndelGT1);
148+
output.addSequenceOutput(outputVcfSnpEff, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
149+
output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description);
106150

107151
return output;
108152
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package org.labkey.sequenceanalysis.run.bampostprocessing;
2+
3+
import org.apache.log4j.Logger;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.json.JSONObject;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.sequenceanalysis.model.Readset;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.BamProcessingOutputImpl;
10+
import org.labkey.api.sequenceanalysis.pipeline.BamProcessingStep;
11+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
12+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
13+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
14+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
15+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
16+
import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
17+
import org.labkey.api.util.FileUtil;
18+
19+
import java.io.File;
20+
import java.util.ArrayList;
21+
import java.util.Arrays;
22+
import java.util.LinkedHashSet;
23+
import java.util.List;
24+
25+
public class ClipOverlappingAlignmentsWrapper extends AbstractDiscvrSeqWrapper
26+
{
27+
public ClipOverlappingAlignmentsWrapper(Logger log)
28+
{
29+
super(log);
30+
}
31+
32+
public File execute(File inputBam, File fasta, File bed, File outputBam, @Nullable File reportFile) throws PipelineJobException {
33+
List<String> args = new ArrayList<>(getBaseArgs());
34+
args.add("ClipOverlappingAlignments");
35+
36+
args.add("-R");
37+
args.add(fasta.getPath());
38+
39+
args.add("-I");
40+
args.add(inputBam.getPath());
41+
42+
args.add("--clipIntervals");
43+
args.add(bed.getPath());
44+
45+
args.add("-O");
46+
args.add(outputBam.getPath());
47+
48+
if (reportFile != null)
49+
{
50+
args.add("--reportFile");
51+
args.add(reportFile.getPath());
52+
}
53+
54+
execute(args);
55+
56+
if (!outputBam.exists())
57+
{
58+
throw new PipelineJobException("Unable to find expected file: " + outputBam.getPath());
59+
}
60+
return outputBam;
61+
}
62+
63+
public static class ClipOverlappingAlignmentsStep extends AbstractCommandPipelineStep<ClipOverlappingAlignmentsWrapper> implements BamProcessingStep
64+
{
65+
public ClipOverlappingAlignmentsStep(PipelineStepProvider provider, PipelineContext ctx)
66+
{
67+
super(provider, ctx, new ClipOverlappingAlignmentsWrapper(ctx.getLogger()));
68+
}
69+
70+
public static class Provider extends AbstractPipelineStepProvider<ClipOverlappingAlignmentsWrapper.ClipOverlappingAlignmentsStep>
71+
{
72+
public Provider()
73+
{
74+
super("ClipOverlappingAlignments", "Clip Overlapping Alignments", "DISCVRseq", "The step runs DISCVRseq's ClipOverlappingAlignments, which will soft-clip and alignments that start/end in the intervals specified in the provided BED file. It was intended for applications such as clipping amplification primers.", Arrays.asList(
75+
ToolParameterDescriptor.createExpDataParam("bedFile", "BED File (Intervals)", "This is a BED file specifying the intervals to clip. Strandedness is ignored.", "ldk-expdatafield", new JSONObject()
76+
{{
77+
put("allowBlank", false);
78+
}}, null)
79+
), new LinkedHashSet<>(Arrays.asList("ldk/field/ExpDataField.js")), "https://bimberlab.github.io/DISCVRSeq/");
80+
}
81+
82+
@Override
83+
public ClipOverlappingAlignmentsStep create(PipelineContext ctx)
84+
{
85+
return new ClipOverlappingAlignmentsStep(this, ctx);
86+
}
87+
}
88+
89+
@Override
90+
public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDirectory) throws PipelineJobException
91+
{
92+
BamProcessingOutputImpl output = new BamProcessingOutputImpl();
93+
getWrapper().setOutputDir(outputDirectory);
94+
getWrapper().setWorkingDir(outputDirectory);
95+
96+
File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".clipped.bam");
97+
File reportFile = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".clipped.txt");
98+
99+
File bedFile = null;
100+
Integer bedFileId = getProvider().getParameterByName("bedFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, null);
101+
if (bedFileId != null)
102+
{
103+
bedFile = getPipelineCtx().getSequenceSupport().getCachedData(bedFileId);
104+
if (!bedFile.exists())
105+
{
106+
throw new PipelineJobException("Unable to find BED file: " + bedFile.getPath());
107+
}
108+
}
109+
110+
getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), bedFile, outputBam, reportFile);
111+
112+
output.addSequenceOutput(reportFile, rs.getName() + ": alignment clipping report", "Alignment Clipping Report", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
113+
114+
return output;
115+
}
116+
}
117+
}

0 commit comments

Comments
 (0)