Skip to content

Commit 3116dd5

Browse files
committed
Add filter step to discard reads matching a reference
1 parent 7651972 commit 3116dd5

File tree

4 files changed

+130
-2
lines changed

4 files changed

+130
-2
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
import org.labkey.sequenceanalysis.run.preprocessing.CutadaptWrapper;
115115
import org.labkey.sequenceanalysis.run.preprocessing.DownsampleFastqWrapper;
116116
import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep;
117+
import org.labkey.sequenceanalysis.run.preprocessing.FilterReadsStep;
117118
import org.labkey.sequenceanalysis.run.preprocessing.FlashPipelineStep;
118119
import org.labkey.sequenceanalysis.run.preprocessing.PrintReadsContainingStep;
119120
import org.labkey.sequenceanalysis.run.preprocessing.TagPcrSummaryStep;
@@ -225,6 +226,7 @@ public static void registerPipelineSteps()
225226
SequencePipelineService.get().registerPipelineStep(new TrimmomaticWrapper.CropReadsProvider());
226227
SequencePipelineService.get().registerPipelineStep(new FlashPipelineStep.Provider());
227228
SequencePipelineService.get().registerPipelineStep(new PrintReadsContainingStep.Provider());
229+
SequencePipelineService.get().registerPipelineStep(new FilterReadsStep.Provider());
228230
SequencePipelineService.get().registerPipelineStep(new TrimmomaticWrapper.HeadCropReadsProvider());
229231
SequencePipelineService.get().registerPipelineStep(new TrimmomaticWrapper.MaxInfoTrimmingProvider());
230232
SequencePipelineService.get().registerPipelineStep(new TrimmomaticWrapper.AdapterTrimmingProvider());

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public BWAMemAlignmentStep create(PipelineContext context)
9898
}
9999
}
100100

101-
private void performMemAlignment(PipelineJob job, AlignmentOutputImpl output, File inputFastq1, @Nullable File inputFastq2, File outputDirectory, ReferenceGenome referenceGenome, String basename, List<String> additionalArgs) throws PipelineJobException
101+
public void performMemAlignment(PipelineJob job, AlignmentOutputImpl output, File inputFastq1, @Nullable File inputFastq2, File outputDirectory, ReferenceGenome referenceGenome, String basename, List<String> additionalArgs) throws PipelineJobException
102102
{
103103
setOutputDir(outputDirectory);
104104

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package org.labkey.sequenceanalysis.run.preprocessing;
2+
3+
import org.jetbrains.annotations.Nullable;
4+
import org.json.JSONObject;
5+
import org.labkey.api.data.ConvertHelper;
6+
import org.labkey.api.pipeline.PipelineJob;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
9+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
11+
import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl;
12+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
13+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
14+
import org.labkey.api.sequenceanalysis.pipeline.PreprocessingStep;
15+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
16+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
17+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
18+
import org.labkey.api.util.FileUtil;
19+
import org.labkey.api.util.Pair;
20+
import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper;
21+
import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler;
22+
23+
import java.io.File;
24+
import java.util.ArrayList;
25+
import java.util.Arrays;
26+
import java.util.List;
27+
28+
public class FilterReadsStep extends AbstractPipelineStep implements PreprocessingStep
29+
{
30+
private static final String GENOME = "genomeId";
31+
private static final String MIN_QUAL = "minQual";
32+
33+
public FilterReadsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
34+
{
35+
super(provider, ctx);
36+
}
37+
38+
public static class Provider extends AbstractPipelineStepProvider<PreprocessingStep>
39+
{
40+
public Provider()
41+
{
42+
super("PrintReadsContaining", "Filter Reads Matching Reference", "FilterMatchingReads", "This step aligns input reads against a reference using BWA-mem and will only return read pairs without a passing hit in either read.", Arrays.asList(
43+
new GenomeParam(),
44+
ToolParameterDescriptor.create(MIN_QUAL, "Min MAPQ", "Only alignments with MAPQ greater than this value will be considered", "ldk-integerfield", new JSONObject(){{
45+
put("minValue", 0);
46+
}}, 50)
47+
), null, null);
48+
}
49+
50+
@Override
51+
public FilterReadsStep create(PipelineContext context)
52+
{
53+
return new FilterReadsStep(this, context);
54+
}
55+
}
56+
57+
@Override
58+
public Output processInputFile(File inputFile, @Nullable File inputFile2, File outputDir) throws PipelineJobException
59+
{
60+
PreprocessingOutputImpl output = new PreprocessingOutputImpl(inputFile, inputFile2);
61+
62+
BWAMemWrapper wrapper = new BWAMemWrapper(getPipelineCtx().getLogger());
63+
List<String> bwaArgs = new ArrayList<>();
64+
65+
int minMapq = getProvider().getParameterByName(MIN_QUAL).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
66+
if (minMapq > 0)
67+
{
68+
bwaArgs.add("-T");
69+
bwaArgs.add(String.valueOf(minMapq));
70+
}
71+
72+
int genomeId = getProvider().getParameterByName(GENOME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
73+
ReferenceGenome genome = getPipelineCtx().getSequenceSupport().getCachedObject(GenomeParam.getCacheKey(genomeId), ReferenceGenome.class);
74+
75+
AlignmentOutputImpl alignmentOutput = new AlignmentOutputImpl();
76+
String basename = SequenceAnalysisService.get().getUnzippedBaseName(inputFile.getName()) + ".filterAlign";
77+
wrapper.performMemAlignment(getPipelineCtx().getJob(), alignmentOutput, inputFile, inputFile2, outputDir, genome, basename, bwaArgs);
78+
79+
output.addIntermediateFile(alignmentOutput.getBAM());
80+
output.addIntermediateFile(new File(alignmentOutput.getBAM().getPath() + ".bai"));
81+
82+
File unmappedReadsF = new File(alignmentOutput.getBAM().getParentFile(), FileUtil.getBaseName(alignmentOutput.getBAM()) + "_unmapped_F.fastq");
83+
File unmappedReadsR = new File(alignmentOutput.getBAM().getParentFile(), FileUtil.getBaseName(alignmentOutput.getBAM()) + "_unmapped_R.fastq");
84+
File singletons = new File(alignmentOutput.getBAM().getParentFile(), FileUtil.getBaseName(alignmentOutput.getBAM()) + "_unmapped_singletons.fastq");
85+
86+
UnmappedReadExportHandler.Processor.writeUnmappedReadsAsFastq(alignmentOutput.getBAM(), unmappedReadsF, unmappedReadsR, singletons, getPipelineCtx().getLogger());
87+
output.addIntermediateFile(singletons);
88+
89+
output.setProcessedFastq(Pair.of(unmappedReadsF, unmappedReadsR));
90+
91+
return output;
92+
}
93+
94+
public static class GenomeParam extends ToolParameterDescriptor implements ToolParameterDescriptor.CachableParam
95+
{
96+
public GenomeParam()
97+
{
98+
super(null, GENOME, "Choose Genome", "Select a previously saved reference genome from the list.", "ldk-simplelabkeycombo", new JSONObject()
99+
{{
100+
put("width", 450);
101+
put("schemaName", "sequenceanalysis");
102+
put("queryName", "reference_libraries");
103+
put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()");
104+
put("filterArray", "js:[LABKEY.Filter.create('datedisabled', null, LABKEY.Filter.Types.ISBLANK)]");
105+
put("displayField", "name");
106+
put("valueField", "rowid");
107+
put("allowBlank", false);
108+
}}, null);
109+
}
110+
111+
@Override
112+
public void doCache(PipelineJob job, Object value, SequenceAnalysisJobSupport support) throws PipelineJobException
113+
{
114+
if (value != null)
115+
{
116+
int genomeId = ConvertHelper.convert(value, Integer.class);
117+
support.cacheObject(getCacheKey(genomeId), SequenceAnalysisService.get().getReferenceGenome(genomeId, job.getUser()));
118+
}
119+
}
120+
121+
public static String getCacheKey(int genomeId)
122+
{
123+
return "filterReadStep." + genomeId;
124+
}
125+
}
126+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/PrintReadsContainingStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
public class PrintReadsContainingStep extends AbstractCommandPipelineStep<PrintReadsContainingStep.Wrapper> implements PreprocessingStep
2828
{
29-
public PrintReadsContainingStep(PipelineStepProvider provider, PipelineContext ctx)
29+
public PrintReadsContainingStep(PipelineStepProvider<?> provider, PipelineContext ctx)
3030
{
3131
super(provider, ctx, new PrintReadsContainingStep.Wrapper(ctx.getLogger()));
3232
}

0 commit comments

Comments
 (0)