Skip to content

Commit 22adf27

Browse files
committed
Preliminary support for PrintReadBackedHaplotypes
1 parent bfa4255 commit 22adf27

File tree

4 files changed

+230
-28
lines changed

4 files changed

+230
-28
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.labkey.sequenceanalysis.analysis.MultiQCBamHandler;
5353
import org.labkey.sequenceanalysis.analysis.MultiQCHandler;
5454
import org.labkey.sequenceanalysis.analysis.PicardAlignmentMetricsHandler;
55+
import org.labkey.sequenceanalysis.analysis.PrintReadBackedHaplotypesHandler;
5556
import org.labkey.sequenceanalysis.analysis.RecalculateSequenceMetricsHandler;
5657
import org.labkey.sequenceanalysis.analysis.RnaSeqcHandler;
5758
import org.labkey.sequenceanalysis.analysis.SbtGeneCountHandler;
@@ -338,6 +339,7 @@ public static void registerPipelineSteps()
338339
SequenceAnalysisService.get().registerFileHandler(new PicardAlignmentMetricsHandler());
339340
SequenceAnalysisService.get().registerFileHandler(new BamHaplotypeHandler());
340341
SequenceAnalysisService.get().registerFileHandler(new BamCleanupHandler());
342+
SequenceAnalysisService.get().registerFileHandler(new PrintReadBackedHaplotypesHandler());
341343
SequenceAnalysisService.get().registerFileHandler(new HaplotypeCallerHandler());
342344
SequenceAnalysisService.get().registerFileHandler(new RnaSeqcHandler());
343345
SequenceAnalysisService.get().registerFileHandler(new CombineStarGeneCountsHandler());
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
package org.labkey.sequenceanalysis.analysis;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.apache.logging.log4j.Logger;
6+
import org.json.JSONObject;
7+
import org.labkey.api.module.ModuleLoader;
8+
import org.labkey.api.pipeline.PipelineJob;
9+
import org.labkey.api.pipeline.PipelineJobException;
10+
import org.labkey.api.pipeline.RecordedAction;
11+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
12+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
15+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
16+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
17+
import org.labkey.api.sequenceanalysis.run.DISCVRSeqRunner;
18+
import org.labkey.api.util.FileType;
19+
import org.labkey.api.util.FileUtil;
20+
import org.labkey.api.util.PageFlowUtil;
21+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
22+
import org.labkey.sequenceanalysis.util.SequenceUtil;
23+
24+
import java.io.File;
25+
import java.util.ArrayList;
26+
import java.util.Arrays;
27+
import java.util.Date;
28+
import java.util.LinkedHashSet;
29+
import java.util.List;
30+
31+
/**
32+
* Created by bimber on 2/3/2016.
33+
*/
34+
public class PrintReadBackedHaplotypesHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
35+
{
36+
private FileType _bamFileType = new FileType("bam", false);
37+
38+
public PrintReadBackedHaplotypesHandler()
39+
{
40+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Print Read-Backed Haplotypes", "This scans the alignments over the provided interval(s), and reports all unique haplotypes.", new LinkedHashSet<>(PageFlowUtil.set("/sequenceanalysis/field/IntervalField.js")), Arrays.asList(
41+
ToolParameterDescriptor.create("intervals", "Intervals", "The intervals over which to merge the data. They should be in the form: chr01:102-20394", "sequenceanalysis-intervalfield", new JSONObject(){{
42+
put("allowBlank", false);
43+
}}, null),
44+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-mq"), "minQual", "Min Base Quality", "Nucleotides with quality scores below this value will be converted to N", "ldk-integerfield", new JSONObject(){{
45+
put("minValue", 0);
46+
}}, 10),
47+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-rc"), "requiredCoverageFraction", "Required Coverage Fraction", "A haplotype must have coverage over this fraction of the interval to be reported", "ldk-numberfield", new JSONObject(){{
48+
put("minValue", 0);
49+
put("maxValue", 0);
50+
put("decimalPrecision", 2);
51+
}}, null)
52+
));
53+
}
54+
55+
@Override
56+
public boolean canProcess(SequenceOutputFile o)
57+
{
58+
return o.getFile() != null && _bamFileType.isType(o.getFile());
59+
}
60+
61+
@Override
62+
public boolean doRunRemote()
63+
{
64+
return true;
65+
}
66+
67+
@Override
68+
public boolean doRunLocal()
69+
{
70+
return false;
71+
}
72+
73+
@Override
74+
public SequenceOutputProcessor getProcessor()
75+
{
76+
return new Processor();
77+
}
78+
79+
@Override
80+
public boolean doSplitJobs()
81+
{
82+
return true;
83+
}
84+
85+
public class Processor implements SequenceOutputProcessor
86+
{
87+
@Override
88+
public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
89+
{
90+
for (SequenceOutputFile so : inputFiles)
91+
{
92+
if (so.getReadset() != null)
93+
{
94+
ctx.getSequenceSupport().cacheReadset(so.getReadset(), ctx.getJob().getUser());
95+
}
96+
else
97+
{
98+
ctx.getJob().getLogger().error("Output file lacks a readset and will be skipped: " + so.getRowid());
99+
}
100+
}
101+
}
102+
103+
@Override
104+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
105+
{
106+
PipelineJob job = ctx.getJob();
107+
if (inputFiles.isEmpty())
108+
{
109+
job.getLogger().warn("no input files");
110+
}
111+
112+
for (SequenceOutputFile so : inputFiles)
113+
{
114+
RecordedAction action = new RecordedAction(getName());
115+
action.setStartTime(new Date());
116+
action.addInput(so.getFile(), "Input BAM");
117+
118+
File input = so.getFile();
119+
120+
String intervalText = StringUtils.trimToNull(ctx.getParams().optString("intervals"));
121+
if (intervalText == null)
122+
{
123+
throw new PipelineJobException("Must provide a list of intervals");
124+
}
125+
126+
List<String> args = new ArrayList<>();
127+
List<Interval> il = SequenceUtil.parseAndSortIntervals(intervalText);
128+
if (il != null)
129+
{
130+
for (Interval i : il)
131+
{
132+
args.add("-L");
133+
args.add(i.getContig() + ":" + i.getStart() + "-" + i.getEnd());
134+
}
135+
}
136+
137+
List<String> extraArgs = getClientCommandArgs(ctx.getParams());
138+
if (extraArgs != null)
139+
{
140+
args.addAll(extraArgs);
141+
}
142+
143+
File output = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(input) + ".txt");
144+
Wrapper wrapper = new Wrapper(ctx.getLogger());
145+
wrapper.execute(input, ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(), output, args);
146+
147+
action.addOutput(output, "Local Haplotypes", false);
148+
ctx.addActions(action);
149+
150+
SequenceOutputFile o = new SequenceOutputFile();
151+
o.setName(output.getName());
152+
o.setFile(output);
153+
o.setLibrary_id(so.getLibrary_id());
154+
o.setCategory("Local Haplotypes");
155+
o.setReadset(so.getReadset());
156+
ctx.addSequenceOutput(o);
157+
}
158+
}
159+
160+
@Override
161+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
162+
{
163+
164+
}
165+
}
166+
167+
public static class Wrapper extends DISCVRSeqRunner
168+
{
169+
public Wrapper(Logger log)
170+
{
171+
super(log);
172+
}
173+
174+
public File execute(File bam, File fasta, File output, List<String> extraArgs) throws PipelineJobException
175+
{
176+
List<String> args = getBaseArgs("PrintReadBackedHaplotypes");
177+
args.add("-I");
178+
args.add(bam.getPath());
179+
180+
args.add("-R");
181+
args.add(fasta.getPath());
182+
183+
args.add("-O");
184+
args.add(output.getPath());
185+
186+
args.addAll(extraArgs);
187+
188+
execute(args);
189+
190+
if (!output.exists())
191+
{
192+
throw new PipelineJobException("Unable to find file: " + output.getPath());
193+
}
194+
195+
return output;
196+
}
197+
}
198+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/VariantsToTableStep.java

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.labkey.api.util.PageFlowUtil;
2424
import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper;
2525
import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob;
26+
import org.labkey.sequenceanalysis.util.SequenceUtil;
2627

2728
import javax.annotation.Nullable;
2829
import java.io.File;
@@ -142,7 +143,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
142143
}
143144

144145
String intervalText = StringUtils.trimToNull(getProvider().getParameterByName("intervals").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class));
145-
List<Interval> il = parseAndSortIntervals(intervalText);
146+
List<Interval> il = SequenceUtil.parseAndSortIntervals(intervalText);
146147
if (il != null)
147148
{
148149
for (Interval i : il)
@@ -223,31 +224,4 @@ public void generateTable(File inputVcf, File outputFile, File referenceFasta, L
223224
}
224225
}
225226
}
226-
227-
private List<Interval> parseAndSortIntervals(String intervalString) throws PipelineJobException
228-
{
229-
intervalString = StringUtils.trimToNull(intervalString);
230-
if (intervalString == null)
231-
{
232-
return null;
233-
}
234-
235-
intervalString = intervalString.replaceAll("(\\n|\\r|;)+", ";");
236-
List<Interval> intervals = new ArrayList<>();
237-
for (String i : intervalString.split(";"))
238-
{
239-
String[] tokens = i.split(":|-");
240-
if (tokens.length != 3)
241-
{
242-
throw new PipelineJobException("Invalid interval: " + i);
243-
}
244-
245-
intervals.add(new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[2])));
246-
}
247-
248-
249-
Collections.sort(intervals);
250-
251-
return intervals;
252-
}
253227
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import htsjdk.variant.vcf.VCFHeader;
2424
import htsjdk.variant.vcf.VCFUtils;
2525
import org.apache.commons.io.FileUtils;
26+
import org.apache.commons.lang3.StringUtils;
2627
import org.apache.commons.lang3.SystemUtils;
2728
import org.apache.logging.log4j.Logger;
2829
import org.jetbrains.annotations.Nullable;
@@ -594,4 +595,31 @@ public static void deleteFolderWithRm(Logger log, File directory) throws Pipelin
594595
new SimpleScriptWrapper(log).execute(Arrays.asList("rm", "-Rf", directory.getPath()));
595596
}
596597
}
598+
599+
public static List<Interval> parseAndSortIntervals(String intervalString) throws PipelineJobException
600+
{
601+
intervalString = StringUtils.trimToNull(intervalString);
602+
if (intervalString == null)
603+
{
604+
return null;
605+
}
606+
607+
intervalString = intervalString.replaceAll("(\\n|\\r|;)+", ";");
608+
List<Interval> intervals = new ArrayList<>();
609+
for (String i : intervalString.split(";"))
610+
{
611+
String[] tokens = i.split(":|-");
612+
if (tokens.length != 3)
613+
{
614+
throw new PipelineJobException("Invalid interval: " + i);
615+
}
616+
617+
intervals.add(new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[2])));
618+
}
619+
620+
621+
Collections.sort(intervals);
622+
623+
return intervals;
624+
}
597625
}

0 commit comments

Comments
 (0)