1+ package org .labkey .sequenceanalysis .analysis ;
2+
3+ import htsjdk .samtools .util .Interval ;
4+ import org .apache .commons .lang3 .StringUtils ;
5+ import org .apache .logging .log4j .Logger ;
6+ import org .json .JSONObject ;
7+ import org .labkey .api .module .ModuleLoader ;
8+ import org .labkey .api .pipeline .PipelineJob ;
9+ import org .labkey .api .pipeline .PipelineJobException ;
10+ import org .labkey .api .pipeline .RecordedAction ;
11+ import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
12+ import org .labkey .api .sequenceanalysis .pipeline .AbstractParameterizedOutputHandler ;
13+ import org .labkey .api .sequenceanalysis .pipeline .CommandLineParam ;
14+ import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
15+ import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
16+ import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
17+ import org .labkey .api .sequenceanalysis .run .DISCVRSeqRunner ;
18+ import org .labkey .api .util .FileType ;
19+ import org .labkey .api .util .FileUtil ;
20+ import org .labkey .api .util .PageFlowUtil ;
21+ import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
22+ import org .labkey .sequenceanalysis .util .SequenceUtil ;
23+
24+ import java .io .File ;
25+ import java .util .ArrayList ;
26+ import java .util .Arrays ;
27+ import java .util .Date ;
28+ import java .util .LinkedHashSet ;
29+ import java .util .List ;
30+
31+ /**
32+ * Created by bimber on 2/3/2016.
33+ */
34+ public class PrintReadBackedHaplotypesHandler extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
35+ {
36+ private FileType _bamFileType = new FileType ("bam" , false );
37+
38+ public PrintReadBackedHaplotypesHandler ()
39+ {
40+ super (ModuleLoader .getInstance ().getModule (SequenceAnalysisModule .class ), "Print Read-Backed Haplotypes" , "This scans the alignments over the provided interval(s), and reports all unique haplotypes." , new LinkedHashSet <>(PageFlowUtil .set ("/sequenceanalysis/field/IntervalField.js" )), Arrays .asList (
41+ ToolParameterDescriptor .create ("intervals" , "Intervals" , "The intervals over which to merge the data. They should be in the form: chr01:102-20394" , "sequenceanalysis-intervalfield" , new JSONObject (){{
42+ put ("allowBlank" , false );
43+ }}, null ),
44+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-mq" ), "minQual" , "Min Base Quality" , "Nucleotides with quality scores below this value will be converted to N" , "ldk-integerfield" , new JSONObject (){{
45+ put ("minValue" , 0 );
46+ }}, 10 ),
47+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-rc" ), "requiredCoverageFraction" , "Required Coverage Fraction" , "A haplotype must have coverage over this fraction of the interval to be reported" , "ldk-numberfield" , new JSONObject (){{
48+ put ("minValue" , 0 );
49+ put ("maxValue" , 0 );
50+ put ("decimalPrecision" , 2 );
51+ }}, null )
52+ ));
53+ }
54+
55+ @ Override
56+ public boolean canProcess (SequenceOutputFile o )
57+ {
58+ return o .getFile () != null && _bamFileType .isType (o .getFile ());
59+ }
60+
61+ @ Override
62+ public boolean doRunRemote ()
63+ {
64+ return true ;
65+ }
66+
67+ @ Override
68+ public boolean doRunLocal ()
69+ {
70+ return false ;
71+ }
72+
73+ @ Override
74+ public SequenceOutputProcessor getProcessor ()
75+ {
76+ return new Processor ();
77+ }
78+
79+ @ Override
80+ public boolean doSplitJobs ()
81+ {
82+ return true ;
83+ }
84+
85+ public class Processor implements SequenceOutputProcessor
86+ {
87+ @ Override
88+ public void init (JobContext ctx , List <SequenceOutputFile > inputFiles , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
89+ {
90+ for (SequenceOutputFile so : inputFiles )
91+ {
92+ if (so .getReadset () != null )
93+ {
94+ ctx .getSequenceSupport ().cacheReadset (so .getReadset (), ctx .getJob ().getUser ());
95+ }
96+ else
97+ {
98+ ctx .getJob ().getLogger ().error ("Output file lacks a readset and will be skipped: " + so .getRowid ());
99+ }
100+ }
101+ }
102+
103+ @ Override
104+ public void processFilesRemote (List <SequenceOutputFile > inputFiles , JobContext ctx ) throws UnsupportedOperationException , PipelineJobException
105+ {
106+ PipelineJob job = ctx .getJob ();
107+ if (inputFiles .isEmpty ())
108+ {
109+ job .getLogger ().warn ("no input files" );
110+ }
111+
112+ for (SequenceOutputFile so : inputFiles )
113+ {
114+ RecordedAction action = new RecordedAction (getName ());
115+ action .setStartTime (new Date ());
116+ action .addInput (so .getFile (), "Input BAM" );
117+
118+ File input = so .getFile ();
119+
120+ String intervalText = StringUtils .trimToNull (ctx .getParams ().optString ("intervals" ));
121+ if (intervalText == null )
122+ {
123+ throw new PipelineJobException ("Must provide a list of intervals" );
124+ }
125+
126+ List <String > args = new ArrayList <>();
127+ List <Interval > il = SequenceUtil .parseAndSortIntervals (intervalText );
128+ if (il != null )
129+ {
130+ for (Interval i : il )
131+ {
132+ args .add ("-L" );
133+ args .add (i .getContig () + ":" + i .getStart () + "-" + i .getEnd ());
134+ }
135+ }
136+
137+ List <String > extraArgs = getClientCommandArgs (ctx .getParams ());
138+ if (extraArgs != null )
139+ {
140+ args .addAll (extraArgs );
141+ }
142+
143+ File output = new File (ctx .getWorkingDirectory (), FileUtil .getBaseName (input ) + ".txt" );
144+ Wrapper wrapper = new Wrapper (ctx .getLogger ());
145+ wrapper .execute (input , ctx .getSequenceSupport ().getCachedGenome (so .getLibrary_id ()).getWorkingFastaFile (), output , args );
146+
147+ action .addOutput (output , "Local Haplotypes" , false );
148+ ctx .addActions (action );
149+
150+ SequenceOutputFile o = new SequenceOutputFile ();
151+ o .setName (output .getName ());
152+ o .setFile (output );
153+ o .setLibrary_id (so .getLibrary_id ());
154+ o .setCategory ("Local Haplotypes" );
155+ o .setReadset (so .getReadset ());
156+ ctx .addSequenceOutput (o );
157+ }
158+ }
159+
160+ @ Override
161+ public void processFilesOnWebserver (PipelineJob job , SequenceAnalysisJobSupport support , List <SequenceOutputFile > inputFiles , JSONObject params , File outputDir , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
162+ {
163+
164+ }
165+ }
166+
167+ public static class Wrapper extends DISCVRSeqRunner
168+ {
169+ public Wrapper (Logger log )
170+ {
171+ super (log );
172+ }
173+
174+ public File execute (File bam , File fasta , File output , List <String > extraArgs ) throws PipelineJobException
175+ {
176+ List <String > args = getBaseArgs ("PrintReadBackedHaplotypes" );
177+ args .add ("-I" );
178+ args .add (bam .getPath ());
179+
180+ args .add ("-R" );
181+ args .add (fasta .getPath ());
182+
183+ args .add ("-O" );
184+ args .add (output .getPath ());
185+
186+ args .addAll (extraArgs );
187+
188+ execute (args );
189+
190+ if (!output .exists ())
191+ {
192+ throw new PipelineJobException ("Unable to find file: " + output .getPath ());
193+ }
194+
195+ return output ;
196+ }
197+ }
198+ }
0 commit comments