1+ package org .labkey .sequenceanalysis .run .alignment ;
2+
3+ import org .json .JSONObject ;
4+ import org .labkey .api .module .ModuleLoader ;
5+ import org .labkey .api .pipeline .PipelineJob ;
6+ import org .labkey .api .pipeline .PipelineJobException ;
7+ import org .labkey .api .pipeline .RecordedAction ;
8+ import org .labkey .api .sequenceanalysis .SequenceAnalysisService ;
9+ import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
10+ import org .labkey .api .sequenceanalysis .pipeline .AbstractParameterizedOutputHandler ;
11+ import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
12+ import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
13+ import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
14+ import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
15+ import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
16+ import org .labkey .api .util .FileUtil ;
17+ import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
18+ import org .labkey .sequenceanalysis .run .variant .DepthOfCoverageHandler ;
19+ import org .labkey .sequenceanalysis .util .SequenceUtil ;
20+
21+ import java .io .File ;
22+ import java .io .IOException ;
23+ import java .util .ArrayList ;
24+ import java .util .Arrays ;
25+ import java .util .List ;
26+
27+ public class ParagraphStep extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
28+ {
29+ public ParagraphStep ()
30+ {
31+ super (ModuleLoader .getInstance ().getModule (SequenceAnalysisModule .class ), "Paragraph SV Genotyping" , "This will run paraGRAPH on one or more BAM files to genotype SVs" , null , Arrays .asList (
32+ ToolParameterDescriptor .createExpDataParam ("svVCF" , "Input VCF" , "This is the DataId of the VCF containing the SVs to genotype" , "ldk-expdatafield" , new JSONObject ()
33+ {{
34+ put ("allowBlank" , false );
35+ }}, null )
36+ ));
37+ }
38+
39+ @ Override
40+ public boolean canProcess (SequenceOutputFile o )
41+ {
42+ return o .getFile () != null && o .getFile ().exists () && SequenceUtil .FILETYPE .bamOrCram .getFileType ().isType (o .getFile ());
43+ }
44+
45+ @ Override
46+ public boolean doRunRemote ()
47+ {
48+ return true ;
49+ }
50+
51+ @ Override
52+ public boolean doRunLocal ()
53+ {
54+ return false ;
55+ }
56+
57+ @ Override
58+ public SequenceOutputProcessor getProcessor ()
59+ {
60+ return new DepthOfCoverageHandler .Processor ();
61+ }
62+
63+ public static class Processor implements SequenceOutputProcessor
64+ {
65+ @ Override
66+ public void processFilesOnWebserver (PipelineJob job , SequenceAnalysisJobSupport support , List <SequenceOutputFile > inputFiles , JSONObject params , File outputDir , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
67+ {
68+
69+ }
70+
71+ @ Override
72+ public void processFilesRemote (List <SequenceOutputFile > inputFiles , JobContext ctx ) throws UnsupportedOperationException , PipelineJobException
73+ {
74+ File inputVCF = ctx .getSequenceSupport ().getCachedData (ctx .getParams ().getInt ("svVCF" ));
75+ if (!inputVCF .exists ())
76+ {
77+ throw new PipelineJobException ("Unable to find file: " + inputVCF .getPath ());
78+ }
79+
80+ for (SequenceOutputFile so : inputFiles )
81+ {
82+ List <String > depthArgs = new ArrayList <>();
83+ depthArgs .add ("idxdepth" );
84+ depthArgs .add ("-d" );
85+ depthArgs .add (so .getFile ().getPath ());
86+
87+ File coverageFile = new File (ctx .getWorkingDirectory (), "coverage.txt" );
88+ depthArgs .add ("-o" );
89+ depthArgs .add (coverageFile .getPath ());
90+
91+ depthArgs .add ("-r" );
92+ depthArgs .add (ctx .getSequenceSupport ().getCachedGenome (so .getLibrary_id ()).getWorkingFastaFile ().getPath ());
93+
94+ new SimpleScriptWrapper (ctx .getLogger ()).execute (depthArgs );
95+
96+ if (!coverageFile .exists ())
97+ {
98+ throw new PipelineJobException ("Missing file: " + coverageFile .getPath ());
99+ }
100+
101+ // Should produce a simple text file:
102+ // id path depth read length
103+ // TNPRC-IB18 ../IB18.cram 29.77 150
104+
105+ List <String > paragraphArgs = new ArrayList <>();
106+ paragraphArgs .add ("multigrmpy.py" );
107+ paragraphArgs .add ("--verbose" );
108+
109+ File paragraphOut = new File (ctx .getWorkingDirectory (), FileUtil .getBaseName (so .getFile ()) + ".paragraph.txt" );
110+ paragraphArgs .add ("-o" );
111+ paragraphArgs .add (paragraphOut .getPath ());
112+
113+ int svVcfId = ctx .getParams ().optInt ("svVCF" );
114+ if (svVcfId == 0 )
115+ {
116+ throw new PipelineJobException ("Missing svVCF ID" );
117+ }
118+
119+ File svVcf = ctx .getSequenceSupport ().getCachedData (svVcfId );
120+ if (svVcf == null )
121+ {
122+ throw new PipelineJobException ("File not found for ID: " + svVcfId );
123+ }
124+ else if (!svVcf .exists ())
125+ {
126+ throw new PipelineJobException ("Missing file: " + svVcf .getPath ());
127+ }
128+
129+ paragraphArgs .add ("-i" );
130+ paragraphArgs .add (svVcf .getPath ());
131+
132+ paragraphArgs .add ("-m" );
133+ paragraphArgs .add (coverageFile .getPath ());
134+
135+ paragraphArgs .add ("-r" );
136+ paragraphArgs .add (ctx .getSequenceSupport ().getCachedGenome (so .getLibrary_id ()).getWorkingFastaFile ().getPath ());
137+
138+ paragraphArgs .add ("--scratch-dir" );
139+ paragraphArgs .add (SequencePipelineService .get ().getJavaTempDir ());
140+
141+ Integer threads = SequencePipelineService .get ().getMaxThreads (ctx .getLogger ());
142+ if (threads != null )
143+ {
144+ paragraphArgs .add ("--threads" );
145+ paragraphArgs .add (threads .toString ());
146+ }
147+
148+ paragraphArgs .add ("--logfile" );
149+ paragraphArgs .add (new File (ctx .getWorkingDirectory (), "paragraph.log" ).getPath ());
150+
151+ new SimpleScriptWrapper (ctx .getLogger ()).execute (paragraphArgs );
152+
153+ File genotypes = new File (ctx .getWorkingDirectory (), "genotypes.vcf.gz" );
154+ if (!genotypes .exists ())
155+ {
156+ throw new PipelineJobException ("Missing file: " + genotypes .getPath ());
157+ }
158+
159+ try
160+ {
161+ SequenceAnalysisService .get ().ensureVcfIndex (genotypes , ctx .getLogger ());
162+ }
163+ catch (IOException e )
164+ {
165+ throw new PipelineJobException (e );
166+ }
167+
168+ ctx .getFileManager ().addSequenceOutput (genotypes , "paraGRAPH Genotypes: " + so .getName (), "paraGRAPH Genoypes" , so .getReadset (), null , so .getLibrary_id (), "Input VCF: " + svVcf .getName () + " (" + svVcfId + ")" );
169+ }
170+ }
171+ }
172+ }
0 commit comments