1616import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
1717import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
1818import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
19- import org .labkey .api .sequenceanalysis .run .AbstractCommandWrapper ;
19+ import org .labkey .api .sequenceanalysis .run .DockerWrapper ;
2020import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
2121import org .labkey .api .util .FileUtil ;
2222import org .labkey .api .writer .PrintWriters ;
@@ -127,6 +127,7 @@ else if (!svVcf.exists())
127127 // id path depth read length
128128 // TNPRC-IB18 ../IB18.cram 29.77 150
129129 File coverageFile = new File (ctx .getWorkingDirectory (), "coverage.txt" );
130+ String rgId = null ;
130131 try (PrintWriter writer = PrintWriters .getPrintWriter (coverageFile ); SamReader reader = SamReaderFactory .makeDefault ().open (so .getFile ()))
131132 {
132133 SAMFileHeader header = reader .getFileHeader ();
@@ -139,52 +140,58 @@ else if (header.getReadGroups().size() > 1)
139140 throw new PipelineJobException ("More than one read group found in BAM" );
140141 }
141142
142- String rgId = header .getReadGroups ().get (0 ).getSample ();
143+ rgId = header .getReadGroups ().get (0 ).getSample ();
143144
144145 JSONObject json = new JSONObject (FileUtils .readFileToString (coverageJson , Charset .defaultCharset ()));
145146 writer .println ("id\t path\t depth\t read length" );
146147 double depth = json .getJSONObject ("autosome" ).getDouble ("depth" );
147148 double readLength = json .getInt ("read_length" );
148- writer .println (rgId + "\t " + so .getFile ().getPath () + "\t " + depth + "\t " + readLength );
149+ writer .println (rgId + "\t " + "/work/" + so .getFile ().getName () + "\t " + depth + "\t " + readLength );
149150 }
150151 catch (IOException e )
151152 {
152153 throw new PipelineJobException (e );
153154 }
154155 ctx .getFileManager ().addIntermediateFile (coverageFile );
155156
157+ DockerWrapper dockerWrapper = new DockerWrapper ("ghcr.io/bimberlabinternal/paragraph:latest" , ctx .getLogger ());
156158 List <String > paragraphArgs = new ArrayList <>();
157- paragraphArgs .add (AbstractCommandWrapper .resolveFileInPath ("multigrmpy.py" , null , true ).getPath ());
158- paragraphArgs .add ("--verbose" );
159+ paragraphArgs .add ("/opt/paragraph/bin/multigrmpy.py" );
159160
160- File paragraphOut = new File (ctx .getWorkingDirectory (), FileUtil .getBaseName (so .getFile ()) + ".paragraph.txt" );
161+ dockerWrapper .ensureLocalCopy (so .getFile (), ctx .getWorkingDirectory (), ctx .getFileManager ());
162+ dockerWrapper .ensureLocalCopy (SequenceAnalysisService .get ().getExpectedBamOrCramIndex (so .getFile ()), ctx .getWorkingDirectory (), ctx .getFileManager ());
163+
164+ File paragraphOutDir = new File (ctx .getWorkingDirectory (), FileUtil .getBaseName (so .getFile ()));
161165 paragraphArgs .add ("-o" );
162- paragraphArgs .add (paragraphOut . getPath ());
166+ paragraphArgs .add ("/work/" + paragraphOutDir . getName ());
163167
164168 paragraphArgs .add ("-i" );
165- paragraphArgs .add (svVcf .getPath ());
169+ dockerWrapper .ensureLocalCopy (svVcf , ctx .getWorkingDirectory (), ctx .getFileManager ());
170+ dockerWrapper .ensureLocalCopy (new File (svVcf .getPath () + ".tbi" ), ctx .getWorkingDirectory (), ctx .getFileManager ());
171+ paragraphArgs .add ("/work/" + svVcf .getName ());
166172
167173 paragraphArgs .add ("-m" );
168- paragraphArgs .add (coverageFile .getPath ());
174+ paragraphArgs .add ("/work/" + coverageFile .getName ());
169175
170176 paragraphArgs .add ("-r" );
171- paragraphArgs .add (ctx .getSequenceSupport ().getCachedGenome (so .getLibrary_id ()).getWorkingFastaFile ().getPath ());
177+ File genomeFasta = ctx .getSequenceSupport ().getCachedGenome (so .getLibrary_id ()).getWorkingFastaFile ();
178+ dockerWrapper .ensureLocalCopy (genomeFasta , ctx .getWorkingDirectory (), ctx .getFileManager ());
179+ dockerWrapper .ensureLocalCopy (new File (genomeFasta .getPath () + ".fai" ), ctx .getWorkingDirectory (), ctx .getFileManager ());
180+ paragraphArgs .add ("/work/" + genomeFasta .getName ());
172181
173182 paragraphArgs .add ("--scratch-dir" );
174- paragraphArgs .add (SequencePipelineService .get ().getJavaTempDir ());
183+ paragraphArgs .add ("/tmp" );
184+ dockerWrapper .setTmpDir (new File (SequencePipelineService .get ().getJavaTempDir ()));
175185
176186 if (threads != null )
177187 {
178188 paragraphArgs .add ("--threads" );
179189 paragraphArgs .add (threads .toString ());
180190 }
181191
182- paragraphArgs .add ("--logfile" );
183- paragraphArgs .add (new File (ctx .getWorkingDirectory (), "paragraph.log" ).getPath ());
184-
185- new SimpleScriptWrapper (ctx .getLogger ()).execute (paragraphArgs );
192+ dockerWrapper .executeWithDocker (paragraphArgs , ctx .getWorkingDirectory (), ctx .getFileManager ());
186193
187- File genotypes = new File (ctx . getWorkingDirectory () , "genotypes.vcf.gz" );
194+ File genotypes = new File (paragraphOutDir , "genotypes.vcf.gz" );
188195 if (!genotypes .exists ())
189196 {
190197 throw new PipelineJobException ("Missing file: " + genotypes .getPath ());
@@ -200,6 +207,11 @@ else if (header.getReadGroups().size() > 1)
200207 }
201208
202209 ctx .getFileManager ().addSequenceOutput (genotypes , "paraGRAPH Genotypes: " + so .getName (), "paraGRAPH Genoypes" , so .getReadset (), null , so .getLibrary_id (), "Input VCF: " + svVcf .getName () + " (" + svVcfId + ")" );
210+
211+ ctx .getFileManager ().addIntermediateFile (new File (paragraphOutDir , "variants.json.gz" ));
212+ ctx .getFileManager ().addIntermediateFile (new File (paragraphOutDir , "variants.vcf.gz" ));
213+ ctx .getFileManager ().addIntermediateFile (new File (paragraphOutDir , "genotypes.json.gz" ));
214+ ctx .getFileManager ().addIntermediateFile (new File (paragraphOutDir , "grmpy.log" ));
203215 }
204216 }
205217 }
0 commit comments