11package org .labkey .sequenceanalysis .run .variant ;
22
3+ import com .google .common .io .Files ;
34import htsjdk .samtools .SAMSequenceDictionary ;
45import htsjdk .samtools .SAMSequenceRecord ;
56import htsjdk .samtools .util .Interval ;
1011import org .apache .logging .log4j .Logger ;
1112import org .jetbrains .annotations .Nullable ;
1213import org .json .JSONObject ;
13- import org .labkey .api .collections .CaseInsensitiveHashMap ;
1414import org .labkey .api .pipeline .PipelineJobException ;
15- import org .labkey .api .reader .Readers ;
16- import org .labkey .api .sequenceanalysis .SequenceAnalysisService ;
1715import org .labkey .api .sequenceanalysis .pipeline .AbstractVariantProcessingStepProvider ;
1816import org .labkey .api .sequenceanalysis .pipeline .PedigreeToolParameterDescriptor ;
1917import org .labkey .api .sequenceanalysis .pipeline .PipelineContext ;
2624import org .labkey .api .sequenceanalysis .run .AbstractCommandPipelineStep ;
2725import org .labkey .api .sequenceanalysis .run .AbstractCommandWrapper ;
2826import org .labkey .api .util .Compress ;
29- import org .labkey .api .writer .PrintWriters ;
30- import org .labkey .sequenceanalysis .pipeline .ProcessVariantsHandler ;
3127
32- import java .io .BufferedReader ;
3328import java .io .File ;
3429import java .io .IOException ;
35- import java .io .PrintWriter ;
3630import java .util .ArrayList ;
37- import java .util .Arrays ;
38- import java .util .HashMap ;
3931import java .util .List ;
40- import java .util .Map ;
4132
4233public class KingInferenceStep extends AbstractCommandPipelineStep <KingInferenceStep .KingWrapper > implements VariantProcessingStep
4334{
@@ -50,7 +41,7 @@ public static class Provider extends AbstractVariantProcessingStepProvider<KingI
5041 {
5142 public Provider ()
5243 {
53- super ("KingInferenceStep" , "KING/Relatedness" , "" , "This will run KING to infer kinship from a VCF" , List .of (
44+ super ("KingInferenceStep" , "KING/Relatedness" , "" , "This will run KING (via plink2) to infer kinship from a VCF" , List .of (
5445 ToolParameterDescriptor .create ("limitToChromosomes" , "Limit to Chromosomes" , "If checked, the analysis will include only the primary chromosomes" , "checkbox" , new JSONObject ()
5546 {{
5647 put ("checked" , true );
@@ -148,7 +139,24 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
148139 plinkArgs1 .add ("--out" );
149140 plinkArgs1 .add (plinkOut .getPath ());
150141
151- plink .execute (plinkArgs1 );
142+ File doneFile = new File (plinkOut .getPath () + ".done" );
143+ output .addIntermediateFile (doneFile );
144+ if (doneFile .exists ())
145+ {
146+ getPipelineCtx ().getLogger ().debug ("plink has completed, will not repeat" );
147+ }
148+ else {
149+ plink .execute (plinkArgs1 );
150+
151+ try
152+ {
153+ Files .touch (doneFile );
154+ }
155+ catch (IOException e )
156+ {
157+ throw new PipelineJobException (e );
158+ }
159+ }
152160
153161 File plinkOutBed = new File (plinkOut .getPath () + ".bed" );
154162 if (!plinkOutBed .exists ())
@@ -163,7 +171,23 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
163171 plinkArgs2 .add ("--out" );
164172 plinkArgs2 .add (plinkOutKing .getPath ());
165173
166- plink .execute (plinkArgs2 );
174+ doneFile = new File (plinkOutKing .getPath () + ".done" );
175+ if (doneFile .exists ())
176+ {
177+ getPipelineCtx ().getLogger ().debug ("plink has completed, will not repeat" );
178+ }
179+ else {
180+ plink .execute (plinkArgs2 );
181+
182+ try
183+ {
184+ Files .touch (doneFile );
185+ }
186+ catch (IOException e )
187+ {
188+ throw new PipelineJobException (e );
189+ }
190+ }
167191
168192 File plinkOutKingFile = new File (plinkOutKing .getPath () + ".kin0" );
169193 if (!plinkOutKingFile .exists ())
@@ -188,131 +212,11 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
188212 throw new PipelineJobException (e );
189213 }
190214
191- output .addSequenceOutput (plinkOutKingFileTxt , "PLINK2 Relatedness: " + inputVCF .getName (), "PLINK2 Kinship" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
192-
193- // Also with KING:
194- KingWrapper wrapper = new KingWrapper (getPipelineCtx ().getLogger ());
195- wrapper .setWorkingDir (outputDirectory );
196-
197- List <String > kingArgs = new ArrayList <>();
198- kingArgs .add (wrapper .getExe ().getPath ());
199-
200- kingArgs .add ("-b" );
201- kingArgs .add (plinkOutBed .getPath ());
202-
203- kingArgs .add ("--prefix" );
204- kingArgs .add (SequenceAnalysisService .get ().getUnzippedBaseName (inputVCF .getName ()));
205-
206- // Update the pedigree / fam file:
207- String demographicsProviderName = getProvider ().getParameterByName (PedigreeToolParameterDescriptor .NAME ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx ());
208- if (demographicsProviderName != null )
209- {
210- File pedFile = ProcessVariantsHandler .getPedigreeFile (getPipelineCtx ().getSourceDirectory (true ), demographicsProviderName );
211- if (!pedFile .exists ())
212- {
213- throw new PipelineJobException ("Unable to find pedigree file: " + pedFile .getPath ());
214- }
215-
216- File kingFam = createFamFile (pedFile , new File (plinkOutBed .getParentFile (), "plink.fam" ));
217- kingArgs .add ("--fam" );
218- kingArgs .add (kingFam .getPath ());
219-
220- output .addIntermediateFile (kingFam );
221- }
222-
223- if (threads != null )
224- {
225- kingArgs .add ("--cpus" );
226- kingArgs .add (threads .toString ());
227- }
228-
229- kingArgs .add ("--kinship" );
230- kingArgs .add ("--rplot" );
231-
232- File kinshipOutput = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (inputVCF .getName ()) + ".kin" );
233- wrapper .execute (kingArgs );
234- if (!kinshipOutput .exists ())
235- {
236- throw new PipelineJobException ("Unable to find file: " + kinshipOutput .getPath ());
237- }
238-
239- File kinshipOutputTxt = new File (kinshipOutput .getPath () + ".txt.gz" );
240- if (kinshipOutputTxt .exists ())
241- {
242- kinshipOutputTxt .delete ();
243- }
244-
245- lineCount = SequencePipelineService .get ().getLineCount (kinshipOutput )-1 ;
246- try
247- {
248- Compress .compressGzip (kinshipOutput , kinshipOutputTxt );
249- FileUtils .delete (kinshipOutput );
250- }
251- catch (IOException e )
252- {
253- throw new PipelineJobException (e );
254- }
255-
256- output .addSequenceOutput (kinshipOutputTxt , "King Relatedness: " + inputVCF .getName (), "KING Relatedness" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
215+ output .addSequenceOutput (plinkOutKingFileTxt , "PLINK2/KING Relatedness: " + inputVCF .getName (), "PLINK2/KING Kinship" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
257216
258217 return output ;
259218 }
260219
261- private File createFamFile (File pedFile , File famFile ) throws PipelineJobException
262- {
263- File newFamFile = new File (famFile .getParentFile (), "king.fam" );
264-
265- Map <String , String > pedMap = new CaseInsensitiveHashMap <>();
266- try (BufferedReader reader = Readers .getReader (pedFile ))
267- {
268- String line ;
269- while ((line = reader .readLine ()) != null )
270- {
271- String [] tokens = line .split (" " );
272- if (tokens .length != 6 )
273- {
274- throw new PipelineJobException ("Improper ped line length: " + tokens .length );
275- }
276-
277- pedMap .put (tokens [1 ], StringUtils .join (Arrays .asList ("0" , tokens [1 ], tokens [2 ], tokens [3 ], tokens [4 ], "-9" ), "\t " ));
278- }
279- }
280- catch (IOException e )
281- {
282- throw new PipelineJobException (e );
283- }
284-
285- try (BufferedReader reader = Readers .getReader (famFile );PrintWriter writer = PrintWriters .getPrintWriter (newFamFile ))
286- {
287- String line ;
288- while ((line = reader .readLine ()) != null )
289- {
290- String [] tokens = line .split ("\t " );
291- if (tokens .length != 6 )
292- {
293- throw new PipelineJobException ("Improper ped line length: " + tokens .length );
294- }
295-
296- String newRow = pedMap .get (tokens [1 ]);
297- if (newRow == null )
298- {
299- getPipelineCtx ().getLogger ().warn ("Unable to find pedigree entry for: " + tokens [1 ] + ", reusing original" );
300- writer .println (line );
301- }
302- else
303- {
304- writer .println (newRow );
305- }
306- }
307- }
308- catch (IOException e )
309- {
310- throw new PipelineJobException (e );
311- }
312-
313- return newFamFile ;
314- }
315-
316220 public static class KingWrapper extends AbstractCommandWrapper
317221 {
318222 public KingWrapper (@ Nullable Logger logger )
0 commit comments