11package org .labkey .sequenceanalysis .run .variant ;
22
3+ import htsjdk .samtools .SAMSequenceDictionary ;
4+ import htsjdk .samtools .SAMSequenceRecord ;
35import htsjdk .samtools .util .Interval ;
6+ import htsjdk .variant .utils .SAMSequenceDictionaryExtractor ;
47import org .apache .commons .io .FileUtils ;
8+ import org .apache .commons .lang3 .StringUtils ;
9+ import org .apache .commons .lang3 .math .NumberUtils ;
510import org .apache .logging .log4j .Logger ;
11+ import org .json .JSONObject ;
612import org .labkey .api .pipeline .PipelineJobException ;
713import org .labkey .api .sequenceanalysis .SequenceAnalysisService ;
814import org .labkey .api .sequenceanalysis .pipeline .AbstractVariantProcessingStepProvider ;
915import org .labkey .api .sequenceanalysis .pipeline .PipelineContext ;
1016import org .labkey .api .sequenceanalysis .pipeline .PipelineStepProvider ;
1117import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
1218import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
19+ import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
1320import org .labkey .api .sequenceanalysis .pipeline .VariantProcessingStep ;
1421import org .labkey .api .sequenceanalysis .pipeline .VariantProcessingStepOutputImpl ;
1522import org .labkey .api .sequenceanalysis .run .AbstractCommandPipelineStep ;
@@ -34,6 +41,9 @@ public static class Provider extends AbstractVariantProcessingStepProvider<KingI
3441 public Provider ()
3542 {
3643 super ("KingInferenceStep" , "KING/Relatedness" , "" , "This will run KING to infer kinship from a VCF" , Arrays .asList (
44+ ToolParameterDescriptor .create ("limitToChromosomes" , "Limit to Chromosomes" , "If checked, the analysis will include only the primary chromosomes" , "checkbox" , new JSONObject (){{
45+ put ("checked" , true );
46+ }}, true )
3747 ), null , "https://www.kingrelatedness.com/manual.shtml" );
3848 }
3949
@@ -66,7 +76,27 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
6676 plinkArgs .add (inputVCF .getPath ());
6777
6878 plinkArgs .add ("--make-bed" );
69- plinkArgs .add ("--allow-extra-chr" );
79+
80+ boolean limitToChromosomes = getProvider ().getParameterByName ("limitToChromosomes" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , true );
81+ if (limitToChromosomes )
82+ {
83+ SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor .extractDictionary (genome .getSequenceDictionary ().toPath ());
84+ List <String > toKeep = dict .getSequences ().stream ().filter (s -> {
85+ String name = StringUtils .replaceIgnoreCase (s .getSequenceName (), "^chr" , "" );
86+
87+ return NumberUtils .isCreatable (name ) || "X" .equalsIgnoreCase (name ) || "Y" .equalsIgnoreCase (name );
88+ }).map (SAMSequenceRecord ::getSequenceName ).toList ();
89+
90+ plinkArgs .add ("--chr" );
91+ plinkArgs .add (StringUtils .join (toKeep , "," ));
92+ }
93+ else
94+ {
95+ plinkArgs .add ("--allow-extra-chr" );
96+ }
97+
98+ plinkArgs .add ("--silent" );
99+
70100 plinkArgs .add ("--max-alleles" );
71101 plinkArgs .add ("2" );
72102
@@ -97,11 +127,17 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
97127 kingArgs .add (wrapper .getExe ().getPath ());
98128
99129 kingArgs .add ("-b" );
100- kingArgs .add (plinkOut .getPath ());
130+ kingArgs .add (plinkOutBed .getPath ());
101131
102132 kingArgs .add ("--prefix" );
103133 kingArgs .add (SequenceAnalysisService .get ().getUnzippedBaseName (inputVCF .getName ()));
104134
135+ if (threads != null )
136+ {
137+ kingArgs .add ("--cpus" );
138+ kingArgs .add (threads .toString ());
139+ }
140+
105141 kingArgs .add ("--kinship" );
106142
107143 File kinshipOutput = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (inputVCF .getName ()) + ".kin" );
0 commit comments