|
11 | 11 | import org.jetbrains.annotations.Nullable; |
12 | 12 | import org.json.JSONObject; |
13 | 13 | import org.labkey.api.pipeline.PipelineJobException; |
| 14 | +import org.labkey.api.reader.Readers; |
14 | 15 | import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
15 | 16 | import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; |
16 | 17 | import org.labkey.api.sequenceanalysis.pipeline.PedigreeToolParameterDescriptor; |
|
24 | 25 | import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; |
25 | 26 | import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; |
26 | 27 | import org.labkey.api.util.Compress; |
| 28 | +import org.labkey.api.writer.PrintWriters; |
27 | 29 | import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; |
28 | 30 |
|
| 31 | +import java.io.BufferedReader; |
29 | 32 | import java.io.File; |
30 | 33 | import java.io.IOException; |
| 34 | +import java.io.PrintWriter; |
31 | 35 | import java.util.ArrayList; |
| 36 | +import java.util.Arrays; |
| 37 | +import java.util.HashMap; |
32 | 38 | import java.util.List; |
| 39 | +import java.util.Map; |
33 | 40 |
|
34 | 41 | public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep |
35 | 42 | { |
@@ -121,19 +128,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno |
121 | 128 | plinkArgs.add("--max-alleles"); |
122 | 129 | plinkArgs.add("2"); |
123 | 130 |
|
124 | | - String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx()); |
125 | | - if (demographicsProviderName != null) |
126 | | - { |
127 | | - File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName); |
128 | | - if (!pedFile.exists()) |
129 | | - { |
130 | | - throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); |
131 | | - } |
132 | | - |
133 | | - plinkArgs.add("--ped"); |
134 | | - plinkArgs.add(pedFile.getPath()); |
135 | | - } |
136 | | - |
137 | 131 | plinkArgs.add("--out"); |
138 | 132 | plinkArgs.add(plinkOut.getPath()); |
139 | 133 |
|
@@ -166,6 +160,23 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno |
166 | 160 | kingArgs.add("--prefix"); |
167 | 161 | kingArgs.add(SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName())); |
168 | 162 |
|
| 163 | + // Update the pedigree / fam file: |
| 164 | + String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx()); |
| 165 | + if (demographicsProviderName != null) |
| 166 | + { |
| 167 | + File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName); |
| 168 | + if (!pedFile.exists()) |
| 169 | + { |
| 170 | + throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); |
| 171 | + } |
| 172 | + |
| 173 | + File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"), kingArgs); |
| 174 | + kingArgs.add("--ped"); |
| 175 | + kingArgs.add(kingFam.getPath()); |
| 176 | + |
| 177 | + output.addIntermediateFile(kingFam); |
| 178 | + } |
| 179 | + |
169 | 180 | if (threads != null) |
170 | 181 | { |
171 | 182 | kingArgs.add("--cpus"); |
@@ -202,6 +213,58 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno |
202 | 213 | return output; |
203 | 214 | } |
204 | 215 |
|
| 216 | + private File createFamFile(File pedFile, File famFile, List<String> kingArgs) throws PipelineJobException |
| 217 | + { |
| 218 | + File newFamFile = new File(famFile.getParentFile(), "king.fam"); |
| 219 | + |
| 220 | + Map<String, String> pedMap = new HashMap<>(); |
| 221 | + try (BufferedReader reader = Readers.getReader(pedFile)) |
| 222 | + { |
| 223 | + String line; |
| 224 | + while ((line = reader.readLine()) != null) |
| 225 | + { |
| 226 | + String[] tokens = line.split(" "); |
| 227 | + if (tokens.length != 6) |
| 228 | + { |
| 229 | + throw new PipelineJobException("Improper ped line length: " + tokens.length); |
| 230 | + } |
| 231 | + |
| 232 | + pedMap.put(tokens[1], StringUtils.join(Arrays.asList("0", tokens[1], tokens[2], tokens[3], tokens[4], "-9"), "\t")); |
| 233 | + } |
| 234 | + } |
| 235 | + catch (IOException e) |
| 236 | + { |
| 237 | + throw new PipelineJobException(e); |
| 238 | + } |
| 239 | + |
| 240 | + try (BufferedReader reader = Readers.getReader(famFile);PrintWriter writer = PrintWriters.getPrintWriter(newFamFile)) |
| 241 | + { |
| 242 | + String line; |
| 243 | + while ((line = reader.readLine()) != null) |
| 244 | + { |
| 245 | + String[] tokens = line.split("\t"); |
| 246 | + if (tokens.length != 6) |
| 247 | + { |
| 248 | + throw new PipelineJobException("Improper ped line length: " + tokens.length); |
| 249 | + } |
| 250 | + |
| 251 | + String newRow = pedMap.get(tokens[1]); |
| 252 | + if (newRow == null) |
| 253 | + { |
| 254 | + throw new PipelineJobException("Unable to find pedigree entry for: " + tokens[1]); |
| 255 | + } |
| 256 | + |
| 257 | + writer.println(newRow); |
| 258 | + } |
| 259 | + } |
| 260 | + catch (IOException e) |
| 261 | + { |
| 262 | + throw new PipelineJobException(e); |
| 263 | + } |
| 264 | + |
| 265 | + return newFamFile; |
| 266 | + } |
| 267 | + |
205 | 268 | public static class KingWrapper extends AbstractCommandWrapper |
206 | 269 | { |
207 | 270 | public KingWrapper(@Nullable Logger logger) |
|
0 commit comments