Skip to content

Commit 25c9941

Browse files
committed
Support proper include/exclude list for plink
1 parent 77103ef commit 25c9941

File tree

1 file changed

+39
-3
lines changed

1 file changed

+39
-3
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package org.labkey.sequenceanalysis.run.variant;
22

3+
import au.com.bytecode.opencsv.CSVWriter;
4+
import htsjdk.samtools.util.IOUtil;
35
import htsjdk.samtools.util.Interval;
6+
import org.apache.commons.lang3.StringUtils;
47
import org.apache.logging.log4j.Logger;
58
import org.json.JSONObject;
69
import org.labkey.api.pipeline.PipelineJobException;
@@ -18,6 +21,7 @@
1821

1922
import javax.annotation.Nullable;
2023
import java.io.File;
24+
import java.io.IOException;
2125
import java.util.ArrayList;
2226
import java.util.Arrays;
2327
import java.util.List;
@@ -36,8 +40,10 @@ public Provider()
3640
super("PlinkPcaStep", "Plink/PCA", "", "This will run plink to generate the data for MDS/PCA", Arrays.asList(
3741
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--not-chr"), "excludedContigs", "Excluded Contigs", "A comma separated list of contigs to exclude, such as X,Y,MT.", "textfield", new JSONObject(){{
3842

39-
}}, "X,Y,MT")
40-
), null, "https://zzz.bwh.harvard.edu/plink/");
43+
}}, "X,Y,MT"),
44+
ToolParameterDescriptor.create(SelectSamplesStep.SAMPLE_INCLUDE, "Sample(s) Include", "Only the following samples will be included in the analysis.", "sequenceanalysis-trimmingtextarea", null, null),
45+
ToolParameterDescriptor.create(SelectSamplesStep.SAMPLE_EXCLUDE, "Samples(s) To Exclude", "The following samples will be excluded from the analysis.", "sequenceanalysis-trimmingtextarea", null, null)
46+
), Arrays.asList("sequenceanalysis/field/TrimmingTextArea.js"), "https://zzz.bwh.harvard.edu/plink/");
4147
}
4248

4349
public PlinkPcaStep create(PipelineContext ctx)
@@ -46,6 +52,30 @@ public PlinkPcaStep create(PipelineContext ctx)
4652
}
4753
}
4854

55+
private void addSubjectSelectOptions(String text, List<String> args, String argName, File outputFile, VariantProcessingStepOutputImpl output) throws PipelineJobException
56+
{
57+
text = StringUtils.trimToNull(text);
58+
if (text != null)
59+
{
60+
String[] names = text.split(";");
61+
try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(outputFile), '\t', CSVWriter.NO_QUOTE_CHARACTER))
62+
{
63+
Arrays.stream(names).forEach(x -> {
64+
writer.writeNext(new String[]{x, x});
65+
});
66+
}
67+
catch (IOException e)
68+
{
69+
throw new PipelineJobException(e);
70+
}
71+
72+
args.add(argName);
73+
args.add(outputFile.getPath());
74+
75+
output.addIntermediateFile(outputFile);
76+
}
77+
}
78+
4979
@Override
5080
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
5181
{
@@ -55,7 +85,13 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
5585
args.add(getWrapper().getExe().getPath());
5686
args.add("--pca");
5787
args.add("--allow-extra-chr");
58-
args.add("--keep WGS.names");
88+
89+
String samplesToInclude = getProvider().getParameterByName(SelectSamplesStep.SAMPLE_INCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
90+
addSubjectSelectOptions(samplesToInclude, args, "--keep", new File(outputDirectory, "samplesToKeep.txt"), output);
91+
92+
String samplesToExclude = getProvider().getParameterByName(SelectSamplesStep.SAMPLE_EXCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
93+
addSubjectSelectOptions(samplesToExclude, args, "--exclude", new File(outputDirectory, "samplesToExclude.txt"), output);
94+
5995
args.add("--vcf");
6096
args.add(inputVCF.getPath());
6197

0 commit comments

Comments
 (0)