Skip to content

Commit 6729c1c

Browse files
committed
Bugfix to PCA/per-batch code
1 parent 35bdcab commit 6729c1c

File tree

1 file changed

+39
-25
lines changed

1 file changed

+39
-25
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@
88
import org.apache.commons.lang3.StringUtils;
99
import org.apache.logging.log4j.Logger;
1010
import org.json.JSONObject;
11+
import org.labkey.api.data.Container;
12+
import org.labkey.api.data.SimpleFilter;
13+
import org.labkey.api.data.TableSelector;
1114
import org.labkey.api.pipeline.PipelineJob;
1215
import org.labkey.api.pipeline.PipelineJobException;
16+
import org.labkey.api.query.FieldKey;
17+
import org.labkey.api.query.QueryService;
1318
import org.labkey.api.reader.Readers;
1419
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
15-
import org.labkey.api.sequenceanalysis.model.Readset;
1620
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
1721
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
1822
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
@@ -26,7 +30,9 @@
2630
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
2731
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
2832
import org.labkey.api.util.FileUtil;
33+
import org.labkey.api.util.PageFlowUtil;
2934
import org.labkey.api.writer.PrintWriters;
35+
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
3036

3137
import javax.annotation.Nullable;
3238
import java.io.BufferedReader;
@@ -36,8 +42,10 @@
3642
import java.util.ArrayList;
3743
import java.util.Arrays;
3844
import java.util.HashMap;
45+
import java.util.HashSet;
3946
import java.util.List;
4047
import java.util.Map;
48+
import java.util.Set;
4149

4250
public class PlinkPcaStep extends AbstractCommandPipelineStep<PlinkPcaStep.PlinkWrapper> implements VariantProcessingStep
4351
{
@@ -185,37 +193,43 @@ private void runBatch(File inputVCF, File outputDirectory, VariantProcessingStep
185193
@Override
186194
public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles) throws PipelineJobException
187195
{
188-
try (PrintWriter writer = PrintWriters.getPrintWriter(getSampleMapFile()))
196+
boolean splitByApplication = getProvider().getParameterByName("splitByApplication").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
197+
if (splitByApplication)
189198
{
190-
getPipelineCtx().getLogger().info("Writing Sample Map");
191-
for (SequenceOutputFile so : inputFiles)
199+
try (PrintWriter writer = PrintWriters.getPrintWriter(getSampleMapFile()))
192200
{
193-
if (so.getReadset() == null)
194-
{
195-
throw new PipelineJobException("This step requires all inputs to have a readset");
196-
}
197-
198-
Readset rs = support.getCachedReadset(so.getReadset());
199-
200-
try (VCFFileReader reader = new VCFFileReader(so.getFile()))
201+
getPipelineCtx().getLogger().info("Writing Sample Map");
202+
for (SequenceOutputFile so : inputFiles)
201203
{
202-
VCFHeader header = reader.getFileHeader();
203-
if (header.getSampleNamesInOrder().isEmpty())
204-
{
205-
throw new PipelineJobException("Expected VCF to have samples: " + so.getFile().getPath());
206-
}
207-
else if (header.getSampleNamesInOrder().size() != 1)
204+
try (VCFFileReader reader = new VCFFileReader(so.getFile()))
208205
{
209-
throw new PipelineJobException("Expected VCF to a single sample: " + so.getFile().getPath());
206+
VCFHeader header = reader.getFileHeader();
207+
if (header.getSampleNamesInOrder().isEmpty())
208+
{
209+
throw new PipelineJobException("Expected VCF to have samples: " + so.getFile().getPath());
210+
}
211+
212+
for (String sample : header.getSampleNamesInOrder())
213+
{
214+
// Find readset:
215+
Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer();
216+
Set<String> applications = new HashSet<>(new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application"), new SimpleFilter(FieldKey.fromString("name"), sample), null).getArrayList(String.class));
217+
if (applications.size() == 1)
218+
{
219+
writer.println(sample + "\t" + applications.iterator().next());
220+
}
221+
else
222+
{
223+
throw new PipelineJobException("More than one readset found with name: " + sample);
224+
}
225+
}
210226
}
211-
212-
writer.println(header.getSampleNamesInOrder().get(0) + "\t" + rs.getApplication());
213227
}
214228
}
215-
}
216-
catch (IOException e)
217-
{
218-
throw new PipelineJobException(e);
229+
catch (IOException e)
230+
{
231+
throw new PipelineJobException(e);
232+
}
219233
}
220234
}
221235

0 commit comments

Comments
 (0)