|
8 | 8 | import org.apache.commons.lang3.StringUtils; |
9 | 9 | import org.apache.logging.log4j.Logger; |
10 | 10 | import org.json.JSONObject; |
| 11 | +import org.labkey.api.data.Container; |
| 12 | +import org.labkey.api.data.SimpleFilter; |
| 13 | +import org.labkey.api.data.TableSelector; |
11 | 14 | import org.labkey.api.pipeline.PipelineJob; |
12 | 15 | import org.labkey.api.pipeline.PipelineJobException; |
| 16 | +import org.labkey.api.query.FieldKey; |
| 17 | +import org.labkey.api.query.QueryService; |
13 | 18 | import org.labkey.api.reader.Readers; |
14 | 19 | import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
15 | | -import org.labkey.api.sequenceanalysis.model.Readset; |
16 | 20 | import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; |
17 | 21 | import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; |
18 | 22 | import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; |
|
26 | 30 | import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; |
27 | 31 | import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; |
28 | 32 | import org.labkey.api.util.FileUtil; |
| 33 | +import org.labkey.api.util.PageFlowUtil; |
29 | 34 | import org.labkey.api.writer.PrintWriters; |
| 35 | +import org.labkey.sequenceanalysis.SequenceAnalysisSchema; |
30 | 36 |
|
31 | 37 | import javax.annotation.Nullable; |
32 | 38 | import java.io.BufferedReader; |
|
36 | 42 | import java.util.ArrayList; |
37 | 43 | import java.util.Arrays; |
38 | 44 | import java.util.HashMap; |
| 45 | +import java.util.HashSet; |
39 | 46 | import java.util.List; |
40 | 47 | import java.util.Map; |
| 48 | +import java.util.Set; |
41 | 49 |
|
42 | 50 | public class PlinkPcaStep extends AbstractCommandPipelineStep<PlinkPcaStep.PlinkWrapper> implements VariantProcessingStep |
43 | 51 | { |
@@ -185,37 +193,43 @@ private void runBatch(File inputVCF, File outputDirectory, VariantProcessingStep |
185 | 193 | @Override |
186 | 194 | public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles) throws PipelineJobException |
187 | 195 | { |
188 | | - try (PrintWriter writer = PrintWriters.getPrintWriter(getSampleMapFile())) |
| 196 | + boolean splitByApplication = getProvider().getParameterByName("splitByApplication").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true); |
| 197 | + if (splitByApplication) |
189 | 198 | { |
190 | | - getPipelineCtx().getLogger().info("Writing Sample Map"); |
191 | | - for (SequenceOutputFile so : inputFiles) |
| 199 | + try (PrintWriter writer = PrintWriters.getPrintWriter(getSampleMapFile())) |
192 | 200 | { |
193 | | - if (so.getReadset() == null) |
194 | | - { |
195 | | - throw new PipelineJobException("This step requires all inputs to have a readset"); |
196 | | - } |
197 | | - |
198 | | - Readset rs = support.getCachedReadset(so.getReadset()); |
199 | | - |
200 | | - try (VCFFileReader reader = new VCFFileReader(so.getFile())) |
| 201 | + getPipelineCtx().getLogger().info("Writing Sample Map"); |
| 202 | + for (SequenceOutputFile so : inputFiles) |
201 | 203 | { |
202 | | - VCFHeader header = reader.getFileHeader(); |
203 | | - if (header.getSampleNamesInOrder().isEmpty()) |
204 | | - { |
205 | | - throw new PipelineJobException("Expected VCF to have samples: " + so.getFile().getPath()); |
206 | | - } |
207 | | - else if (header.getSampleNamesInOrder().size() != 1) |
| 204 | + try (VCFFileReader reader = new VCFFileReader(so.getFile())) |
208 | 205 | { |
209 | | - throw new PipelineJobException("Expected VCF to a single sample: " + so.getFile().getPath()); |
| 206 | + VCFHeader header = reader.getFileHeader(); |
| 207 | + if (header.getSampleNamesInOrder().isEmpty()) |
| 208 | + { |
| 209 | + throw new PipelineJobException("Expected VCF to have samples: " + so.getFile().getPath()); |
| 210 | + } |
| 211 | + |
| 212 | + for (String sample : header.getSampleNamesInOrder()) |
| 213 | + { |
| 214 | + // Find readset: |
| 215 | + Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer(); |
| 216 | + Set<String> applications = new HashSet<>(new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application"), new SimpleFilter(FieldKey.fromString("name"), sample), null).getArrayList(String.class)); |
| 217 | + if (applications.size() == 1) |
| 218 | + { |
| 219 | + writer.println(sample + "\t" + applications.iterator().next()); |
| 220 | + } |
| 221 | + else |
| 222 | + { |
| 223 | + throw new PipelineJobException("More than one readset found with name: " + sample); |
| 224 | + } |
| 225 | + } |
210 | 226 | } |
211 | | - |
212 | | - writer.println(header.getSampleNamesInOrder().get(0) + "\t" + rs.getApplication()); |
213 | 227 | } |
214 | 228 | } |
215 | | - } |
216 | | - catch (IOException e) |
217 | | - { |
218 | | - throw new PipelineJobException(e); |
| 229 | + catch (IOException e) |
| 230 | + { |
| 231 | + throw new PipelineJobException(e); |
| 232 | + } |
219 | 233 | } |
220 | 234 | } |
221 | 235 |
|
|
0 commit comments