11package org.labkey.sequenceanalysis.run.analysis;
22
3+ import au.com.bytecode.opencsv.CSVWriter;
4+ import htsjdk.samtools.util.IOUtil;
35import org.apache.commons.io.FileUtils;
46import org.json.JSONObject;
7+ import org.labkey.api.exp.api.ExpData;
8+ import org.labkey.api.exp.api.ExpRun;
9+ import org.labkey.api.exp.api.ExperimentService;
510import org.labkey.api.module.ModuleLoader;
611import org.labkey.api.pipeline.PipelineJob;
712import org.labkey.api.pipeline.PipelineJobException;
813import org.labkey.api.pipeline.RecordedAction;
914import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1015import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1116import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
17+ import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
1218import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1319import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
1420import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
@@ -66,6 +72,38 @@ public SequenceOutputProcessor getProcessor()
6672
6773 public static class Processor implements SequenceOutputProcessor
6874 {
75+ @Override
76+ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
77+ {
78+ try (CSVWriter csv = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(getSampleCsvFile(ctx))))
79+ {
80+ for (SequenceOutputFile so : inputFiles)
81+ {
82+ if (so.getRunId() == null)
83+ {
84+ throw new PipelineJobException("Unable to find ExperimentRun for: " + so.getRowid());
85+ }
86+
87+ ExpRun run = ExperimentService.get().getExpRun(so.getRunId());
88+ List<? extends ExpData> inputs = run.getInputDatas("Input BAM File", null);
89+ if (inputs.isEmpty())
90+ {
91+ throw new PipelineJobException("Unable to find input BAMs for: " + so.getRowid());
92+ }
93+ else if (inputs.size() > 1)
94+ {
95+ throw new PipelineJobException("More than one input BAM found for ExperimentRun: " + so.getRunId());
96+ }
97+
98+ csv.writeNext(new String[]{so.getFile().getParentFile().getPath(), inputs.get(0).getFile().getPath()});
99+ }
100+ }
101+ catch (IOException e)
102+ {
103+ throw new PipelineJobException(e);
104+ }
105+ }
106+
69107 @Override
70108 public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
71109 {
@@ -89,8 +127,6 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
89127 outputBaseName = outputBaseName.replaceAll(".vcf$", "");
90128 }
91129
92- File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");
93-
94130 File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName);
95131
96132 SequenceOutputFile so = new SequenceOutputFile();
@@ -102,6 +138,11 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
102138 ctx.addSequenceOutput(so);
103139 }
104140
141+ private File getSampleCsvFile(PipelineContext ctx)
142+ {
143+ return new File(ctx.getSourceDirectory(), "sawfish.samples.csv");
144+ }
145+
105146 private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException
106147 {
107148 if (inputs.isEmpty())
@@ -126,6 +167,9 @@ private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome g
126167 args.add(sample.getParentFile().getPath());
127168 }
128169
170+ args.add("--sample-csv");
171+ args.add(getSampleCsvFile(ctx).getPath());
172+
129173 File outDir = new File(ctx.getOutputDir(), "sawfish");
130174 args.add("--output-dir");
131175 args.add(outDir.getPath());
0 commit comments