|
2 | 2 |
|
3 | 3 | import htsjdk.samtools.util.Interval; |
4 | 4 | import org.apache.commons.lang3.StringUtils; |
| 5 | +import org.apache.logging.log4j.Logger; |
5 | 6 | import org.jetbrains.annotations.NotNull; |
6 | 7 | import org.labkey.api.pipeline.AbstractTaskFactory; |
7 | 8 | import org.labkey.api.pipeline.AbstractTaskFactorySettings; |
|
14 | 15 | import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
15 | 16 | import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; |
16 | 17 | import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; |
| 18 | +import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper; |
17 | 19 | import org.labkey.api.util.FileType; |
| 20 | +import org.labkey.api.writer.PrintWriters; |
18 | 21 |
|
19 | 22 | import java.io.File; |
20 | 23 | import java.io.IOException; |
| 24 | +import java.io.PrintWriter; |
21 | 25 | import java.util.ArrayList; |
22 | 26 | import java.util.Collections; |
23 | 27 | import java.util.HashSet; |
@@ -170,7 +174,49 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction |
170 | 174 | missing.add(vcf); |
171 | 175 | } |
172 | 176 |
|
173 | | - toConcat.add(vcf); |
| 177 | + // NOTE: this was added to fix a one-time issue where -L was dropped from some upstream GenotypeGVCFs. |
| 178 | + // Under normal conditions this would never be necessary. |
| 179 | + boolean ensureOutputsWithinIntervals = getPipelineJob().getParameterJson().optBoolean("variantCalling.GenotypeGVCFs.ensureOutputsWithinIntervalsOnMerge", false); |
| 180 | + if (ensureOutputsWithinIntervals) |
| 181 | + { |
| 182 | + getJob().getLogger().debug("Ensuring ensure scatter outputs respect intervals"); |
| 183 | + List<Interval> expectedIntervals = jobToIntervalMap.get(name); |
| 184 | + |
| 185 | + File intervalFile = new File(vcf.getParentFile(), "scatterIntervals.list"); |
| 186 | + File subsetVcf = new File(vcf.getParentFile(), SequenceAnalysisService.get().getUnzippedBaseName(vcf.getName()) + ".subset.vcf.gz"); |
| 187 | + File subsetVcfIdx = new File(subsetVcf.getPath() + ".tbi"); |
| 188 | + manager.addIntermediateFile(intervalFile); |
| 189 | + manager.addIntermediateFile(subsetVcf); |
| 190 | + manager.addIntermediateFile(subsetVcfIdx); |
| 191 | + |
| 192 | + if (subsetVcfIdx.exists()) |
| 193 | + { |
| 194 | + getJob().getLogger().debug("Index exists, will not re-submit the VCF: " + subsetVcf.getName()); |
| 195 | + } |
| 196 | + else |
| 197 | + { |
| 198 | + try (PrintWriter writer = PrintWriters.getPrintWriter(intervalFile)) |
| 199 | + { |
| 200 | + expectedIntervals.forEach(interval -> { |
| 201 | + writer.println(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); |
| 202 | + }); |
| 203 | + } |
| 204 | + catch (IOException e) |
| 205 | + { |
| 206 | + throw new PipelineJobException(e); |
| 207 | + } |
| 208 | + |
| 209 | + Wrapper wrapper = new Wrapper(getJob().getLogger()); |
| 210 | + wrapper.execute(vcf, subsetVcf, intervalFile); |
| 211 | + } |
| 212 | + |
| 213 | + toConcat.add(subsetVcf); |
| 214 | + } |
| 215 | + else |
| 216 | + { |
| 217 | + toConcat.add(vcf); |
| 218 | + } |
| 219 | + |
174 | 220 | manager.addInput(action, "Input VCF", vcf); |
175 | 221 | manager.addIntermediateFile(vcf); |
176 | 222 | manager.addIntermediateFile(new File(vcf.getPath() + ".tbi")); |
@@ -204,4 +250,33 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction |
204 | 250 |
|
205 | 251 | return combined; |
206 | 252 | } |
| 253 | + |
| 254 | + public static class Wrapper extends AbstractDiscvrSeqWrapper |
| 255 | + { |
| 256 | + public Wrapper(Logger log) |
| 257 | + { |
| 258 | + super(log); |
| 259 | + } |
| 260 | + |
| 261 | + public void execute(File inputVcf, File outputVcf, File intervalFile) throws PipelineJobException |
| 262 | + { |
| 263 | + List<String> args = new ArrayList<>(getBaseArgs()); |
| 264 | + args.add("OutputVariantsStartingInIntervals"); |
| 265 | + |
| 266 | + args.add("-V"); |
| 267 | + args.add(inputVcf.getPath()); |
| 268 | + |
| 269 | + args.add("-O"); |
| 270 | + args.add(outputVcf.getPath()); |
| 271 | + |
| 272 | + args.add("-L"); |
| 273 | + args.add(intervalFile.getPath()); |
| 274 | + |
| 275 | + execute(args); |
| 276 | + if (!outputVcf.exists()) |
| 277 | + { |
| 278 | + throw new PipelineJobException("Missing file: " + outputVcf.getPath()); |
| 279 | + } |
| 280 | + } |
| 281 | + } |
207 | 282 | } |
0 commit comments