|
2 | 2 |
|
3 | 3 | import au.com.bytecode.opencsv.CSVReader; |
4 | 4 | import au.com.bytecode.opencsv.CSVWriter; |
| 5 | +import htsjdk.samtools.SAMSequenceDictionary; |
5 | 6 | import htsjdk.samtools.util.CloseableIterator; |
6 | 7 | import htsjdk.samtools.util.IOUtil; |
7 | 8 | import htsjdk.samtools.util.Interval; |
|
11 | 12 | import htsjdk.variant.variantcontext.writer.VariantContextWriter; |
12 | 13 | import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; |
13 | 14 | import htsjdk.variant.vcf.VCFFileReader; |
| 15 | +import htsjdk.variant.vcf.VCFHeader; |
14 | 16 | import org.apache.commons.lang3.StringUtils; |
15 | 17 | import org.apache.log4j.Logger; |
16 | 18 | import org.biojava3.core.sequence.DNASequence; |
|
48 | 50 | import java.io.IOException; |
49 | 51 | import java.io.InputStream; |
50 | 52 | import java.io.PrintWriter; |
| 53 | +import java.text.NumberFormat; |
51 | 54 | import java.util.ArrayList; |
52 | 55 | import java.util.Arrays; |
53 | 56 | import java.util.Collections; |
@@ -152,10 +155,13 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
152 | 155 | int totalIndelGT2 = 0; |
153 | 156 |
|
154 | 157 | File loFreqConsensusVcf = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.consensus.vcf.gz"); |
155 | | - VariantContextWriterBuilder writerBuiler = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath())); |
| 158 | + SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath()); |
| 159 | + VariantContextWriterBuilder writerBuiler = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict); |
156 | 160 | try (VCFFileReader reader = new VCFFileReader(outputVcfSnpEff);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writer = writerBuiler.build()) |
157 | 161 | { |
158 | | - writer.writeHeader(reader.getFileHeader()); |
| 162 | + VCFHeader header = reader.getFileHeader(); |
| 163 | + header.setSequenceDictionary(dict); |
| 164 | + writer.writeHeader(header); |
159 | 165 |
|
160 | 166 | while (it.hasNext()) |
161 | 167 | { |
@@ -251,14 +257,25 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
251 | 257 | intervalOfCurrentGap = null; |
252 | 258 | } |
253 | 259 | } |
| 260 | + |
| 261 | + //Ensure we count final gap |
| 262 | + if (intervalOfCurrentGap != null) |
| 263 | + { |
| 264 | + writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())}); |
| 265 | + gapIntervals++; |
| 266 | + } |
254 | 267 | } |
255 | 268 | catch (IOException e) |
256 | 269 | { |
257 | 270 | throw new PipelineJobException(e); |
258 | 271 | } |
259 | 272 |
|
260 | | - getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped); |
261 | | - getPipelineCtx().getLogger().info("Total intervals of these gaps: " + gapIntervals); |
| 273 | + NumberFormat fmt = NumberFormat.getPercentInstance(); |
| 274 | + fmt.setMaximumFractionDigits(2); |
| 275 | + |
| 276 | + double pctNoCover = positionsSkipped / (double)dict.getReferenceLength(); |
| 277 | + getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped + "(" + fmt.format(pctNoCover) + ")"); |
| 278 | + getPipelineCtx().getLogger().info("Total # gap intervals: " + gapIntervals); |
262 | 279 |
|
263 | 280 | //generate bcftools consensus |
264 | 281 | File script = new File(SequenceAnalysisService.get().getScriptPath(SequenceAnalysisModule.NAME, "external/viral_consensus.sh")); |
@@ -331,6 +348,12 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
331 | 348 | int lofreqConsensusNs = replaceFastHeader(consensusFastaLoFreq, rs, "Lofreq|Variants:" + totalGTThreshold); |
332 | 349 | description += "\nConsensus Ns: " + lofreqConsensusNs; |
333 | 350 |
|
| 351 | + if (lofreqConsensusNs < positionsSkipped) |
| 352 | + { |
| 353 | + getPipelineCtx().getLogger().error("Problem with masking of the genome. Insufficient non-covered positions"); |
| 354 | + } |
| 355 | + |
| 356 | + |
334 | 357 | if (bcfToolsConsensusNs != lofreqConsensusNs) |
335 | 358 | { |
336 | 359 | getPipelineCtx().getLogger().warn("Consensus ambiguities from bcftools and lofreq did not match: " + bcfToolsConsensusNs + " / " + lofreqConsensusNs); |
|
0 commit comments