Skip to content

Commit 9f75f5e

Browse files
committed
More logging around viral consensus
1 parent 29ce967 commit 9f75f5e

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ProcessUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public void run()
8383
try (BufferedReader procReader = new BufferedReader(new InputStreamReader(_readStdErr ? tmpProcess.getErrorStream() : tmpProcess.getInputStream(), StringUtilsLabKey.DEFAULT_CHARSET)))
8484
{
8585
String line;
86-
while (tmpProcess.isAlive() && (line = procReader.readLine()) != null)
86+
while ((line = procReader.readLine()) != null)
8787
{
8888
if (_writeOutputToLog)
8989
_log.log(Level.DEBUG, "\t" + line);

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,5 @@ public void logVersionString() throws PipelineJobException
491491
args.add("--version");
492492

493493
getLogger().info("STAR version: " + executeWithOutput(args));
494-
getLogger().debug("last exit code: " + getLastReturnCode());
495494
}
496495
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
5+
import htsjdk.samtools.SAMSequenceDictionary;
56
import htsjdk.samtools.util.CloseableIterator;
67
import htsjdk.samtools.util.IOUtil;
78
import htsjdk.samtools.util.Interval;
@@ -11,6 +12,7 @@
1112
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
1213
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
1314
import htsjdk.variant.vcf.VCFFileReader;
15+
import htsjdk.variant.vcf.VCFHeader;
1416
import org.apache.commons.lang3.StringUtils;
1517
import org.apache.log4j.Logger;
1618
import org.biojava3.core.sequence.DNASequence;
@@ -48,6 +50,7 @@
4850
import java.io.IOException;
4951
import java.io.InputStream;
5052
import java.io.PrintWriter;
53+
import java.text.NumberFormat;
5154
import java.util.ArrayList;
5255
import java.util.Arrays;
5356
import java.util.Collections;
@@ -152,10 +155,13 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
152155
int totalIndelGT2 = 0;
153156

154157
File loFreqConsensusVcf = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.consensus.vcf.gz");
155-
VariantContextWriterBuilder writerBuiler = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath()));
158+
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(referenceGenome.getSequenceDictionary().toPath());
159+
VariantContextWriterBuilder writerBuiler = new VariantContextWriterBuilder().setOutputFile(loFreqConsensusVcf).setReferenceDictionary(dict);
156160
try (VCFFileReader reader = new VCFFileReader(outputVcfSnpEff);CloseableIterator<VariantContext> it = reader.iterator();VariantContextWriter writer = writerBuiler.build())
157161
{
158-
writer.writeHeader(reader.getFileHeader());
162+
VCFHeader header = reader.getFileHeader();
163+
header.setSequenceDictionary(dict);
164+
writer.writeHeader(header);
159165

160166
while (it.hasNext())
161167
{
@@ -251,14 +257,25 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
251257
intervalOfCurrentGap = null;
252258
}
253259
}
260+
261+
//Ensure we count final gap
262+
if (intervalOfCurrentGap != null)
263+
{
264+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
265+
gapIntervals++;
266+
}
254267
}
255268
catch (IOException e)
256269
{
257270
throw new PipelineJobException(e);
258271
}
259272

260-
getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped);
261-
getPipelineCtx().getLogger().info("Total intervals of these gaps: " + gapIntervals);
273+
NumberFormat fmt = NumberFormat.getPercentInstance();
274+
fmt.setMaximumFractionDigits(2);
275+
276+
double pctNoCover = positionsSkipped / (double)dict.getReferenceLength();
277+
getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped + "(" + fmt.format(pctNoCover) + ")");
278+
getPipelineCtx().getLogger().info("Total # gap intervals: " + gapIntervals);
262279

263280
//generate bcftools consensus
264281
File script = new File(SequenceAnalysisService.get().getScriptPath(SequenceAnalysisModule.NAME, "external/viral_consensus.sh"));
@@ -331,6 +348,12 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
331348
int lofreqConsensusNs = replaceFastHeader(consensusFastaLoFreq, rs, "Lofreq|Variants:" + totalGTThreshold);
332349
description += "\nConsensus Ns: " + lofreqConsensusNs;
333350

351+
if (lofreqConsensusNs < positionsSkipped)
352+
{
353+
getPipelineCtx().getLogger().error("Problem with masking of the genome. Insufficient non-covered positions");
354+
}
355+
356+
334357
if (bcfToolsConsensusNs != lofreqConsensusNs)
335358
{
336359
getPipelineCtx().getLogger().warn("Consensus ambiguities from bcftools and lofreq did not match: " + bcfToolsConsensusNs + " / " + lofreqConsensusNs);

0 commit comments

Comments
 (0)