|
9 | 9 | import htsjdk.variant.variantcontext.VariantContext; |
10 | 10 | import htsjdk.variant.vcf.VCFFileReader; |
11 | 11 | import org.apache.log4j.Logger; |
| 12 | +import org.biojava3.core.sequence.DNASequence; |
| 13 | +import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet; |
| 14 | +import org.biojava3.core.sequence.compound.NucleotideCompound; |
| 15 | +import org.biojava3.core.sequence.io.DNASequenceCreator; |
| 16 | +import org.biojava3.core.sequence.io.FastaReader; |
| 17 | +import org.biojava3.core.sequence.io.GenericFastaHeaderParser; |
12 | 18 | import org.jetbrains.annotations.Nullable; |
13 | 19 | import org.json.JSONObject; |
14 | 20 | import org.labkey.api.pipeline.PipelineJobException; |
|
35 | 41 |
|
36 | 42 | import java.io.File; |
37 | 43 | import java.io.IOException; |
| 44 | +import java.io.InputStream; |
38 | 45 | import java.util.ArrayList; |
39 | 46 | import java.util.Arrays; |
40 | 47 | import java.util.Collections; |
41 | 48 | import java.util.HashMap; |
42 | 49 | import java.util.HashSet; |
| 50 | +import java.util.LinkedHashMap; |
43 | 51 | import java.util.List; |
44 | 52 | import java.util.Map; |
45 | 53 | import java.util.Set; |
| 54 | +import java.util.concurrent.atomic.AtomicInteger; |
46 | 55 | import java.util.stream.Collectors; |
47 | 56 |
|
48 | 57 | public class LofreqAnalysis extends AbstractCommandPipelineStep<LofreqAnalysis.LofreqWrapper> implements AnalysisStep |
@@ -160,7 +169,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
160 | 169 | } |
161 | 170 |
|
162 | 171 | SimpleScriptWrapper consensusWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); |
163 | | - consensusWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory()); |
| 172 | + consensusWrapper.setWorkingDir(inputBam.getParentFile()); |
164 | 173 |
|
165 | 174 | Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); |
166 | 175 | if (maxThreads != null) |
@@ -278,11 +287,38 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
278 | 287 | description += "\n" + "WARNING: " + alleleToAF.size() + " variants detected in lofreq and not bcftools"; |
279 | 288 | } |
280 | 289 |
|
| 290 | + File consensusFasta = new File(inputBam.getParentFile(), FileUtil.getBaseName(inputBam.getName()) + ".consensus.fasta"); |
| 291 | + if (!consensusFasta.exists()) |
| 292 | + { |
| 293 | + throw new PipelineJobException("Expected file not found: " + consensusFasta.getPath()); |
| 294 | + } |
| 295 | + |
| 296 | + try (InputStream is = IOUtil.openFileForReading(consensusFasta)) |
| 297 | + { |
| 298 | + FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<>(is, new GenericFastaHeaderParser<>(), new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet())); |
| 299 | + LinkedHashMap<String, DNASequence> fastaData = fastaReader.process(); |
| 300 | + |
| 301 | + for (String fastaHeader : fastaData.keySet()) |
| 302 | + { |
| 303 | + AtomicInteger totalN = new AtomicInteger(); |
| 304 | + DNASequence seq = fastaData.get(fastaHeader); |
| 305 | + seq.forEach(nt -> { |
| 306 | + if (nt.getUpperedBase().equals("N")) { |
| 307 | + totalN.getAndIncrement(); |
| 308 | + } |
| 309 | + }); |
| 310 | + |
| 311 | + description += "\nConsensus Ns: " + totalN.get(); |
| 312 | + } |
| 313 | + } |
| 314 | + catch (IOException e) |
| 315 | + { |
| 316 | + throw new PipelineJobException(e); |
| 317 | + } |
281 | 318 |
|
282 | 319 | output.addSequenceOutput(outputVcfSnpEff, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), description); |
283 | 320 | output.addSequenceOutput(coverageOut, "Depth of Coverage: " + rs.getName(), "Depth of Coverage", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); |
284 | 321 |
|
285 | | - File consensusFasta = new File(inputBam.getParentFile(), FileUtil.getBaseName(inputBam.getName()) + ".consensus.fasta"); |
286 | 322 | output.addSequenceOutput(consensusFasta, "Consensus: " + rs.getName(), "Viral Consensus Sequence", rs.getReadsetId(), null, referenceGenome.getGenomeId(), description); |
287 | 323 |
|
288 | 324 | return output; |
|
0 commit comments