3434import org .labkey .api .sequenceanalysis .run .AbstractCommandWrapper ;
3535import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
3636import org .labkey .api .util .FileUtil ;
37+ import org .labkey .api .writer .PrintWriters ;
3738import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
3839import org .labkey .sequenceanalysis .run .util .DepthOfCoverageWrapper ;
3940import org .labkey .sequenceanalysis .run .variant .SNPEffStep ;
4243import java .io .File ;
4344import java .io .IOException ;
4445import java .io .InputStream ;
46+ import java .io .PrintWriter ;
4547import java .util .ArrayList ;
4648import java .util .Arrays ;
4749import java .util .Collections ;
@@ -247,7 +249,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
247249 getPipelineCtx ().getLogger ().info ("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped );
248250 getPipelineCtx ().getLogger ().info ("Total intervals of these gaps: " + gapIntervals );
249251
250- consensusWrapper .execute (Arrays .asList ("/bin/bash" , script .getPath (), inputBam .getPath (), referenceGenome .getWorkingFastaFile ().getPath (), mask .getPath ()));
252+ consensusWrapper .execute (Arrays .asList ("/bin/bash" , script .getPath (), inputBam .getPath (), referenceGenome .getWorkingFastaFile ().getPath (), mask .getPath (), String . valueOf ( minCoverage ) ));
251253 File calls = new File (inputBam .getParentFile (), FileUtil .getBaseName (inputBam ) + ".calls.vcf.gz" );
252254
253255 Set <VariantContext > variantsBcftoolsOnly = new HashSet <>();
@@ -269,20 +271,20 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
269271 }
270272 }
271273
272- String description = String .format ("Total Variants: %s\n Total GT 1 PCT: %s\n Total GT 50 PCT: %s\n Total Indel GT 1 PCT: %s" , totalVariants , totalGT1 , totalGT50 , totalIndelGT1 );
274+ String description = String .format ("Total Variants: %s\n Total GT 1 PCT: %s\n Total GT 50 PCT: %s\n Total Indel GT 1 PCT: %s\t Positions Below Coverage: %s " , totalVariants , totalGT1 , totalGT50 , totalIndelGT1 , positionsSkipped );
273275
274276 if (!variantsBcftoolsOnly .isEmpty ())
275277 {
276278 getPipelineCtx ().getLogger ().error ("The following variants were in bcftools, but not GT50% in lofreq: " );
277- variantsBcftoolsOnly .forEach (vc -> getPipelineCtx ().getLogger ().error (getHashKey (vc )));
279+ variantsBcftoolsOnly .forEach (vc -> getPipelineCtx ().getLogger ().error (getHashKey (vc ) + ", DP=" + vc . getAttribute ( "DP" ) ));
278280
279281 description += "\n " + "WARNING: " + variantsBcftoolsOnly .size () + " variants detected in bcftools and not lofreq" ;
280282 }
281283
282284 if (!alleleToAF .isEmpty ())
283285 {
284286 getPipelineCtx ().getLogger ().error ("The following variants were GT50% in lofreq, but not in bcftools: " );
285- alleleToAF .keySet ().forEach (vc -> getPipelineCtx ().getLogger ().error (vc ));
287+ alleleToAF .keySet ().forEach (vc -> getPipelineCtx ().getLogger ().error (vc + ", AF=" + alleleToAF . get ( vc ) ));
286288
287289 description += "\n " + "WARNING: " + alleleToAF .size () + " variants detected in lofreq and not bcftools" ;
288290 }
@@ -293,23 +295,44 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
293295 throw new PipelineJobException ("Expected file not found: " + consensusFasta .getPath ());
294296 }
295297
298+ DNASequence seq ;
296299 try (InputStream is = IOUtil .openFileForReading (consensusFasta ))
297300 {
298301 FastaReader <DNASequence , NucleotideCompound > fastaReader = new FastaReader <>(is , new GenericFastaHeaderParser <>(), new DNASequenceCreator (AmbiguityDNACompoundSet .getDNACompoundSet ()));
299302 LinkedHashMap <String , DNASequence > fastaData = fastaReader .process ();
300303
301- for (String fastaHeader : fastaData .keySet ())
302- {
303- AtomicInteger totalN = new AtomicInteger ();
304- DNASequence seq = fastaData .get (fastaHeader );
305- seq .forEach (nt -> {
306- if (nt .getUpperedBase ().equals ("N" )) {
307- totalN .getAndIncrement ();
308- }
309- });
304+ AtomicInteger totalN = new AtomicInteger ();
305+ seq = fastaData .values ().iterator ().next ();
306+ seq .forEach (nt -> {
307+ if (nt .getUpperedBase ().equals ("N" )) {
308+ totalN .getAndIncrement ();
309+ }
310+ });
310311
311- description += "\n Consensus Ns: " + totalN .get ();
312+ description += "\n Consensus Ns: " + totalN .get ();
313+ }
314+ catch (IOException e )
315+ {
316+ throw new PipelineJobException (e );
317+ }
318+
319+ //Replace FASTA header:
320+ try (PrintWriter writer = PrintWriters .getPrintWriter (consensusFasta ))
321+ {
322+ StringBuilder header = new StringBuilder ();
323+ if (rs .getSubjectId () != null )
324+ {
325+ header .append (rs .getSubjectId ()).append ("|" );
312326 }
327+ else
328+ {
329+ header .append (rs .getName ()).append ("|" );
330+ }
331+
332+ header .append (rs .getLibraryType () == null ? rs .getApplication () : rs .getLibraryType ());
333+
334+ writer .println (">" + header );
335+ writer .println (seq .getSequenceAsString ());
313336 }
314337 catch (IOException e )
315338 {
0 commit comments