Skip to content

Commit 0576278

Browse files
committed
Add more indels to SnpEff VCF
1 parent 312eb47 commit 0576278

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import au.com.bytecode.opencsv.CSVWriter;
55
import htsjdk.samtools.SAMSequenceDictionary;
66
import htsjdk.samtools.SAMSequenceRecord;
7+
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
8+
import htsjdk.samtools.reference.ReferenceSequence;
79
import htsjdk.samtools.util.CloseableIterator;
810
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
911
import htsjdk.variant.variantcontext.Allele;
@@ -340,6 +342,22 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
340342

341343
int length = Integer.parseInt(line[3]) - Integer.parseInt(line[2]) - 1; //NOTE: pindel reports one base upstream+downstream as part of the indel
342344
writer.writeNext(new String[]{ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName(), String.valueOf(so.getRowid()), String.valueOf(so.getReadset()), "Pindel", line[1], line[2], line[3], String.valueOf(length), "", line[0], line[4], "", line[5], line[6]});
345+
346+
if ("D".equalsIgnoreCase(line[0]))
347+
{
348+
ReferenceSequence rs = getReferenceSequence(ctx, genomeIds.iterator().next(), line[1]);
349+
int start0 = Integer.parseInt(line[2]) - 1;
350+
char alt = (char)rs.getBases()[start0];
351+
352+
int end1 = Integer.parseInt(line[3]) - 1; //pindel reports the end as one bp downstream of the event, so drop 1 bp even though we're keeping 1-based
353+
String ref = new String(Arrays.copyOfRange(rs.getBases(), start0, end1), StringUtilsLabKey.DEFAULT_CHARSET);
354+
if (ref.length() != length + 1)
355+
{
356+
throw new PipelineJobException("Improper pindel parsing: " + so.getRowid() + "/" + line[2] + "/" + line[3] + "/" + ref + "/" + alt);
357+
}
358+
359+
uniqueIndels.add(line[1] + "<>" + line[2] + "<>" + ref + "<>" + alt);
360+
}
343361
}
344362
}
345363
}
@@ -653,6 +671,22 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
653671
ctx.getFileManager().addSequenceOutput(output, "Merged LoFreq Variants: " + inputFiles.size() + " VCFs", "Merged LoFreq Variant Table", null, null, genome.getGenomeId(), null);
654672
}
655673

674+
private Map<String, ReferenceSequence> seqMap = new HashMap<>();
675+
676+
private ReferenceSequence getReferenceSequence(JobContext ctx, int genomeId, String name) throws IOException
677+
{
678+
if (!seqMap.containsKey(name))
679+
{
680+
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeId);
681+
try (IndexedFastaSequenceFile fastaSequenceFile = new IndexedFastaSequenceFile(genome.getWorkingFastaFile()))
682+
{
683+
seqMap.put(name, fastaSequenceFile.getSequence(name));
684+
}
685+
}
686+
687+
return seqMap.get(name);
688+
}
689+
656690
private static String getCacheKey(String contig, int start)
657691
{
658692
return contig + "<>" + start;

0 commit comments

Comments
 (0)