Skip to content

Commit aa4d50c

Browse files
committed
- Improve subset seurat code
- move closer to including pindel in variant VCF
1 parent 607779f commit aa4d50c

File tree

5 files changed

+132
-84
lines changed

5 files changed

+132
-84
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 92 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,51 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
227227
//Create a BED file with all regions of coverage below MIN_COVERAGE:
228228
int minCoverage = getProvider().getParameterByName("minCoverage").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
229229
int positionsSkipped = 0;
230+
int basesRecoveredFromDeletions = 0;
230231
int gapIntervals = 0;
231232
double avgDepth;
232233

233-
//TODO: rescue pindel?
234+
boolean runPindel = getProvider().getParameterByName("runPindel").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
235+
236+
List<VariantContext> pindelConsensusVariants = new ArrayList<>();
237+
int totalPindelConsensusVariants = 0;
238+
int totalPindelVariants = 0;
239+
if (runPindel)
240+
{
241+
Double minFraction = getProvider().getParameterByName("minFraction").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
242+
int minDepth = getProvider().getParameterByName("minDepth").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
243+
int minInsertSize = getProvider().getParameterByName("minInsertSize").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
244+
245+
PindelAnalysis.PindelSettings settings = new PindelAnalysis.PindelSettings();
246+
247+
File pindelOutput = PindelAnalysis.runPindel(output, getPipelineCtx(), rs, outputDir, inputBam, referenceGenome.getWorkingFastaFile(), minFraction, minDepth, true, coverageOut, minInsertSize);
248+
File pindelVcf = PindelAnalysis.createVcf(pindelOutput, new File(pindelOutput.getParentFile(), FileUtil.getBaseName(pindelOutput) + ".all.vcf.gz"), referenceGenome, settings);
249+
250+
try (VCFFileReader reader = new VCFFileReader(pindelVcf);CloseableIterator<VariantContext> it = reader.iterator())
251+
{
252+
while (it.hasNext())
253+
{
254+
VariantContext vc = it.next();
255+
if (vc.hasAttribute("IN_CONSENSUS"))
256+
{
257+
pindelConsensusVariants.add(vc);
258+
totalPindelConsensusVariants++;
259+
}
260+
261+
totalPindelVariants++;
262+
}
263+
}
264+
265+
getPipelineCtx().getLogger().info("Total pindel variants: " + totalPindelVariants);
266+
getPipelineCtx().getLogger().info("Total consensus pindel variants: " + totalPindelConsensusVariants);
267+
if (totalPindelConsensusVariants == 0)
268+
{
269+
getPipelineCtx().getLogger().info("deleting empty pindel VCF: " + pindelVcf.getPath());
270+
pindelVcf.delete();
271+
new File(pindelVcf.getPath() + ".tbi").delete();
272+
}
273+
}
274+
234275
File mask = new File(outputDir, "mask.bed");
235276
Map<String, Integer> gatkDepth = new HashMap<>();
236277
try (CSVReader reader = new CSVReader(Readers.getReader(coverageOut), '\t');CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(mask), '\t', CSVWriter.NO_QUOTE_CHARACTER))
@@ -259,40 +300,63 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
259300

260301
if (depth < minCoverage)
261302
{
262-
positionsSkipped++;
303+
//Check if within pindel SNV calls:
304+
Interval currentPosition = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
305+
boolean withinDeletion = false;
306+
for (VariantContext vc : pindelConsensusVariants)
307+
{
308+
if (vc.isIndel())
309+
{
310+
// If we overlap the SNV (which should be just deletions (note: the first position of the variant is the non-indel REF base), allow low coverage:
311+
if (currentPosition.overlaps(vc) && currentPosition.getStart() != vc.getStart())
312+
{
313+
basesRecoveredFromDeletions++;
314+
315+
//TODO: enable this once sure:
316+
//withinDeletion = true;
317+
break;
318+
}
319+
}
320+
}
263321

264-
if (intervalOfCurrentGap != null)
322+
if (!withinDeletion)
265323
{
266-
if (intervalOfCurrentGap.getContig().equals(tokens[0]))
324+
positionsSkipped++;
325+
326+
if (intervalOfCurrentGap != null)
267327
{
268-
//extend
269-
intervalOfCurrentGap = new Interval(intervalOfCurrentGap.getContig(), intervalOfCurrentGap.getStart(), Integer.parseInt(tokens[1]));
328+
if (intervalOfCurrentGap.getContig().equals(tokens[0]))
329+
{
330+
//extend
331+
intervalOfCurrentGap = new Interval(intervalOfCurrentGap.getContig(), intervalOfCurrentGap.getStart(), Integer.parseInt(tokens[1]));
332+
}
333+
else
334+
{
335+
//switched contigs, write and make new:
336+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart() - 1), String.valueOf(intervalOfCurrentGap.getEnd())});
337+
gapIntervals++;
338+
intervalOfCurrentGap = currentPosition;
339+
}
270340
}
271341
else
272342
{
273-
//switched contigs, write and make new:
274-
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
275-
gapIntervals++;
276-
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
343+
//Not existing gap, just start one:
344+
intervalOfCurrentGap = currentPosition;
277345
}
278-
}
279-
else
280-
{
281-
//Not existing gap, just start one:
282-
intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1]));
346+
347+
continue;
283348
}
284349
}
285-
else
286-
{
287-
//We just existed a gap, so write:
288-
if (intervalOfCurrentGap != null)
289-
{
290-
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
291-
gapIntervals++;
292-
}
293350

294-
intervalOfCurrentGap = null;
351+
//Otherwise this is a valid position to report:
352+
//We just exited a gap, so write:
353+
if (intervalOfCurrentGap != null)
354+
{
355+
writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())});
356+
gapIntervals++;
295357
}
358+
359+
intervalOfCurrentGap = null;
296360
}
297361

298362
//Ensure we count final gap
@@ -422,43 +486,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
422486
Set<String> variantsBcftools = runBcftools(inputBam, referenceGenome, mask, minCoverage);
423487
int variantsBcftoolsTotal = variantsBcftools.size();
424488

425-
boolean runPindel = getProvider().getParameterByName("runPindel").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
426-
427-
List<VariantContext> pindelConsensusVariants = new ArrayList<>();
428-
int totalPindelConsensusVariants = 0;
429-
if (runPindel)
430-
{
431-
Double minFraction = getProvider().getParameterByName("minFraction").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
432-
int minDepth = getProvider().getParameterByName("minDepth").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
433-
int minInsertSize = getProvider().getParameterByName("minInsertSize").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
434-
435-
PindelAnalysis.PindelSettings settings = new PindelAnalysis.PindelSettings();
436-
437-
File pindelOutput = PindelAnalysis.runPindel(output, getPipelineCtx(), rs, outputDir, inputBam, referenceGenome.getWorkingFastaFile(), minFraction, minDepth, true, coverageOut, minInsertSize);
438-
File pindelVcf = PindelAnalysis.createVcf(pindelOutput, new File(pindelOutput.getParentFile(), FileUtil.getBaseName(pindelOutput) + ".vcf.gz"), referenceGenome, settings);
439-
440-
try (VCFFileReader reader = new VCFFileReader(pindelVcf);CloseableIterator<VariantContext> it = reader.iterator())
441-
{
442-
while (it.hasNext())
443-
{
444-
VariantContext vc = it.next();
445-
if (vc.hasAttribute("IN_CONSENSUS"))
446-
{
447-
pindelConsensusVariants.add(vc);
448-
totalPindelConsensusVariants++;
449-
}
450-
}
451-
}
452-
453-
getPipelineCtx().getLogger().info("Total consensus pindel variants: " + totalPindelConsensusVariants);
454-
if (totalPindelConsensusVariants == 0)
455-
{
456-
getPipelineCtx().getLogger().info("deleting empty pindel VCF: " + pindelVcf.getPath());
457-
pindelVcf.delete();
458-
new File(pindelVcf.getPath() + ".tbi").delete();
459-
}
460-
}
461-
462489
//Create final VCF:
463490
int totalVariants = 0;
464491
int totalGT1 = 0;
@@ -602,10 +629,11 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
602629
}
603630
}
604631

605-
//TODO: add pindel
606632
if (!pindelConsensusVariants.isEmpty())
607633
{
608-
634+
//TODO: enable once sure:
635+
//pindelConsensusVariants.stream().forEach(consensusVariants::add);
636+
getPipelineCtx().getLogger().info("total consensus variants that would be added: " + pindelConsensusVariants.size());
609637
}
610638

611639
try (CloseableIterator<VariantContext> iterator = allVariants.iterator())
@@ -634,6 +662,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
634662
double pctNoCover = positionsSkipped / (double)dict.getReferenceLength();
635663
getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped + "(" + fmt.format(pctNoCover) + ")");
636664
getPipelineCtx().getLogger().info("Total # gap intervals: " + gapIntervals);
665+
getPipelineCtx().getLogger().info("Total # of low coverage bases recovered within consensus deletions: " + basesRecoveredFromDeletions);
637666

638667
String description = String.format("Total Variants: %s\nTotal GT 1 PCT: %s\nTotal GT 50 PCT: %s\nTotal Indel GT 1 PCT: %s\nPositions Below Coverage: %s\nTotal In LoFreq Consensus: %s\nTotal Indel In LoFreq Consensus: %s\nTotal Consensus Variant in PBS: %s", totalVariants, totalGT1, totalGT50, totalIndelGT1, positionsSkipped, totalGTThreshold, totalIndelGTThreshold, totalConsensusInPBS);
639668
description += "\n" + "Strand Bias Recovered: " + filteredVariantsRecovered;

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PindelAnalysis.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,7 @@ public static File createVcf(File pindelOutput, File vcfOutput, ReferenceGenome
468468
String[] line;
469469
while ((line = reader.readNext()) != null)
470470
{
471+
int inConsensus = 1;
471472
if (!("D".equals(line[0]) || "I".equals(line[0]) || "S".equals(line[0])))
472473
{
473474
continue;
@@ -483,17 +484,17 @@ public static File createVcf(File pindelOutput, File vcfOutput, ReferenceGenome
483484
// Assume LoFreq calls these well enough:
484485
if (refLength < settings.MIN_LENGTH_TO_CONSIDER && altLength < settings.MIN_LENGTH_TO_CONSIDER)
485486
{
486-
continue;
487+
inConsensus = 0;
487488
}
488489

489490
if (("D".equals(line[0]) || "S".equals(line[0])) && refLength > settings.MAX_DELETION_LENGTH)
490491
{
491-
continue;
492+
inConsensus = 0;
492493
}
493494

494495
if (Double.parseDouble(line[6]) < settings.MIN_AF)
495496
{
496-
continue;
497+
inConsensus = 0;
497498
}
498499

499500
double eventCoverage = 0.0;
@@ -504,7 +505,7 @@ public static File createVcf(File pindelOutput, File vcfOutput, ReferenceGenome
504505

505506
if (("D".equals(line[0]) || "S".equals(line[0])) && eventCoverage > settings.MAX_DEL_EVENT_COVERAGE)
506507
{
507-
continue;
508+
inConsensus = 0;
508509
}
509510

510511
VariantContextBuilder vcb = new VariantContextBuilder();
@@ -514,7 +515,7 @@ public static File createVcf(File pindelOutput, File vcfOutput, ReferenceGenome
514515
vcb.alleles(Arrays.asList(Allele.create(refAllele, true), Allele.create(altAllele)));
515516
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, Double.parseDouble(line[6]));
516517

517-
vcb.attribute("IN_CONSENSUS", 1);
518+
vcb.attribute("IN_CONSENSUS", inConsensus);
518519
vcb.attribute("GATK_DP", (int)Double.parseDouble(line[7]));
519520

520521
int dp = "I".equals(line[0]) ? Integer.parseInt(line[4]) : (int) Double.parseDouble(line[10]);

singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ else if (NumberUtils.isCreatable(val))
323323
}
324324
else if ("sequenceanalysis-trimmingtextarea".equals(pd.getFieldXtype()))
325325
{
326-
val = val.replace("'", "\'");
326+
val = val.replace("'", "\\\'");
327327
String[] vals = val.split(",");
328328
return "c('" + StringUtils.join(vals, "','") + "')";
329329
}
Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
for (datasetId in names(seuratObjects)) {
2-
seuratObj <- seuratObjects[[datasetId]]
3-
seuratObjects[[datasetId]] <- NULL
2+
seuratObj <- seuratObjects[[datasetId]]
3+
seuratObjects[[datasetId]] <- NULL
44

5-
#TODO: this is a stopgap for a former bug in RunCellHashing. Retain until all existing seurat objects lacking this field are removed
6-
if (!'HTO.Classification' %in% names(seuratObj@meta.data) && 'consensuscall.global' %in% names(seuratObj@meta.data)) {
7-
seuratObj$HTO.Classification <- seuratObj$consensuscall.global
8-
}
5+
#TODO: this is a stopgap for a former bug in RunCellHashing. Retain until all existing seurat objects lacking this field are removed
6+
if (!'HTO.Classification' %in% names(seuratObj@meta.data) && 'consensuscall.global' %in% names(seuratObj@meta.data)) {
7+
seuratObj$HTO.Classification <- seuratObj$consensuscall.global
8+
}
99

10-
<SUBSETS>
10+
<SUBSET_CODE>
1111

12-
newSeuratObjects[[datasetId]] <- seuratObj
12+
# Cleanup
13+
rm(seuratObj)
14+
gc()
15+
}
1316

14-
# Cleanup
15-
rm(seuratObj)
16-
gc()
17+
if (length(newSeuratObjects) == 0) {
18+
stop('No cells remained in any seurat objects after subsetting')
1719
}

singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public Collection<String> getRLibraries()
5555
return ret;
5656
}
5757

58-
final static String EXPRESSION = "<SUBSETS>";
58+
final static String EXPRESSION = "<SUBSET_CODE>";
5959
final static String DELIM = "<>";
6060

6161
@Override
@@ -77,9 +77,25 @@ protected List<String> loadChunkFromFile() throws PipelineJobException
7777
{
7878
for (String subset : values)
7979
{
80-
String toSub = "seuratObj <- subset(seuratObj, subset = " + subset + ")";
81-
ret.add(line.replaceAll(EXPRESSION, toSub));
82-
ret.add(line.replaceAll(EXPRESSION, "print(paste0('Cells after subset: ', ncol(seuratObj)))"));
80+
String subsetEscaped = subset.replace("'", "\\\'");
81+
82+
ret.add("\tcells <- c()");
83+
ret.add("\ttryCatch({");
84+
ret.add("\t\tcells <- WhichCells(so, expression = " + subset + ")");
85+
ret.add("\t}, error = function(e){");
86+
ret.add("\t\tif (!is.null(e) && e$message == 'Cannot find cells provided') {");
87+
ret.add("\t\t\tprint(paste0('There were no cells remaining after the subset: ', '" + subsetEscaped + "'))");
88+
ret.add("\t\t}");
89+
ret.add("\t})");
90+
ret.add("");
91+
ret.add("\tif (length(cells) == 0) {");
92+
ret.add("\t\tprint(paste0('There were no cells after subsetting for dataset: ', datasetId, ', with subset: ', '" + subsetEscaped + "'))");
93+
ret.add("\t} else {");
94+
ret.add("\t\tseuratObj <- subset(seuratObj, cells = cells)");
95+
ret.add("\t\tprint(paste0('Cells after subset: ', ncol(seuratObj)))");
96+
ret.add("\t\tnewSeuratObjects[[datasetId]] <- seuratObj");
97+
ret.add("\t}");
98+
ret.add("");
8399
}
84100
}
85101
else

0 commit comments

Comments
 (0)