Skip to content

Commit df3ffcd

Browse files
committed
Improve MHC group collapsing code
1 parent 91dc85b commit df3ffcd

File tree

1 file changed

+29
-3
lines changed

1 file changed

+29
-3
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import java.sql.SQLException;
4949
import java.util.ArrayList;
5050
import java.util.Arrays;
51+
import java.util.Collection;
5152
import java.util.Collections;
5253
import java.util.Comparator;
5354
import java.util.HashMap;
@@ -57,6 +58,7 @@
5758
import java.util.Set;
5859
import java.util.TreeSet;
5960
import java.util.stream.Collectors;
61+
import java.util.stream.Stream;
6062

6163
/**
6264
* User: bimber
@@ -496,9 +498,10 @@ private boolean doCollapse(Logger log)
496498
AlignmentGroup g1 = it.next();
497499
while (it.hasNext())
498500
{
501+
int orig = g1.alleles.size();
499502
if (compareGroupToOthers(g1))
500503
{
501-
log.info("Collapsed: " + g1.lineages + ", with: " + g1.alleles.size());
504+
log.info("Collapsed: " + g1.lineages + ", from: " + orig + " to " + g1.alleles.size() + " alleles");
502505
return true; // abort and restart the process with a new list iterator
503506
}
504507

@@ -563,14 +566,36 @@ public boolean canCombine(AlignmentGroup g2)
563566
return false;
564567
}
565568

566-
return CollectionUtils.disjunction(this.alleles, g2.alleles).size() == 1;
569+
// Allow greater level of collapse with highly ambiguous results:
570+
// Require similar sizes, but disjoint allele sets (e.g., A/B/D and A/C/D, but not A/B/C and A/D/E)
571+
int setDiffThreshold;
572+
int sizeDiffThreshold;
573+
if (this.alleles.size() >= 16)
574+
{
575+
setDiffThreshold = 6;
576+
sizeDiffThreshold = 3;
577+
}
578+
else if (this.alleles.size() >= 8)
579+
{
580+
setDiffThreshold = 4;
581+
sizeDiffThreshold = 2;
582+
}
583+
else
584+
{
585+
setDiffThreshold = 2;
586+
sizeDiffThreshold = 1;
587+
}
588+
589+
return Math.abs(this.alleles.size() - g2.alleles.size()) <= sizeDiffThreshold && CollectionUtils.disjunction(this.alleles, g2.alleles).size() <= setDiffThreshold;
567590
}
568591

569592
public AlignmentGroup combine(AlignmentGroup g2)
570593
{
571-
// Take the larger allele set:
594+
// Take the union of the allele sets:
595+
TreeSet<String> allAlleles = Stream.of(this.alleles, g2.alleles).flatMap(Collection::stream).collect(Collectors.toCollection(TreeSet::new));
572596
if (g2.alleles.size() > this.alleles.size())
573597
{
598+
g2.alleles = allAlleles;
574599
g2.rowIdsToDelete.addAll(this.rowIds);
575600
g2.rowIdsToDelete.addAll(this.rowIdsToDelete);
576601
g2.totalReads = g2.totalReads + totalReads;
@@ -582,6 +607,7 @@ public AlignmentGroup combine(AlignmentGroup g2)
582607
}
583608
else
584609
{
610+
this.alleles = allAlleles;
585611
this.rowIdsToDelete.addAll(g2.rowIds);
586612
this.rowIdsToDelete.addAll(g2.rowIdsToDelete);
587613
this.totalReads = g2.totalReads + totalReads;

0 commit comments

Comments
 (0)