Skip to content

Commit cb7d5e1

Browse files
committed
Update CellRangerVDJWrapper to support CR9
1 parent 60c109d commit cb7d5e1

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import au.com.bytecode.opencsv.CSVReader;
44
import org.apache.commons.io.FileUtils;
55
import org.apache.commons.lang3.StringUtils;
6+
import org.apache.commons.lang3.stream.IntStreams;
67
import org.apache.logging.log4j.Logger;
78
import org.jetbrains.annotations.Nullable;
89
import org.json.JSONObject;
@@ -61,6 +62,7 @@
6162
import java.util.concurrent.atomic.AtomicInteger;
6263
import java.util.regex.Matcher;
6364
import java.util.regex.Pattern;
65+
import java.util.stream.IntStream;
6466

6567
public class CellRangerVDJWrapper extends AbstractCommandWrapper
6668
{
@@ -862,6 +864,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
862864
Map<String, Integer> chimericCallsRecovered = new HashMap<>();
863865
int restoredTRDVAV = 0;
864866

867+
final Map<String, Integer> headerIdx = new HashMap<>();
865868
int lineIdx = 0;
866869
while ((line = reader.readLine()) != null)
867870
{
@@ -873,49 +876,56 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
873876
writer.println(line + ",chain_type");
874877
}
875878

879+
String[] header = line.split(",");
880+
IntStream.range(0, header.length).forEach(idx -> headerIdx.put(header[idx], idx));
876881
continue;
877882
}
878883

879884
//Infer correct chain from the V, J and C genes
880885
String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings
881886

882887
// Restore original value for TRD/TRA
883-
if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV"))
888+
final int vGeneIdx = headerIdx.get("v_gene");
889+
final int jGeneIdx = headerIdx.get("j_gene");
890+
final int cGeneIdx = headerIdx.get("c_gene");
891+
final int chainIdx = headerIdx.get("chain");
892+
893+
if (tokens[vGeneIdx].contains("TRDV") && tokens[vGeneIdx].contains("/") && tokens[vGeneIdx].contains("AV"))
884894
{
885895
restoredTRDVAV++;
886-
String[] split = tokens[6].split("/");
887-
tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", "");
896+
String[] split = tokens[vGeneIdx].split("/");
897+
tokens[vGeneIdx] = "TR" + split[1] + "/" + split[0].replaceAll("TR", "");
888898
}
889899

890900
List<String> chains = new ArrayList<>();
891901
String vGeneChain = null;
892902
String jGeneChain = null;
893903
String cGeneChain = null;
894-
for (int idx : new Integer[]{6,8,9})
904+
for (int idx : new Integer[]{vGeneIdx,jGeneIdx,cGeneIdx})
895905
{
896906
String val = StringUtils.trimToNull(tokens[idx]);
897907
if (val != null)
898908
{
899909
val = val.substring(0, 3);
900910

901911
chains.add(val);
902-
if (idx == 6)
912+
if (idx == vGeneIdx)
903913
{
904914
vGeneChain = val;
905915
}
906-
if (idx == 8)
916+
if (idx == jGeneIdx)
907917
{
908918
jGeneChain = val;
909919
}
910-
else if (idx == 9)
920+
else if (idx == cGeneIdx)
911921
{
912922
cGeneChain = val;
913923
}
914924
}
915925
}
916926

917927
Set<String> uniqueChains = new HashSet<>(chains);
918-
String originalChain = StringUtils.trimToNull(tokens[5]);
928+
String originalChain = StringUtils.trimToNull(tokens[chainIdx]);
919929

920930
// Recover TRDV/TRAJ/TRAC:
921931
if (uniqueChains.size() > 1)
@@ -925,7 +935,7 @@ else if (idx == 9)
925935
{
926936
uniqueChains.clear();
927937
uniqueChains.add(cGeneChain);
928-
String key = originalChain + "->" + cGeneChain + " (based on C-GENE)";
938+
String key = vGeneChain + ":" + jGeneChain + ":" + originalChain + "->" + cGeneChain + " (based on C-GENE)";
929939
chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1);
930940
}
931941
else if (uniqueChains.size() == 2)
@@ -950,14 +960,14 @@ else if (uniqueChains.size() == 2)
950960
if (uniqueChains.size() == 1)
951961
{
952962
String chain = uniqueChains.iterator().next();
953-
tokens[5] = chain;
963+
tokens[chainIdx] = chain;
954964
}
955965
else
956966
{
957-
log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]);
967+
log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[vGeneIdx] + "/" + tokens[jGeneIdx] + "/" + tokens[cGeneIdx]);
958968
}
959969

960-
if (acceptableChains.contains(tokens[5]))
970+
if (acceptableChains.contains(tokens[chainIdx]))
961971
{
962972
writer.println(StringUtils.join(tokens, ",") + "," + chainType);
963973
}

0 commit comments

Comments
 (0)