33import au .com .bytecode .opencsv .CSVReader ;
44import org .apache .commons .io .FileUtils ;
55import org .apache .commons .lang3 .StringUtils ;
6+ import org .apache .commons .lang3 .stream .IntStreams ;
67import org .apache .logging .log4j .Logger ;
78import org .jetbrains .annotations .Nullable ;
89import org .json .JSONObject ;
6162import java .util .concurrent .atomic .AtomicInteger ;
6263import java .util .regex .Matcher ;
6364import java .util .regex .Pattern ;
65+ import java .util .stream .IntStream ;
6466
6567public class CellRangerVDJWrapper extends AbstractCommandWrapper
6668{
@@ -862,6 +864,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
862864 Map <String , Integer > chimericCallsRecovered = new HashMap <>();
863865 int restoredTRDVAV = 0 ;
864866
867+ final Map <String , Integer > headerIdx = new HashMap <>();
865868 int lineIdx = 0 ;
866869 while ((line = reader .readLine ()) != null )
867870 {
@@ -873,49 +876,56 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
873876 writer .println (line + ",chain_type" );
874877 }
875878
879+ String [] header = line .split ("," );
880+ IntStream .range (0 , header .length ).forEach (idx -> headerIdx .put (header [idx ], idx ));
876881 continue ;
877882 }
878883
879884 //Infer correct chain from the V, J and C genes
880885 String [] tokens = line .split ("," , -1 ); // -1 used to preserve trailing empty strings
881886
882887 // Restore original value for TRD/TRA
883- if (tokens [6 ].contains ("TRDV" ) && tokens [6 ].contains ("/" ) && tokens [6 ].contains ("AV" ))
888+ final int vGeneIdx = headerIdx .get ("v_gene" );
889+ final int jGeneIdx = headerIdx .get ("j_gene" );
890+ final int cGeneIdx = headerIdx .get ("c_gene" );
891+ final int chainIdx = headerIdx .get ("chain" );
892+
893+ if (tokens [vGeneIdx ].contains ("TRDV" ) && tokens [vGeneIdx ].contains ("/" ) && tokens [vGeneIdx ].contains ("AV" ))
884894 {
885895 restoredTRDVAV ++;
886- String [] split = tokens [6 ].split ("/" );
887- tokens [6 ] = "TR" + split [1 ] + "/" + split [0 ].replaceAll ("TR" , "" );
896+ String [] split = tokens [vGeneIdx ].split ("/" );
897+ tokens [vGeneIdx ] = "TR" + split [1 ] + "/" + split [0 ].replaceAll ("TR" , "" );
888898 }
889899
890900 List <String > chains = new ArrayList <>();
891901 String vGeneChain = null ;
892902 String jGeneChain = null ;
893903 String cGeneChain = null ;
894- for (int idx : new Integer []{6 , 8 , 9 })
904+ for (int idx : new Integer []{vGeneIdx , jGeneIdx , cGeneIdx })
895905 {
896906 String val = StringUtils .trimToNull (tokens [idx ]);
897907 if (val != null )
898908 {
899909 val = val .substring (0 , 3 );
900910
901911 chains .add (val );
902- if (idx == 6 )
912+ if (idx == vGeneIdx )
903913 {
904914 vGeneChain = val ;
905915 }
906- if (idx == 8 )
916+ if (idx == jGeneIdx )
907917 {
908918 jGeneChain = val ;
909919 }
910- else if (idx == 9 )
920+ else if (idx == cGeneIdx )
911921 {
912922 cGeneChain = val ;
913923 }
914924 }
915925 }
916926
917927 Set <String > uniqueChains = new HashSet <>(chains );
918- String originalChain = StringUtils .trimToNull (tokens [5 ]);
928+ String originalChain = StringUtils .trimToNull (tokens [chainIdx ]);
919929
920930 // Recover TRDV/TRAJ/TRAC:
921931 if (uniqueChains .size () > 1 )
@@ -925,7 +935,7 @@ else if (idx == 9)
925935 {
926936 uniqueChains .clear ();
927937 uniqueChains .add (cGeneChain );
928- String key = originalChain + "->" + cGeneChain + " (based on C-GENE)" ;
938+ String key = vGeneChain + ":" + jGeneChain + ":" + originalChain + "->" + cGeneChain + " (based on C-GENE)" ;
929939 chimericCallsRecovered .put (key , chimericCallsRecovered .getOrDefault (key , 0 ) + 1 );
930940 }
931941 else if (uniqueChains .size () == 2 )
@@ -950,14 +960,14 @@ else if (uniqueChains.size() == 2)
950960 if (uniqueChains .size () == 1 )
951961 {
952962 String chain = uniqueChains .iterator ().next ();
953- tokens [5 ] = chain ;
963+ tokens [chainIdx ] = chain ;
954964 }
955965 else
956966 {
957- log .info ("Multiple chains detected [" + StringUtils .join (chains , "," )+ "], leaving original call alone: " + originalChain + ". " + tokens [6 ] + "/" + tokens [8 ] + "/" + tokens [9 ]);
967+ log .info ("Multiple chains detected [" + StringUtils .join (chains , "," )+ "], leaving original call alone: " + originalChain + ". " + tokens [vGeneIdx ] + "/" + tokens [jGeneIdx ] + "/" + tokens [cGeneIdx ]);
958968 }
959969
960- if (acceptableChains .contains (tokens [5 ]))
970+ if (acceptableChains .contains (tokens [chainIdx ]))
961971 {
962972 writer .println (StringUtils .join (tokens , "," ) + "," + chainType );
963973 }
0 commit comments