Bugfixed to hashing/VDJ calls

bbimber · bbimber · commit 01c6f24c172e · 2021-03-03T12:34:37.000-08:00
diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java
@@ -45,6 +45,8 @@ static public void setInstance(CellHashingService instance)
         _instance = instance;
     }
 
+    abstract public void prepareHashingIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing) throws PipelineJobException;
+
     abstract public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, boolean failIfNoHashing, boolean failIfNoCiteSeq) throws PipelineJobException;
 
     abstract public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutputTracker output, SequenceOutputHandler.JobContext ctx, CellHashingParameters parameters, File rawCountMatrixDir) throws PipelineJobException;
diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
@@ -43,6 +43,7 @@
 import org.labkey.api.writer.PrintWriters;
 import org.labkey.singlecell.run.CellRangerFeatureBarcodeHandler;
 
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.io.PrintWriter;
@@ -81,13 +82,19 @@ public static CellHashingServiceImpl get()
         return _instance;
     }
 
+    @Override
+    public void prepareHashingIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing) throws PipelineJobException
+    {
+        prepareHashingAndCiteSeqFilesIfNeeded(sourceDir, job, support, filterField, failIfNoHashing, false, true, true, false);
+    }
+
     @Override
     public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing, final boolean failIfNoCiteSeq) throws PipelineJobException
     {
-        prepareHashingAndCiteSeqFilesIfNeeded(sourceDir, job, support, filterField, failIfNoHashing, failIfNoCiteSeq, true);
+        prepareHashingAndCiteSeqFilesIfNeeded(sourceDir, job, support, filterField, failIfNoHashing, failIfNoCiteSeq, true, true, true);
     }
 
-    public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing, final boolean failIfNoCiteSeq, final boolean cacheCountMatrixFiles) throws PipelineJobException
+    public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing, final boolean failIfNoCiteSeq, final boolean cacheCountMatrixFiles, boolean requireValidHashingIfPresent, boolean requireValidCiteSeqIfPresent) throws PipelineJobException
     {
         Container target = job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer();
         UserSchema sequenceAnalysis = QueryService.get().getUserSchema(job.getUser(), target, SingleCellSchema.SEQUENCE_SCHEMA_NAME);
@@ -217,6 +224,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
             HashMap<Integer, File> readsetToCountMap = new HashMap<>();
             if (distinctHTOs.size() > 1)
             {
+                Set<Integer> hashingToRemove = new HashSet<>();
                 readsetToHashingMap.forEach((readsetId, hashingReadsetId) -> {
                     if (cacheCountMatrixFiles)
                     {
@@ -226,7 +234,15 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
                         TableSelector ts = new TableSelector(sequenceOutputs, filter, new org.labkey.api.data.Sort("-rowid"));
                         if (!ts.exists())
                         {
-                            throw new IllegalArgumentException("Unable to find existing count matrix for hashing readset: " + hashingReadsetId);
+                            if (requireValidHashingIfPresent)
+                            {
+                                throw new IllegalArgumentException("Unable to find existing count matrix for hashing readset: " + hashingReadsetId);
+                            }
+                            else
+                            {
+                                job.getLogger().warn("Unable to find existing count matrix for hashing readset: " + hashingReadsetId + ", skipping");
+                                hashingToRemove.add(readsetId);
+                            }
                         }
                         else
                         {
@@ -238,6 +254,8 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
                     support.cacheReadset(hashingReadsetId, job.getUser());
 
                 });
+
+                hashingToRemove.forEach(readsetToHashingMap::remove);
             }
             else if (distinctHTOs.size() == 1)
             {
@@ -259,6 +277,7 @@ else if (distinctHTOs.size() == 1)
                 job.getLogger().info("distinct HTOs: " + distinctHTOs.size());
             }
 
+            Set<Integer> citeToRemove = new HashSet<>();
             readsetToCiteSeqMap.forEach((readsetId, citeseqReadsetId) -> {
                 if (cacheCountMatrixFiles)
                 {
@@ -268,7 +287,15 @@ else if (distinctHTOs.size() == 1)
                     TableSelector ts = new TableSelector(sequenceOutputs, filter, new org.labkey.api.data.Sort("-rowid"));
                     if (!ts.exists())
                     {
-                        throw new IllegalArgumentException("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId);
+                        if (requireValidCiteSeqIfPresent)
+                        {
+                            throw new IllegalArgumentException("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId);
+                        }
+                        else
+                        {
+                            job.getLogger().warn("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId + ", skipping");
+                            citeToRemove.add(readsetId);
+                        }
                     }
                     else
                     {
@@ -280,6 +307,8 @@ else if (distinctHTOs.size() == 1)
                 support.cacheReadset(citeseqReadsetId, job.getUser());
             });
 
+            citeToRemove.forEach(readsetToCiteSeqMap::remove);
+
             support.cacheObject(READSET_TO_HASHING_MAP, readsetToHashingMap);
             support.cacheObject(READSET_TO_CITESEQ_MAP, readsetToCiteSeqMap);
             support.cacheObject(READSET_TO_COUNTS_MAP, readsetToCountMap);
@@ -940,7 +969,20 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St
             throw new PipelineJobException("Unable to find HTML file: " + htmlFile.getPath());
         }
 
-        if (!callsFile.exists())
+        boolean callFileValid = callsFile.exists();
+        if (callFileValid)
+        {
+            try (BufferedReader reader = Readers.getReader(callsFile))
+            {
+                callFileValid = reader.readLine() != null;
+            }
+            catch (IOException e)
+            {
+                throw new PipelineJobException(e);
+            }
+        }
+
+        if (!callFileValid)
         {
             //copy HTML locally to make debugging easier:
             if (localPipelineDir != null)
diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java b/singlecell/src/org/labkey/singlecell/run/CellRangerFeatureBarcodeHandler.java
@@ -126,7 +126,7 @@ else if (rs.getApplication().equals("CITE-Seq"))
                 throw new PipelineJobException("Unexpected application: " + rs.getApplication());
             }
 
-            CellHashingServiceImpl.get().prepareHashingAndCiteSeqFilesIfNeeded(outputDir, job, support, field, failIfNoHashing, failIfNoCiteseq, false);
+            CellHashingServiceImpl.get().prepareHashingAndCiteSeqFilesIfNeeded(outputDir, job, support, field, failIfNoHashing, failIfNoCiteseq, false, failIfNoHashing, failIfNoCiteseq);
         }
 
         @Override

Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,8 @@ static public void setInstance(CellHashingService instance)`
`45`	`45`	`_instance = instance;`
`46`	`46`	`}`
`47`	`47`
	`48`	`+ abstract public void prepareHashingIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, final boolean failIfNoHashing) throws PipelineJobException;`
	`49`	`+`
`48`	`50`	`abstract public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob job, SequenceAnalysisJobSupport support, String filterField, boolean failIfNoHashing, boolean failIfNoCiteSeq) throws PipelineJobException;`
`49`	`51`
`50`	`52`	`abstract public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutputTracker output, SequenceOutputHandler.JobContext ctx, CellHashingParameters parameters, File rawCountMatrixDir) throws PipelineJobException;`
Original file line number	Diff line number	Diff line change
`@@ -126,7 +126,7 @@ else if (rs.getApplication().equals("CITE-Seq"))`
`126`	`126`	`throw new PipelineJobException("Unexpected application: " + rs.getApplication());`
`127`	`127`	`}`
`128`	`128`
`129`		`- CellHashingServiceImpl.get().prepareHashingAndCiteSeqFilesIfNeeded(outputDir, job, support, field, failIfNoHashing, failIfNoCiteseq, false);`
	`129`	`+ CellHashingServiceImpl.get().prepareHashingAndCiteSeqFilesIfNeeded(outputDir, job, support, field, failIfNoHashing, failIfNoCiteseq, false, failIfNoHashing, failIfNoCiteseq);`
`130`	`130`	`}`
`131`	`131`
`132`	`132`	`@Override`