4343import org .labkey .api .writer .PrintWriters ;
4444import org .labkey .singlecell .run .CellRangerFeatureBarcodeHandler ;
4545
46+ import java .io .BufferedReader ;
4647import java .io .File ;
4748import java .io .IOException ;
4849import java .io .PrintWriter ;
@@ -81,13 +82,19 @@ public static CellHashingServiceImpl get()
8182 return _instance ;
8283 }
8384
85+ @ Override
86+ public void prepareHashingIfNeeded (File sourceDir , PipelineJob job , SequenceAnalysisJobSupport support , String filterField , final boolean failIfNoHashing ) throws PipelineJobException
87+ {
88+ prepareHashingAndCiteSeqFilesIfNeeded (sourceDir , job , support , filterField , failIfNoHashing , false , true , true , false );
89+ }
90+
8491 @ Override
8592 public void prepareHashingAndCiteSeqFilesIfNeeded (File sourceDir , PipelineJob job , SequenceAnalysisJobSupport support , String filterField , final boolean failIfNoHashing , final boolean failIfNoCiteSeq ) throws PipelineJobException
8693 {
87- prepareHashingAndCiteSeqFilesIfNeeded (sourceDir , job , support , filterField , failIfNoHashing , failIfNoCiteSeq , true );
94+ prepareHashingAndCiteSeqFilesIfNeeded (sourceDir , job , support , filterField , failIfNoHashing , failIfNoCiteSeq , true , true , true );
8895 }
8996
90- public void prepareHashingAndCiteSeqFilesIfNeeded (File sourceDir , PipelineJob job , SequenceAnalysisJobSupport support , String filterField , final boolean failIfNoHashing , final boolean failIfNoCiteSeq , final boolean cacheCountMatrixFiles ) throws PipelineJobException
97+ public void prepareHashingAndCiteSeqFilesIfNeeded (File sourceDir , PipelineJob job , SequenceAnalysisJobSupport support , String filterField , final boolean failIfNoHashing , final boolean failIfNoCiteSeq , final boolean cacheCountMatrixFiles , boolean requireValidHashingIfPresent , boolean requireValidCiteSeqIfPresent ) throws PipelineJobException
9198 {
9299 Container target = job .getContainer ().isWorkbook () ? job .getContainer ().getParent () : job .getContainer ();
93100 UserSchema sequenceAnalysis = QueryService .get ().getUserSchema (job .getUser (), target , SingleCellSchema .SEQUENCE_SCHEMA_NAME );
@@ -217,6 +224,7 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
217224 HashMap <Integer , File > readsetToCountMap = new HashMap <>();
218225 if (distinctHTOs .size () > 1 )
219226 {
227+ Set <Integer > hashingToRemove = new HashSet <>();
220228 readsetToHashingMap .forEach ((readsetId , hashingReadsetId ) -> {
221229 if (cacheCountMatrixFiles )
222230 {
@@ -226,7 +234,15 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
226234 TableSelector ts = new TableSelector (sequenceOutputs , filter , new org .labkey .api .data .Sort ("-rowid" ));
227235 if (!ts .exists ())
228236 {
229- throw new IllegalArgumentException ("Unable to find existing count matrix for hashing readset: " + hashingReadsetId );
237+ if (requireValidHashingIfPresent )
238+ {
239+ throw new IllegalArgumentException ("Unable to find existing count matrix for hashing readset: " + hashingReadsetId );
240+ }
241+ else
242+ {
243+ job .getLogger ().warn ("Unable to find existing count matrix for hashing readset: " + hashingReadsetId + ", skipping" );
244+ hashingToRemove .add (readsetId );
245+ }
230246 }
231247 else
232248 {
@@ -238,6 +254,8 @@ public void prepareHashingAndCiteSeqFilesIfNeeded(File sourceDir, PipelineJob jo
238254 support .cacheReadset (hashingReadsetId , job .getUser ());
239255
240256 });
257+
258+ hashingToRemove .forEach (readsetToHashingMap ::remove );
241259 }
242260 else if (distinctHTOs .size () == 1 )
243261 {
@@ -259,6 +277,7 @@ else if (distinctHTOs.size() == 1)
259277 job .getLogger ().info ("distinct HTOs: " + distinctHTOs .size ());
260278 }
261279
280+ Set <Integer > citeToRemove = new HashSet <>();
262281 readsetToCiteSeqMap .forEach ((readsetId , citeseqReadsetId ) -> {
263282 if (cacheCountMatrixFiles )
264283 {
@@ -268,7 +287,15 @@ else if (distinctHTOs.size() == 1)
268287 TableSelector ts = new TableSelector (sequenceOutputs , filter , new org .labkey .api .data .Sort ("-rowid" ));
269288 if (!ts .exists ())
270289 {
271- throw new IllegalArgumentException ("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId );
290+ if (requireValidCiteSeqIfPresent )
291+ {
292+ throw new IllegalArgumentException ("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId );
293+ }
294+ else
295+ {
296+ job .getLogger ().warn ("Unable to find existing count matrix for CITE-seq readset: " + citeseqReadsetId + ", skipping" );
297+ citeToRemove .add (readsetId );
298+ }
272299 }
273300 else
274301 {
@@ -280,6 +307,8 @@ else if (distinctHTOs.size() == 1)
280307 support .cacheReadset (citeseqReadsetId , job .getUser ());
281308 });
282309
310+ citeToRemove .forEach (readsetToCiteSeqMap ::remove );
311+
283312 support .cacheObject (READSET_TO_HASHING_MAP , readsetToHashingMap );
284313 support .cacheObject (READSET_TO_CITESEQ_MAP , readsetToCiteSeqMap );
285314 support .cacheObject (READSET_TO_COUNTS_MAP , readsetToCountMap );
@@ -940,7 +969,20 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St
940969 throw new PipelineJobException ("Unable to find HTML file: " + htmlFile .getPath ());
941970 }
942971
943- if (!callsFile .exists ())
972+ boolean callFileValid = callsFile .exists ();
973+ if (callFileValid )
974+ {
975+ try (BufferedReader reader = Readers .getReader (callsFile ))
976+ {
977+ callFileValid = reader .readLine () != null ;
978+ }
979+ catch (IOException e )
980+ {
981+ throw new PipelineJobException (e );
982+ }
983+ }
984+
985+ if (!callFileValid )
944986 {
945987 //copy HTML locally to make debugging easier:
946988 if (localPipelineDir != null )
0 commit comments