44import au .com .bytecode .opencsv .CSVWriter ;
55import org .apache .commons .io .FileUtils ;
66import org .apache .commons .lang3 .tuple .Pair ;
7+ import org .jetbrains .annotations .Nullable ;
78import org .json .JSONObject ;
89import org .labkey .api .collections .CaseInsensitiveHashMap ;
910import org .labkey .api .data .CompareType ;
@@ -114,43 +115,58 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Reads
114115 throw new PipelineJobException ("Readset missing application: " + rs .getRowId ());
115116 }
116117
117- String field ;
118+ List < String > fields = new ArrayList <>() ;
118119 boolean failIfNoHashingReadset = false ;
119120 boolean failIfNoCiteseqReadset = false ;
120- if (rs . getApplication (). equals ( "Cell Hashing" ))
121+ if (isHashing ( rs ))
121122 {
122- field = "hashingReadsetId" ;
123+ fields . add ( "hashingReadsetId" ) ;
123124 failIfNoHashingReadset = true ;
124125 }
125- else if (rs .getApplication ().equals ("CITE-Seq" ))
126+
127+ if (isCiteSeq (rs ))
126128 {
127- field = "citeseqReadsetId" ;
129+ fields . add ( "citeseqReadsetId" ) ;
128130 failIfNoCiteseqReadset = true ;
129131 }
130- else
132+
133+ if (fields .isEmpty ())
131134 {
132135 throw new PipelineJobException ("Unexpected application: " + rs .getApplication ());
133136 }
134137
135- CellHashingServiceImpl .get ().prepareHashingAndCiteSeqFilesForFeatureCountsIfNeeded (outputDir , job , support , field , failIfNoHashingReadset , failIfNoCiteseqReadset );
136-
137- boolean useGEX = params .optBoolean ("useGEX" , false );
138- if (useGEX )
138+ for (String field : fields )
139139 {
140- TableInfo cDNATable = QueryService .get ().getUserSchema (job .getUser (), job .getContainer (), SingleCellSchema .NAME ).getTable (SingleCellSchema .TABLE_CDNAS , null );
141- Set <Integer > gexReadsetIds = new HashSet <>(new TableSelector (cDNATable , PageFlowUtil .set ("readsetid" ), new SimpleFilter (FieldKey .fromString (field ), rs .getRowId ()), null ).getArrayList (Integer .class ));
142- if (gexReadsetIds .size () == 1 )
143- {
144- support .cacheReadset (gexReadsetIds .iterator ().next (), job .getUser ());
145- support .cacheObject (FEATURE_TO_GEX , gexReadsetIds .iterator ().next ());
146- }
147- else
140+ CellHashingServiceImpl .get ().prepareHashingAndCiteSeqFilesForFeatureCountsIfNeeded (outputDir , job , support , field , failIfNoHashingReadset , failIfNoCiteseqReadset );
141+
142+ boolean useGEX = params .optBoolean ("useGEX" , false );
143+ if (useGEX )
148144 {
149- job .getLogger ().warn ("Expected a single GEX readset for " + rs .getRowId () + ", found: " + gexReadsetIds .size ());
145+ TableInfo cDNATable = QueryService .get ().getUserSchema (job .getUser (), job .getContainer (), SingleCellSchema .NAME ).getTable (SingleCellSchema .TABLE_CDNAS , null );
146+ Set <Integer > gexReadsetIds = new HashSet <>(new TableSelector (cDNATable , PageFlowUtil .set ("readsetid" ), new SimpleFilter (FieldKey .fromString (field ), rs .getRowId ()), null ).getArrayList (Integer .class ));
147+ if (gexReadsetIds .size () == 1 )
148+ {
149+ support .cacheReadset (gexReadsetIds .iterator ().next (), job .getUser ());
150+ support .cacheObject (FEATURE_TO_GEX , gexReadsetIds .iterator ().next ());
151+ }
152+ else
153+ {
154+ job .getLogger ().warn ("Expected a single GEX readset for " + rs .getRowId () + ", found: " + gexReadsetIds .size ());
155+ }
150156 }
151157 }
152158 }
153159
160+ private boolean isHashing (Readset rs )
161+ {
162+ return rs .getApplication ().equals ("Cell Hashing" ) || rs .getApplication ().equals ("Cell Hashing/CITE-seq" );
163+ }
164+
165+ private boolean isCiteSeq (Readset rs )
166+ {
167+ return rs .getApplication ().equals ("CITE-Seq" ) || rs .getApplication ().equals ("Cell Hashing/CITE-seq" );
168+ }
169+
154170 @ Override
155171 public void processFilesOnWebserver (PipelineJob job , SequenceAnalysisJobSupport support , List <Readset > readsets , JSONObject params , File outputDir , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
156172 {
@@ -169,37 +185,9 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
169185 }
170186
171187 AlignmentOutputImpl output = new AlignmentOutputImpl ();
172- CellRangerWrapper wrapper = new CellRangerWrapper (ctx .getLogger ());
173-
174- Readset rs = readsets .get (0 );
175-
176- String category ;
177- File featureFile ;
178- if (rs .getApplication ().equals ("Cell Hashing" ))
179- {
180- category = HASHING_CATEGORY ;
181- featureFile = createFeatureRefForHashing (ctx .getOutputDir (), CellHashingServiceImpl .get ().getAllHashingBarcodesFile (ctx .getSourceDirectory ()));
182-
183- }
184- else if (rs .getApplication ().equals ("CITE-Seq" ))
185- {
186- category = CITESEQ_CATEGORY ;
187- featureFile = createFeatureRefForCiteSeq (ctx .getOutputDir (), CellHashingServiceImpl .get ().getValidCiteSeqBarcodeMetadataFile (ctx .getSourceDirectory (), rs .getReadsetId ()));
188- }
189- else
190- {
191- throw new IllegalStateException ("Unknown category. This should be caught upstream" );
192- }
193-
194- List <String > extraArgs = new ArrayList <>(getClientCommandArgs ("=" , ctx .getParams ()));
195- extraArgs .add ("--nosecondary" );
196-
197- extraArgs .add ("--feature-ref=" + featureFile .getPath ());
198-
199- String idParam = ctx .getParams ().optString ("id" , null );
200- String id = CellRangerWrapper .getId (idParam , rs );
201188
202189 List <Pair <File , File >> inputFastqs = new ArrayList <>();
190+ Readset rs = readsets .get (0 );
203191 rs .getReadData ().forEach (rd -> {
204192 inputFastqs .add (Pair .of (rd .getFile1 (), rd .getFile2 ()));
205193 action .addInputIfNotPresent (rd .getFile1 (), "Input FASTQ" );
@@ -218,14 +206,41 @@ else if (rs.getApplication().equals("CITE-Seq"))
218206 });
219207 }
220208
209+ if (isHashing (rs ))
210+ {
211+ processType (inputFastqs , output , ctx , HASHING_CATEGORY , createFeatureRefForHashing (ctx .getOutputDir (), CellHashingServiceImpl .get ().getAllHashingBarcodesFile (ctx .getSourceDirectory ())), rs , (isCiteSeq (rs ) ? "-Hashing" : null ));
212+
213+ }
214+
215+ if (isCiteSeq (rs ))
216+ {
217+ processType (inputFastqs , output , ctx , CITESEQ_CATEGORY , createFeatureRefForCiteSeq (ctx .getOutputDir (), CellHashingServiceImpl .get ().getValidCiteSeqBarcodeMetadataFile (ctx .getSourceDirectory (), rs .getReadsetId ())), rs , (isHashing (rs ) ? "-CITE" : null ));
218+ }
219+
220+ ctx .getFileManager ().addStepOutputs (action , output );
221+ ctx .addActions (action );
222+ }
223+
224+ private void processType (List <Pair <File , File >> inputFastqs , AlignmentOutputImpl output , JobContext ctx , String category , File featureFile , Readset rs , @ Nullable String idSuffix ) throws PipelineJobException
225+ {
226+ CellRangerWrapper wrapper = new CellRangerWrapper (ctx .getLogger ());
227+
228+ List <String > extraArgs = new ArrayList <>(getClientCommandArgs ("=" , ctx .getParams ()));
229+ extraArgs .add ("--nosecondary" );
230+
231+ extraArgs .add ("--feature-ref=" + featureFile .getPath ());
232+
233+ String idParam = ctx .getParams ().optString ("id" , null );
234+ String id = CellRangerWrapper .getId (idParam , rs );
235+
221236 List <String > args = wrapper .prepareCountArgs (output , id , ctx .getOutputDir (), rs , inputFastqs , extraArgs , false );
222237
223238 //https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis
224239 File libraryCsv = new File (ctx .getOutputDir (), "libraries.csv" );
225240 try (CSVWriter writer = new CSVWriter (PrintWriters .getPrintWriter (libraryCsv ), ',' , CSVWriter .NO_QUOTE_CHARACTER ))
226241 {
227242 writer .writeNext (new String []{"fastqs" , "sample" , "library_type" });
228- writer .writeNext (new String []{wrapper .getLocalFastqDir (ctx .getOutputDir ()).getPath (), wrapper .makeLegalSampleName (rs .getName ()), "Antibody Capture" });
243+ writer .writeNext (new String []{wrapper .getLocalFastqDir (ctx .getOutputDir ()).getPath (), CellRangerWrapper .makeLegalSampleName (rs .getName ()), "Antibody Capture" });
229244 }
230245 catch (IOException e )
231246 {
@@ -248,10 +263,26 @@ else if (rs.getApplication().equals("CITE-Seq"))
248263
249264 wrapper .execute (args );
250265
251- File outdir = new File (ctx .getOutputDir (), id );
252- outdir = new File (outdir , "outs" );
266+ File crDir = new File (ctx .getOutputDir (), id );
267+ if (idSuffix != null )
268+ {
269+ File toMove = new File (crDir .getPath () + idSuffix );
270+ ctx .getLogger ().debug ("Moving cellranger folder to: " + toMove .getPath ());
271+ try
272+ {
273+ FileUtils .moveDirectory (crDir , toMove );
274+ }
275+ catch (IOException e )
276+ {
277+ throw new PipelineJobException (e );
278+ }
279+
280+ crDir = toMove ;
281+ }
253282
254- File bam = new File (outdir , "possorted_genome_bam.bam" );
283+ File outsdir = new File (crDir , "outs" );
284+
285+ File bam = new File (outsdir , "possorted_genome_bam.bam" );
255286 if (!bam .exists ())
256287 {
257288 throw new PipelineJobException ("Unable to find file: " + bam .getPath ());
@@ -264,13 +295,13 @@ else if (rs.getApplication().equals("CITE-Seq"))
264295 try
265296 {
266297 String prefix = FileUtil .makeLegalName (rs .getName () + "_" );
267- File outputHtml = new File (outdir , "web_summary.html" );
298+ File outputHtml = new File (outsdir , "web_summary.html" );
268299 if (!outputHtml .exists ())
269300 {
270301 throw new PipelineJobException ("Unable to find file: " + outputHtml .getPath ());
271302 }
272303
273- File outputHtmlRename = new File (outdir , prefix + outputHtml .getName ());
304+ File outputHtmlRename = new File (outsdir , prefix + outputHtml .getName ());
274305 if (outputHtmlRename .exists ())
275306 {
276307 outputHtmlRename .delete ();
@@ -280,7 +311,7 @@ else if (rs.getApplication().equals("CITE-Seq"))
280311 String description = ctx .getParams ().optBoolean ("useGEX" , false ) ? "HTO and GEX Counts" : null ;
281312 output .addSequenceOutput (outputHtmlRename , rs .getName () + " 10x " + rs .getApplication () + " Summary" , "10x Run Summary" , rs .getRowId (), null , null , description );
282313
283- File rawCounts = new File (outdir , "raw_feature_bc_matrix/matrix.mtx.gz" );
314+ File rawCounts = new File (outsdir , "raw_feature_bc_matrix/matrix.mtx.gz" );
284315 if (rawCounts .exists ())
285316 {
286317 output .addSequenceOutput (rawCounts , rs .getName () + ": " + rs .getApplication () + " Raw Counts" , category , rs .getRowId (), null , null , description );
@@ -297,7 +328,7 @@ else if (rs.getApplication().equals("CITE-Seq"))
297328 }
298329
299330 //NOTE: this folder has many unnecessary files and symlinks that get corrupted when we rename the main outputs
300- File directory = new File (outdir .getParentFile (), "SC_RNA_COUNTER_CS" );
331+ File directory = new File (outsdir .getParentFile (), "SC_RNA_COUNTER_CS" );
301332 if (directory .exists ())
302333 {
303334 //NOTE: this will have lots of symlinks, including corrupted ones, which java handles badly
@@ -307,9 +338,6 @@ else if (rs.getApplication().equals("CITE-Seq"))
307338 {
308339 ctx .getLogger ().warn ("Unable to find folder: " + directory .getPath ());
309340 }
310-
311- ctx .getFileManager ().addStepOutputs (action , output );
312- ctx .addActions (action );
313341 }
314342
315343 private File makeDummyIndex (JobContext ctx ) throws PipelineJobException
0 commit comments