@@ -68,23 +68,41 @@ public class CellHashingHandler extends AbstractParameterizedOutputHandler<Seque
6868
6969 public CellHashingHandler ()
7070 {
71- super (ModuleLoader .getInstance ().getModule (SequenceAnalysisModule .class ), "CITE-Seq Count" , "This will run CITE-Seq Count to generate a table of features counts from CITE-Seq or cell hashing libraries" , null , getDefaultParams ());
71+ this ("Cell Hashing Calls" , "This will run CITE-Seq Count to generate a table of features counts from CITE-Seq or cell hashing libraries. It will also run R code to generate a table of calls per cell" , getDefaultParams ());
72+ }
73+
74+ protected CellHashingHandler (String name , String description , List <ToolParameterDescriptor > defaultParams )
75+ {
76+ super (ModuleLoader .getInstance ().getModule (SequenceAnalysisModule .class ), name , description , null , defaultParams );
7277 }
7378
7479 public static List <ToolParameterDescriptor > getDefaultParams ()
7580 {
76- return Arrays .asList (
81+ return getDefaultParams (true , DEFAULT_TAG_GROUP );
82+ }
83+
84+ public static List <ToolParameterDescriptor > getDefaultParams (boolean allowScanningEditDistance , String defaultTagGroup )
85+ {
86+ List <ToolParameterDescriptor > ret = new ArrayList <>(Arrays .asList (
7787 ToolParameterDescriptor .create ("outputFilePrefix" , "Output File Basename" , null , "textfield" , new JSONObject (){{
7888 put ("allowBlank" , false );
7989 }}, "cellHashingCalls" ),
8090 ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-cbf" ), "cbf" , "Cell Barcode Start" , null , "ldk-integerfield" , null , 1 ),
8191 ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-cbl" ), "cbl" , "Cell Barcode End" , null , "ldk-integerfield" , null , 16 ),
8292 ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-umif" ), "umif" , "UMI Start" , null , "ldk-integerfield" , null , 17 ),
83- ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-umil" ), "umil" , "UMI End" , null , "ldk-integerfield" , null , 26 ),
84- ToolParameterDescriptor .create ("scanEditDistances" , "Scan Edit Distances" , "If checked, CITE-seq-count will be run using edit distances from 0-3 and the iteration with the highest singlets will be used." , "checkbox" , new JSONObject (){{
85- put ("checked" , true );
86- }}, true ),
87- ToolParameterDescriptor .create ("editDistance" , "Edit Distance" , null , "ldk-integerfield" , null , 1 ),
93+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-umil" ), "umil" , "UMI End" , null , "ldk-integerfield" , null , 26 )
94+ ));
95+
96+ if (allowScanningEditDistance )
97+ {
98+ ret .add (ToolParameterDescriptor .create ("scanEditDistances" , "Scan Edit Distances" , "If checked, CITE-seq-count will be run using edit distances from 0-3 and the iteration with the highest singlets will be used." , "checkbox" , new JSONObject ()
99+ {{
100+ put ("checked" , true );
101+ }}, true ));
102+ }
103+
104+ ret .addAll (Arrays .asList (
105+ ToolParameterDescriptor .create ("editDistance" , "Edit Distance" , null , "ldk-integerfield" , null , 3 ),
88106 ToolParameterDescriptor .create ("excludeFailedcDNA" , "Exclude Failed cDNA" , "If selected, cDNAs with non-blank status fields will be omitted" , "checkbox" , null , true ),
89107 ToolParameterDescriptor .create ("minCountPerCell" , "Min Reads/Cell" , null , "ldk-integerfield" , null , 5 ),
90108 ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("-cells" ), "cells" , "Expected Cells" , null , "ldk-integerfield" , null , 20000 ),
@@ -94,11 +112,13 @@ public static List<ToolParameterDescriptor> getDefaultParams()
94112 put ("displayField" , "group_name" );
95113 put ("valueField" , "group_name" );
96114 put ("allowBlank" , false );
97- }}, DEFAULT_TAG_GROUP ),
115+ }}, defaultTagGroup ),
98116 ToolParameterDescriptor .create ("useOutputFileContainer" , "Submit to Source File Workbook" , "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected." , "checkbox" , new JSONObject (){{
99117 put ("checked" , false );
100118 }}, false )
101- );
119+ ));
120+
121+ return ret ;
102122 }
103123
104124 @ Override
@@ -134,11 +154,18 @@ public boolean doSplitJobs()
134154 @ Override
135155 public SequenceReadsetProcessor getProcessor ()
136156 {
137- return new Processor ();
157+ return new Processor (true );
138158 }
139159
140- protected class Processor implements SequenceReadsetProcessor
160+ public class Processor implements SequenceReadsetProcessor
141161 {
162+ private final boolean _generateHtoCalls ;
163+
164+ public Processor (boolean generateHtoCalls )
165+ {
166+ _generateHtoCalls = generateHtoCalls ;
167+ }
168+
142169 @ Override
143170 public void init (PipelineJob job , SequenceAnalysisJobSupport support , List <Readset > readsets , JSONObject params , File outputDir , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
144171 {
@@ -203,11 +230,16 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
203230 Set <Integer > editDistances = new TreeSet <>();
204231 Map <Integer , Map <String , Object >> results = new HashMap <>();
205232
206- Integer highestSinglet = 0 ;
233+ int highestSinglet = 0 ;
207234 Integer bestEditDistance = null ;
208235
209236 Integer minCountPerCell = ctx .getParams ().optInt ("minCountPerCell" , 5 );
210237 boolean scanEditDistances = ctx .getParams ().optBoolean ("scanEditDistances" , false );
238+ if (!_generateHtoCalls && scanEditDistances )
239+ {
240+ throw new PipelineJobException ("Scan edit distances should not be possible to use unless cell hashing is used" );
241+ }
242+
211243 if (scanEditDistances )
212244 {
213245 editDistances .add (0 );
@@ -223,7 +255,7 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
223255
224256 for (Integer editDistance : editDistances )
225257 {
226- Map <String , Object > callMap = executeCiteSeqCount (ctx , action , rs , editDistance , minCountPerCell );
258+ Map <String , Object > callMap = executeCiteSeqCount (ctx , action , rs , editDistance , minCountPerCell , _generateHtoCalls );
227259 results .put (editDistance , callMap );
228260
229261 int singlet = Integer .parseInt (callMap .get ("singlet" ).toString ());
@@ -240,9 +272,22 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
240272 ctx .getLogger ().info ("Using edit distance: " + bestEditDistance + ", singlet: " + highestSinglet );
241273
242274 Map <String , Object > callMap = results .get (bestEditDistance );
243- String description = String .format ("Edit Distance: %,d\n Min Reads/Cell: %,d\n Total Singlet: %,d\n Doublet: %,d\n Discordant: %,d\n Seurat Called: %,d\n Negative: %,d\n Unique HTOs: %s" , bestEditDistance , minCountPerCell , callMap .get ("singlet" ), callMap .get ("doublet" ), callMap .get ("discordant" ), callMap .get ("seuratSinglet" ), callMap .get ("negative" ), callMap .get ("UniqueHtos" ));
244- File htoCalls = (File )callMap .get ("htoCalls" );
245- File html = (File )callMap .get ("html" );
275+ if (_generateHtoCalls )
276+ {
277+ String description = String .format ("Edit Distance: %,d\n Min Reads/Cell: %,d\n Total Singlet: %,d\n Doublet: %,d\n Discordant: %,d\n Seurat Called: %,d\n Negative: %,d\n Unique HTOs: %s" , bestEditDistance , minCountPerCell , callMap .get ("singlet" ), callMap .get ("doublet" ), callMap .get ("discordant" ), callMap .get ("seuratSinglet" ), callMap .get ("negative" ), callMap .get ("UniqueHtos" ));
278+ File htoCalls = (File ) callMap .get ("htoCalls" );
279+ File html = (File ) callMap .get ("html" );
280+
281+ ctx .getFileManager ().addSequenceOutput (htoCalls , rs .getName () + ": Cell Hashing Calls" ,"Cell Hashing Calls" , rs .getReadsetId (), null , null , description );
282+ ctx .getFileManager ().addSequenceOutput (html , rs .getName () + ": Cell Hashing Report" ,"Cell Hashing Report" , rs .getReadsetId (), null , null , description );
283+ }
284+ else
285+ {
286+ ctx .getLogger ().debug ("HTO calls will not be generated" );
287+
288+ File citeSeqCount = (File ) callMap .get ("citeSeqCount" );
289+ ctx .getFileManager ().addSequenceOutput (citeSeqCount , rs .getName () + ": CITE-Seq Count Matrix" ,"CITE-Seq Count Matrix" , rs .getReadsetId (), null , null , null );
290+ }
246291
247292 File origUnknown = getCiteSeqCountUnknownOutput (ctx .getSourceDirectory (), bestEditDistance );
248293 File movedUnknown = getCiteSeqCountUnknownOutput (ctx .getSourceDirectory (), null );
@@ -260,9 +305,6 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
260305 {
261306 throw new PipelineJobException (e );
262307 }
263-
264- ctx .getFileManager ().addSequenceOutput (htoCalls , rs .getName () + ": Cell Hashing Calls" ,"Cell Hashing Calls" , rs .getReadsetId (), null , null , description );
265- ctx .getFileManager ().addSequenceOutput (html , rs .getName () + ": Cell Hashing Report" ,"Cell Hashing Report" , rs .getReadsetId (), null , null , description );
266308 }
267309 else
268310 {
@@ -275,7 +317,7 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
275317 }
276318 }
277319
278- private Map <String , Object > executeCiteSeqCount (JobContext ctx , RecordedAction action , Readset rs , int editDistance , int minCountPerCell ) throws PipelineJobException
320+ private Map <String , Object > executeCiteSeqCount (JobContext ctx , RecordedAction action , Readset rs , int editDistance , int minCountPerCell , boolean generateHtoCalls ) throws PipelineJobException
279321 {
280322 CiteSeqCountWrapper wrapper = new CiteSeqCountWrapper (ctx .getLogger ());
281323 ReadData rd = rs .getReadData ().get (0 );
@@ -328,27 +370,32 @@ private Map<String, Object> executeCiteSeqCount(JobContext ctx, RecordedAction a
328370 throw new PipelineJobException (e );
329371 }
330372 ctx .getFileManager ().addIntermediateFile (doneFile );
373+ ctx .getFileManager ().addOutput (action , "Unknown barcodes" , unknownBarcodes );
374+ ctx .getFileManager ().addOutput (action , "CITE-seq Raw Counts" , outputMatrix );
375+ ctx .getFileManager ().addIntermediateFile (unknownBarcodes );
376+ ctx .getFileManager ().addIntermediateFile (outputDir );
331377
332- ctx .getJob ().setStatus (PipelineJob .TaskStatus .running , "Generating HTO calls for edit distance: " + editDistance );
333- File htoCalls = generateFinalCalls (outputMatrix .getParentFile (), ctx .getOutputDir (), outputBasename , ctx .getLogger (), null , true , minCountPerCell , ctx .getSourceDirectory ());
334- File html = new File (htoCalls .getParentFile (), outputBasename + ".html" );
378+ Map <String , Object > callMap = new HashMap <>();
379+ callMap .put ("citeSeqCount" , outputMatrix );
335380
336- if (! html . exists () )
381+ if (generateHtoCalls )
337382 {
338- throw new PipelineJobException ("Unable to find expected HTML file: " + html .getPath ());
339- }
383+ ctx .getJob ().setStatus (PipelineJob .TaskStatus .running , "Generating HTO calls for edit distance: " + editDistance );
384+ File htoCalls = generateFinalCalls (outputMatrix .getParentFile (), ctx .getOutputDir (), outputBasename , ctx .getLogger (), null , true , minCountPerCell , ctx .getSourceDirectory ());
385+ File html = new File (htoCalls .getParentFile (), outputBasename + ".html" );
340386
341- ctx . getFileManager (). addOutput ( action , "Unknown barcodes" , unknownBarcodes );
342- ctx . getFileManager (). addOutput ( action , "CITE-seq Raw Counts" , outputMatrix );
343- ctx . getFileManager (). addOutput ( action , "Cell Hashing Calls" , htoCalls );
344- ctx . getFileManager (). addOutput ( action , "Cell Hashing Report" , html );
387+ if (! html . exists ())
388+ {
389+ throw new PipelineJobException ( "Unable to find expected HTML file: " + html . getPath () );
390+ }
345391
346- ctx .getFileManager ().addIntermediateFile ( unknownBarcodes );
347- ctx .getFileManager ().addIntermediateFile ( outputDir );
392+ ctx .getFileManager ().addOutput ( action , "Cell Hashing Calls" , htoCalls );
393+ ctx .getFileManager ().addOutput ( action , "Cell Hashing Report" , html );
348394
349- Map <String , Object > callMap = parseOutputTable (ctx .getLogger (), htoCalls , unknownBarcodes , ctx .getSourceDirectory (), ctx .getWorkingDirectory (), true );
350- callMap .put ("htoCalls" , htoCalls );
351- callMap .put ("html" , html );
395+ callMap .putAll (parseOutputTable (ctx .getLogger (), htoCalls , unknownBarcodes , ctx .getSourceDirectory (), ctx .getWorkingDirectory (), true ));
396+ callMap .put ("htoCalls" , htoCalls );
397+ callMap .put ("html" , html );
398+ }
352399
353400 return callMap ;
354401 }
0 commit comments