1+ package org .labkey .singlecell .run ;
2+
3+ import org .apache .commons .io .FileUtils ;
4+ import org .json .JSONObject ;
5+ import org .labkey .api .module .ModuleLoader ;
6+ import org .labkey .api .pipeline .PipelineJob ;
7+ import org .labkey .api .pipeline .PipelineJobException ;
8+ import org .labkey .api .pipeline .RecordedAction ;
9+ import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
10+ import org .labkey .api .sequenceanalysis .pipeline .AbstractParameterizedOutputHandler ;
11+ import org .labkey .api .sequenceanalysis .pipeline .CommandLineParam ;
12+ import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
13+ import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
14+ import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
15+ import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
16+ import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
17+ import org .labkey .api .util .FileUtil ;
18+ import org .labkey .singlecell .SingleCellModule ;
19+
20+ import java .io .File ;
21+ import java .io .IOException ;
22+ import java .util .ArrayList ;
23+ import java .util .Arrays ;
24+ import java .util .List ;
25+
26+ public class CellBenderCiteSeqHandler extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
27+ {
28+ public CellBenderCiteSeqHandler ()
29+ {
30+ super (ModuleLoader .getInstance ().getModule (SingleCellModule .class ), "Run CellBender (CITE-seq)" , "This will run cellbender on the input cellranger folder and create a subset matrix with background/ambient noise removed." , null , getParams ());
31+ }
32+
33+ protected static List <ToolParameterDescriptor > getParams ()
34+ {
35+ return Arrays .asList (
36+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("--expected-cells" ), "expectedCells" , "Expected Cells" , "Passed to CellBender --expected-cells" , "ldk-integerfield" , null , 5000 ),
37+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("--total-droplets-included" ), "totalDropletsIncluded" , "Total Droplets Included" , "Passed to CellBender --total-droplets-included" , "ldk-integerfield" , null , 20000 ),
38+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("--fpr" ), "fpr" , "fpr Cells" , "Passed to CellBender --fpr" , "ldk-numberfield" , new JSONObject (){{
39+ put ("decimalPrecision" , 3 );
40+ }}, 0.01 ),
41+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .create ("--epochs" ), "epochs" , "Epochs" , "Passed to CellBender --epochs" , "ldk-integerfield" , null , 150 ),
42+ ToolParameterDescriptor .createCommandLineParam (CommandLineParam .createSwitch ("--cuda" ), "useGpus" , "Use GPUs" , "If checked, the --cuda argument will be set on cellbender" , "checkbox" , null , false )
43+ );
44+ }
45+
46+ @ Override
47+ public boolean doSplitJobs ()
48+ {
49+ return true ;
50+ }
51+
52+ @ Override
53+ public boolean canProcess (SequenceOutputFile o )
54+ {
55+ return "CITE-seq Counts" .equals (o .getCategory ()) & "matrix.mtx.gz" .equals (o .getFile ().getName ());
56+ }
57+
58+ @ Override
59+ public boolean doRunRemote ()
60+ {
61+ return true ;
62+ }
63+
64+ @ Override
65+ public boolean doRunLocal ()
66+ {
67+ return false ;
68+ }
69+
70+ @ Override
71+ public SequenceOutputProcessor getProcessor ()
72+ {
73+ return new CellBenderCiteSeqHandler .Processor ();
74+ }
75+
76+ public class Processor implements SequenceOutputProcessor
77+ {
78+ @ Override
79+ public void init (JobContext ctx , List <SequenceOutputFile > inputFiles , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
80+ {
81+ if (inputFiles .size () > 1 )
82+ {
83+ throw new PipelineJobException ("Expected a single input" );
84+ }
85+
86+ File h5 = getH5 (inputFiles .get (0 ).getFile ());
87+ if (!h5 .exists ())
88+ {
89+ throw new PipelineJobException ("Unable to find file: " + h5 .getPath ());
90+ }
91+ }
92+
93+ private File getH5 (File matrix )
94+ {
95+ return new File (matrix .getParentFile ().getParentFile (), "raw_feature_bc_matrix.h5" );
96+ }
97+
98+ @ Override
99+ public void processFilesOnWebserver (PipelineJob job , SequenceAnalysisJobSupport support , List <SequenceOutputFile > inputFiles , JSONObject params , File outputDir , List <RecordedAction > actions , List <SequenceOutputFile > outputsToCreate ) throws UnsupportedOperationException , PipelineJobException
100+ {
101+
102+ }
103+
104+ @ Override
105+ public void processFilesRemote (List <SequenceOutputFile > inputFiles , JobContext ctx ) throws UnsupportedOperationException , PipelineJobException
106+ {
107+ File inputH5 = getH5 (inputFiles .get (0 ).getFile ());
108+ File outputH5 = new File (ctx .getOutputDir (), FileUtil .getBaseName (inputH5 .getName ()) + ".cellbender.h5" );
109+
110+ String exe = SequencePipelineService .get ().getExeForPackage ("CELLBENDERPATH" , "cellbender" ).getPath ();
111+ List <String > args = new ArrayList <>(Arrays .asList (
112+ exe , "--input" , inputH5 .getPath (),
113+ "--output" , outputH5 .getPath ()
114+ ));
115+ args .addAll (getClientCommandArgs (ctx .getParams ()));
116+
117+ new SimpleScriptWrapper (ctx .getLogger ()).execute (args );
118+ if (!outputH5 .exists ())
119+ {
120+ throw new PipelineJobException ("Missing file: " + outputH5 .getPath ());
121+ }
122+
123+ File filteredH5 = new File (outputH5 .getPath ().replaceAll (".h5$" , "_filtered.h5" ));
124+ if (!filteredH5 .exists ())
125+ {
126+ throw new PipelineJobException ("Missing file: " + filteredH5 .getPath ());
127+ }
128+
129+ SequenceOutputFile so = new SequenceOutputFile ();
130+ so .setReadset (inputFiles .get (0 ).getReadset ());
131+ so .setLibrary_id (inputFiles .get (0 ).getLibrary_id ());
132+ so .setFile (filteredH5 );
133+ if (so .getReadset () != null )
134+ {
135+ so .setName (ctx .getSequenceSupport ().getCachedReadset (so .getReadset ()).getName () + ": CellBender Filtered" );
136+ }
137+ else
138+ {
139+ so .setName (inputFiles .get (0 ).getName () + ": CellBender Filtered" );
140+ }
141+ so .setCategory ("CellBender Filtered CITE-Seq Counts" );
142+ ctx .addSequenceOutput (so );
143+
144+ File aggregates = new File (inputH5 .getParentFile (), "antibody_analysis/aggregate_barcodes.csv" );
145+ if (!aggregates .exists ())
146+ {
147+ throw new PipelineJobException ("Missing file: " + outputH5 .getPath ());
148+ }
149+
150+ try
151+ {
152+ File aggregatesCopy = new File (ctx .getOutputDir (), aggregates .getName ());
153+ if (aggregatesCopy .exists ())
154+ {
155+ aggregatesCopy .delete ();
156+ }
157+
158+ FileUtils .copyFile (aggregates , aggregatesCopy );
159+ }
160+ catch (IOException e )
161+ {
162+ throw new PipelineJobException (e );
163+ }
164+ }
165+ }
166+ }
0 commit comments