33import org .apache .commons .io .FileUtils ;
44import org .jetbrains .annotations .Nullable ;
55import org .json .JSONObject ;
6+ import org .labkey .api .data .Container ;
7+ import org .labkey .api .data .SimpleFilter ;
8+ import org .labkey .api .data .Sort ;
9+ import org .labkey .api .data .TableInfo ;
10+ import org .labkey .api .data .TableSelector ;
11+ import org .labkey .api .exp .api .ExpData ;
12+ import org .labkey .api .exp .api .ExperimentService ;
13+ import org .labkey .api .pipeline .PipelineJob ;
614import org .labkey .api .pipeline .PipelineJobException ;
15+ import org .labkey .api .query .FieldKey ;
16+ import org .labkey .api .query .QueryService ;
17+ import org .labkey .api .query .UserSchema ;
718import org .labkey .api .sequenceanalysis .model .Readset ;
819import org .labkey .api .sequenceanalysis .pipeline .AbstractAlignmentStepProvider ;
920import org .labkey .api .sequenceanalysis .pipeline .AlignmentOutputImpl ;
1425import org .labkey .api .sequenceanalysis .pipeline .SequenceAnalysisJobSupport ;
1526import org .labkey .api .sequenceanalysis .pipeline .ToolParameterDescriptor ;
1627import org .labkey .api .util .PageFlowUtil ;
28+ import org .labkey .singlecell .SingleCellSchema ;
1729
1830import java .io .File ;
1931import java .io .IOException ;
2032import java .util .Arrays ;
33+ import java .util .HashMap ;
2134import java .util .LinkedHashSet ;
2235import java .util .List ;
36+ import java .util .Map ;
2337
2438public class NimbleAlignmentStep extends AbstractCellRangerDependentStep
2539{
2640 public static final String REF_GENOMES = "refGenomes" ;
2741 public static final String MAX_HITS_TO_REPORT = "maxHitsToReport" ;
28- public static final String ALIGN_OUTPUT = "alignmentOutput" ;
2942 public static final String STRANDEDNESS = "strandedness" ;
43+ public static final String REQUIRE_CACHED_BARCODES = "requireCachedBarcodes" ;
3044
3145 public NimbleAlignmentStep (AlignmentStepProvider <?> provider , PipelineContext ctx , CellRangerWrapper wrapper )
3246 {
@@ -59,7 +73,10 @@ public static List<ToolParameterDescriptor> getToolParameters()
5973 }}, null ),
6074 ToolParameterDescriptor .create (MAX_HITS_TO_REPORT , "Max Hits To Report" , "If a given hit has more than this number of references, it is discarded" , "ldk-integerfield" , new JSONObject (){{
6175 put ("minValue" , 0 );
62- }}, 4 )
76+ }}, 4 ),
77+ ToolParameterDescriptor .create (REQUIRE_CACHED_BARCODES , "Fail Unless Cached Barcodes Present" , "If checked, the pipeline will expect a previously computed map of cellbarcodes and UMIs to be computed. Under default conditions, if this is missing, cellranger will be re-run. This flag can be helpful to avoid that computation if you expect the barcode file to exist." , "checkbox" , new JSONObject (){{
78+
79+ }}, false )
6380 );
6481 }
6582
@@ -68,6 +85,96 @@ public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nu
6885 {
6986 AlignmentOutputImpl output = new AlignmentOutputImpl ();
7087
88+ boolean throwIfNotFound = getProvider ().getParameterByName (REQUIRE_CACHED_BARCODES ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , false );
89+ File cachedBarcodes = getCachedBarcodeFile (rs , throwIfNotFound );
90+
91+ File localBam ;
92+ if (cachedBarcodes == null )
93+ {
94+ localBam = performCellRangerAlignment (output , rs , inputFastqs1 , inputFastqs2 , outputDirectory , referenceGenome , basename , readGroupId , platformUnit );
95+ }
96+ else
97+ {
98+ localBam = createNimbleBam (output , rs , inputFastqs1 , inputFastqs2 );
99+ }
100+
101+
102+ // Now run nimble itself:
103+ NimbleHelper helper = new NimbleHelper (getPipelineCtx (), getProvider (), getStepIdx ());
104+ helper .doNimbleAlign (localBam , output , rs , basename );
105+ output .setBAM (localBam );
106+
107+ return output ;
108+ }
109+
110+ private File createNimbleBam (AlignmentOutputImpl output , Readset rs , List <File > inputFastqs1 , List <File > inputFastqs2 ) throws PipelineJobException
111+ {
112+ File cellbarcodes = getCachedBarcodeFile (rs , true );
113+ File umiMapping = getUmiMapping (cellbarcodes );
114+
115+ return NimbleHelper .runFastqToBam (output , getPipelineCtx (), rs , inputFastqs1 , inputFastqs2 , cellbarcodes , umiMapping );
116+ }
117+
118+ private File getCachedBarcodeFile (Readset rs , boolean throwIfNotFound ) throws PipelineJobException
119+ {
120+ Map <Integer , Integer > map = getPipelineCtx ().getSequenceSupport ().getCachedObject (CACHE_KEY , PipelineJob .createObjectMapper ().getTypeFactory ().constructParametricType (Map .class , Integer .class , Integer .class ));
121+ Integer dataId = map .get (rs .getReadsetId ());
122+ if (dataId == null )
123+ {
124+ if (throwIfNotFound )
125+ {
126+ throw new PipelineJobException ("No cached data found for readset: " + rs .getReadsetId ());
127+ }
128+
129+ return null ;
130+ }
131+
132+ File ret = getPipelineCtx ().getSequenceSupport ().getCachedData (dataId );
133+ if (ret == null || ! ret .exists ())
134+ {
135+ throw new PipelineJobException ("Missing cached cellbarcode file: " + dataId );
136+ }
137+
138+ return ret ;
139+ }
140+
141+ private File getUmiMapping (File cellbarcodeFile ) throws PipelineJobException
142+ {
143+ File ret = new File (cellbarcodeFile .getPath ().replaceAll (".cb.txt.gz" , ".umi.txt.gz" ));
144+ if (ret == null || ! ret .exists ())
145+ {
146+ throw new PipelineJobException ("Missing cached UMI file: " + ret .getPath ());
147+ }
148+
149+ return ret ;
150+ }
151+
152+ private File findCellBarcodeFiles (Readset rs ) throws PipelineJobException
153+ {
154+ Container targetContainer = getPipelineCtx ().getJob ().getContainer ().isWorkbookOrTab () ? getPipelineCtx ().getJob ().getContainer ().getParent () : getPipelineCtx ().getJob ().getContainer ();
155+ UserSchema us = QueryService .get ().getUserSchema (getPipelineCtx ().getJob ().getUser (), targetContainer , SingleCellSchema .SEQUENCE_SCHEMA_NAME );
156+ TableInfo ti = us .getTable ("outputfiles" );
157+
158+ SimpleFilter sf = new SimpleFilter (FieldKey .fromString ("readset" ), rs .getRowId ());
159+ sf .addCondition (FieldKey .fromString ("category" ), NimbleHelper .CATEGORY_CB );
160+ List <Integer > cbs = new TableSelector (ti , PageFlowUtil .set ("dataid" ), sf , new Sort ("-rowid" )).getArrayList (Integer .class );
161+ if (!cbs .isEmpty ())
162+ {
163+ int dataId = cbs .get (0 );
164+ ExpData d = ExperimentService .get ().getExpData (dataId );
165+ if (d == null || d .getFile () == null )
166+ {
167+ throw new PipelineJobException ("Output lacks a file: " + dataId );
168+ }
169+
170+ return d .getFile ();
171+ }
172+
173+ return null ;
174+ }
175+
176+ private File performCellRangerAlignment (AlignmentOutputImpl output , Readset rs , List <File > inputFastqs1 , @ Nullable List <File > inputFastqs2 , File outputDirectory , ReferenceGenome referenceGenome , String basename , String readGroupId , @ Nullable String platformUnit ) throws PipelineJobException
177+ {
71178 // We need to ensure we keep the BAM for post-processing:
72179 setAlwaysRetainBam (true );
73180
@@ -89,12 +196,7 @@ public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nu
89196
90197 NimbleHelper .write10xBarcodes (localBam , getWrapper ().getLogger (), rs , referenceGenome , output );
91198
92- // Now run nimble itself:
93- NimbleHelper helper = new NimbleHelper (getPipelineCtx (), getProvider (), getStepIdx ());
94- helper .doNimbleAlign (localBam , output , rs , basename );
95- output .setBAM (localBam );
96-
97- return output ;
199+ return localBam ;
98200 }
99201
100202 @ Override
@@ -109,5 +211,20 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException
109211 {
110212 helper .prepareGenome (id );
111213 }
214+
215+ // Try to find 10x barcodes:
216+ HashMap <Integer , File > readsetToBarcodes = new HashMap <>();
217+ for (Readset rs : support .getCachedReadsets ())
218+ {
219+ File f = findCellBarcodeFiles (rs );
220+ if (f != null )
221+ {
222+ readsetToBarcodes .put (rs .getReadsetId (), f );
223+ }
224+ }
225+
226+ support .cacheObject (CACHE_KEY , readsetToBarcodes );
112227 }
228+
229+ private static final String CACHE_KEY = "nimble.cb" ;
113230}
0 commit comments