Skip to content

Commit a38c81b

Browse files
committed
Allow nimble to cache reference data
1 parent c880068 commit a38c81b

File tree

1 file changed

+72
-14
lines changed

1 file changed

+72
-14
lines changed

singlecell/src/org/labkey/singlecell/run/NimbleHelper.java

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.labkey.singlecell.run;
22

3-
import au.com.bytecode.opencsv.CSVReader;
43
import au.com.bytecode.opencsv.CSVWriter;
54
import org.apache.commons.io.FileUtils;
65
import org.apache.commons.io.IOUtils;
@@ -10,23 +9,18 @@
109
import org.json.JSONObject;
1110
import org.labkey.api.data.ColumnInfo;
1211
import org.labkey.api.data.Container;
13-
import org.labkey.api.data.DbSchema;
14-
import org.labkey.api.data.DbSchemaType;
1512
import org.labkey.api.data.SimpleFilter;
16-
import org.labkey.api.data.Table;
1713
import org.labkey.api.data.TableInfo;
1814
import org.labkey.api.data.TableSelector;
19-
import org.labkey.api.exp.api.ExpData;
20-
import org.labkey.api.exp.api.ExperimentService;
2115
import org.labkey.api.pipeline.PipelineJob;
2216
import org.labkey.api.pipeline.PipelineJobException;
2317
import org.labkey.api.query.FieldKey;
2418
import org.labkey.api.query.QueryService;
2519
import org.labkey.api.reader.Readers;
2620
import org.labkey.api.sequenceanalysis.RefNtSequenceModel;
2721
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
28-
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
2922
import org.labkey.api.sequenceanalysis.model.Readset;
23+
import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil;
3024
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
3125
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput;
3226
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
@@ -36,7 +30,6 @@
3630
import org.labkey.api.util.Compress;
3731
import org.labkey.api.util.PageFlowUtil;
3832
import org.labkey.api.writer.PrintWriters;
39-
import org.labkey.singlecell.SingleCellSchema;
4033

4134
import javax.annotation.Nullable;
4235
import java.io.BufferedReader;
@@ -117,8 +110,16 @@ public void prepareGenome(int genomeId) throws PipelineJobException
117110
}
118111

119112
getPipelineCtx().getSequenceSupport().cacheGenome(rg);
113+
if (AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg))
114+
{
115+
getPipelineCtx().getLogger().debug("Cached index found, will not re-create");
116+
return;
117+
}
118+
120119
getPipelineCtx().getLogger().info("Preparing genome CSV/FASTA for " + rg.getName());
121-
try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(getGenomeCsv(genomeId)), ',', CSVWriter.NO_QUOTE_CHARACTER); PrintWriter fastaWriter = PrintWriters.getPrintWriter(getGenomeFasta(genomeId)))
120+
File csv = getGenomeCsv(genomeId, true);
121+
File fasta = getGenomeFasta(genomeId, true);
122+
try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(csv), ',', CSVWriter.NO_QUOTE_CHARACTER); PrintWriter fastaWriter = PrintWriters.getPrintWriter(fasta))
122123
{
123124
writer.writeNext(new String[]{"reference_genome", "name", "nt_length", "genbank", "category", "subset", "locus", "lineage", "sequence"});
124125

@@ -147,20 +148,77 @@ public void prepareGenome(int genomeId) throws PipelineJobException
147148
fastaWriter.println(seq);
148149
});
149150
}
150-
catch(IOException e)
151+
catch (IOException e)
151152
{
152153
throw new PipelineJobException(e);
153154
}
155+
156+
File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble");
157+
if (!indexDir.exists())
158+
{
159+
indexDir.mkdir();
160+
}
161+
162+
for (File f : Arrays.asList(csv, fasta))
163+
{
164+
File cached = new File(indexDir, f.getName());
165+
if (!cached.exists())
166+
{
167+
try
168+
{
169+
getPipelineCtx().getLogger().debug("Caching file: " + cached.getPath());
170+
FileUtils.copyFile(f, cached);
171+
}
172+
catch (IOException e)
173+
{
174+
throw new PipelineJobException(e);
175+
}
176+
}
177+
}
178+
}
179+
180+
private File getGenomeCsv(int genomeId) throws PipelineJobException
181+
{
182+
return getGenomeCsv(genomeId, false);
183+
}
184+
185+
private File getGenomeCsv(int genomeId, boolean forceWorkDir) throws PipelineJobException
186+
{
187+
ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(genomeId, getPipelineCtx().getJob().getUser());
188+
if (rg == null)
189+
{
190+
throw new PipelineJobException("Unable to find genome: " + genomeId);
191+
}
192+
193+
if (!forceWorkDir && AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg))
194+
{
195+
File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble");
196+
return new File(indexDir, "genome." + genomeId + ".csv");
197+
}
198+
199+
return new File(getPipelineCtx().getSourceDirectory(), "genome." + genomeId + ".csv");
154200
}
155201

156-
private File getGenomeCsv(int id)
202+
private File getGenomeFasta(int genomeId) throws PipelineJobException
157203
{
158-
return new File(getPipelineCtx().getSourceDirectory(), "genome." + id + ".csv");
204+
return getGenomeFasta(genomeId, false);
159205
}
160206

161-
private File getGenomeFasta(int id)
207+
private File getGenomeFasta(int genomeId, boolean forceWorkDir) throws PipelineJobException
162208
{
163-
return new File(getPipelineCtx().getSourceDirectory(), "genome." + id + ".fasta");
209+
ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(genomeId, getPipelineCtx().getJob().getUser());
210+
if (rg == null)
211+
{
212+
throw new PipelineJobException("Unable to find genome: " + genomeId);
213+
}
214+
215+
if (!forceWorkDir && AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg))
216+
{
217+
File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble");
218+
return new File(indexDir, "genome." + genomeId + ".fasta");
219+
}
220+
221+
return new File(getPipelineCtx().getSourceDirectory(), "genome." + genomeId + ".fasta");
164222
}
165223

166224
public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, String basename) throws UnsupportedOperationException, PipelineJobException

0 commit comments

Comments
 (0)