|
1 | 1 | package org.labkey.singlecell.run; |
2 | 2 |
|
3 | | -import au.com.bytecode.opencsv.CSVReader; |
4 | 3 | import au.com.bytecode.opencsv.CSVWriter; |
5 | 4 | import org.apache.commons.io.FileUtils; |
6 | 5 | import org.apache.commons.io.IOUtils; |
|
10 | 9 | import org.json.JSONObject; |
11 | 10 | import org.labkey.api.data.ColumnInfo; |
12 | 11 | import org.labkey.api.data.Container; |
13 | | -import org.labkey.api.data.DbSchema; |
14 | | -import org.labkey.api.data.DbSchemaType; |
15 | 12 | import org.labkey.api.data.SimpleFilter; |
16 | | -import org.labkey.api.data.Table; |
17 | 13 | import org.labkey.api.data.TableInfo; |
18 | 14 | import org.labkey.api.data.TableSelector; |
19 | | -import org.labkey.api.exp.api.ExpData; |
20 | | -import org.labkey.api.exp.api.ExperimentService; |
21 | 15 | import org.labkey.api.pipeline.PipelineJob; |
22 | 16 | import org.labkey.api.pipeline.PipelineJobException; |
23 | 17 | import org.labkey.api.query.FieldKey; |
24 | 18 | import org.labkey.api.query.QueryService; |
25 | 19 | import org.labkey.api.reader.Readers; |
26 | 20 | import org.labkey.api.sequenceanalysis.RefNtSequenceModel; |
27 | 21 | import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
28 | | -import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
29 | 22 | import org.labkey.api.sequenceanalysis.model.Readset; |
| 23 | +import org.labkey.api.sequenceanalysis.pipeline.AlignerIndexUtil; |
30 | 24 | import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; |
31 | 25 | import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; |
32 | 26 | import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; |
|
36 | 30 | import org.labkey.api.util.Compress; |
37 | 31 | import org.labkey.api.util.PageFlowUtil; |
38 | 32 | import org.labkey.api.writer.PrintWriters; |
39 | | -import org.labkey.singlecell.SingleCellSchema; |
40 | 33 |
|
41 | 34 | import javax.annotation.Nullable; |
42 | 35 | import java.io.BufferedReader; |
@@ -117,8 +110,16 @@ public void prepareGenome(int genomeId) throws PipelineJobException |
117 | 110 | } |
118 | 111 |
|
119 | 112 | getPipelineCtx().getSequenceSupport().cacheGenome(rg); |
| 113 | + if (AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg)) |
| 114 | + { |
| 115 | + getPipelineCtx().getLogger().debug("Cached index found, will not re-create"); |
| 116 | + return; |
| 117 | + } |
| 118 | + |
120 | 119 | getPipelineCtx().getLogger().info("Preparing genome CSV/FASTA for " + rg.getName()); |
121 | | - try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(getGenomeCsv(genomeId)), ',', CSVWriter.NO_QUOTE_CHARACTER); PrintWriter fastaWriter = PrintWriters.getPrintWriter(getGenomeFasta(genomeId))) |
| 120 | + File csv = getGenomeCsv(genomeId, true); |
| 121 | + File fasta = getGenomeFasta(genomeId, true); |
| 122 | + try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(csv), ',', CSVWriter.NO_QUOTE_CHARACTER); PrintWriter fastaWriter = PrintWriters.getPrintWriter(fasta)) |
122 | 123 | { |
123 | 124 | writer.writeNext(new String[]{"reference_genome", "name", "nt_length", "genbank", "category", "subset", "locus", "lineage", "sequence"}); |
124 | 125 |
|
@@ -147,20 +148,77 @@ public void prepareGenome(int genomeId) throws PipelineJobException |
147 | 148 | fastaWriter.println(seq); |
148 | 149 | }); |
149 | 150 | } |
150 | | - catch(IOException e) |
| 151 | + catch (IOException e) |
151 | 152 | { |
152 | 153 | throw new PipelineJobException(e); |
153 | 154 | } |
| 155 | + |
| 156 | + File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble"); |
| 157 | + if (!indexDir.exists()) |
| 158 | + { |
| 159 | + indexDir.mkdir(); |
| 160 | + } |
| 161 | + |
| 162 | + for (File f : Arrays.asList(csv, fasta)) |
| 163 | + { |
| 164 | + File cached = new File(indexDir, f.getName()); |
| 165 | + if (!cached.exists()) |
| 166 | + { |
| 167 | + try |
| 168 | + { |
| 169 | + getPipelineCtx().getLogger().debug("Caching file: " + cached.getPath()); |
| 170 | + FileUtils.copyFile(f, cached); |
| 171 | + } |
| 172 | + catch (IOException e) |
| 173 | + { |
| 174 | + throw new PipelineJobException(e); |
| 175 | + } |
| 176 | + } |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + private File getGenomeCsv(int genomeId) throws PipelineJobException |
| 181 | + { |
| 182 | + return getGenomeCsv(genomeId, false); |
| 183 | + } |
| 184 | + |
| 185 | + private File getGenomeCsv(int genomeId, boolean forceWorkDir) throws PipelineJobException |
| 186 | + { |
| 187 | + ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(genomeId, getPipelineCtx().getJob().getUser()); |
| 188 | + if (rg == null) |
| 189 | + { |
| 190 | + throw new PipelineJobException("Unable to find genome: " + genomeId); |
| 191 | + } |
| 192 | + |
| 193 | + if (!forceWorkDir && AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg)) |
| 194 | + { |
| 195 | + File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble"); |
| 196 | + return new File(indexDir, "genome." + genomeId + ".csv"); |
| 197 | + } |
| 198 | + |
| 199 | + return new File(getPipelineCtx().getSourceDirectory(), "genome." + genomeId + ".csv"); |
154 | 200 | } |
155 | 201 |
|
156 | | - private File getGenomeCsv(int id) |
| 202 | + private File getGenomeFasta(int genomeId) throws PipelineJobException |
157 | 203 | { |
158 | | - return new File(getPipelineCtx().getSourceDirectory(), "genome." + id + ".csv"); |
| 204 | + return getGenomeFasta(genomeId, false); |
159 | 205 | } |
160 | 206 |
|
161 | | - private File getGenomeFasta(int id) |
| 207 | + private File getGenomeFasta(int genomeId, boolean forceWorkDir) throws PipelineJobException |
162 | 208 | { |
163 | | - return new File(getPipelineCtx().getSourceDirectory(), "genome." + id + ".fasta"); |
| 209 | + ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(genomeId, getPipelineCtx().getJob().getUser()); |
| 210 | + if (rg == null) |
| 211 | + { |
| 212 | + throw new PipelineJobException("Unable to find genome: " + genomeId); |
| 213 | + } |
| 214 | + |
| 215 | + if (!forceWorkDir && AlignerIndexUtil.hasCachedIndex(getPipelineCtx(), "nimble", rg)) |
| 216 | + { |
| 217 | + File indexDir = AlignerIndexUtil.getIndexDir(rg, "nimble"); |
| 218 | + return new File(indexDir, "genome." + genomeId + ".fasta"); |
| 219 | + } |
| 220 | + |
| 221 | + return new File(getPipelineCtx().getSourceDirectory(), "genome." + genomeId + ".fasta"); |
164 | 222 | } |
165 | 223 |
|
166 | 224 | public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, String basename) throws UnsupportedOperationException, PipelineJobException |
|
0 commit comments