Skip to content

Commit 490e4e9

Browse files
committed
Prepare to cache genomes on remote filesystem
1 parent 9cacd91 commit 490e4e9

16 files changed

+427
-39
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -178,29 +178,13 @@ public static void saveCachedIndex(boolean hasCachedIndex, PipelineContext ctx,
178178
}
179179

180180
lockFile.delete();
181+
182+
ReferenceGenomeManager.get().markGenomeModified(genome);
181183
}
182184
catch (IOException e)
183185
{
184186
throw new PipelineJobException(e);
185187
}
186188
}
187189
}
188-
189-
public static void cacheGenomeLocally(ReferenceGenome genome, File localCacheDir, Logger log) throws PipelineJobException
190-
{
191-
log.info("attempting to rsync genome to local disks: " + localCacheDir.getPath());
192-
if (genome.isTemporaryGenome())
193-
{
194-
log.info("cannot cache custom genomes, skipping");
195-
return;
196-
}
197-
198-
File sourceDir = genome.getSourceFastaFile().getParentFile();
199-
200-
new SimpleScriptWrapper(log).execute(Arrays.asList(
201-
"rsync", "-r", "-vi", "-a", "--delete", "--delete-excluded", "--exclude", "tracks/*", "--exclude", "chainFiles/*", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
202-
));
203-
204-
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
205-
}
206190
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package org.labkey.api.sequenceanalysis.pipeline;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.log4j.Logger;
5+
import org.labkey.api.pipeline.PipelineJobException;
6+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
7+
8+
import java.io.File;
9+
import java.io.IOException;
10+
import java.util.Arrays;
11+
12+
public class ReferenceGenomeManager
13+
{
14+
private static final ReferenceGenomeManager _instance = new ReferenceGenomeManager();
15+
16+
private ReferenceGenomeManager()
17+
{
18+
19+
}
20+
21+
public static ReferenceGenomeManager get()
22+
{
23+
return _instance;
24+
}
25+
26+
private File getLocalUpdateFile(ReferenceGenome genome)
27+
{
28+
return new File(genome.getSourceFastaFile().getParentFile(), ".lastUpdate");
29+
}
30+
31+
private File getRemoteSyncFile(int genomeId)
32+
{
33+
File remoteDir = new File(SequencePipelineService.get().getRemoteGenomeCacheDirectory(), String.valueOf(genomeId));
34+
35+
return new File(remoteDir, ".lastSync");
36+
}
37+
38+
private boolean isUpToDate(ReferenceGenome genome)
39+
{
40+
File localFile = getLocalUpdateFile(genome);
41+
if (!localFile.exists())
42+
{
43+
return false;
44+
}
45+
46+
File remoteFile = getRemoteSyncFile(genome.getGenomeId());
47+
if (!remoteFile.getParentFile().exists())
48+
{
49+
return false;
50+
}
51+
52+
if (!remoteFile.exists())
53+
{
54+
return false;
55+
}
56+
57+
long lastUpdated = localFile.lastModified();
58+
long lastSync = remoteFile.lastModified();
59+
60+
return lastUpdated >= lastSync;
61+
}
62+
63+
public void markGenomeModified(ReferenceGenome genome) throws IOException
64+
{
65+
File toUpdate = getLocalUpdateFile(genome);
66+
FileUtils.touch(toUpdate);
67+
}
68+
69+
public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws PipelineJobException
70+
{
71+
File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
72+
if (localCacheDir == null)
73+
{
74+
return;
75+
}
76+
77+
log.info("attempting to rsync genome to local disks: " + localCacheDir.getPath());
78+
if (genome.isTemporaryGenome())
79+
{
80+
log.info("cannot cache custom genomes, skipping");
81+
return;
82+
}
83+
84+
if (isUpToDate(genome))
85+
{
86+
log.debug("Genome up-to-date, will not repeat rsync");
87+
return;
88+
}
89+
90+
File sourceDir = genome.getSourceFastaFile().getParentFile();
91+
92+
new SimpleScriptWrapper(log).execute(Arrays.asList(
93+
"rsync", "-r", "-vi", "-a", "--delete", "--delete-excluded", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
94+
));
95+
96+
try
97+
{
98+
FileUtils.touch(getRemoteSyncFile(genome.getGenomeId()));
99+
}
100+
catch (IOException e)
101+
{
102+
throw new PipelineJobException(e);
103+
}
104+
105+
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
106+
}
107+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,38 @@
22

33
import org.apache.commons.io.FileUtils;
44
import org.apache.log4j.Logger;
5+
import org.labkey.api.data.CompareType;
56
import org.labkey.api.data.Container;
67
import org.labkey.api.data.ContainerManager;
78
import org.labkey.api.data.SimpleFilter;
89
import org.labkey.api.data.TableInfo;
910
import org.labkey.api.data.TableSelector;
1011
import org.labkey.api.exp.api.ExpData;
1112
import org.labkey.api.exp.api.ExperimentService;
13+
import org.labkey.api.ldk.LDKService;
1214
import org.labkey.api.pipeline.PipeRoot;
1315
import org.labkey.api.pipeline.PipelineService;
1416
import org.labkey.api.query.FieldKey;
17+
import org.labkey.api.security.User;
1518
import org.labkey.api.sequenceanalysis.RefNtSequenceModel;
1619
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
20+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1721
import org.labkey.api.util.FileType;
1822
import org.labkey.api.util.FileUtil;
23+
import org.labkey.api.util.JobRunner;
1924
import org.labkey.api.util.PageFlowUtil;
2025
import org.labkey.api.util.SystemMaintenance.MaintenanceTask;
2126
import org.labkey.sequenceanalysis.model.AnalysisModelImpl;
27+
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
2228

2329
import java.io.File;
2430
import java.io.IOException;
2531
import java.util.ArrayList;
2632
import java.util.Collections;
33+
import java.util.HashMap;
2734
import java.util.HashSet;
2835
import java.util.List;
36+
import java.util.Map;
2937
import java.util.Set;
3038

3139
/**
@@ -56,6 +64,8 @@ public void run(Logger log)
5664
//delete sequence text files and library artifacts not associated with a DB record
5765
try
5866
{
67+
possiblySubmitRemoteTask(log);
68+
5969
processContainer(ContainerManager.getRoot(), log);
6070
verifySequenceDataPresent(log);
6171
}
@@ -65,6 +75,54 @@ public void run(Logger log)
6575
}
6676
}
6777

78+
private void possiblySubmitRemoteTask(Logger log)
79+
{
80+
if (SequencePipelineService.get().getRemoteGenomeCacheDirectory() != null)
81+
{
82+
JobRunner jr = JobRunner.getDefault();
83+
jr.execute(new Runnable()
84+
{
85+
@Override
86+
public void run()
87+
{
88+
try
89+
{
90+
Map<Integer, File> genomeMap = new HashMap<>();
91+
new TableSelector(SequenceAnalysisSchema.getInstance().getSchema().getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("rowid", "fasta_file"), new SimpleFilter(FieldKey.fromString("datedisabled"), null, CompareType.ISBLANK), null).forEachResults(rs -> {
92+
int dataId = rs.getInt(FieldKey.fromString("fasta_file"));
93+
if (dataId > -1)
94+
{
95+
ExpData d = ExperimentService.get().getExpData(dataId);
96+
if (d != null && d.getFile() != null)
97+
{
98+
genomeMap.put(rs.getInt(FieldKey.fromString("rowid")), d.getFile());
99+
}
100+
}
101+
});
102+
103+
if (!genomeMap.isEmpty())
104+
{
105+
final User adminUser = LDKService.get().getBackgroundAdminUser();
106+
if (adminUser == null)
107+
{
108+
log.error("LDK module BackgroundAdminUser property not set. If this is set, JBrowseMaintenanceTask could automatically submit repair jobs.");
109+
return;
110+
}
111+
112+
CacheGenomeTrigger.cacheGenomes(ContainerManager.getSharedContainer(), adminUser, genomeMap, log);
113+
}
114+
}
115+
catch (Exception e)
116+
{
117+
log.error(e);
118+
}
119+
}
120+
});
121+
122+
jr.waitForCompletion();
123+
}
124+
}
125+
68126
private void verifySequenceDataPresent(Logger log)
69127
{
70128
log.info("verifying sequence data files present");

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.labkey.sequenceanalysis.button.RunMultiQCButton;
5050
import org.labkey.sequenceanalysis.pipeline.AlignmentAnalysisJob;
5151
import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob;
52+
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
5253
import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob;
5354
import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob;
5455
import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob;
@@ -339,6 +340,8 @@ public static void registerPipelineSteps()
339340

340341
//ObjectFactory.Registry.register(AnalysisModelImpl.class, new UnderscoreBeanObjectFactory(AnalysisModelImpl.class));
341342
//ObjectFactory.Registry.register(SequenceReadsetImpl.class, new UnderscoreBeanObjectFactory(SequenceReadsetImpl.class));
343+
344+
SequenceAnalysisService.get().registerGenomeTrigger(new CacheGenomeTrigger());
342345
}
343346

344347
@Override
@@ -404,6 +407,7 @@ public Set<ExperimentRunType> getExperimentRunTypes(@Nullable Container containe
404407
PipelineService.get().registerPipelineProvider(new ImportGenomeTrackPipelineJob.Provider(this));
405408
PipelineService.get().registerPipelineProvider(new OrphanFilePipelineProvider(this));
406409
PipelineService.get().registerPipelineProvider(new SequencePipelineProvider(this));
410+
PipelineService.get().registerPipelineProvider(new CacheGenomeTrigger.CacheGenomePipelineJobProvider(this));
407411

408412
LDKService.get().registerQueryButton(new ReprocessLibraryButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_REF_LIBRARIES);
409413

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep;
1515
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
1616
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
17+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenomeManager;
1718
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1819
import org.labkey.api.util.FileType;
1920
import org.labkey.sequenceanalysis.run.util.FastaIndexer;
@@ -131,7 +132,7 @@ public RecordedActionSet run() throws PipelineJobException
131132
//create locally first
132133
alignmentStep.createIndex(referenceGenome, _wd.getDir());
133134

134-
//NOTE: the AlignerSteps are doing this themselves. not sure if that is the right behvior
135+
//NOTE: the AlignerSteps are doing this themselves. not sure if that is the right behavior
135136
if (!AlignerIndexUtil.hasCachedIndex(ctx, provider.getName(), referenceGenome))
136137
{
137138
AlignerIndexUtil.saveCachedIndex(false, ctx, outDir, provider.getName(), referenceGenome);
@@ -166,7 +167,7 @@ public RecordedActionSet run() throws PipelineJobException
166167
File cacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
167168
if (cacheDir != null)
168169
{
169-
AlignerIndexUtil.cacheGenomeLocally(referenceGenome, cacheDir, getJob().getLogger());
170+
ReferenceGenomeManager.get().cacheGenomeLocally(referenceGenome, getJob().getLogger());
170171
}
171172

172173
return new RecordedActionSet();

0 commit comments

Comments
 (0)