Skip to content

Commit cf0b65c

Browse files
committed
Bugfix genome cache pipeline
1 parent 2049585 commit cf0b65c

File tree

3 files changed

+52
-6
lines changed

3 files changed

+52
-6
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ public void run()
109109
return;
110110
}
111111

112-
CacheGenomeTrigger.cacheGenomes(ContainerManager.getSharedContainer(), adminUser, genomeMap, log);
112+
CacheGenomeTrigger.cacheGenomes(ContainerManager.getSharedContainer(), adminUser, genomeMap, log, true);
113113
}
114114
}
115115
catch (Exception e)

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.labkey.sequenceanalysis.pipeline;
22

3+
import org.apache.commons.lang3.StringUtils;
34
import org.jetbrains.annotations.NotNull;
45
import org.labkey.api.data.Container;
56
import org.labkey.api.files.FileUrls;
@@ -10,8 +11,11 @@
1011
import org.labkey.api.pipeline.PipelineDirectory;
1112
import org.labkey.api.pipeline.PipelineJob;
1213
import org.labkey.api.pipeline.PipelineJobException;
14+
import org.labkey.api.pipeline.PipelineJobService;
1315
import org.labkey.api.pipeline.PipelineProvider;
1416
import org.labkey.api.pipeline.RecordedActionSet;
17+
import org.labkey.api.pipeline.TaskId;
18+
import org.labkey.api.pipeline.TaskPipeline;
1519
import org.labkey.api.security.User;
1620
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1721
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenomeManager;
@@ -28,10 +32,13 @@
2832
import java.util.Collections;
2933
import java.util.List;
3034
import java.util.Map;
35+
import java.util.Set;
36+
import java.util.stream.Collectors;
3137

3238
public class CacheGenomePipelineJob extends PipelineJob
3339
{
3440
private Map<Integer, File> _genomeMap;
41+
private boolean _deleteOtherFolders = false;
3542

3643
public static class Provider extends PipelineProvider
3744
{
@@ -55,11 +62,12 @@ protected CacheGenomePipelineJob()
5562

5663
}
5764

58-
public CacheGenomePipelineJob(Container c, User user, PipeRoot pipeRoot, Map<Integer, File> genomeMap, File outputDir)
65+
public CacheGenomePipelineJob(Container c, User user, PipeRoot pipeRoot, Map<Integer, File> genomeMap, File outputDir, boolean deleteOtherFolders)
5966
{
6067
super(Provider.NAME, new ViewBackgroundInfo(c, user, null), pipeRoot);
6168

6269
_genomeMap = genomeMap;
70+
_deleteOtherFolders = deleteOtherFolders;
6371

6472
setLogFile(new File(outputDir, FileUtil.makeFileNameWithTimestamp("cacheGenomes", "log")));
6573

@@ -75,6 +83,16 @@ public void setGenomeMap(Map<Integer, File> genomeMap)
7583
_genomeMap = genomeMap;
7684
}
7785

86+
public boolean isDeleteOtherFolders()
87+
{
88+
return _deleteOtherFolders;
89+
}
90+
91+
public void setDeleteOtherFolders(boolean deleteOtherFolders)
92+
{
93+
_deleteOtherFolders = deleteOtherFolders;
94+
}
95+
7896
@Override
7997
public URLHelper getStatusHref()
8098
{
@@ -87,6 +105,12 @@ public String getDescription()
87105
return "Caches reference genomes to a remote filesystem";
88106
}
89107

108+
@Override
109+
public TaskPipeline getTaskPipeline()
110+
{
111+
return PipelineJobService.get().getTaskPipeline(new TaskId(CacheGenomePipelineJob.class));
112+
}
113+
90114
public static class CacheGenomesTask extends PipelineJob.Task<CacheGenomesTask.Factory>
91115
{
92116
protected CacheGenomesTask(Factory factory, PipelineJob job)
@@ -135,7 +159,7 @@ public boolean isJobComplete(PipelineJob job)
135159
File cacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
136160
if (cacheDir == null)
137161
{
138-
return new RecordedActionSet();
162+
throw new PipelineJobException("This job should not have been initiated unless REMOTE_GENOME_CACHE_DIR is set");
139163
}
140164

141165
if (!cacheDir.exists())
@@ -151,6 +175,20 @@ public boolean isJobComplete(PipelineJob job)
151175
ReferenceGenomeManager.get().cacheGenomeLocally(rg, getJob().getLogger());
152176
}
153177

178+
if (job.isDeleteOtherFolders())
179+
{
180+
Set<String> whitelist = job.getGenomeMap().keySet().stream().map(String::valueOf).collect(Collectors.toSet());
181+
File[] toDelete = cacheDir.listFiles((file) -> {
182+
return !whitelist.contains(file.getName());
183+
});
184+
185+
if (toDelete != null && toDelete.length > 0)
186+
{
187+
getJob().getLogger().info("Folders will be deleted: " + StringUtils.join(toDelete, ", "));
188+
//TODO: verify
189+
}
190+
}
191+
154192
return new RecordedActionSet();
155193
}
156194
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomeTrigger.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.labkey.api.sequenceanalysis.GenomeTrigger;
1212
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1313
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1415
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
1516

1617
import java.io.File;
@@ -50,7 +51,7 @@ private void possiblyCache(Container c, User u, Logger log, int genomeId)
5051
Map<Integer, File> genomeMap = new HashMap<>();
5152
ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(genomeId, u);
5253
genomeMap.put(rg.getGenomeId(), rg.getSourceFastaFile());
53-
cacheGenomes(c, u, genomeMap, log);
54+
cacheGenomes(c, u, genomeMap, log, false);
5455
}
5556
catch (PipelineJobException e)
5657
{
@@ -64,8 +65,15 @@ public boolean isAvailable(Container c)
6465
return c.getActiveModules().contains(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.NAME));
6566
}
6667

67-
public static void cacheGenomes(Container c, User u, Map<Integer, File> genomeMap, Logger log)
68+
public static void cacheGenomes(Container c, User u, Map<Integer, File> genomeMap, Logger log, boolean deleteOtherFolders)
6869
{
70+
//Dont start pipeline job unless REMOTE_GENOME_CACHE_DIR is set
71+
File cacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
72+
if (cacheDir == null)
73+
{
74+
return;
75+
}
76+
6977
PipeRoot pipeRoot = PipelineService.get().findPipelineRoot(c);
7078

7179
File logFileDir = new File(pipeRoot.getRootPath(), CacheGenomePipelineJob.Provider.NAME);
@@ -74,7 +82,7 @@ public static void cacheGenomes(Container c, User u, Map<Integer, File> genomeMa
7482
logFileDir.mkdirs();
7583
}
7684

77-
CacheGenomePipelineJob job = new CacheGenomePipelineJob(c, u, pipeRoot, genomeMap, logFileDir);
85+
CacheGenomePipelineJob job = new CacheGenomePipelineJob(c, u, pipeRoot, genomeMap, logFileDir, deleteOtherFolders);
7886

7987
try
8088
{

0 commit comments

Comments
 (0)