Skip to content

Commit 03b6924

Browse files
authored
Merge pull request #66 from LabKey/fb_merge_discvr-20.7
Merge discvr-20.7 to develop
2 parents 8a7b236 + 063bcf2 commit 03b6924

30 files changed

+631
-66
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -179,29 +179,13 @@ public static void saveCachedIndex(boolean hasCachedIndex, PipelineContext ctx,
179179
}
180180

181181
lockFile.delete();
182+
183+
ReferenceGenomeManager.get().markGenomeModified(genome);
182184
}
183185
catch (IOException e)
184186
{
185187
throw new PipelineJobException(e);
186188
}
187189
}
188190
}
189-
190-
public static void cacheGenomeLocally(ReferenceGenome genome, File localCacheDir, Logger log) throws PipelineJobException
191-
{
192-
log.info("attempting to rsync genome to local disks: " + localCacheDir.getPath());
193-
if (genome.isTemporaryGenome())
194-
{
195-
log.info("cannot cache custom genomes, skipping");
196-
return;
197-
}
198-
199-
File sourceDir = genome.getSourceFastaFile().getParentFile();
200-
201-
new SimpleScriptWrapper(log).execute(Arrays.asList(
202-
"rsync", "-r", "-vi", "-a", "--delete", "--delete-excluded", "--exclude", "tracks/*", "--exclude", "chainFiles/*", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
203-
));
204-
205-
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
206-
}
207191
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package org.labkey.api.sequenceanalysis.pipeline;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.logging.log4j.Logger;
5+
import org.labkey.api.pipeline.PipelineJobException;
6+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
7+
8+
import java.io.File;
9+
import java.io.IOException;
10+
import java.util.Arrays;
11+
12+
public class ReferenceGenomeManager
13+
{
14+
private static final ReferenceGenomeManager _instance = new ReferenceGenomeManager();
15+
16+
private ReferenceGenomeManager()
17+
{
18+
19+
}
20+
21+
public static ReferenceGenomeManager get()
22+
{
23+
return _instance;
24+
}
25+
26+
private File getLocalUpdateFile(ReferenceGenome genome)
27+
{
28+
return new File(genome.getSourceFastaFile().getParentFile(), ".lastUpdate");
29+
}
30+
31+
private File getRemoteSyncFile(int genomeId)
32+
{
33+
File remoteDir = new File(SequencePipelineService.get().getRemoteGenomeCacheDirectory(), String.valueOf(genomeId));
34+
35+
return new File(remoteDir, ".lastSync");
36+
}
37+
38+
private boolean isUpToDate(ReferenceGenome genome)
39+
{
40+
File localFile = getLocalUpdateFile(genome);
41+
if (!localFile.exists())
42+
{
43+
return false;
44+
}
45+
46+
File remoteFile = getRemoteSyncFile(genome.getGenomeId());
47+
if (!remoteFile.getParentFile().exists())
48+
{
49+
return false;
50+
}
51+
52+
if (!remoteFile.exists())
53+
{
54+
return false;
55+
}
56+
57+
long lastUpdated = localFile.lastModified();
58+
long lastSync = remoteFile.lastModified();
59+
60+
return lastUpdated >= lastSync;
61+
}
62+
63+
public void markGenomeModified(ReferenceGenome genome) throws IOException
64+
{
65+
File toUpdate = getLocalUpdateFile(genome);
66+
FileUtils.touch(toUpdate);
67+
}
68+
69+
public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws PipelineJobException
70+
{
71+
if (!SequencePipelineService.get().isRemoteGenomeCacheUsed())
72+
{
73+
return;
74+
}
75+
76+
if (genome.isTemporaryGenome())
77+
{
78+
log.info("cannot cache custom genomes, skipping");
79+
return;
80+
}
81+
82+
if (isUpToDate(genome))
83+
{
84+
log.debug("Genome up-to-date, will not repeat rsync");
85+
return;
86+
}
87+
88+
File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
89+
log.info("attempting to rsync genome to local disks: " + localCacheDir.getPath());
90+
91+
File sourceDir = genome.getSourceFastaFile().getParentFile();
92+
93+
//Note: neither source nor dest have trailing slashes, so the entire source (i.e '128', gets synced into a subdir of dest)
94+
new SimpleScriptWrapper(log).execute(Arrays.asList(
95+
"rsync", "-r", "-a", "--delete", "--delete-excluded", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
96+
));
97+
98+
try
99+
{
100+
File lastUpdate = getLocalUpdateFile(genome);
101+
if (!lastUpdate.exists())
102+
{
103+
FileUtils.touch(lastUpdate);
104+
}
105+
106+
FileUtils.touch(getRemoteSyncFile(genome.getGenomeId()));
107+
}
108+
catch (IOException e)
109+
{
110+
throw new PipelineJobException(e);
111+
}
112+
113+
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
114+
}
115+
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ static public void setInstance(SequencePipelineService instance)
7979

8080
abstract public List<String> getJavaOpts(@Nullable Integer maxRamOverride);
8181

82+
abstract public boolean isRemoteGenomeCacheUsed();
83+
8284
@Nullable
8385
abstract public File getRemoteGenomeCacheDirectory();
8486

SequenceAnalysis/resources/queries/sequenceanalysis/readData/.qview.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<customView xmlns="http://labkey.org/data/xml/queryCustomView" label="All">
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" label="All" canOverride="true">
22
<columns>
33
<column name="rowid" />
44
<column name="readset/rowid" />

SequenceAnalysis/resources/queries/sequenceanalysis/readData/File Details.qview.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<customView xmlns="http://labkey.org/data/xml/queryCustomView">
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" canOverride="true">
22
<columns>
33
<column name="rowid" />
44
<column name="readset/rowid" />
@@ -15,7 +15,7 @@
1515
<column name="archived" />
1616

1717
<column name="created" />
18-
<column name="workbook" />
18+
<column name="readset/workbook" />
1919
</columns>
2020
<sorts>
2121
<sort column="readset" />

SequenceAnalysis/resources/queries/sequenceanalysis/readData/With Filepath.qview.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<customView xmlns="http://labkey.org/data/xml/queryCustomView">
1+
<customView xmlns="http://labkey.org/data/xml/queryCustomView" canOverride="true">
22
<columns>
33
<column name="rowid" />
44
<column name="readset/rowid" />
@@ -15,7 +15,7 @@
1515
<column name="archived" />
1616

1717
<column name="created" />
18-
<column name="workbook" />
18+
<column name="readset/workbook" />
1919
</columns>
2020
<sorts>
2121
<sort column="readset" />

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,38 @@
33
import org.apache.commons.io.FileUtils;
44
import org.apache.logging.log4j.Logger;
55
import org.apache.logging.log4j.LogManager;
6+
import org.labkey.api.data.CompareType;
67
import org.labkey.api.data.Container;
78
import org.labkey.api.data.ContainerManager;
89
import org.labkey.api.data.SimpleFilter;
910
import org.labkey.api.data.TableInfo;
1011
import org.labkey.api.data.TableSelector;
1112
import org.labkey.api.exp.api.ExpData;
1213
import org.labkey.api.exp.api.ExperimentService;
14+
import org.labkey.api.ldk.LDKService;
1315
import org.labkey.api.pipeline.PipeRoot;
1416
import org.labkey.api.pipeline.PipelineService;
1517
import org.labkey.api.query.FieldKey;
18+
import org.labkey.api.security.User;
1619
import org.labkey.api.sequenceanalysis.RefNtSequenceModel;
1720
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
21+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1822
import org.labkey.api.util.FileType;
1923
import org.labkey.api.util.FileUtil;
24+
import org.labkey.api.util.JobRunner;
2025
import org.labkey.api.util.PageFlowUtil;
2126
import org.labkey.api.util.SystemMaintenance.MaintenanceTask;
2227
import org.labkey.sequenceanalysis.model.AnalysisModelImpl;
28+
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
2329

2430
import java.io.File;
2531
import java.io.IOException;
2632
import java.util.ArrayList;
2733
import java.util.Collections;
34+
import java.util.HashMap;
2835
import java.util.HashSet;
2936
import java.util.List;
37+
import java.util.Map;
3038
import java.util.Set;
3139

3240
/**
@@ -57,6 +65,8 @@ public void run(Logger log)
5765
//delete sequence text files and library artifacts not associated with a DB record
5866
try
5967
{
68+
possiblySubmitRemoteTask(log);
69+
6070
processContainer(ContainerManager.getRoot(), log);
6171
verifySequenceDataPresent(log);
6272
}
@@ -66,6 +76,58 @@ public void run(Logger log)
6676
}
6777
}
6878

79+
private void possiblySubmitRemoteTask(Logger log)
80+
{
81+
if (SequencePipelineService.get().isRemoteGenomeCacheUsed())
82+
{
83+
JobRunner jr = JobRunner.getDefault();
84+
jr.execute(new Runnable()
85+
{
86+
@Override
87+
public void run()
88+
{
89+
try
90+
{
91+
Map<Integer, File> genomeMap = new HashMap<>();
92+
new TableSelector(SequenceAnalysisSchema.getInstance().getSchema().getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("rowid", "fasta_file"), new SimpleFilter(FieldKey.fromString("datedisabled"), null, CompareType.ISBLANK), null).forEachResults(rs -> {
93+
int dataId = rs.getInt(FieldKey.fromString("fasta_file"));
94+
if (dataId > -1)
95+
{
96+
ExpData d = ExperimentService.get().getExpData(dataId);
97+
if (d != null && d.getFile() != null)
98+
{
99+
genomeMap.put(rs.getInt(FieldKey.fromString("rowid")), d.getFile());
100+
}
101+
}
102+
});
103+
104+
if (!genomeMap.isEmpty())
105+
{
106+
final User adminUser = LDKService.get().getBackgroundAdminUser();
107+
if (adminUser == null)
108+
{
109+
log.error("LDK module BackgroundAdminUser property not set. If this is set, JBrowseMaintenanceTask could automatically submit repair jobs.");
110+
return;
111+
}
112+
113+
CacheGenomeTrigger.cacheGenomes(ContainerManager.getSharedContainer(), adminUser, genomeMap, log, true);
114+
}
115+
}
116+
catch (Exception e)
117+
{
118+
log.error(e);
119+
}
120+
}
121+
});
122+
123+
jr.waitForCompletion();
124+
}
125+
else
126+
{
127+
log.debug("Genome caching not used, skipping");
128+
}
129+
}
130+
69131
private void verifySequenceDataPresent(Logger log)
70132
{
71133
log.info("verifying sequence data files present");

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
import org.labkey.sequenceanalysis.button.RunMultiQCButton;
5151
import org.labkey.sequenceanalysis.pipeline.AlignmentAnalysisJob;
5252
import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob;
53+
import org.labkey.sequenceanalysis.pipeline.CacheGenomePipelineJob;
54+
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
5355
import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob;
5456
import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob;
5557
import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob;
@@ -340,6 +342,8 @@ public static void registerPipelineSteps()
340342

341343
//ObjectFactory.Registry.register(AnalysisModelImpl.class, new UnderscoreBeanObjectFactory(AnalysisModelImpl.class));
342344
//ObjectFactory.Registry.register(SequenceReadsetImpl.class, new UnderscoreBeanObjectFactory(SequenceReadsetImpl.class));
345+
346+
SequenceAnalysisService.get().registerGenomeTrigger(new CacheGenomeTrigger());
343347
}
344348

345349
@Override
@@ -405,6 +409,7 @@ public Set<ExperimentRunType> getExperimentRunTypes(@Nullable Container containe
405409
PipelineService.get().registerPipelineProvider(new ImportGenomeTrackPipelineJob.Provider(this));
406410
PipelineService.get().registerPipelineProvider(new OrphanFilePipelineProvider(this));
407411
PipelineService.get().registerPipelineProvider(new SequencePipelineProvider(this));
412+
PipelineService.get().registerPipelineProvider(new CacheGenomePipelineJob.Provider(this));
408413

409414
LDKService.get().registerQueryButton(new ReprocessLibraryButton(), SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_REF_LIBRARIES);
410415

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,13 @@ else if (maxRam != null)
318318
return params;
319319
}
320320

321+
@Override
322+
public boolean isRemoteGenomeCacheUsed()
323+
{
324+
String dir = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("REMOTE_GENOME_CACHE_DIR");
325+
return StringUtils.trimToNull(dir) != null;
326+
}
327+
321328
@Override
322329
public File getRemoteGenomeCacheDirectory()
323330
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GenotypeGVCFHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ private File runGenotypeGVCFs(PipelineJob job, JobContext ctx, ProcessVariantsHa
356356
toolParams.add(f.getPath());
357357
}
358358

359-
if (ctx.getParams().get("variantCalling.GenotypeGVCFs.allowOldRmsMappingData") != null)
359+
if (ctx.getParams().optBoolean("variantCalling.GenotypeGVCFs.allowOldRmsMappingData", false))
360360
{
361361
toolParams.add("--allow-old-rms-mapping-quality-annotation-data");
362362
}
@@ -383,7 +383,7 @@ private File runGenotypeGVCFs(PipelineJob job, JobContext ctx, ProcessVariantsHa
383383
toolParams.add("--genomicsdb-shared-posixfs-optimizations");
384384
}
385385

386-
wrapper.execute(genome.getSourceFastaFile(), outputVcf, toolParams, inputVcf);
386+
wrapper.execute(genome.getWorkingFastaFile(), outputVcf, toolParams, inputVcf);
387387

388388
action.addOutput(outputVcf, "VCF", outputVcf.exists(), true);
389389
action.setEndTime(new Date());

0 commit comments

Comments
 (0)