Skip to content

Commit c3bd8dd

Browse files
hextrazaSebastian Benjaminbbimber
authored
Add LRUQueryCache (#273)
* Add LRUQueryCache * Custom query caching policy * Query strategy works on BooleanQuery with MatchAllDocs predicate * Use proper Cache and add action to report cache info * Improve handling of lucene cache * Add code to cache the first page of each mGAP JBrowse session * Add logging and clear cache prior to adding default query --------- Co-authored-by: Sebastian Benjamin <sebastiancbenjamin@gmail.com> Co-authored-by: bbimber <bbimber@gmail.com>
1 parent 7cde18d commit c3bd8dd

File tree

8 files changed

+218
-7
lines changed

8 files changed

+218
-7
lines changed

jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ static public void setInstance(JBrowseService instance)
4848

4949
abstract public void registerLuceneIndexDetector(LuceneIndexDetector detector);
5050

51+
abstract public void cacheDefaultQuery(User u, String sessionId, String trackId);
52+
5153
public interface LuceneIndexDetector
5254
{
5355
SequenceOutputFile findMatchingLuceneIndex(SequenceOutputFile vcfFile, List<String> infoFieldsToIndex, User u, @Nullable Logger log) throws PipelineJobException;

jbrowse/src/org/labkey/jbrowse/JBrowseController.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,5 +1035,15 @@ public void setIncludeDefaultFields(boolean includeDefaultFields)
10351035
this.includeDefaultFields = includeDefaultFields;
10361036
}
10371037
}
1038+
1039+
@RequiresPermission(ReadPermission.class)
1040+
public static class GetLuceneCacheInfoAction extends ReadOnlyApiAction<Object>
1041+
{
1042+
@Override
1043+
public ApiResponse execute(Object form, BindException errors)
1044+
{
1045+
return new ApiSimpleResponse("cacheInfo", JBrowseLuceneSearch.reportCacheInfo());
1046+
}
1047+
}
10381048
}
10391049

jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.labkey.jbrowse;
22

33
import org.apache.commons.lang3.StringUtils;
4+
import org.apache.logging.log4j.Logger;
45
import org.apache.lucene.analysis.Analyzer;
56
import org.apache.lucene.analysis.standard.StandardAnalyzer;
67
import org.apache.lucene.document.Document;
@@ -15,16 +16,22 @@
1516
import org.apache.lucene.search.BooleanClause;
1617
import org.apache.lucene.search.BooleanQuery;
1718
import org.apache.lucene.search.IndexSearcher;
19+
import org.apache.lucene.search.LRUQueryCache;
1820
import org.apache.lucene.search.MatchAllDocsQuery;
1921
import org.apache.lucene.search.Query;
22+
import org.apache.lucene.search.QueryCache;
23+
import org.apache.lucene.search.QueryCachingPolicy;
2024
import org.apache.lucene.search.Sort;
2125
import org.apache.lucene.search.SortField;
2226
import org.apache.lucene.search.TopFieldDocs;
27+
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
2328
import org.apache.lucene.store.Directory;
2429
import org.apache.lucene.store.FSDirectory;
25-
import org.apache.lucene.util.NumericUtils;
2630
import org.jetbrains.annotations.Nullable;
31+
import org.json.JSONArray;
2732
import org.json.JSONObject;
33+
import org.labkey.api.cache.Cache;
34+
import org.labkey.api.cache.CacheManager;
2835
import org.labkey.api.data.Container;
2936
import org.labkey.api.data.ContainerManager;
3037
import org.labkey.api.jbrowse.AbstractJBrowseFieldCustomizer;
@@ -33,6 +40,7 @@
3340
import org.labkey.api.module.ModuleLoader;
3441
import org.labkey.api.security.User;
3542
import org.labkey.api.settings.AppProps;
43+
import org.labkey.api.util.logging.LogHelper;
3644
import org.labkey.jbrowse.model.JBrowseSession;
3745
import org.labkey.jbrowse.model.JsonFile;
3846

@@ -51,20 +59,24 @@
5159
import java.util.StringTokenizer;
5260
import java.util.regex.Matcher;
5361
import java.util.regex.Pattern;
54-
import java.util.stream.Collectors;
5562

5663
import static org.labkey.jbrowse.JBrowseFieldUtils.VARIABLE_SAMPLES;
5764
import static org.labkey.jbrowse.JBrowseFieldUtils.getSession;
5865
import static org.labkey.jbrowse.JBrowseFieldUtils.getTrack;
5966

6067
public class JBrowseLuceneSearch
6168
{
69+
private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries");
6270
private final JBrowseSession _session;
6371
private final JsonFile _jsonFile;
6472
private final User _user;
6573
private final String[] specialStartPatterns = {"*:* -", "+", "-"};
6674
private static final String ALL_DOCS = "all";
6775
private static final String GENOMIC_POSITION = "genomicPosition";
76+
private static final int maxCachedQueries = 1000;
77+
private static final long maxRamBytesUsed = 250 * 1024 * 1024L;
78+
79+
private static final Cache<String, LRUQueryCache> _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "JBrowseLuceneSearchCache");
6880

6981
private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u)
7082
{
@@ -85,6 +97,17 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User
8597
return new JBrowseLuceneSearch(session, getTrack(session, trackId, u), u);
8698
}
8799

100+
private static synchronized QueryCache getCacheForSession(String trackObjectId) {
101+
LRUQueryCache qc = _cache.get(trackObjectId);
102+
if (qc == null)
103+
{
104+
qc = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed);
105+
_cache.put(trackObjectId, qc);
106+
}
107+
108+
return qc;
109+
}
110+
88111
private String templateReplace(final String searchString) {
89112
String result = searchString;
90113
Pattern pattern = Pattern.compile("~(.*?)~");
@@ -148,6 +171,8 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
148171
)
149172
{
150173
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
174+
indexSearcher.setQueryCache(getCacheForSession(_jsonFile.getObjectId()));
175+
indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy());
151176

152177
List<String> stringQueryParserFields = new ArrayList<>();
153178
Map<String, SortField.Type> numericQueryParserFields = new HashMap<>();
@@ -263,7 +288,7 @@ else if (numericQueryParserFields.containsKey(fieldName))
263288
for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++)
264289
{
265290
JSONObject elem = new JSONObject();
266-
Document doc = indexSearcher.doc(topDocs.scoreDocs[i].doc);
291+
Document doc = indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc);
267292

268293
for (IndexableField field : doc.getFields()) {
269294
String fieldName = field.name();
@@ -345,4 +370,70 @@ public boolean isAvailable(Container c, User u)
345370
return true;
346371
}
347372
}
373+
374+
public static class ForceMatchAllDocsCachingPolicy implements QueryCachingPolicy {
375+
private final UsageTrackingQueryCachingPolicy defaultPolicy = new UsageTrackingQueryCachingPolicy();
376+
377+
@Override
378+
public boolean shouldCache(Query query) throws IOException {
379+
if (query instanceof BooleanQuery bq) {
380+
for (BooleanClause clause : bq) {
381+
if (clause.getQuery() instanceof MatchAllDocsQuery) {
382+
return true;
383+
}
384+
}
385+
}
386+
387+
return defaultPolicy.shouldCache(query);
388+
}
389+
390+
@Override
391+
public void onUse(Query query) {
392+
defaultPolicy.onUse(query);
393+
}
394+
}
395+
396+
public static JSONArray reportCacheInfo()
397+
{
398+
JSONArray cacheInfo = new JSONArray();
399+
for (String sessionId : _cache.getKeys())
400+
{
401+
LRUQueryCache qc = _cache.get(sessionId);
402+
JSONObject info = new JSONObject();
403+
info.put("cacheSize", qc.getCacheSize());
404+
info.put("cacheCount", qc.getCacheCount());
405+
info.put("hitCount", qc.getHitCount());
406+
info.put("missCount", qc.getMissCount());
407+
info.put("evictionCount", qc.getEvictionCount());
408+
info.put("totalCount", qc.getTotalCount());
409+
cacheInfo.put(info);
410+
}
411+
412+
return cacheInfo;
413+
}
414+
415+
public void cacheDefaultQuery()
416+
{
417+
try
418+
{
419+
JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId());
420+
doSearch(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false);
421+
}
422+
catch (ParseException | IOException e)
423+
{
424+
_log.error("Unable to cache default query for: " + _jsonFile.getObjectId(), e);
425+
}
426+
}
427+
428+
public static void clearCache(@Nullable String jbrowseTrackId)
429+
{
430+
if (jbrowseTrackId == null)
431+
{
432+
_cache.clear();
433+
}
434+
else
435+
{
436+
_cache.remove(jbrowseTrackId);
437+
}
438+
}
348439
}

jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,13 @@ public void registerLuceneIndexDetector(LuceneIndexDetector detector)
312312
_detectors.add(detector);
313313
}
314314

315+
@Override
316+
public void cacheDefaultQuery(User u, String sessionId, String trackId)
317+
{
318+
JBrowseLuceneSearch luceneSearch = JBrowseLuceneSearch.create(sessionId, trackId, u);
319+
luceneSearch.cacheDefaultQuery();
320+
}
321+
315322
public static final class DefaultLuceneIndexDetector implements LuceneIndexDetector
316323
{
317324
@Override

jbrowse/src/org/labkey/jbrowse/model/JsonFile.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@
5050
import org.labkey.api.util.PageFlowUtil;
5151
import org.labkey.api.util.Path;
5252
import org.labkey.api.view.UnauthorizedException;
53+
import org.labkey.jbrowse.JBrowseLuceneSearch;
5354
import org.labkey.jbrowse.JBrowseManager;
5455
import org.labkey.jbrowse.JBrowseSchema;
55-
import org.labkey.jbrowse.pipeline.IndexVariantsStep;
5656
import org.labkey.jbrowse.pipeline.JBrowseLucenePipelineJob;
5757
import org.labkey.sequenceanalysis.run.util.TabixRunner;
5858

@@ -964,6 +964,8 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool
964964
}
965965
else if (existingLuceneDir != null && existingLuceneDir.exists())
966966
{
967+
JBrowseLuceneSearch.clearCache(getObjectId());
968+
967969
// Note: this could exist, but be an empty folder:
968970
if (luceneDir.exists())
969971
{
@@ -1004,7 +1006,7 @@ else if (sizeInGb > 50)
10041006
try
10051007
{
10061008
PipeRoot root = PipelineService.get().getPipelineRootSetting(getContainerObj());
1007-
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
1009+
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, getObjectId(), vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
10081010
}
10091011
catch (PipelineValidationException e)
10101012
{
@@ -1030,6 +1032,7 @@ else if (sizeInGb > 50)
10301032
if (forceReprocess || !doesLuceneIndexExist())
10311033
{
10321034
JBrowseLucenePipelineJob.prepareLuceneIndex(targetFile, luceneDir, log, getInfoFieldsToIndex(), allowLenientLuceneProcessing());
1035+
JBrowseLuceneSearch.clearCache(getObjectId());
10331036
}
10341037
else
10351038
{
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package org.labkey.jbrowse.pipeline;
2+
3+
import org.jetbrains.annotations.NotNull;
4+
import org.labkey.api.pipeline.AbstractTaskFactory;
5+
import org.labkey.api.pipeline.AbstractTaskFactorySettings;
6+
import org.labkey.api.pipeline.PipelineJob;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.pipeline.PipelineJobService;
9+
import org.labkey.api.pipeline.RecordedAction;
10+
import org.labkey.api.pipeline.RecordedActionSet;
11+
import org.labkey.api.util.FileType;
12+
import org.labkey.jbrowse.JBrowseLuceneSearch;
13+
14+
import java.util.Collections;
15+
import java.util.List;
16+
17+
/**
18+
* User: bbimber
19+
* Date: 8/6/12
20+
* Time: 12:57 PM
21+
*/
22+
public class JBrowseLuceneFinalTask extends PipelineJob.Task<JBrowseLuceneFinalTask.Factory>
23+
{
24+
protected JBrowseLuceneFinalTask(Factory factory, PipelineJob job)
25+
{
26+
super(factory, job);
27+
}
28+
29+
public static class Factory extends AbstractTaskFactory<AbstractTaskFactorySettings, Factory>
30+
{
31+
public Factory()
32+
{
33+
super(JBrowseLuceneFinalTask.class);
34+
}
35+
36+
@Override
37+
public List<FileType> getInputTypes()
38+
{
39+
return Collections.emptyList();
40+
}
41+
42+
@Override
43+
public String getStatusName()
44+
{
45+
return PipelineJob.TaskStatus.running.toString();
46+
}
47+
48+
@Override
49+
public List<String> getProtocolActionNames()
50+
{
51+
return List.of("JBrowse-Lucene-Finalize");
52+
}
53+
54+
@Override
55+
public PipelineJob.Task<?> createTask(PipelineJob job)
56+
{
57+
return new JBrowseLuceneFinalTask(this, job);
58+
}
59+
60+
@Override
61+
public boolean isJobComplete(PipelineJob job)
62+
{
63+
return false;
64+
}
65+
}
66+
67+
@Override
68+
@NotNull
69+
public RecordedActionSet run() throws PipelineJobException
70+
{
71+
if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer)
72+
{
73+
throw new PipelineJobException("This task must run on the webserver!");
74+
}
75+
76+
JBrowseLuceneSearch.clearCache(getPipelineJob().getJbrowseTrackId());
77+
return new RecordedActionSet(Collections.singleton(new RecordedAction("JBrowse-Lucene")));
78+
}
79+
80+
private JBrowseLucenePipelineJob getPipelineJob()
81+
{
82+
return (JBrowseLucenePipelineJob)getJob();
83+
}
84+
}

jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
public class JBrowseLucenePipelineJob extends PipelineJob
4040
{
4141
private List<String> _infoFields;
42+
private String _jbrowseTrackId;
4243
private File _vcf;
4344
private File _targetDir;
4445
private boolean _allowLenientLuceneProcessing = false;
@@ -48,9 +49,10 @@ protected JBrowseLucenePipelineJob()
4849
{
4950
}
5051

51-
public JBrowseLucenePipelineJob(Container c, User user, PipeRoot pipeRoot, File vcf, File targetDir, List<String> infoFields, boolean allowLenientLuceneProcessing)
52+
public JBrowseLucenePipelineJob(Container c, User user, PipeRoot pipeRoot, String jbrowseTrackId, File vcf, File targetDir, List<String> infoFields, boolean allowLenientLuceneProcessing)
5253
{
5354
super(JBrowseLucenePipelineProvider.NAME, new ViewBackgroundInfo(c, user, null), pipeRoot);
55+
_jbrowseTrackId = jbrowseTrackId;
5456
_vcf = vcf;
5557
_targetDir = targetDir;
5658
_infoFields = infoFields;
@@ -90,7 +92,7 @@ public ActionURL getStatusHref()
9092
}
9193

9294
@Override
93-
public TaskPipeline getTaskPipeline()
95+
public TaskPipeline<?> getTaskPipeline()
9496
{
9597
return PipelineJobService.get().getTaskPipeline(new TaskId(JBrowseLucenePipelineJob.class));
9698
}
@@ -105,6 +107,16 @@ public void setInfoFields(List<String> infoFields)
105107
_infoFields = infoFields;
106108
}
107109

110+
public String getJbrowseTrackId()
111+
{
112+
return _jbrowseTrackId;
113+
}
114+
115+
public void setJbrowseTrackId(String jbrowseTrackId)
116+
{
117+
_jbrowseTrackId = jbrowseTrackId;
118+
}
119+
108120
public File getVcf()
109121
{
110122
return _vcf;

jbrowse/webapp/WEB-INF/jbrowse/jbrowseContext.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
<list>
99
<bean class="org.labkey.jbrowse.pipeline.JBrowseSessionTask$Factory"/>
1010
<bean class="org.labkey.jbrowse.pipeline.JBrowseLuceneTask$Factory"/>
11+
<bean class="org.labkey.jbrowse.pipeline.JBrowseLuceneFinalTask$Factory"/>
1112
</list>
1213
</property>
1314
<property name="pipelines">
@@ -25,6 +26,7 @@
2526
<property name="taskProgressionSpec">
2627
<list>
2728
<value type="java.lang.Class">org.labkey.jbrowse.pipeline.JBrowseLuceneTask</value>
29+
<value type="java.lang.Class">org.labkey.jbrowse.pipeline.JBrowseLuceneFinalTask</value>
2830
</list>
2931
</property>
3032
</bean>

0 commit comments

Comments
 (0)