Skip to content

Commit 795e2bb

Browse files
committed
Merge discvr-24.3 to develop
2 parents f74bd78 + 6122014 commit 795e2bb

File tree

20 files changed

+346
-36
lines changed

20 files changed

+346
-36
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
import org.labkey.api.data.TableInfo;
1313
import org.labkey.api.data.TableSelector;
1414
import org.labkey.api.exp.api.ExpData;
15+
import org.labkey.api.exp.api.ExpRun;
1516
import org.labkey.api.exp.api.ExperimentService;
1617
import org.labkey.api.ldk.LDKService;
1718
import org.labkey.api.pipeline.PipeRoot;
1819
import org.labkey.api.pipeline.PipelineJobException;
1920
import org.labkey.api.pipeline.PipelineService;
21+
import org.labkey.api.pipeline.PipelineStatusFile;
2022
import org.labkey.api.query.FieldKey;
2123
import org.labkey.api.security.User;
2224
import org.labkey.api.sequenceanalysis.RefNtSequenceModel;
@@ -34,6 +36,8 @@
3436

3537
import java.io.File;
3638
import java.io.IOException;
39+
import java.nio.file.Files;
40+
import java.nio.file.Path;
3741
import java.util.ArrayList;
3842
import java.util.Arrays;
3943
import java.util.Collections;
@@ -44,6 +48,7 @@
4448
import java.util.Map;
4549
import java.util.Set;
4650
import java.util.stream.Collectors;
51+
import java.util.stream.Stream;
4752

4853
/**
4954
* Created by bimber on 9/15/2014.
@@ -229,6 +234,60 @@ else if (!d.getFile().exists())
229234
log.error("Unable to find file associated with analysis: " + m.getAnalysisId() + ", " + m.getAlignmentFile() + ", " + d.getFile().getPath() + " for container: " + (c == null ? m.getContainer() : c.getPath()));
230235
}
231236
}
237+
238+
inspectForCoreFiles(m.getRunId(), log);
239+
}
240+
}
241+
242+
private void inspectForCoreFiles(Integer runId, Logger log)
243+
{
244+
if (runId == null)
245+
{
246+
return;
247+
}
248+
249+
ExpRun run = ExperimentService.get().getExpRun(runId);
250+
if (run == null)
251+
{
252+
log.info("Not ExpRun found for runId: " + runId);
253+
return;
254+
}
255+
else if (run.getJobId() == null)
256+
{
257+
log.info("ExpRun lacks jobId: " + runId);
258+
return;
259+
}
260+
261+
PipelineStatusFile sf = PipelineService.get().getStatusFile(run.getJobId());
262+
if (sf == null)
263+
{
264+
log.error("Unknown statusFile: " + run.getJobId() + ", for run: " + runId);
265+
return;
266+
}
267+
else if (sf.getFilePath() == null)
268+
{
269+
log.error("StatusFile filepath is null: " + run.getJobId() + ", for run: " + runId);
270+
return;
271+
}
272+
273+
File root = new File(sf.getFilePath());
274+
if (!root.exists())
275+
{
276+
log.error("Run fileroot does not exist: " + runId + " / " + root.getPath());
277+
return;
278+
}
279+
280+
try (Stream<Path> stream = Files.walk(root.toPath()))
281+
{
282+
List<Path> files = stream.filter(x -> x.getFileName().startsWith("core.")).toList();
283+
if (!files.isEmpty())
284+
{
285+
files.forEach(x -> log.error("Found core file: " + x.toFile().getPath()));
286+
}
287+
}
288+
catch (IOException e)
289+
{
290+
log.error("Error walking file root: " + run.getFilePathRootPath(), e);
232291
}
233292
}
234293

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSamplesStep.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import htsjdk.samtools.util.Interval;
44
import org.labkey.api.pipeline.PipelineJobException;
55
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
6+
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
67
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
78
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
89
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -41,7 +42,8 @@ public Provider()
4142
{
4243
super("SelectSamples", "Select Specific Samples", "GATK SelectVariants", "A VCF will be generated containing only the samples specified below.", Arrays.asList(
4344
ToolParameterDescriptor.create(SAMPLE_INCLUDE, "Select Sample(s) Include", "Only variants of the selected type(s) will be included", "sequenceanalysis-trimmingtextarea", null, null),
44-
ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null)
45+
ToolParameterDescriptor.create(SAMPLE_EXCLUDE, "Select Samples(s) To Exclude", "Variants of the selected type(s) will be excluded", "sequenceanalysis-trimmingtextarea", null, null),
46+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--allow-nonoverlapping-command-line-samples"), "allowNnonoverlappingSamples", "Allow non-overlapping Samples", "Normally the job will fail is samples are selected that do not exist in the VCF. If checked, this will be allowed.", "checkbox", null, null)
4547
), PageFlowUtil.set("/sequenceanalysis/field/TrimmingTextArea.js"), "https://software.broadinstitute.org/gatk/");
4648
}
4749

@@ -72,6 +74,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
7274
});
7375
}
7476

77+
options.addAll(getClientCommandArgs());
78+
7579
File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".selectSamples.vcf.gz");
7680
getWrapper().execute(genome.getWorkingFastaFile(), inputVCF, outputVcf, options);
7781
if (!outputVcf.exists())

jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ static public void setInstance(JBrowseService instance)
4848

4949
abstract public void registerLuceneIndexDetector(LuceneIndexDetector detector);
5050

51+
abstract public void cacheDefaultQuery(User u, String sessionId, String trackId);
52+
5153
public interface LuceneIndexDetector
5254
{
5355
SequenceOutputFile findMatchingLuceneIndex(SequenceOutputFile vcfFile, List<String> infoFieldsToIndex, User u, @Nullable Logger log) throws PipelineJobException;

jbrowse/src/org/labkey/jbrowse/JBrowseController.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,5 +1035,15 @@ public void setIncludeDefaultFields(boolean includeDefaultFields)
10351035
this.includeDefaultFields = includeDefaultFields;
10361036
}
10371037
}
1038+
1039+
@RequiresPermission(ReadPermission.class)
1040+
public static class GetLuceneCacheInfoAction extends ReadOnlyApiAction<Object>
1041+
{
1042+
@Override
1043+
public ApiResponse execute(Object form, BindException errors)
1044+
{
1045+
return new ApiSimpleResponse("cacheInfo", JBrowseLuceneSearch.reportCacheInfo());
1046+
}
1047+
}
10381048
}
10391049

jbrowse/src/org/labkey/jbrowse/JBrowseFieldUtils.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ public static Map<String, JBrowseFieldDescriptor> getGenotypeDependentFields(@Nu
132132
else
133133
{
134134
ret.put(VARIABLE_SAMPLES, new JBrowseFieldDescriptor(VARIABLE_SAMPLES, "All samples with this variant", true, true, VCFHeaderLineType.Character, 7).multiValued(true).label("Samples With Variant"));
135+
ret.put(HOMOZYGOUS_VAR, new JBrowseFieldDescriptor(HOMOZYGOUS_VAR, "Samples that are homozygous for the variant allele", false, true, VCFHeaderLineType.Character, 8).multiValued(true).label("Samples Homozygous for Variant"));
135136
ret.put(N_HET, new JBrowseFieldDescriptor(N_HET, "The number of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Heterozygotes"));
136137
ret.put(N_HOMVAR, new JBrowseFieldDescriptor(N_HOMVAR, "The number of samples with this allele that are homozygous", false, true, VCFHeaderLineType.Integer, 9).label("# Homozygous Variant"));
137138
ret.put(N_CALLED, new JBrowseFieldDescriptor(N_CALLED, "The number of samples with called genotypes at this position", false, true, VCFHeaderLineType.Integer, 9).label("# Genotypes Called"));
138139
ret.put(FRACTION_HET, new JBrowseFieldDescriptor(FRACTION_HET, "The fraction of samples with this allele that are heterozygous", false, true, VCFHeaderLineType.Float, 9).label("Fraction Heterozygotes"));
139140

140141
ret.get(VARIABLE_SAMPLES).allowableValues(header.getSampleNamesInOrder());
142+
ret.get(HOMOZYGOUS_VAR).allowableValues(header.getSampleNamesInOrder());
141143
}
142144
}
143145
}

jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.labkey.jbrowse;
22

33
import org.apache.commons.lang3.StringUtils;
4+
import org.apache.logging.log4j.Logger;
45
import org.apache.lucene.analysis.Analyzer;
56
import org.apache.lucene.analysis.standard.StandardAnalyzer;
67
import org.apache.lucene.document.Document;
@@ -15,16 +16,22 @@
1516
import org.apache.lucene.search.BooleanClause;
1617
import org.apache.lucene.search.BooleanQuery;
1718
import org.apache.lucene.search.IndexSearcher;
19+
import org.apache.lucene.search.LRUQueryCache;
1820
import org.apache.lucene.search.MatchAllDocsQuery;
1921
import org.apache.lucene.search.Query;
22+
import org.apache.lucene.search.QueryCache;
23+
import org.apache.lucene.search.QueryCachingPolicy;
2024
import org.apache.lucene.search.Sort;
2125
import org.apache.lucene.search.SortField;
2226
import org.apache.lucene.search.TopFieldDocs;
27+
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
2328
import org.apache.lucene.store.Directory;
2429
import org.apache.lucene.store.FSDirectory;
25-
import org.apache.lucene.util.NumericUtils;
2630
import org.jetbrains.annotations.Nullable;
31+
import org.json.JSONArray;
2732
import org.json.JSONObject;
33+
import org.labkey.api.cache.Cache;
34+
import org.labkey.api.cache.CacheManager;
2835
import org.labkey.api.data.Container;
2936
import org.labkey.api.data.ContainerManager;
3037
import org.labkey.api.jbrowse.AbstractJBrowseFieldCustomizer;
@@ -33,6 +40,7 @@
3340
import org.labkey.api.module.ModuleLoader;
3441
import org.labkey.api.security.User;
3542
import org.labkey.api.settings.AppProps;
43+
import org.labkey.api.util.logging.LogHelper;
3644
import org.labkey.jbrowse.model.JBrowseSession;
3745
import org.labkey.jbrowse.model.JsonFile;
3846

@@ -51,20 +59,24 @@
5159
import java.util.StringTokenizer;
5260
import java.util.regex.Matcher;
5361
import java.util.regex.Pattern;
54-
import java.util.stream.Collectors;
5562

5663
import static org.labkey.jbrowse.JBrowseFieldUtils.VARIABLE_SAMPLES;
5764
import static org.labkey.jbrowse.JBrowseFieldUtils.getSession;
5865
import static org.labkey.jbrowse.JBrowseFieldUtils.getTrack;
5966

6067
public class JBrowseLuceneSearch
6168
{
69+
private static final Logger _log = LogHelper.getLogger(JBrowseLuceneSearch.class, "Logger related to JBrowse/Lucene indexing and queries");
6270
private final JBrowseSession _session;
6371
private final JsonFile _jsonFile;
6472
private final User _user;
6573
private final String[] specialStartPatterns = {"*:* -", "+", "-"};
6674
private static final String ALL_DOCS = "all";
6775
private static final String GENOMIC_POSITION = "genomicPosition";
76+
private static final int maxCachedQueries = 1000;
77+
private static final long maxRamBytesUsed = 250 * 1024 * 1024L;
78+
79+
private static final Cache<String, LRUQueryCache> _cache = CacheManager.getStringKeyCache(1000, CacheManager.UNLIMITED, "JBrowseLuceneSearchCache");
6880

6981
private JBrowseLuceneSearch(final JBrowseSession session, final JsonFile jsonFile, User u)
7082
{
@@ -85,6 +97,17 @@ public static JBrowseLuceneSearch create(String sessionId, String trackId, User
8597
return new JBrowseLuceneSearch(session, getTrack(session, trackId, u), u);
8698
}
8799

100+
private static synchronized QueryCache getCacheForSession(String trackObjectId) {
101+
LRUQueryCache qc = _cache.get(trackObjectId);
102+
if (qc == null)
103+
{
104+
qc = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed);
105+
_cache.put(trackObjectId, qc);
106+
}
107+
108+
return qc;
109+
}
110+
88111
private String templateReplace(final String searchString) {
89112
String result = searchString;
90113
Pattern pattern = Pattern.compile("~(.*?)~");
@@ -148,6 +171,8 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina
148171
)
149172
{
150173
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
174+
indexSearcher.setQueryCache(getCacheForSession(_jsonFile.getObjectId()));
175+
indexSearcher.setQueryCachingPolicy(new ForceMatchAllDocsCachingPolicy());
151176

152177
List<String> stringQueryParserFields = new ArrayList<>();
153178
Map<String, SortField.Type> numericQueryParserFields = new HashMap<>();
@@ -245,7 +270,7 @@ else if (numericQueryParserFields.containsKey(fieldName))
245270
throw new IllegalArgumentException("Could not find type for sort field: " + sortField);
246271
}
247272

248-
sort = new Sort(new SortField(sortField, fieldType, sortReverse));
273+
sort = new Sort(new SortField(sortField + "_sort", fieldType, sortReverse));
249274
}
250275

251276
// Get chunks of size {pageSize}. Default to 1 chunk -- add to the offset to get more.
@@ -263,7 +288,7 @@ else if (numericQueryParserFields.containsKey(fieldName))
263288
for (int i = pageSize * offset; i < Math.min(pageSize * (offset + 1), topDocs.scoreDocs.length); i++)
264289
{
265290
JSONObject elem = new JSONObject();
266-
Document doc = indexSearcher.doc(topDocs.scoreDocs[i].doc);
291+
Document doc = indexSearcher.storedFields().document(topDocs.scoreDocs[i].doc);
267292

268293
for (IndexableField field : doc.getFields()) {
269294
String fieldName = field.name();
@@ -345,4 +370,70 @@ public boolean isAvailable(Container c, User u)
345370
return true;
346371
}
347372
}
373+
374+
public static class ForceMatchAllDocsCachingPolicy implements QueryCachingPolicy {
375+
private final UsageTrackingQueryCachingPolicy defaultPolicy = new UsageTrackingQueryCachingPolicy();
376+
377+
@Override
378+
public boolean shouldCache(Query query) throws IOException {
379+
if (query instanceof BooleanQuery bq) {
380+
for (BooleanClause clause : bq) {
381+
if (clause.getQuery() instanceof MatchAllDocsQuery) {
382+
return true;
383+
}
384+
}
385+
}
386+
387+
return defaultPolicy.shouldCache(query);
388+
}
389+
390+
@Override
391+
public void onUse(Query query) {
392+
defaultPolicy.onUse(query);
393+
}
394+
}
395+
396+
public static JSONArray reportCacheInfo()
397+
{
398+
JSONArray cacheInfo = new JSONArray();
399+
for (String sessionId : _cache.getKeys())
400+
{
401+
LRUQueryCache qc = _cache.get(sessionId);
402+
JSONObject info = new JSONObject();
403+
info.put("cacheSize", qc.getCacheSize());
404+
info.put("cacheCount", qc.getCacheCount());
405+
info.put("hitCount", qc.getHitCount());
406+
info.put("missCount", qc.getMissCount());
407+
info.put("evictionCount", qc.getEvictionCount());
408+
info.put("totalCount", qc.getTotalCount());
409+
cacheInfo.put(info);
410+
}
411+
412+
return cacheInfo;
413+
}
414+
415+
public void cacheDefaultQuery()
416+
{
417+
try
418+
{
419+
JBrowseLuceneSearch.clearCache(_jsonFile.getObjectId());
420+
doSearch(_user, ALL_DOCS, 100, 0, GENOMIC_POSITION, false);
421+
}
422+
catch (ParseException | IOException e)
423+
{
424+
_log.error("Unable to cache default query for: " + _jsonFile.getObjectId(), e);
425+
}
426+
}
427+
428+
public static void clearCache(@Nullable String jbrowseTrackId)
429+
{
430+
if (jbrowseTrackId == null)
431+
{
432+
_cache.clear();
433+
}
434+
else
435+
{
436+
_cache.remove(jbrowseTrackId);
437+
}
438+
}
348439
}

jbrowse/src/org/labkey/jbrowse/JBrowseServiceImpl.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,13 @@ public void registerLuceneIndexDetector(LuceneIndexDetector detector)
312312
_detectors.add(detector);
313313
}
314314

315+
@Override
316+
public void cacheDefaultQuery(User u, String sessionId, String trackId)
317+
{
318+
JBrowseLuceneSearch luceneSearch = JBrowseLuceneSearch.create(sessionId, trackId, u);
319+
luceneSearch.cacheDefaultQuery();
320+
}
321+
315322
public static final class DefaultLuceneIndexDetector implements LuceneIndexDetector
316323
{
317324
@Override

jbrowse/src/org/labkey/jbrowse/model/JsonFile.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@
5151
import org.labkey.api.util.PageFlowUtil;
5252
import org.labkey.api.util.Path;
5353
import org.labkey.api.view.UnauthorizedException;
54+
import org.labkey.jbrowse.JBrowseLuceneSearch;
5455
import org.labkey.jbrowse.JBrowseManager;
5556
import org.labkey.jbrowse.JBrowseSchema;
56-
import org.labkey.jbrowse.pipeline.IndexVariantsStep;
5757
import org.labkey.jbrowse.pipeline.JBrowseLucenePipelineJob;
5858
import org.labkey.sequenceanalysis.run.util.TabixRunner;
5959

@@ -964,6 +964,8 @@ public File prepareResource(User u, Logger log, boolean throwIfNotPrepared, bool
964964
}
965965
else if (existingLuceneDir != null && existingLuceneDir.exists())
966966
{
967+
JBrowseLuceneSearch.clearCache(getObjectId());
968+
967969
// Note: this could exist, but be an empty folder:
968970
if (luceneDir.exists())
969971
{
@@ -1004,7 +1006,7 @@ else if (sizeInGb > 50)
10041006
try
10051007
{
10061008
PipeRoot root = PipelineService.get().getPipelineRootSetting(getContainerObj());
1007-
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
1009+
PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, getObjectId(), vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing()));
10081010
}
10091011
catch (PipelineValidationException e)
10101012
{
@@ -1030,6 +1032,7 @@ else if (sizeInGb > 50)
10301032
if (forceReprocess || !doesLuceneIndexExist())
10311033
{
10321034
JBrowseLucenePipelineJob.prepareLuceneIndex(targetFile, luceneDir, log, getInfoFieldsToIndex(), allowLenientLuceneProcessing());
1035+
JBrowseLuceneSearch.clearCache(getObjectId());
10331036
}
10341037
else
10351038
{

0 commit comments

Comments
 (0)