Skip to content

Commit c88382a

Browse files
authored
23.3_fb merge beta branch of lucene search (#208)
Pre-release for Lucene-based indexing and search of VCFs
1 parent dbd98d0 commit c88382a

File tree

44 files changed

+3198
-395
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3198
-395
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
/jbrowse/node_modules
44
/intellijBuild
55
/travisSettings.sh
6+
index/
67

78
# Created by npm install
89
jbrowse/jb_run.js

SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/.qview.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<column name="comments" />
1212
<column name="barcode5" />
1313
<column name="barcode3" />
14+
<column name="concentration" />
1415
<column name="sampleid" />
1516

1617
<column name="instrument_run_id" />

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SNPEffStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public class SNPEffStep extends AbstractCommandPipelineStep<SnpEffWrapper> imple
3434
{
3535
public static final String GENE_PARAM = "gene_file";
3636

37-
public SNPEffStep(PipelineStepProvider provider, PipelineContext ctx)
37+
public SNPEffStep(PipelineStepProvider<?> provider, PipelineContext ctx)
3838
{
3939
super(provider, ctx, new SnpEffWrapper(ctx.getLogger()));
4040
}

SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ public void testSteps() throws Exception
130130

131131
if (_createdGenomeId == null)
132132
{
133-
_createdGenomeId = createReferenceGenome(this);
133+
_createdGenomeId = createMac239ReferenceGenome(this, 0);
134134
}
135135

136136
importReadsetMetadata();
@@ -1379,18 +1379,34 @@ public List<String> getAssociatedModules()
13791379

13801380
private static void ensureSIVmac239exists(BaseWebDriverTest test) throws Exception
13811381
{
1382+
String sequence = read239FromFile();
1383+
1384+
ensureRefSeqExists(test, GENOME_SEQ_NAME, sequence);
1385+
1386+
//verify virus_strains query
1387+
SelectRowsCommand sr = new SelectRowsCommand("sequenceanalysis", "virus_strains");
13821388
Connection cn = new Connection(WebTestHelper.getBaseURL(), PasswordUtil.getUsername(), PasswordUtil.getPassword());
1383-
SelectRowsCommand sr = new SelectRowsCommand("sequenceanalysis", "ref_nt_sequences");
13841389
sr.addFilter(new Filter("name", GENOME_SEQ_NAME, Filter.Operator.EQUAL));
1390+
SelectRowsResponse resp = sr.execute(cn, test.getContainerId());
1391+
assertTrue(resp.getRowCount().intValue() > 0);
1392+
test.log("total viral sequences: " + resp.getRowCount() + " in container: " + test.getCurrentContainerPath());
1393+
}
1394+
1395+
public static void ensureRefSeqExists(BaseWebDriverTest test, String seqName, String ntSequence) throws Exception
1396+
{
1397+
Connection cn = new Connection(WebTestHelper.getBaseURL(), PasswordUtil.getUsername(), PasswordUtil.getPassword());
1398+
SelectRowsCommand sr = new SelectRowsCommand("sequenceanalysis", "ref_nt_sequences");
1399+
sr.addFilter(new Filter("name", seqName, Filter.Operator.EQUAL));
1400+
sr.addFilter(new Filter("subset", seqName, Filter.Operator.EQUAL));
1401+
13851402
SelectRowsResponse resp = sr.execute(cn, test.getContainerId());
13861403
if (resp.getRowCount().intValue() == 0)
13871404
{
1388-
test.log("creating SIVMac239_Test sequence in container: " + test.getCurrentContainerPath());
1405+
test.log("creating " + seqName + " sequence in container: " + test.getCurrentContainerPath());
13891406
Map<String, Object> row = new HashMap<>();
1390-
row.put("name", GENOME_SEQ_NAME);
1391-
row.put("subset", GENOME_SEQ_NAME);
1392-
String sequence = read239FromFile();
1393-
row.put("sequence", sequence);
1407+
row.put("name", seqName);
1408+
row.put("subset", seqName);
1409+
row.put("sequence", ntSequence);
13941410
row.put("category", "Virus");
13951411

13961412
InsertRowsCommand ic = new InsertRowsCommand("sequenceanalysis", "ref_nt_sequences");
@@ -1399,22 +1415,20 @@ private static void ensureSIVmac239exists(BaseWebDriverTest test) throws Excepti
13991415
}
14001416
else
14011417
{
1402-
test.log("SIVMac239_Test sequence already exists for container: " + test.getCurrentContainerPath());
1418+
test.log("Sequence: " + seqName + " already exists for container: " + test.getCurrentContainerPath());
14031419
String subset = (String)resp.getRows().get(0).get("subset");
14041420
test.log("subset: " + subset);
14051421
}
1406-
1407-
//verify virus_strains query
1408-
sr = new SelectRowsCommand("sequenceanalysis", "virus_strains");
1409-
sr.addFilter(new Filter("name", GENOME_SEQ_NAME, Filter.Operator.EQUAL));
1410-
resp = sr.execute(cn, test.getContainerId());
1411-
assertTrue(resp.getRowCount().intValue() > 0);
1412-
test.log("total viral sequences: " + resp.getRowCount() + " in container: " + test.getCurrentContainerPath());
14131422
}
14141423

14151424
private static String read239FromFile() throws Exception
14161425
{
14171426
File fasta = new File(_sampleData, "Ref_DB.fasta");
1427+
return readSeqFromFile(fasta);
1428+
}
1429+
1430+
public static String readSeqFromFile(File fasta) throws Exception
1431+
{
14181432
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fasta), StandardCharsets.UTF_8)))
14191433
{
14201434
StringBuilder sb = new StringBuilder();
@@ -1442,30 +1456,30 @@ public static int getTotalPipelineJobs(BaseWebDriverTest test) throws CommandExc
14421456
return resp.getRowCount().intValue();
14431457
}
14441458

1445-
public static Integer createReferenceGenome(BaseWebDriverTest test) throws Exception
1446-
{
1447-
return createReferenceGenome(test, 0);
1448-
}
1449-
14501459
public static final String GENOME_SEQ_NAME = "SIVmac239_Test";
14511460

1452-
public static Integer createReferenceGenome(BaseWebDriverTest test, int expectedChildJobs) throws Exception
1461+
public static Integer createMac239ReferenceGenome(BaseWebDriverTest test, int expectedChildJobs) throws Exception
14531462
{
1454-
test.log("creating SIVmac239 reference genome");
1455-
test.beginAt("/sequenceanalysis/" + test.getContainerId() + "/begin.view");
1456-
14571463
//verify SIVmac239_Test NT sequence exists:
14581464
ensureSIVmac239exists(test);
14591465

1466+
return createReferenceGenome(test, expectedChildJobs, TEST_GENOME_NAME, GENOME_SEQ_NAME);
1467+
}
1468+
1469+
public static Integer createReferenceGenome(BaseWebDriverTest test, int expectedChildJobs, String genomeName, String seqName) throws Exception
1470+
{
1471+
test.log("creating reference genome: " + genomeName);
1472+
test.beginAt("/sequenceanalysis/" + test.getContainerId() + "/begin.view");
1473+
14601474
int existingPipelineJobs = SequenceTest.getTotalPipelineJobs(test);
14611475

14621476
test.waitAndClickAndWait(Locator.linkContainingText("Reference Sequences"));
14631477
DataRegionTable dr = new DataRegionTable("query", test);
1464-
dr.setFilter("name", "Equals", GENOME_SEQ_NAME);
1478+
dr.setFilter("name", "Equals", seqName);
14651479
dr.checkCheckbox(0);
14661480
dr.clickHeaderMenu("More Actions", false, "Create Reference Genome");
14671481
new Window.WindowFinder(test.getDriver()).withTitle("Create Reference Genome").waitFor();
1468-
Ext4FieldRef.getForLabel(test, "Name").setValue(TEST_GENOME_NAME);
1482+
Ext4FieldRef.getForLabel(test, "Name").setValue(genomeName);
14691483
String description = "This is a reference genome description";
14701484
Ext4FieldRef.getForLabel(test, "Description").setValue(description);
14711485
Ext4FieldRef.getForLabel(test, "Skip Aligner Index Creation").setChecked(true); //skip this since it requires sequence tools
@@ -1477,7 +1491,7 @@ public static Integer createReferenceGenome(BaseWebDriverTest test, int expected
14771491

14781492
Connection cn = new Connection(WebTestHelper.getBaseURL(), PasswordUtil.getUsername(), PasswordUtil.getPassword());
14791493
SelectRowsCommand sr = new SelectRowsCommand("sequenceanalysis", "reference_libraries");
1480-
sr.addFilter(new Filter("name", TEST_GENOME_NAME, Filter.Operator.EQUAL));
1494+
sr.addFilter(new Filter("name", genomeName, Filter.Operator.EQUAL));
14811495
sr.addFilter(new Filter("description", description, Filter.Operator.EQUAL));
14821496
SelectRowsResponse resp = sr.execute(cn, test.getContainerId());
14831497

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package org.labkey.api.jbrowse;
2+
3+
import org.labkey.api.data.Container;
4+
import org.labkey.api.module.Module;
5+
import org.labkey.api.security.User;
6+
7+
abstract public class AbstractJBrowseFieldCustomizer implements JBrowseFieldCustomizer
8+
{
9+
private final Module _module;
10+
public AbstractJBrowseFieldCustomizer(Module owningModule)
11+
{
12+
_module = owningModule;
13+
}
14+
15+
@Override
16+
public boolean isAvailable(Container c, User u)
17+
{
18+
return c.getActiveModules().contains(_module);
19+
}
20+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package org.labkey.api.jbrowse;
2+
3+
import org.jetbrains.annotations.Nullable;
4+
import org.labkey.api.data.Container;
5+
import org.labkey.api.security.User;
6+
7+
import java.util.Set;
8+
9+
public interface GroupsProvider
10+
{
11+
@Nullable Set<String> getGroupMembers(String groupName, Container c, User u);
12+
boolean hasGroup(String groupName, Container c, User u);
13+
boolean isAvailable(Container c, User u);
14+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package org.labkey.api.jbrowse;
2+
3+
import org.labkey.api.data.Container;
4+
import org.labkey.api.security.User;
5+
6+
public interface JBrowseFieldCustomizer {
7+
public void customizeField(JBrowseFieldDescriptor field);
8+
9+
public boolean isAvailable(Container c, User u);
10+
}
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
package org.labkey.api.jbrowse;
2+
3+
import htsjdk.variant.vcf.VCFHeaderLineType;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.json.JSONObject;
6+
7+
import java.util.Collections;
8+
import java.util.List;
9+
10+
public class JBrowseFieldDescriptor {
11+
private final String _fieldName;
12+
private final VCFHeaderLineType _type;
13+
private String _label;
14+
private final String _description;
15+
private final boolean _isInDefaultColumns;
16+
private final boolean _isIndexed;
17+
private boolean _isMultiValued = false;
18+
private List<String> _allowableValues = null;
19+
private boolean _isHidden = false;
20+
private String _colWidth = null;
21+
private Integer _orderKey = null;
22+
23+
// NOTE: this should support "jexl:xxxxxx" syntax, like other JBrowse formatting
24+
private String _formatString = null;
25+
26+
public JBrowseFieldDescriptor(String luceneFieldName, @Nullable String description, boolean isInDefaultColumns, boolean isIndexed, VCFHeaderLineType type, Integer orderKey) {
27+
_fieldName = luceneFieldName;
28+
_label = luceneFieldName;
29+
_description = description;
30+
_isInDefaultColumns = isInDefaultColumns;
31+
_isIndexed = isIndexed;
32+
_type = type;
33+
_orderKey = orderKey;
34+
}
35+
36+
public JBrowseFieldDescriptor hidden(boolean isHidden) {
37+
_isHidden = isHidden;
38+
return this;
39+
}
40+
41+
public JBrowseFieldDescriptor colWidth(String colWidth) {
42+
_colWidth = colWidth;
43+
return this;
44+
}
45+
46+
public JBrowseFieldDescriptor formatString(String formatString) {
47+
_formatString = formatString;
48+
return this;
49+
}
50+
51+
public JBrowseFieldDescriptor allowableValues(List<String> allowableValues) {
52+
_allowableValues = Collections.unmodifiableList(allowableValues);
53+
return this;
54+
}
55+
56+
public JBrowseFieldDescriptor multiValued(boolean isMultiValued) {
57+
_isMultiValued = isMultiValued;
58+
return this;
59+
}
60+
61+
public JBrowseFieldDescriptor label(String label) {
62+
_label = label;
63+
return this;
64+
}
65+
66+
public String getFieldName() {
67+
return _fieldName;
68+
}
69+
70+
public VCFHeaderLineType getType() {
71+
return _type;
72+
}
73+
74+
public String getLabel() {
75+
return _label;
76+
}
77+
78+
public String getDescription() {
79+
return _description;
80+
}
81+
82+
public boolean isInDefaultColumns() {
83+
return _isInDefaultColumns;
84+
}
85+
86+
public boolean isIndexed() {
87+
return _isIndexed;
88+
}
89+
90+
public boolean isMultiValued() {
91+
return _isMultiValued;
92+
}
93+
94+
public boolean isHidden() {
95+
return _isHidden;
96+
}
97+
98+
public String getColWidth() {
99+
return _colWidth;
100+
}
101+
102+
public void setLabel(String label) {
103+
_label = label;
104+
}
105+
106+
public void setMultiValued(boolean multiValued) {
107+
_isMultiValued = multiValued;
108+
}
109+
110+
public void setHidden(boolean hidden) {
111+
_isHidden = hidden;
112+
}
113+
114+
public void setColWidth(String colWidth) {
115+
_colWidth = colWidth;
116+
}
117+
118+
public void setOrderKey(Integer orderKey) {
119+
_orderKey = orderKey;
120+
}
121+
122+
public JSONObject toJSON() {
123+
JSONObject fieldDescriptorJSON = new JSONObject();
124+
fieldDescriptorJSON.put("name", _fieldName);
125+
fieldDescriptorJSON.put("label", _label == null ? _fieldName : _label);
126+
fieldDescriptorJSON.put("description", _description);
127+
fieldDescriptorJSON.put("type", _type.toString());
128+
fieldDescriptorJSON.put("isInDefaultColumns", _isInDefaultColumns);
129+
fieldDescriptorJSON.put("isIndexed", _isIndexed);
130+
fieldDescriptorJSON.put("isMultiValued", _isMultiValued);
131+
fieldDescriptorJSON.put("isHidden", _isHidden);
132+
fieldDescriptorJSON.put("colWidth", _colWidth);
133+
fieldDescriptorJSON.put("formatString", _formatString);
134+
fieldDescriptorJSON.put("orderKey", _orderKey);
135+
fieldDescriptorJSON.put("allowableValues", _allowableValues);
136+
137+
return fieldDescriptorJSON;
138+
}
139+
}

jbrowse/api-src/org/labkey/api/jbrowse/JBrowseService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,8 @@ static public void setInstance(JBrowseService instance)
3434
abstract public void reprocessDatabase(User u, String databaseGuid) throws PipelineValidationException;
3535

3636
abstract public void registerDemographicsSource(DemographicsSource source);
37+
38+
abstract public void registerGroupsProvider(GroupsProvider provider);
39+
40+
abstract public void registerFieldCustomizer(JBrowseFieldCustomizer customizer);
3741
}

jbrowse/build.gradle

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import org.labkey.gradle.util.BuildUtils;
22

3+
final String luceneVersion="9.4.2"
4+
35
repositories {
46
mavenCentral()
57
// Added for org.clojars.chapmanb:sam dependency required by com.github.samtools:htsjdk
@@ -13,7 +15,7 @@ repositories {
1315
}
1416

1517
dependencies {
16-
implementation "com.github.samtools:htsjdk:${htsjdkVersion}"
18+
apiImplementation "com.github.samtools:htsjdk:${htsjdkVersion}"
1719
BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:LabDevKitModules:LDK", depProjectConfig: "apiJarFile")
1820
BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:DiscvrLabKeyModules:SequenceAnalysis", depProjectConfig: "apiJarFile")
1921
BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:DiscvrLabKeyModules:SequenceAnalysis", depProjectConfig: "runtimeElements")
@@ -23,6 +25,11 @@ dependencies {
2325
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:LabDevKitModules:LDK", depProjectConfig: "published", depExtension: "module")
2426
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:SequenceAnalysis", depProjectConfig: "published", depExtension: "module")
2527
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: BuildUtils.getPlatformModuleProjectPath(project.gradle, "pipeline"), depProjectConfig: "published", depExtension: "module")
28+
29+
external "org.apache.lucene:lucene-analysis-common:${luceneVersion}"
30+
external "org.apache.lucene:lucene-core:${luceneVersion}"
31+
external "org.apache.lucene:lucene-queryparser:${luceneVersion}"
32+
external "org.apache.lucene:lucene-backward-codecs:${luceneVersion}"
2633
}
2734

2835
def jbPkgTask = project.tasks.named("npm_run_jb-pkg")

0 commit comments

Comments
 (0)