Skip to content

Commit 29ca7a3

Browse files
committed
Preliminary opt-in support for full-text search on JBrowse VCFs
1 parent 16fa82a commit 29ca7a3

File tree

4 files changed

+132
-38
lines changed

4 files changed

+132
-38
lines changed

SequenceAnalysis/pipeline_code/sequence_tools_install.sh

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,6 @@ SKIP_PACKAGE_MANAGER=
4141
CLEAN_SRC=
4242
LK_HOME=
4343
LK_USER=
44-
MAVEN_OPTS="-Xss10m"
45-
46-
#NOTE: java/javac not automatically picked up
47-
if [ ! -z "${JAVA_HOME:+x}" ]; then
48-
PATH=${JAVA_HOME}/bin:$PATH
49-
fi
5044

5145
while getopts "d:u:fpc" arg;
5246
do
@@ -102,7 +96,7 @@ echo "Install location"
10296
echo ""
10397
echo "LKTOOLS_DIR: $LKTOOLS_DIR"
10498
echo "LKSRC_DIR: $LKSRC_DIR"
105-
WGET_OPTS="--read-timeout=10 --secure-protocol=auto --no-check-certificate"
99+
WGET_OPTS="--read-timeout=10 --secure-protocol=auto --no-check-certificate -q"
106100

107101
#
108102
# Install required software
@@ -113,18 +107,6 @@ echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
113107
echo "Install Required packages via the package manager"
114108
echo ""
115109

116-
# can install EPEL to get additional repositories if necessary. May be needed on RHEL/CentOS
117-
#OS=`cat /etc/redhat-release | awk {'print $1}'`
118-
#if [ "$OS" = "CentOS" ]
119-
#then
120-
# if [[ ! -e ${LKSRC_DIR}/epel-release-6-8.noarch.rpm ]];
121-
# then
122-
# cd $LKSRC_DIR
123-
# wget $WGET_OPTS http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
124-
# wget $WGET_OPTS http://rpms.famillecollet.com/enterprise/remi-release-6.rpm
125-
# rpm -Uvh remi-release-6*.rpm epel-release-6*.rpm
126-
# fi
127-
#fi
128110

129111
if [ ! -z $SKIP_PACKAGE_MANAGER ]; then
130112
echo "Skipping package install"
@@ -134,18 +116,6 @@ elif [ $(which yum) ]; then
134116
elif [ $(which apt-get) ]; then
135117
echo "Using apt-get"
136118

137-
#this is a possible setup for R
138-
#add-apt-repository "deb http://cran.cnr.berkeley.edu/bin/linux/ubuntu/ precise/"
139-
#gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 or gpg --hkp://keyserver keyserver.ubuntu.com:80 --recv-key E084DAB9
140-
#gpg -a --export E084DAB9 | sudo apt-key add -
141-
142-
#install oracle java
143-
#apt-get install python-software-properties
144-
#add-apt-repository ppa:webupd8team/java
145-
#apt-get update
146-
#apt-get install oracle-java7-installer
147-
#update-alternatives --config java
148-
#update-alternatives --config javac
149119
#apt-get -y update
150120
apt-get -q -y install bzip2 libbz2-dev libc6 libc6-dev libncurses5-dev python3-dev unzip zip ncftp gcc make perl libssl-dev libgcc1 libstdc++6 zlib1g zlib1g-dev libboost-all-dev python3-numpy python3-scipy libexpat1-dev pkg-config subversion flex subversion libgoogle-perftools-dev perl-doc git cmake maven r-base r-cran-rcpp python-pip
151121
else
@@ -460,6 +430,7 @@ then
460430
echo "Cleaning up previous installs"
461431
rm -Rf bismark_*
462432
rm -Rf Bismark_*
433+
rm -Rf 0.22.3*
463434
rm -Rf $LKTOOLS_DIR/bismark
464435
rm -Rf $LKTOOLS_DIR/bismark2bedGraph
465436
rm -Rf $LKTOOLS_DIR/bismark2report
@@ -468,12 +439,12 @@ then
468439
rm -Rf $LKTOOLS_DIR/coverage2cytosine
469440
rm -Rf $LKTOOLS_DIR/deduplicate_bismark
470441

471-
wget $WGET_OPTS https://github.com/FelixKrueger/Bismark/archive/0.17.0.tar.gz
472-
gunzip 0.17.0.tar.gz
473-
tar -xf 0.17.0.tar
442+
wget $WGET_OPTS https://github.com/FelixKrueger/Bismark/archive/0.22.3.tar.gz
443+
gunzip 0.22.3.tar.gz
444+
tar -xf 0.22.3.tar
474445
echo "Compressing TAR"
475-
gzip 0.17.0.tar
476-
cd Bismark-0.17.0
446+
gzip 0.22.3.tar
447+
cd Bismark-0.22.3
477448

478449
install ./bismark $LKTOOLS_DIR/bismark
479450
install ./bismark2bedGraph $LKTOOLS_DIR/bismark2bedGraph
@@ -1129,7 +1100,7 @@ echo "Installing lofreq"
11291100
echo ""
11301101
cd $LKSRC_DIR
11311102

1132-
if [[ ! -e ${LKTOOLS_DIR}/trimmomatic.jar || ! -z $FORCE_REINSTALL ]];
1103+
if [[ ! -e ${LKTOOLS_DIR}/lofreq || ! -z $FORCE_REINSTALL ]];
11331104
then
11341105
rm -Rf lofreq_star*
11351106
rm -Rf $LKTOOLS_DIR/lofreq_star*

jbrowse/resources/web/jbrowse/window/ModifyJsonConfigWindow.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,24 @@ Ext4.define('JBrowse.window.ModifyJsonConfigWindow', {
9595
handler: function (gridBtn) {
9696
this.addAttribute('excludeFromSearch', true, 'BOOLEAN');
9797
}
98+
},{
99+
text: 'Create Full Text Index?',
100+
scope: this,
101+
handler: function (gridBtn) {
102+
this.addAttribute('createFullTextIndex', true, 'BOOLEAN');
103+
}
104+
},{
105+
text: 'Info Fields For Full Text Search',
106+
scope: this,
107+
handler: function (gridBtn) {
108+
this.addAttribute('infoFieldsForFullTextSearch', null, 'STRING');
109+
}
110+
},{
111+
text: 'Annotations For Full Text Search',
112+
scope: this,
113+
handler: function (gridBtn) {
114+
this.addAttribute('annotationsForFullTextSearch', null, 'STRING');
115+
}
98116
},{
99117
text: 'Other',
100118
scope: this,

jbrowse/src/org/labkey/jbrowse/model/JBrowseSession.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,4 +403,17 @@ public static JBrowseSession getGenericGenomeSession(int genomeId)
403403

404404
return session;
405405
}
406+
407+
public JsonFile getTrack(User u, String trackGUID)
408+
{
409+
for (JsonFile jf : getGenomeTracks(u, false))
410+
{
411+
if (trackGUID.equalsIgnoreCase(jf.getObjectId()))
412+
{
413+
return jf;
414+
}
415+
}
416+
417+
return null;
418+
}
406419
}

jbrowse/src/org/labkey/jbrowse/model/JsonFile.java

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
3939
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
4040
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
41+
import org.labkey.api.sequenceanalysis.run.DISCVRSeqRunner;
4142
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
4243
import org.labkey.api.settings.AppProps;
4344
import org.labkey.api.util.FileType;
@@ -760,7 +761,7 @@ private JSONObject getTabixTrack(Logger log, ExpData targetFile, ReferenceGenome
760761

761762
public boolean needsProcessing()
762763
{
763-
return (needsGzip() && !isGzipped()) || doIndex();
764+
return (needsGzip() && !isGzipped()) || doIndex() || shouldHaveFreeTextSearch();
764765
}
765766

766767
public boolean isGzipped()
@@ -913,9 +914,71 @@ public File prepareResource(Logger log, boolean throwIfNotPrepared, boolean forc
913914
}
914915
}
915916

917+
if (shouldHaveFreeTextSearch())
918+
{
919+
File luceneDir = getExpectedLocationOfLuceneIndex(throwIfNotPrepared);
920+
if (forceReprocess && luceneDir.exists())
921+
{
922+
try
923+
{
924+
FileUtils.deleteDirectory(luceneDir);
925+
}
926+
catch (IOException e)
927+
{
928+
throw new PipelineJobException(e);
929+
}
930+
}
931+
932+
prepareLuceneIndex(log);
933+
}
934+
916935
return targetFile;
917936
}
918937

938+
private void prepareLuceneIndex(Logger log)
939+
{
940+
log.debug("Generating VCF full text index for file: " + getExpData().getFile().getName());
941+
942+
DISCVRSeqRunner runner = new DISCVRSeqRunner(log);
943+
List<String> args = runner.getBaseArgs("VcfToLuceneIndexer");
944+
args.add("-V");
945+
args.add(getExpData().getFile().getPath());
946+
947+
args.add("-O");
948+
args.add(getExpectedLocationOfLuceneIndex(false).getPath());
949+
950+
JSONObject config = getExtraTrackConfig();
951+
String infoFieldsForFullTextSearch = config == null ? null : StringUtils.trimToNull(config.optString("infoFieldsForFullTextSearch"));
952+
if (infoFieldsForFullTextSearch == null)
953+
{
954+
args.add("-IF");
955+
args.add("AF");
956+
}
957+
else
958+
{
959+
for (String field : infoFieldsForFullTextSearch.split(","))
960+
{
961+
args.add("-IF");
962+
args.add(field);
963+
}
964+
}
965+
966+
String annotationsForFullTextSearch = config == null ? null : StringUtils.trimToNull(config.optString("annotationsForFullTextSearch"));
967+
if (annotationsForFullTextSearch == null)
968+
{
969+
args.add("-AN");
970+
args.add("SampleList");
971+
}
972+
else
973+
{
974+
for (String field : annotationsForFullTextSearch.split(","))
975+
{
976+
args.add("-AN");
977+
args.add(field);
978+
}
979+
}
980+
}
981+
919982
protected void createIndex(File finalLocation, Logger log, File idx, boolean throwIfNotPrepared) throws PipelineJobException
920983
{
921984
if (throwIfNotPrepared)
@@ -1231,4 +1294,33 @@ public String getPrimaryExtension()
12311294
return _extensions.get(0);
12321295
}
12331296
}
1297+
1298+
public boolean shouldHaveFreeTextSearch()
1299+
{
1300+
ExpData targetFile = getExpData();
1301+
if (!TRACK_TYPES.vcf.getFileType().isType(targetFile.getFile()))
1302+
{
1303+
return false;
1304+
}
1305+
1306+
JSONObject json = getExtraTrackConfig();
1307+
return json != null && json.optBoolean("createFullTextIndex", false);
1308+
}
1309+
1310+
public File getExpectedLocationOfLuceneIndex(boolean throwIfNotFound)
1311+
{
1312+
File basedir = getLocationOfProcessedTrack(false);
1313+
if (basedir == null)
1314+
{
1315+
return null;
1316+
}
1317+
1318+
File ret = new File(basedir.getParentFile(), "lucene");
1319+
if (throwIfNotFound && !ret.exists())
1320+
{
1321+
throw new IllegalStateException("Expected search index not found: " + ret.getPath());
1322+
}
1323+
1324+
return ret;
1325+
}
12341326
}

0 commit comments

Comments
 (0)