Skip to content

Commit 0f15349

Browse files
committed
Create standalone lucene index step
1 parent 63637a1 commit 0f15349

File tree

3 files changed

+136
-0
lines changed

3 files changed

+136
-0
lines changed

jbrowse/src/org/labkey/jbrowse/JBrowseModule.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.labkey.api.query.DetailsURL;
2929
import org.labkey.api.security.permissions.AdminOperationsPermission;
3030
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
31+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
3132
import org.labkey.api.settings.AdminConsole;
3233
import org.labkey.api.util.PageFlowUtil;
3334
import org.labkey.api.util.SystemMaintenance;
@@ -38,6 +39,7 @@
3839
import org.labkey.jbrowse.button.ModifyTrackConfigButton;
3940
import org.labkey.jbrowse.button.ReprocessResourcesButton;
4041
import org.labkey.jbrowse.button.ReprocessSessionsButton;
42+
import org.labkey.jbrowse.pipeline.IndexVariantsStep;
4143
import org.labkey.jbrowse.pipeline.JBrowseSessionPipelineProvider;
4244
import org.labkey.jbrowse.query.JBrowseUserSchema;
4345

@@ -108,6 +110,8 @@ public void doStartupAfterSpringConfig(ModuleContext moduleContext)
108110

109111
WebdavService.get().registerPreGzippedExtensions("jsonz");
110112
WebdavService.get().registerPreGzippedExtensions("txtz");
113+
114+
SequencePipelineService.get().registerPipelineStep(new IndexVariantsStep.Provider());
111115
}
112116

113117
@Override
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package org.labkey.jbrowse.pipeline;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import org.apache.commons.io.FileUtils;
5+
import org.apache.commons.lang3.StringUtils;
6+
import org.apache.commons.lang3.SystemUtils;
7+
import org.labkey.api.pipeline.PipelineJobException;
8+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
9+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
11+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
12+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
13+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
14+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
15+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
16+
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
17+
import org.labkey.api.writer.PrintWriters;
18+
19+
import javax.annotation.Nullable;
20+
import java.io.File;
21+
import java.io.IOException;
22+
import java.io.PrintWriter;
23+
import java.util.Arrays;
24+
import java.util.List;
25+
26+
public class IndexVariantsStep extends AbstractCommandPipelineStep<SelectVariantsWrapper> implements VariantProcessingStep
27+
{
28+
public static final String CATEGORY = "VCF Lucene Index";
29+
30+
public IndexVariantsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
31+
{
32+
super(provider, ctx, new SelectVariantsWrapper(ctx.getLogger()));
33+
}
34+
35+
public static class Provider extends AbstractVariantProcessingStepProvider<IndexVariantsStep> implements VariantProcessingStep.SupportsScatterGather
36+
{
37+
public Provider()
38+
{
39+
super("IndexVariantsStep", "Index Variants", "DISCVR-seq", "Create a lucene index for the selected fields", Arrays.asList(
40+
ToolParameterDescriptor.create("infoFieldsToIndex", "INFO fields to index", "A list of INFO fields to index", "sequenceanalysis-trimmingtextarea", null, null),
41+
ToolParameterDescriptor.create("allowLenientProcessing", "Allow Lenient Processing", "If selected, many error types will be logged but ignored.", "checkbox", null, false)
42+
), Arrays.asList("/sequenceanalysis/field/TrimmingTextArea.js"), "https://github.com/BimberLab/DISCVRSeq");
43+
}
44+
45+
@Override
46+
public IndexVariantsStep create(PipelineContext ctx)
47+
{
48+
return new IndexVariantsStep(this, ctx);
49+
}
50+
}
51+
52+
@Override
53+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
54+
{
55+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
56+
57+
String infoFieldsRaw = StringUtils.trimToNull(getProvider().getParameterByName("infoFieldsToIndex").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class));
58+
if (infoFieldsRaw == null)
59+
{
60+
throw new PipelineJobException("Missing info fields to index");
61+
}
62+
63+
List<String> infoFields = Arrays.stream(infoFieldsRaw.split(";")).sorted().toList();
64+
boolean allowLenientProcessing = getProvider().getParameterByName("allowLenientProcessing").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
65+
66+
File indexDir = new File(outputDirectory, "lucene");
67+
JBrowseLucenePipelineJob.prepareLuceneIndex(inputVCF, indexDir, getPipelineCtx().getLogger(), infoFields, allowLenientProcessing);
68+
69+
File idx = new File(indexDir, "write.lock");
70+
if (!idx.exists())
71+
{
72+
throw new PipelineJobException("Unable to find file: " + idx.getPath());
73+
}
74+
75+
output.addSequenceOutput(idx, "Lucene index: " + inputVCF.getName(), CATEGORY, null, null, genome.getGenomeId(), "Fields indexed: " + infoFieldsRaw);
76+
77+
return output;
78+
}
79+
}

jbrowse/src/org/labkey/jbrowse/pipeline/JBrowseLucenePipelineJob.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.labkey.jbrowse.pipeline;
22

33
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.SystemUtils;
45
import org.apache.logging.log4j.Logger;
56
import org.labkey.api.assay.AssayFileWriter;
67
import org.labkey.api.data.Container;
@@ -22,10 +23,12 @@
2223
import org.labkey.api.view.ActionURL;
2324
import org.labkey.api.view.ViewBackgroundInfo;
2425
import org.labkey.api.view.ViewContext;
26+
import org.labkey.api.writer.PrintWriters;
2527
import org.labkey.jbrowse.JBrowseManager;
2628

2729
import java.io.File;
2830
import java.io.IOException;
31+
import java.io.PrintWriter;
2932
import java.util.List;
3033

3134
/**
@@ -168,6 +171,7 @@ public static void prepareLuceneIndex(File vcf, File indexDir, Logger log, List<
168171
args.add("LENIENT");
169172
}
170173

174+
infoFieldsForFullTextSearch = infoFieldsForFullTextSearch.stream().sorted().toList();
171175
for (String field : infoFieldsForFullTextSearch)
172176
{
173177
args.add("-IF");
@@ -193,10 +197,59 @@ public static void prepareLuceneIndex(File vcf, File indexDir, Logger log, List<
193197
}
194198

195199
runner.execute(args);
200+
201+
if (!SystemUtils.IS_OS_WINDOWS)
202+
{
203+
try
204+
{
205+
log.debug("Updating file permissions");
206+
recursivelyChangeDirectoryPermissions(indexDir);
207+
}
208+
catch (IOException e)
209+
{
210+
throw new PipelineJobException(e);
211+
}
212+
}
213+
214+
File fieldFile = getFieldListFile(indexDir);
215+
try (PrintWriter writer = PrintWriters.getPrintWriter(fieldFile))
216+
{
217+
infoFieldsForFullTextSearch.forEach(writer::println);
218+
}
219+
catch (IOException e)
220+
{
221+
throw new PipelineJobException(e);
222+
}
196223
}
197224

198225
public static File getExpectedLocationOfLuceneIndexStats(File indexDir)
199226
{
200227
return new File(indexDir.getPath() + ".stats.txt");
201228
}
229+
230+
private static void recursivelyChangeDirectoryPermissions(File f) throws IOException
231+
{
232+
if (f.isDirectory())
233+
{
234+
Runtime.getRuntime().exec(new String[]{"chmod", "775", f.getPath()});
235+
236+
File[] children = f.listFiles();
237+
if (children != null)
238+
{
239+
for (File child : children)
240+
{
241+
recursivelyChangeDirectoryPermissions(child);
242+
}
243+
}
244+
}
245+
else
246+
{
247+
Runtime.getRuntime().exec(new String[]{"chmod", "664", f.getPath()});
248+
}
249+
}
250+
251+
public static File getFieldListFile(File indexDir)
252+
{
253+
return new File(indexDir, "fieldList.txt");
254+
}
202255
}

0 commit comments

Comments
 (0)