|
30 | 30 | import org.labkey.api.pipeline.PipeRoot; |
31 | 31 | import org.labkey.api.pipeline.PipelineJobException; |
32 | 32 | import org.labkey.api.pipeline.PipelineService; |
| 33 | +import org.labkey.api.pipeline.PipelineValidationException; |
33 | 34 | import org.labkey.api.query.FieldKey; |
34 | 35 | import org.labkey.api.query.QueryService; |
35 | 36 | import org.labkey.api.query.UserSchema; |
|
39 | 40 | import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
40 | 41 | import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; |
41 | 42 | import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; |
42 | | -import org.labkey.api.sequenceanalysis.run.DISCVRSeqRunner; |
43 | 43 | import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; |
44 | 44 | import org.labkey.api.settings.AppProps; |
45 | 45 | import org.labkey.api.util.FileType; |
46 | 46 | import org.labkey.api.util.FileUtil; |
47 | 47 | import org.labkey.api.util.GUID; |
| 48 | +import org.labkey.api.util.JobRunner; |
48 | 49 | import org.labkey.api.util.PageFlowUtil; |
49 | 50 | import org.labkey.api.util.Path; |
50 | 51 | import org.labkey.api.view.UnauthorizedException; |
51 | 52 | import org.labkey.jbrowse.JBrowseManager; |
52 | 53 | import org.labkey.jbrowse.JBrowseSchema; |
| 54 | +import org.labkey.jbrowse.pipeline.JBrowseLucenePipelineJob; |
53 | 55 | import org.labkey.sequenceanalysis.run.util.TabixRunner; |
54 | 56 |
|
55 | 57 | import javax.annotation.Nullable; |
@@ -938,25 +940,52 @@ public File prepareResource(Logger log, boolean throwIfNotPrepared, boolean forc |
938 | 940 | if (shouldHaveFreeTextSearch()) |
939 | 941 | { |
940 | 942 | File luceneDir = getExpectedLocationOfLuceneIndex(throwIfNotPrepared); |
941 | | - if (forceReprocess && luceneDir.exists()) |
| 943 | + long sizeInGb = targetFile.length() / (1024 * 1024 * 1024); |
| 944 | + log.debug("preparing lucene index, VCF size: " + sizeInGb); |
| 945 | + |
| 946 | + if (!forceReprocess && doesLuceneIndexExist()) |
942 | 947 | { |
943 | | - try |
944 | | - { |
945 | | - FileUtils.deleteDirectory(luceneDir); |
946 | | - } |
947 | | - catch (IOException e) |
948 | | - { |
949 | | - throw new PipelineJobException(e); |
950 | | - } |
| 948 | + log.debug("Existing lucene index found, will not re-create: " + luceneDir.getPath()); |
951 | 949 | } |
952 | | - |
953 | | - if (forceReprocess || !doesLuceneIndexExist()) |
| 950 | + else if (sizeInGb > 50) |
954 | 951 | { |
955 | | - prepareLuceneIndex(log); |
| 952 | + log.info("VCF is too large, submitting VcfToLuceneIndexer as a separate pipeline job"); |
| 953 | + final File vcf = targetFile; |
| 954 | + JobRunner.getDefault().execute(() -> { |
| 955 | + try |
| 956 | + { |
| 957 | + PipeRoot root = PipelineService.get().getPipelineRootSetting(getContainerObj()); |
| 958 | + PipelineService.get().queueJob(new JBrowseLucenePipelineJob(getContainerObj(), null, root, vcf, luceneDir, getInfoFieldsToIndex(), allowLenientLuceneProcessing())); |
| 959 | + } |
| 960 | + catch (PipelineValidationException e) |
| 961 | + { |
| 962 | + log.error(e); |
| 963 | + } |
| 964 | + }); |
956 | 965 | } |
957 | 966 | else |
958 | 967 | { |
959 | | - log.debug("Existing lucene index found, will not re-create: " + luceneDir.getPath()); |
| 968 | + if (forceReprocess && luceneDir.exists()) |
| 969 | + { |
| 970 | + try |
| 971 | + { |
| 972 | + log.debug("Deleting existing index: " + luceneDir.getPath()); |
| 973 | + FileUtils.deleteDirectory(luceneDir); |
| 974 | + } |
| 975 | + catch (IOException e) |
| 976 | + { |
| 977 | + throw new PipelineJobException(e); |
| 978 | + } |
| 979 | + } |
| 980 | + |
| 981 | + if (forceReprocess || !doesLuceneIndexExist()) |
| 982 | + { |
| 983 | + JBrowseLucenePipelineJob.prepareLuceneIndex(targetFile, luceneDir, log, getInfoFieldsToIndex(), allowLenientLuceneProcessing()); |
| 984 | + } |
| 985 | + else |
| 986 | + { |
| 987 | + log.debug("Existing lucene index found, will not re-create: " + luceneDir.getPath()); |
| 988 | + } |
960 | 989 | } |
961 | 990 | } |
962 | 991 |
|
@@ -988,60 +1017,10 @@ private boolean doesLuceneIndexExist() |
988 | 1017 | return Arrays.asList(rawFields.split(",")); |
989 | 1018 | } |
990 | 1019 |
|
991 | | - private void prepareLuceneIndex(Logger log) throws PipelineJobException |
| 1020 | + private boolean allowLenientLuceneProcessing() |
992 | 1021 | { |
993 | | - log.debug("Generating VCF full text index for file: " + getExpData().getFile().getName()); |
994 | | - |
995 | | - DISCVRSeqRunner runner = new DISCVRSeqRunner(log); |
996 | | - if (!runner.jarExists()) |
997 | | - { |
998 | | - log.error("Unable to find DISCVRSeq.jar, skipping lucene index creation"); |
999 | | - return; |
1000 | | - } |
1001 | | - |
1002 | | - File indexDir = getExpectedLocationOfLuceneIndex(false); |
1003 | | - if (indexDir != null && indexDir.exists()) |
1004 | | - { |
1005 | | - try |
1006 | | - { |
1007 | | - FileUtils.deleteDirectory(getExpectedLocationOfLuceneIndex(false)); |
1008 | | - } |
1009 | | - catch (IOException e) |
1010 | | - { |
1011 | | - throw new PipelineJobException(e); |
1012 | | - } |
1013 | | - } |
1014 | | - |
1015 | | - List<String> args = runner.getBaseArgs("VcfToLuceneIndexer"); |
1016 | | - args.add("-V"); |
1017 | | - args.add(getExpData().getFile().getPath()); |
1018 | | - |
1019 | | - args.add("-O"); |
1020 | | - args.add(indexDir.getPath()); |
1021 | | - |
1022 | | - args.add("--validation-stringency"); |
1023 | | - args.add("LENIENT"); |
1024 | | - |
1025 | | - List<String> infoFieldsForFullTextSearch = getInfoFieldsToIndex(); |
1026 | | - for (String field : infoFieldsForFullTextSearch) |
1027 | | - { |
1028 | | - args.add("-IF"); |
1029 | | - args.add(field); |
1030 | | - } |
1031 | | - |
1032 | | - args.add("--allow-missing-fields"); |
1033 | | - |
1034 | | - args.add("--index-stats"); |
1035 | | - args.add(getExpectedLocationOfLuceneIndexStats(false).getPath()); |
1036 | | - |
1037 | 1022 | JSONObject config = getExtraTrackConfig(); |
1038 | | - if (config != null && !config.isNull("lenientLuceneProcessing") && config.getBoolean("lenientLuceneProcessing")) |
1039 | | - { |
1040 | | - args.add("--validation-stringency"); |
1041 | | - args.add("LENIENT"); |
1042 | | - } |
1043 | | - |
1044 | | - runner.execute(args); |
| 1023 | + return config != null && !config.isNull("lenientLuceneProcessing") && config.getBoolean("lenientLuceneProcessing"); |
1045 | 1024 | } |
1046 | 1025 |
|
1047 | 1026 | protected void createIndex(File finalLocation, Logger log, File idx, boolean throwIfNotPrepared) throws PipelineJobException |
@@ -1385,11 +1364,6 @@ public boolean shouldHaveFreeTextSearch() |
1385 | 1364 | return json != null && json.optBoolean("createFullTextIndex", false); |
1386 | 1365 | } |
1387 | 1366 |
|
1388 | | - public File getExpectedLocationOfLuceneIndexStats(boolean throwIfNotFound) |
1389 | | - { |
1390 | | - return new File(getExpectedLocationOfLuceneIndex(throwIfNotFound).getPath() + ".stats.txt"); |
1391 | | - } |
1392 | | - |
1393 | 1367 | public File getExpectedLocationOfLuceneIndex(boolean throwIfNotFound) |
1394 | 1368 | { |
1395 | 1369 | File basedir = getLocationOfProcessedTrack(false); |
|
0 commit comments