Skip to content

Commit 9b8c21e

Browse files
committed
Allow option to consolidate GenomicsDB either before or after processing
1 parent 32a2965 commit 9b8c21e

File tree

1 file changed

+49
-43
lines changed

1 file changed

+49
-43
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java

Lines changed: 49 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ protected static List<ToolParameterDescriptor> getToolParameters(boolean addCopy
103103
put("minValue", 0);
104104
}}, 36),
105105
ToolParameterDescriptor.create("consolidate", "Consolidate", "If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false.", "checkbox", new JSONObject(){{
106-
put("checked", false);
107-
}}, false),
106+
put("checked", true);
107+
}}, true),
108108
ToolParameterDescriptor.create("scatterGather", "Scatter/Gather Options", "If selected, this job will be divided to run job per chromosome. The final step will take the VCF from each intermediate step and combined to make a final VCF file.", "sequenceanalysis-variantscattergatherpanel", new JSONObject(){{
109109
put("defaultValue", "chunked");
110110
}}, false)
@@ -601,11 +601,6 @@ else if (genomeIds.isEmpty())
601601
GenomicsDbImportWrapper wrapper = new GenomicsDbImportWrapper(ctx.getLogger());
602602
List<String> options = new ArrayList<>(getClientCommandArgs(ctx.getParams()));
603603

604-
if (ctx.getParams().optBoolean("consolidate", false))
605-
{
606-
options.add("--consolidate");
607-
}
608-
609604
if (ctx.getParams().optBoolean("sharedPosixOptimizations", false))
610605
{
611606
options.add("--genomicsdb-shared-posixfs-optimizations");
@@ -622,48 +617,18 @@ else if (genomeIds.isEmpty())
622617
wrapper.addToEnvironment("TILEDB_DISABLE_FILE_LOCKING", "1");
623618
}
624619

625-
if (ctx.getParams().optBoolean("consolidateFirst", false))
626-
{
627-
ctx.getLogger().info("Will pre-consolidate the workspace using consolidate_genomicsdb_array");
628-
List<String> baseArgs = new ArrayList<>();
629-
baseArgs.add(SequencePipelineService.get().getExeForPackage("GENOMICSDB_PATH", "consolidate_genomicsdb_array").getPath());
630-
631-
baseArgs.add("-w");
632-
baseArgs.add(workingDestinationWorkspaceFolder.getPath());
633-
634-
if (ctx.getParams().optBoolean("sharedPosixOptimizations", false))
635-
{
636-
baseArgs.add("--shared-posixfs-optimizations");
637-
}
638-
639-
if (ctx.getParams().get("genomicsdbSegmentSize") != null)
640-
{
641-
baseArgs.add("--segment-size");
642-
baseArgs.add(String.valueOf(ctx.getParams().get("genomicsdbSegmentSize")));
643-
}
644-
645-
List<Interval> intervals = getIntervalsOrFullGenome(ctx, genome);
646-
for (Interval i : intervals)
647-
{
648-
File contigFolder = new File(workingDestinationWorkspaceFolder, getFolderNameFromInterval(i));
649-
ctx.getLogger().info("Consolidating contig folder: " + contigFolder);
650-
651-
List<String> toRun = new ArrayList<>(baseArgs);
652-
toRun.add("-a");
653-
toRun.add(contigFolder.getName());
654-
655-
new SimpleScriptWrapper(ctx.getLogger()).execute(toRun);
656-
657-
reportFragmentsPerContig(ctx, contigFolder, i.getContig());
658-
}
659-
}
660-
661620
if (!genomicsDbCompleted)
662621
{
663622
try
664623
{
665624
List<Interval> intervals = getIntervals(ctx);
666625

626+
if (ctx.getParams().optBoolean("consolidateFirst", false))
627+
{
628+
ctx.getLogger().info("Will pre-consolidate the workspace using consolidate_genomicsdb_array");
629+
doConsolidate(ctx, workingDestinationWorkspaceFolder, genome);
630+
}
631+
667632
Integer maxRam = SequencePipelineService.get().getMaxRam();
668633
Integer nativeMemoryBuffer = ctx.getParams().optInt("nativeMemoryBuffer", 0);
669634
if (maxRam != null && nativeMemoryBuffer > 0)
@@ -680,6 +645,12 @@ else if (genomeIds.isEmpty())
680645

681646
wrapper.execute(genome, vcfsToProcess, workingDestinationWorkspaceFolder, intervals, options, _append);
682647

648+
if (ctx.getParams().optBoolean("consolidate", true))
649+
{
650+
ctx.getLogger().info("Will consolidate the workspace using consolidate_genomicsdb_array");
651+
doConsolidate(ctx, workingDestinationWorkspaceFolder, genome);
652+
}
653+
683654
FileUtils.touch(doneFile);
684655
ctx.getLogger().debug("GenomicsDB complete, touching file: " + doneFile.getPath());
685656
}
@@ -781,6 +752,41 @@ else if (genomeIds.isEmpty())
781752
}
782753
}
783754

755+
private void doConsolidate(JobContext ctx, File workingDestinationWorkspaceFolder, ReferenceGenome genome) throws PipelineJobException
756+
{
757+
List<String> baseArgs = new ArrayList<>();
758+
baseArgs.add(SequencePipelineService.get().getExeForPackage("GENOMICSDB_PATH", "consolidate_genomicsdb_array").getPath());
759+
760+
baseArgs.add("-w");
761+
baseArgs.add(workingDestinationWorkspaceFolder.getPath());
762+
763+
if (ctx.getParams().optBoolean("sharedPosixOptimizations", false))
764+
{
765+
baseArgs.add("--shared-posixfs-optimizations");
766+
}
767+
768+
if (ctx.getParams().get("genomicsdbSegmentSize") != null)
769+
{
770+
baseArgs.add("--segment-size");
771+
baseArgs.add(String.valueOf(ctx.getParams().get("genomicsdbSegmentSize")));
772+
}
773+
774+
List<Interval> intervals = getIntervalsOrFullGenome(ctx, genome);
775+
for (Interval i : intervals)
776+
{
777+
File contigFolder = new File(workingDestinationWorkspaceFolder, getFolderNameFromInterval(i));
778+
ctx.getLogger().info("Consolidating contig folder: " + contigFolder);
779+
780+
List<String> toRun = new ArrayList<>(baseArgs);
781+
toRun.add("-a");
782+
toRun.add(contigFolder.getName());
783+
784+
new SimpleScriptWrapper(ctx.getLogger()).execute(toRun);
785+
786+
reportFragmentsPerContig(ctx, contigFolder, i.getContig());
787+
}
788+
}
789+
784790
private void copyWorkspace(JobContext ctx, File sourceWorkspace, File destinationWorkspaceFolder, ReferenceGenome genome, Collection<File> toDelete, boolean alwaysPerformRsync, boolean overwriteTopLevelFiles, boolean removeExistingTopLevelFiles) throws PipelineJobException
785791
{
786792
if (!destinationWorkspaceFolder.exists())

0 commit comments

Comments
 (0)