Skip to content

Commit 26c7446

Browse files
committed
Support --batch-size for consolidate_genomicsdb_array
1 parent 3eb820f commit 26c7446

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ protected static List<ToolParameterDescriptor> getToolParameters(boolean addCopy
105105
ToolParameterDescriptor.create("consolidate", "Consolidate", "If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false.", "checkbox", new JSONObject(){{
106106
put("checked", true);
107107
}}, true),
108+
ToolParameterDescriptor.create("genomicsdbBatchSize", "Consolidate Batch Size", "This is passed to --batch-size of consolidate_genomicsdb_array, and can reduce memory usage.", "ldk-numberfield", new JSONObject(){{
109+
put("minValue", 0);
110+
}}, 50),
108111
ToolParameterDescriptor.create("scatterGather", "Scatter/Gather Options", "If selected, this job will be divided to run job per chromosome. The final step will take the VCF from each intermediate step and combined to make a final VCF file.", "sequenceanalysis-variantscattergatherpanel", new JSONObject(){{
109112
put("defaultValue", "chunked");
110113
}}, false)
@@ -771,6 +774,10 @@ private void doConsolidate(JobContext ctx, File workingDestinationWorkspaceFolde
771774
baseArgs.add(String.valueOf(ctx.getParams().get("genomicsdbSegmentSize")));
772775
}
773776

777+
int batchSize = ctx.getParams().optInt("genomicsdbBatchSize", 50);
778+
baseArgs.add("-b");
779+
baseArgs.add(String.valueOf(batchSize));
780+
774781
List<Interval> intervals = getIntervalsOrFullGenome(ctx, genome);
775782
for (Interval i : intervals)
776783
{

0 commit comments

Comments
 (0)