Skip to content

Commit 37e4612

Browse files
committed
Ensure consistent case for status in MCC and add automatic selection of genomicsDB consolidate batch size
1 parent 8e801ab commit 37e4612

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ protected static List<ToolParameterDescriptor> getToolParameters(boolean addCopy
105105
ToolParameterDescriptor.create("consolidate", "Consolidate", "If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false.", "checkbox", new JSONObject(){{
106106
put("checked", true);
107107
}}, true),
108-
ToolParameterDescriptor.create("genomicsdbBatchSize", "Consolidate Batch Size", "This is passed to --batch-size of consolidate_genomicsdb_array, and can reduce memory usage.", "ldk-numberfield", new JSONObject(){{
108+
ToolParameterDescriptor.create("genomicsdbBatchSize", "Consolidate Batch Size", "This is passed to --batch-size of consolidate_genomicsdb_array, and can reduce memory usage. If a value of -1 is used, this will auto-calculate batch size using numberOfFragments/4", "ldk-numberfield", new JSONObject(){{
109109
put("minValue", 0);
110110
}}, null),
111111
ToolParameterDescriptor.create("scatterGather", "Scatter/Gather Options", "If selected, this job will be divided to run job per chromosome. The final step will take the VCF from each intermediate step and combined to make a final VCF file.", "sequenceanalysis-variantscattergatherpanel", new JSONObject(){{
@@ -792,6 +792,19 @@ private void doConsolidate(JobContext ctx, File workingDestinationWorkspaceFolde
792792
toRun.add("-a");
793793
toRun.add(contigFolder.getName());
794794

795+
if (batchSize == -1)
796+
{
797+
int totalFragments = getFragmentsPerContig(contigFolder).size();
798+
int inferredBatchSize = Math.max(1, totalFragments / 4);
799+
800+
ctx.getLogger().debug("Inferring batch size from fragments (" + totalFragments + "). Using: " + inferredBatchSize);
801+
if (inferredBatchSize > 1)
802+
{
803+
baseArgs.add("-b");
804+
baseArgs.add(String.valueOf(inferredBatchSize));
805+
}
806+
}
807+
795808
new SimpleScriptWrapper(ctx.getLogger()).execute(toRun);
796809

797810
reportFragmentsPerContig(ctx, contigFolder, i.getContig());
@@ -902,7 +915,7 @@ private void reportFragmentsPerContig(JobContext ctx, File destContigFolder, Str
902915
}
903916
else
904917
{
905-
ctx.getLogger().info(contigName + " total fragments: " + children.size());
918+
ctx.getLogger().info(contigName + ", total fragments: " + children.size()+ ", in: " + destContigFolder.getPath());
906919
}
907920
}
908921

0 commit comments

Comments
 (0)