@@ -103,8 +103,8 @@ protected static List<ToolParameterDescriptor> getToolParameters(boolean addCopy
103103 put ("minValue" , 0 );
104104 }}, 36 ),
105105 ToolParameterDescriptor .create ("consolidate" , "Consolidate" , "If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false." , "checkbox" , new JSONObject (){{
106- put ("checked" , false );
107- }}, false ),
106+ put ("checked" , true );
107+ }}, true ),
108108 ToolParameterDescriptor .create ("scatterGather" , "Scatter/Gather Options" , "If selected, this job will be divided to run job per chromosome. The final step will take the VCF from each intermediate step and combined to make a final VCF file." , "sequenceanalysis-variantscattergatherpanel" , new JSONObject (){{
109109 put ("defaultValue" , "chunked" );
110110 }}, false )
@@ -601,11 +601,6 @@ else if (genomeIds.isEmpty())
601601 GenomicsDbImportWrapper wrapper = new GenomicsDbImportWrapper (ctx .getLogger ());
602602 List <String > options = new ArrayList <>(getClientCommandArgs (ctx .getParams ()));
603603
604- if (ctx .getParams ().optBoolean ("consolidate" , false ))
605- {
606- options .add ("--consolidate" );
607- }
608-
609604 if (ctx .getParams ().optBoolean ("sharedPosixOptimizations" , false ))
610605 {
611606 options .add ("--genomicsdb-shared-posixfs-optimizations" );
@@ -622,48 +617,18 @@ else if (genomeIds.isEmpty())
622617 wrapper .addToEnvironment ("TILEDB_DISABLE_FILE_LOCKING" , "1" );
623618 }
624619
625- if (ctx .getParams ().optBoolean ("consolidateFirst" , false ))
626- {
627- ctx .getLogger ().info ("Will pre-consolidate the workspace using consolidate_genomicsdb_array" );
628- List <String > baseArgs = new ArrayList <>();
629- baseArgs .add (SequencePipelineService .get ().getExeForPackage ("GENOMICSDB_PATH" , "consolidate_genomicsdb_array" ).getPath ());
630-
631- baseArgs .add ("-w" );
632- baseArgs .add (workingDestinationWorkspaceFolder .getPath ());
633-
634- if (ctx .getParams ().optBoolean ("sharedPosixOptimizations" , false ))
635- {
636- baseArgs .add ("--shared-posixfs-optimizations" );
637- }
638-
639- if (ctx .getParams ().get ("genomicsdbSegmentSize" ) != null )
640- {
641- baseArgs .add ("--segment-size" );
642- baseArgs .add (String .valueOf (ctx .getParams ().get ("genomicsdbSegmentSize" )));
643- }
644-
645- List <Interval > intervals = getIntervalsOrFullGenome (ctx , genome );
646- for (Interval i : intervals )
647- {
648- File contigFolder = new File (workingDestinationWorkspaceFolder , getFolderNameFromInterval (i ));
649- ctx .getLogger ().info ("Consolidating contig folder: " + contigFolder );
650-
651- List <String > toRun = new ArrayList <>(baseArgs );
652- toRun .add ("-a" );
653- toRun .add (contigFolder .getName ());
654-
655- new SimpleScriptWrapper (ctx .getLogger ()).execute (toRun );
656-
657- reportFragmentsPerContig (ctx , contigFolder , i .getContig ());
658- }
659- }
660-
661620 if (!genomicsDbCompleted )
662621 {
663622 try
664623 {
665624 List <Interval > intervals = getIntervals (ctx );
666625
626+ if (ctx .getParams ().optBoolean ("consolidateFirst" , false ))
627+ {
628+ ctx .getLogger ().info ("Will pre-consolidate the workspace using consolidate_genomicsdb_array" );
629+ doConsolidate (ctx , workingDestinationWorkspaceFolder , genome );
630+ }
631+
667632 Integer maxRam = SequencePipelineService .get ().getMaxRam ();
668633 Integer nativeMemoryBuffer = ctx .getParams ().optInt ("nativeMemoryBuffer" , 0 );
669634 if (maxRam != null && nativeMemoryBuffer > 0 )
@@ -680,6 +645,12 @@ else if (genomeIds.isEmpty())
680645
681646 wrapper .execute (genome , vcfsToProcess , workingDestinationWorkspaceFolder , intervals , options , _append );
682647
648+ if (ctx .getParams ().optBoolean ("consolidate" , true ))
649+ {
650+ ctx .getLogger ().info ("Will consolidate the workspace using consolidate_genomicsdb_array" );
651+ doConsolidate (ctx , workingDestinationWorkspaceFolder , genome );
652+ }
653+
683654 FileUtils .touch (doneFile );
684655 ctx .getLogger ().debug ("GenomicsDB complete, touching file: " + doneFile .getPath ());
685656 }
@@ -781,6 +752,41 @@ else if (genomeIds.isEmpty())
781752 }
782753 }
783754
755+ private void doConsolidate (JobContext ctx , File workingDestinationWorkspaceFolder , ReferenceGenome genome ) throws PipelineJobException
756+ {
757+ List <String > baseArgs = new ArrayList <>();
758+ baseArgs .add (SequencePipelineService .get ().getExeForPackage ("GENOMICSDB_PATH" , "consolidate_genomicsdb_array" ).getPath ());
759+
760+ baseArgs .add ("-w" );
761+ baseArgs .add (workingDestinationWorkspaceFolder .getPath ());
762+
763+ if (ctx .getParams ().optBoolean ("sharedPosixOptimizations" , false ))
764+ {
765+ baseArgs .add ("--shared-posixfs-optimizations" );
766+ }
767+
768+ if (ctx .getParams ().get ("genomicsdbSegmentSize" ) != null )
769+ {
770+ baseArgs .add ("--segment-size" );
771+ baseArgs .add (String .valueOf (ctx .getParams ().get ("genomicsdbSegmentSize" )));
772+ }
773+
774+ List <Interval > intervals = getIntervalsOrFullGenome (ctx , genome );
775+ for (Interval i : intervals )
776+ {
777+ File contigFolder = new File (workingDestinationWorkspaceFolder , getFolderNameFromInterval (i ));
778+ ctx .getLogger ().info ("Consolidating contig folder: " + contigFolder );
779+
780+ List <String > toRun = new ArrayList <>(baseArgs );
781+ toRun .add ("-a" );
782+ toRun .add (contigFolder .getName ());
783+
784+ new SimpleScriptWrapper (ctx .getLogger ()).execute (toRun );
785+
786+ reportFragmentsPerContig (ctx , contigFolder , i .getContig ());
787+ }
788+ }
789+
784790 private void copyWorkspace (JobContext ctx , File sourceWorkspace , File destinationWorkspaceFolder , ReferenceGenome genome , Collection <File > toDelete , boolean alwaysPerformRsync , boolean overwriteTopLevelFiles , boolean removeExistingTopLevelFiles ) throws PipelineJobException
785791 {
786792 if (!destinationWorkspaceFolder .exists ())
0 commit comments