4040import java .util .ArrayList ;
4141import java .util .Arrays ;
4242import java .util .Collection ;
43+ import java .util .Collections ;
4344import java .util .Date ;
4445import java .util .HashSet ;
4546import java .util .LinkedHashSet ;
4647import java .util .List ;
4748import java .util .Map ;
4849import java .util .Set ;
50+ import java .util .stream .Collectors ;
4951
5052abstract public class AbstractGenomicsDBImportHandler extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > implements SequenceOutputHandler .TracksVCF , SequenceOutputHandler .HasCustomVariantMerge , VariantProcessingStep .MayRequirePrepareTask , VariantProcessingStep .SupportsScatterGather
5153{
@@ -302,16 +304,28 @@ Set<String> getContigsInInputs(List<File> inputVCFs, Logger log) throws Pipeline
302304 return _contigsInInputs ;
303305 }
304306
305- private void copyToLevelFiles (PipelineJob job , File sourceWorkspace , File destinationWorkspace , boolean overwrite ) throws IOException
307+ private void copyToLevelFiles (PipelineJob job , File sourceWorkspace , File destinationWorkspace , boolean removeOtherFiles , boolean overwriteExisting ) throws IOException
306308 {
307309 job .getLogger ().info ("Copying top-level files from: " + sourceWorkspace .getPath ());
310+ if (removeOtherFiles )
311+ {
312+ for (File f : destinationWorkspace .listFiles ())
313+ {
314+ if (!f .isDirectory ())
315+ {
316+ job .getLogger ().debug ("deleting existing top-level file: " + f .getPath ());
317+ f .delete ();
318+ }
319+ }
320+ }
321+
308322 for (String fn : Arrays .asList ("callset.json" , "vidmap.json" , "vcfheader.vcf" , "__tiledb_workspace.tdb" ))
309323 {
310324 File source = new File (sourceWorkspace , fn );
311325 File dest = new File (destinationWorkspace , fn );
312326 if (dest .exists ())
313327 {
314- if (!overwrite )
328+ if (!overwriteExisting )
315329 {
316330 job .getLogger ().debug ("workspace file exists, will not overwrite: " + dest .getPath ());
317331 continue ;
@@ -457,7 +471,7 @@ else if (genomeIds.isEmpty())
457471 }
458472
459473 File sourceWorkspace = getSourceWorkspace (ctx .getParams (), ctx .getSequenceSupport ());
460- copyWorkspace (ctx , sourceWorkspace , workingDestinationWorkspaceFolder , genome , toDelete , !genomicsDbCompleted );
474+ copyWorkspace (ctx , sourceWorkspace , workingDestinationWorkspaceFolder , genome , toDelete , !genomicsDbCompleted , ! genomicsDbCompleted , ! genomicsDbCompleted );
461475 }
462476 else
463477 {
@@ -587,7 +601,7 @@ else if (genomeIds.isEmpty())
587601
588602 if (!copyToSourceDone .exists ())
589603 {
590- copyWorkspace (ctx , workingDestinationWorkspaceFolder , workspaceLocalDir , genome , toDelete , true );
604+ copyWorkspace (ctx , workingDestinationWorkspaceFolder , workspaceLocalDir , genome , toDelete , true , false , false );
591605
592606 try
593607 {
@@ -635,7 +649,7 @@ else if (genomeIds.isEmpty())
635649 }
636650 }
637651
638- private void copyWorkspace (JobContext ctx , File sourceWorkspace , File destinationWorkspaceFolder , ReferenceGenome genome , Collection <File > toDelete , boolean alwaysPerformRsync ) throws PipelineJobException
652+ private void copyWorkspace (JobContext ctx , File sourceWorkspace , File destinationWorkspaceFolder , ReferenceGenome genome , Collection <File > toDelete , boolean alwaysPerformRsync , boolean overwriteTopLevelFiles , boolean removeExistingTopLevelFiles ) throws PipelineJobException
639653 {
640654 if (!destinationWorkspaceFolder .exists ())
641655 {
@@ -665,14 +679,15 @@ private void copyWorkspace(JobContext ctx, File sourceWorkspace, File destinatio
665679 else
666680 {
667681 ctx .getLogger ().info ("has been copied, skipping: " + i .getContig ());
682+ assertContigFoldersEqual (sourceFolder , destContigFolder );
668683 reportFragmentsPerContig (ctx , destContigFolder , i .getContig ());
669684 continue ;
670685 }
671686 }
672687
673688 if (!haveCopiedTopLevelFiles )
674689 {
675- copyToLevelFiles (ctx .getJob (), sourceWorkspace , destinationWorkspaceFolder , false );
690+ copyToLevelFiles (ctx .getJob (), sourceWorkspace , destinationWorkspaceFolder , removeExistingTopLevelFiles , overwriteTopLevelFiles );
676691 haveCopiedTopLevelFiles = true ;
677692 }
678693
@@ -704,6 +719,7 @@ private void copyWorkspace(JobContext ctx, File sourceWorkspace, File destinatio
704719 }
705720
706721 FileUtils .touch (copyDone );
722+ assertContigFoldersEqual (sourceFolder , destContigFolder );
707723 reportFragmentsPerContig (ctx , destContigFolder , i .getContig ());
708724 }
709725 catch (IOException e )
@@ -718,22 +734,46 @@ private File getCopyToSourceDone(JobContext ctx)
718734 return new File (ctx .getSourceDirectory (), "copyToWebserver.done" );
719735 }
720736
721- private void reportFragmentsPerContig ( JobContext ctx , File destContigFolder , String contigName )
737+ private void assertContigFoldersEqual ( File sourceFolder , File destContigFolder ) throws IllegalArgumentException
722738 {
723- if (destContigFolder .exists ())
739+ List <String > sourceFiles = getFragmentsPerContig (sourceFolder );
740+ List <String > destFiles = getFragmentsPerContig (destContigFolder );
741+
742+ if (!sourceFiles .equals (destFiles ))
724743 {
725- File [] children = destContigFolder .listFiles ( x -> {
726- return x . isDirectory () && ! "genomicsdb_meta_dir" . equals ( x . getName ());
727- });
744+ throw new IllegalArgumentException ( "Source and destination contig files not equal for: " + destContigFolder .getPath ());
745+ }
746+ }
728747
729- ctx .getLogger ().info (contigName + " total fragments: " + children .length );
748+ private void reportFragmentsPerContig (JobContext ctx , File destContigFolder , String contigName )
749+ {
750+ List <String > children = getFragmentsPerContig (destContigFolder );
751+ if (children == null )
752+ {
753+ ctx .getLogger ().warn ("expected folder not found: " + destContigFolder .getPath ());
730754 }
731755 else
732756 {
733- ctx .getLogger ().warn ( "expected folder not found : " + destContigFolder . getPath ());
757+ ctx .getLogger ().info ( contigName + " total fragments : " + children . size ());
734758 }
735759 }
736760
761+ private List <String > getFragmentsPerContig (File destContigFolder )
762+ {
763+ if (destContigFolder .exists ())
764+ {
765+ List <String > children = Arrays .stream (destContigFolder .listFiles (x -> {
766+ return x .isDirectory () && !"genomicsdb_meta_dir" .equals (x .getName ());
767+ })).map (File ::getName ).collect (Collectors .toList ());
768+
769+ Collections .sort (children );
770+
771+ return children ;
772+ }
773+
774+ return null ;
775+ }
776+
737777 private boolean doCopyLocal (JSONObject params )
738778 {
739779 return params .optBoolean ("doCopyGVcfLocal" , false );
0 commit comments