33import org .apache .commons .collections4 .CollectionUtils ;
44import org .apache .commons .io .FileUtils ;
55import org .apache .commons .lang3 .StringUtils ;
6- import org .labkey . api . data . ColumnInfo ;
6+ import org .jetbrains . annotations . NotNull ;
77import org .labkey .api .data .CompareType ;
88import org .labkey .api .data .Container ;
99import org .labkey .api .data .ContainerManager ;
4848import java .sql .ResultSet ;
4949import java .sql .SQLException ;
5050import java .util .ArrayList ;
51- import java .util .Arrays ;
5251import java .util .Collection ;
5352import java .util .Collections ;
5453import java .util .HashMap ;
@@ -91,7 +90,7 @@ public String getDescription()
9190 }
9291
9392 @ Override
94- public TaskPipeline getTaskPipeline ()
93+ public TaskPipeline <?> getTaskPipeline ()
9594 {
9695 return PipelineJobService .get ().getTaskPipeline (new TaskId (OrphanFilePipelineJob .class ));
9796 }
@@ -129,7 +128,7 @@ public List<String> getProtocolActionNames()
129128 }
130129
131130 @ Override
132- public PipelineJob .Task createTask (PipelineJob job )
131+ public PipelineJob .Task <?> createTask (PipelineJob job )
133132 {
134133 return new Task (this , job );
135134 }
@@ -142,7 +141,7 @@ public boolean isJobComplete(PipelineJob job)
142141 }
143142
144143 @ Override
145- public RecordedActionSet run () throws PipelineJobException
144+ public @ NotNull RecordedActionSet run () throws PipelineJobException
146145 {
147146 getJob ().getLogger ().info ("## The following sections list any files or pipeline jobs that appear to be orphans, not connected to any imported readsets or sequence outputs:" );
148147
@@ -165,7 +164,23 @@ public RecordedActionSet run() throws PipelineJobException
165164 knownExpDatas = Collections .unmodifiableSet (knownExpDatas );
166165 //messages.add("## total registered sequence ExpData: " + knownExpDatas.size());
167166
168- getOrphanFilesForContainer (getJob ().getContainer (), getJob ().getUser (), orphanFiles , orphanIndexes , orphanJobs , messages , probableDeletes , knownJobPaths , knownExpDatas );
167+ // Build map of URL/ExpData for all data, to cover cross-container files
168+ Map <URI , Set <Integer >> knownDataMap = new HashMap <>();
169+ for (Integer d : knownExpDatas )
170+ {
171+ ExpData ed = ExperimentService .get ().getExpData (d );
172+ if (ed != null )
173+ {
174+ if (!knownDataMap .containsKey (ed .getDataFileURI ()))
175+ {
176+ knownDataMap .put (ed .getDataFileURI (), new HashSet <>());
177+ }
178+
179+ knownDataMap .get (ed .getDataFileURI ()).add (d );
180+ }
181+ }
182+
183+ getOrphanFilesForContainer (getJob ().getContainer (), getJob ().getUser (), orphanFiles , orphanIndexes , orphanJobs , messages , probableDeletes , knownJobPaths , knownExpDatas , knownDataMap );
169184 probableDeletes .addAll (orphanIndexes );
170185
171186 if (!orphanFiles .isEmpty ())
@@ -286,7 +301,7 @@ private Set<File> getKnownSequenceJobPaths(Container c, User u, Collection<Strin
286301 return knownJobPaths ;
287302 }
288303
289- private Map <URI , Set <Integer >> getDataMapForContainer (Container c )
304+ private Map <URI , Set <Integer >> getDataMapForContainer (Container c , Map < URI , Set < Integer >> knownExpDataMap )
290305 {
291306 SimpleFilter dataFilter = new SimpleFilter (FieldKey .fromString ("container" ), c .getId ());
292307 TableInfo dataTable = ExperimentService .get ().getTinfoData ();
@@ -320,10 +335,21 @@ public void exec(ResultSet rs) throws SQLException
320335 });
321336 //messages.add("## total ExpData paths: " + dataMap.size());
322337
338+ // append additional datas:
339+ for (URI u : knownExpDataMap .keySet ())
340+ {
341+ if (!dataMap .containsKey (u ))
342+ {
343+ dataMap .put (u , new HashSet <>());
344+ }
345+
346+ dataMap .get (u ).addAll (knownExpDataMap .get (u ));
347+ }
348+
323349 return dataMap ;
324350 }
325351
326- public void getOrphanFilesForContainer (Container c , User u , Set <File > orphanFiles , Set <File > orphanIndexes , Set <PipelineStatusFile > orphanJobs , List <String > messages , Set <File > probableDeletes , Set <File > knownSequenceJobPaths , Set <Integer > knownExpDatas )
352+ public void getOrphanFilesForContainer (Container c , User u , Set <File > orphanFiles , Set <File > orphanIndexes , Set <PipelineStatusFile > orphanJobs , List <String > messages , Set <File > probableDeletes , Set <File > knownSequenceJobPaths , Set <Integer > knownExpDatas , Map < URI , Set < Integer >> knownExpDataMap )
327353 {
328354 PipeRoot root = PipelineService .get ().getPipelineRootSetting (c );
329355 if (root == null )
@@ -338,7 +364,7 @@ public void getOrphanFilesForContainer(Container c, User u, Set<File> orphanFile
338364
339365 messages .add ("## processing container: " + c .getPath ());
340366
341- Map <URI , Set <Integer >> dataMap = getDataMapForContainer (c );
367+ Map <URI , Set <Integer >> dataMap = getDataMapForContainer (c , knownExpDataMap );
342368
343369 Container parent = c .isWorkbook () ? c .getParent () : c ;
344370 TableInfo jobsTableParent = PipelineService .get ().getJobsTable (u , parent );
@@ -438,7 +464,7 @@ public boolean accept(File pathname)
438464 {
439465 if (child .isWorkbook ())
440466 {
441- getOrphanFilesForContainer (child , u , orphanFiles , orphanIndexes , orphanJobs , messages , probableDeletes , knownSequenceJobPaths , knownExpDatas );
467+ getOrphanFilesForContainer (child , u , orphanFiles , orphanIndexes , orphanJobs , messages , probableDeletes , knownSequenceJobPaths , knownExpDatas , knownExpDataMap );
442468 }
443469 }
444470 }
0 commit comments