Skip to content

Commit d9f7c41

Browse files
committed
Update orphan file job to better handle cross-workbook records
1 parent b571bbc commit d9f7c41

File tree

1 file changed

+36
-10
lines changed

1 file changed

+36
-10
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import org.apache.commons.collections4.CollectionUtils;
44
import org.apache.commons.io.FileUtils;
55
import org.apache.commons.lang3.StringUtils;
6-
import org.labkey.api.data.ColumnInfo;
6+
import org.jetbrains.annotations.NotNull;
77
import org.labkey.api.data.CompareType;
88
import org.labkey.api.data.Container;
99
import org.labkey.api.data.ContainerManager;
@@ -48,7 +48,6 @@
4848
import java.sql.ResultSet;
4949
import java.sql.SQLException;
5050
import java.util.ArrayList;
51-
import java.util.Arrays;
5251
import java.util.Collection;
5352
import java.util.Collections;
5453
import java.util.HashMap;
@@ -91,7 +90,7 @@ public String getDescription()
9190
}
9291

9392
@Override
94-
public TaskPipeline getTaskPipeline()
93+
public TaskPipeline<?> getTaskPipeline()
9594
{
9695
return PipelineJobService.get().getTaskPipeline(new TaskId(OrphanFilePipelineJob.class));
9796
}
@@ -129,7 +128,7 @@ public List<String> getProtocolActionNames()
129128
}
130129

131130
@Override
132-
public PipelineJob.Task createTask(PipelineJob job)
131+
public PipelineJob.Task<?> createTask(PipelineJob job)
133132
{
134133
return new Task(this, job);
135134
}
@@ -142,7 +141,7 @@ public boolean isJobComplete(PipelineJob job)
142141
}
143142

144143
@Override
145-
public RecordedActionSet run() throws PipelineJobException
144+
public @NotNull RecordedActionSet run() throws PipelineJobException
146145
{
147146
getJob().getLogger().info("## The following sections list any files or pipeline jobs that appear to be orphans, not connected to any imported readsets or sequence outputs:");
148147

@@ -165,7 +164,23 @@ public RecordedActionSet run() throws PipelineJobException
165164
knownExpDatas = Collections.unmodifiableSet(knownExpDatas);
166165
//messages.add("## total registered sequence ExpData: " + knownExpDatas.size());
167166

168-
getOrphanFilesForContainer(getJob().getContainer(), getJob().getUser(), orphanFiles, orphanIndexes, orphanJobs, messages, probableDeletes, knownJobPaths, knownExpDatas);
167+
// Build map of URL/ExpData for all data, to cover cross-container files
168+
Map<URI, Set<Integer>> knownDataMap = new HashMap<>();
169+
for (Integer d : knownExpDatas)
170+
{
171+
ExpData ed = ExperimentService.get().getExpData(d);
172+
if (ed != null)
173+
{
174+
if (!knownDataMap.containsKey(ed.getDataFileURI()))
175+
{
176+
knownDataMap.put(ed.getDataFileURI(), new HashSet<>());
177+
}
178+
179+
knownDataMap.get(ed.getDataFileURI()).add(d);
180+
}
181+
}
182+
183+
getOrphanFilesForContainer(getJob().getContainer(), getJob().getUser(), orphanFiles, orphanIndexes, orphanJobs, messages, probableDeletes, knownJobPaths, knownExpDatas, knownDataMap);
169184
probableDeletes.addAll(orphanIndexes);
170185

171186
if (!orphanFiles.isEmpty())
@@ -286,7 +301,7 @@ private Set<File> getKnownSequenceJobPaths(Container c, User u, Collection<Strin
286301
return knownJobPaths;
287302
}
288303

289-
private Map<URI, Set<Integer>> getDataMapForContainer(Container c)
304+
private Map<URI, Set<Integer>> getDataMapForContainer(Container c, Map<URI, Set<Integer>> knownExpDataMap)
290305
{
291306
SimpleFilter dataFilter = new SimpleFilter(FieldKey.fromString("container"), c.getId());
292307
TableInfo dataTable = ExperimentService.get().getTinfoData();
@@ -320,10 +335,21 @@ public void exec(ResultSet rs) throws SQLException
320335
});
321336
//messages.add("## total ExpData paths: " + dataMap.size());
322337

338+
// append additional datas:
339+
for (URI u : knownExpDataMap.keySet())
340+
{
341+
if (!dataMap.containsKey(u))
342+
{
343+
dataMap.put(u, new HashSet<>());
344+
}
345+
346+
dataMap.get(u).addAll(knownExpDataMap.get(u));
347+
}
348+
323349
return dataMap;
324350
}
325351

326-
public void getOrphanFilesForContainer(Container c, User u, Set<File> orphanFiles, Set<File> orphanIndexes, Set<PipelineStatusFile> orphanJobs, List<String> messages, Set<File> probableDeletes, Set<File> knownSequenceJobPaths, Set<Integer> knownExpDatas)
352+
public void getOrphanFilesForContainer(Container c, User u, Set<File> orphanFiles, Set<File> orphanIndexes, Set<PipelineStatusFile> orphanJobs, List<String> messages, Set<File> probableDeletes, Set<File> knownSequenceJobPaths, Set<Integer> knownExpDatas, Map<URI, Set<Integer>> knownExpDataMap)
327353
{
328354
PipeRoot root = PipelineService.get().getPipelineRootSetting(c);
329355
if (root == null)
@@ -338,7 +364,7 @@ public void getOrphanFilesForContainer(Container c, User u, Set<File> orphanFile
338364

339365
messages.add("## processing container: " + c.getPath());
340366

341-
Map<URI, Set<Integer>> dataMap = getDataMapForContainer(c);
367+
Map<URI, Set<Integer>> dataMap = getDataMapForContainer(c, knownExpDataMap);
342368

343369
Container parent = c.isWorkbook() ? c.getParent() : c;
344370
TableInfo jobsTableParent = PipelineService.get().getJobsTable(u, parent);
@@ -438,7 +464,7 @@ public boolean accept(File pathname)
438464
{
439465
if (child.isWorkbook())
440466
{
441-
getOrphanFilesForContainer(child, u, orphanFiles, orphanIndexes, orphanJobs, messages, probableDeletes, knownSequenceJobPaths, knownExpDatas);
467+
getOrphanFilesForContainer(child, u, orphanFiles, orphanIndexes, orphanJobs, messages, probableDeletes, knownSequenceJobPaths, knownExpDatas, knownExpDataMap);
442468
}
443469
}
444470
}

0 commit comments

Comments
 (0)