Skip to content

Commit 74fe7a7

Browse files
committed
Improve cluster error reporting
1 parent c08483e commit 74fe7a7

File tree

5 files changed

+45
-10
lines changed

5 files changed

+45
-10
lines changed

cluster/api-src/org/labkey/api/cluster/ClusterService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ public static void setInstance(ClusterService instance)
3636
*/
3737
abstract public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException;
3838

39+
abstract public File getSerializedJobFile(File jobLogFile);
40+
3941
public interface ClusterRemoteTask extends Serializable
4042
{
4143
public void run(Logger log);

cluster/src/org/labkey/cluster/ClusterController.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.labkey.api.data.TableInfo;
3030
import org.labkey.api.pipeline.PipeRoot;
3131
import org.labkey.api.pipeline.PipelineJob;
32-
import org.labkey.api.pipeline.PipelineJobException;
3332
import org.labkey.api.pipeline.PipelineJobService;
3433
import org.labkey.api.pipeline.PipelineService;
3534
import org.labkey.api.pipeline.PipelineStatusFile;
@@ -39,7 +38,6 @@
3938
import org.labkey.api.security.RequiresSiteAdmin;
4039
import org.labkey.api.security.permissions.AdminPermission;
4140
import org.labkey.api.security.permissions.ReadPermission;
42-
import org.labkey.api.settings.AppProps;
4341
import org.labkey.api.settings.ResourceURL;
4442
import org.labkey.api.util.HtmlString;
4543
import org.labkey.api.util.PageFlowUtil;
@@ -51,7 +49,6 @@
5149
import org.springframework.web.servlet.ModelAndView;
5250

5351
import java.io.File;
54-
import java.io.IOException;
5552
import java.util.ArrayList;
5653
import java.util.HashMap;
5754
import java.util.List;
@@ -313,26 +310,39 @@ public boolean handlePost(JobIdsForm form, BindException errors) throws Exceptio
313310
sfs.add(sf);
314311
}
315312

316-
sfs.forEach(sf -> {
313+
for (PipelineStatusFile sf : sfs)
314+
{
317315
File log = new File(sf.getFilePath());
318316
File json = AbstractClusterExecutionEngine.getSerializedJobFile(log);
319317
if (!json.exists())
320318
{
321-
return;
319+
errors.reject(ERROR_MSG, "Unable to find pipeline JSON, expected: " + json.getPath());
320+
return false;
322321
}
323322

323+
PipelineJob job = null;
324324
try
325325
{
326-
PipelineJob job = PipelineJob.readFromFile(json);
326+
job = PipelineJob.readFromFile(json);
327327

328-
_log.info("Submitting job: " + job.getJobGUID() + ": " + job.getActiveTaskStatus());
328+
job.getLogger().info("Submitting job from JSON: " + job.getJobGUID() + ": " + job.getActiveTaskStatus());
329329
PipelineService.get().setPipelineJobStatus(job, job.getActiveTaskStatus());
330330
}
331-
catch (PipelineJobException | IOException e)
331+
catch (Exception e)
332332
{
333-
_log.error(e);
333+
if (job != null)
334+
{
335+
job.getLogger().error("Unable to requeue job", e);
336+
}
337+
else
338+
{
339+
_log.error("Unable to requeue pipeline job", e);
340+
}
341+
342+
errors.reject(ERROR_MSG, "Unable to requeue pipeline job: " + e.getMessage());
343+
return false;
334344
}
335-
});
345+
}
336346

337347
return true;
338348
}

cluster/src/org/labkey/cluster/ClusterServiceImpl.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.labkey.api.pipeline.RemoteExecutionEngine;
99
import org.labkey.api.pipeline.TaskId;
1010
import org.labkey.api.security.User;
11+
import org.labkey.cluster.pipeline.AbstractClusterExecutionEngine;
1112
import org.labkey.cluster.pipeline.ClusterPipelineJob;
1213

1314
import java.io.File;
@@ -74,6 +75,12 @@ public String getClusterUser(Container c)
7475
return ClusterManager.get().getClusterUser(c);
7576
}
7677

78+
@Override
79+
public File getSerializedJobFile(File jobLogFile)
80+
{
81+
return AbstractClusterExecutionEngine.getSerializedJobFile(jobLogFile);
82+
}
83+
7784
@Override
7885
public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException
7986
{

cluster/src/org/labkey/cluster/query/ForceCancelJobsButton.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.labkey.cluster.query;
22

3+
import org.labkey.api.data.ContainerManager;
4+
import org.labkey.api.data.TableInfo;
35
import org.labkey.api.ldk.table.SimpleButtonConfigFactory;
46
import org.labkey.api.module.ModuleLoader;
57
import org.labkey.api.security.permissions.AdminPermission;
@@ -19,4 +21,10 @@ public ForceCancelJobsButton()
1921

2022
setPermission(AdminPermission.class);
2123
}
24+
25+
@Override
26+
public boolean isAvailable(TableInfo ti)
27+
{
28+
return super.isAvailable(ti) || ContainerManager.getRoot().equals(ti.getUserSchema().getContainer());
29+
}
2230
}

cluster/src/org/labkey/cluster/query/RecoverCompletedJobsButton.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.labkey.cluster.query;
22

3+
import org.labkey.api.data.ContainerManager;
4+
import org.labkey.api.data.TableInfo;
35
import org.labkey.api.ldk.table.SimpleButtonConfigFactory;
46
import org.labkey.api.module.ModuleLoader;
57
import org.labkey.api.security.permissions.AdminPermission;
@@ -19,4 +21,10 @@ public RecoverCompletedJobsButton()
1921

2022
setPermission(AdminPermission.class);
2123
}
24+
25+
@Override
26+
public boolean isAvailable(TableInfo ti)
27+
{
28+
return super.isAvailable(ti) || ContainerManager.getRoot().equals(ti.getUserSchema().getContainer());
29+
}
2230
}

0 commit comments

Comments
 (0)