Skip to content

Commit 9db70c4

Browse files
authored
Merge pull request #120 from LabKey/fb_merge_21.11_to_develop
Merge discvr-21.11 to develop
2 parents 6c23ad5 + 4fd18ee commit 9db70c4

File tree

25 files changed

+546
-299
lines changed

25 files changed

+546
-299
lines changed

cluster/api-src/org/labkey/api/cluster/ClusterService.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ public static void setInstance(ClusterService instance)
3434
* This creates a barebones PipelineJob configured to run on the selected RemoteExecutionEngine.
3535
* You may wish to further configure this job. This method does not submit the job, which you can do using PipelineService.queueJob()
3636
*/
37-
abstract public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException;
37+
abstract public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine<?> engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException;
38+
39+
abstract public File getSerializedJobFile(File jobLogFile);
40+
41+
abstract public File getExpectedSubmitScript(PipelineJob job);
3842

3943
public interface ClusterRemoteTask extends Serializable
4044
{
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
Ext4.namespace('Cluster.Utils');
2+
3+
Cluster.Utils = new function() {
4+
return {
5+
buttonHandlerForLog: function (dataRegionName) {
6+
const checked = LABKEY.DataRegions[dataRegionName].getChecked();
7+
if (!checked.length){
8+
Ext4.Msg.alert('Error', 'No rows selected');
9+
return;
10+
}
11+
else if (checked.length > 1){
12+
Ext4.Msg.alert('Error', 'Can only select one row at a time');
13+
return;
14+
}
15+
16+
window.open(LABKEY.ActionURL.buildURL('cluster', 'viewJavaLog', null, {jobId: checked[0]}), '_blank');
17+
},
18+
19+
recoverCompletedJobs: function (dataRegionName) {
20+
const checked = LABKEY.DataRegions[dataRegionName].getChecked();
21+
if (!checked.length){
22+
Ext4.Msg.alert('Error', 'No rows selected');
23+
return;
24+
}
25+
26+
window.location = LABKEY.ActionURL.buildURL('cluster', 'recoverCompletedJobs', null, {jobIds: checked.join(',')});
27+
},
28+
29+
forcePipelineCancel: function (dataRegionName) {
30+
const checked = LABKEY.DataRegions[dataRegionName].getChecked();
31+
if (!checked.length){
32+
Ext4.Msg.alert('Error', 'No rows selected');
33+
return;
34+
}
35+
36+
window.location = LABKEY.ActionURL.buildURL('cluster', 'forcePipelineCancel', null, {jobIds: checked.join(',')});
37+
}
38+
};
39+
};

cluster/src/org/labkey/cluster/ClusterController.java

Lines changed: 121 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,19 @@
1616

1717
package org.labkey.cluster;
1818

19+
import org.apache.commons.io.FilenameUtils;
1920
import org.apache.commons.lang3.StringUtils;
2021
import org.apache.logging.log4j.LogManager;
2122
import org.apache.logging.log4j.Logger;
2223
import org.labkey.api.action.ConfirmAction;
24+
import org.labkey.api.action.SimpleRedirectAction;
2325
import org.labkey.api.action.SpringActionController;
2426
import org.labkey.api.data.DbSchema;
2527
import org.labkey.api.data.DbSchemaType;
2628
import org.labkey.api.data.Table;
2729
import org.labkey.api.data.TableInfo;
30+
import org.labkey.api.pipeline.PipeRoot;
2831
import org.labkey.api.pipeline.PipelineJob;
29-
import org.labkey.api.pipeline.PipelineJobException;
3032
import org.labkey.api.pipeline.PipelineJobService;
3133
import org.labkey.api.pipeline.PipelineService;
3234
import org.labkey.api.pipeline.PipelineStatusFile;
@@ -35,17 +37,19 @@
3537
import org.labkey.api.security.RequiresPermission;
3638
import org.labkey.api.security.RequiresSiteAdmin;
3739
import org.labkey.api.security.permissions.AdminPermission;
40+
import org.labkey.api.security.permissions.ReadPermission;
41+
import org.labkey.api.settings.ResourceURL;
3842
import org.labkey.api.util.HtmlString;
3943
import org.labkey.api.util.PageFlowUtil;
4044
import org.labkey.api.util.URLHelper;
45+
import org.labkey.api.view.ActionURL;
4146
import org.labkey.api.view.HtmlView;
4247
import org.labkey.cluster.pipeline.AbstractClusterExecutionEngine;
4348
import org.springframework.validation.BindException;
4449
import org.springframework.validation.Errors;
4550
import org.springframework.web.servlet.ModelAndView;
4651

4752
import java.io.File;
48-
import java.io.IOException;
4953
import java.util.ArrayList;
5054
import java.util.HashMap;
5155
import java.util.List;
@@ -110,9 +114,10 @@ public URLHelper getSuccessURL(JobIdsForm form)
110114

111115
public ModelAndView getConfirmView(JobIdsForm form, BindException errors) throws Exception
112116
{
117+
113118
return new HtmlView(HtmlString.unsafe("This will change the status of the pipeline job with the provided ID to Cancelled. It is intended to help the situation when the normal UI leave a job in a perpetual 'Cancelling' state." +
114119
"To continue, enter a comma-delimited list of Job IDs and hit submit:<br><br>" +
115-
"<label>Enter Job ID(s): </label><input name=\"jobIds\"><br>"));
120+
"<label>Enter Job ID(s): </label><input name=\"jobIds\" value = \"" + form.getJobIds() + "\"><br>"));
116121
}
117122

118123
public boolean handlePost(JobIdsForm form, BindException errors) throws Exception
@@ -274,7 +279,7 @@ public ModelAndView getConfirmView(JobIdsForm form, BindException errors) throws
274279
{
275280
return new HtmlView(HtmlString.unsafe("This will attempt to re-queue existing pipeline jobs using their serialized JSON text files. It is intended as a workaround for the situation where a job has been marked complete." +
276281
"To continue, enter a comma-delimited list of Job IDs and hit submit:<br><br>" +
277-
"<label>Enter Job ID(s): </label><input name=\"jobIds\"><br>"));
282+
"<label>Enter Job ID(s): </label><input name=\"jobIds\" value=\"" + form.getJobIds() + "\"><br>"));
278283
}
279284

280285
public boolean handlePost(JobIdsForm form, BindException errors) throws Exception
@@ -306,28 +311,133 @@ public boolean handlePost(JobIdsForm form, BindException errors) throws Exceptio
306311
sfs.add(sf);
307312
}
308313

309-
sfs.forEach(sf -> {
314+
for (PipelineStatusFile sf : sfs)
315+
{
310316
File log = new File(sf.getFilePath());
311317
File json = AbstractClusterExecutionEngine.getSerializedJobFile(log);
312318
if (!json.exists())
313319
{
314-
return;
320+
errors.reject(ERROR_MSG, "Unable to find pipeline JSON, expected: " + json.getPath());
321+
return false;
315322
}
316323

324+
PipelineJob job = null;
317325
try
318326
{
319-
PipelineJob job = PipelineJob.readFromFile(json);
327+
job = PipelineJob.readFromFile(json);
320328

321-
_log.info("Submitting job: " + job.getJobGUID() + ": " + job.getActiveTaskStatus());
329+
job.getLogger().info("Submitting job from JSON: " + job.getJobGUID() + ": " + job.getActiveTaskStatus());
322330
PipelineService.get().setPipelineJobStatus(job, job.getActiveTaskStatus());
323331
}
324-
catch (PipelineJobException | IOException e)
332+
catch (Exception e)
325333
{
326-
_log.error(e);
334+
if (job != null)
335+
{
336+
job.getLogger().error("Unable to requeue job", e);
337+
}
338+
else
339+
{
340+
_log.error("Unable to requeue pipeline job", e);
341+
}
342+
343+
errors.reject(ERROR_MSG, "Unable to requeue pipeline job: " + e.getMessage());
344+
return false;
327345
}
328-
});
346+
}
329347

330348
return true;
331349
}
332350
}
351+
352+
@RequiresPermission(ReadPermission.class)
353+
public static class ViewJavaLogAction extends SimpleRedirectAction<ViewJavaLogForm>
354+
{
355+
@Override
356+
public void validate(ViewJavaLogForm viewJavaLogForm, BindException errors)
357+
{
358+
super.validate(viewJavaLogForm, errors);
359+
360+
if (viewJavaLogForm.getJobId() == null)
361+
{
362+
errors.reject(ERROR_MSG, "Must provide JobId");
363+
}
364+
365+
PipelineStatusFile sf = PipelineService.get().getStatusFile(viewJavaLogForm.getJobId());
366+
if (sf == null)
367+
{
368+
errors.reject(ERROR_MSG, "Unknown job: " + viewJavaLogForm.getJobId());
369+
}
370+
else if (!sf.lookupContainer().hasPermission(getUser(), ReadPermission.class))
371+
{
372+
errors.reject(ERROR_MSG, "The current user does not have permission to view the folder: " + sf.lookupContainer().getPath());
373+
}
374+
}
375+
376+
@Override
377+
public URLHelper getRedirectURL(ViewJavaLogForm viewJavaLogForm) throws Exception
378+
{
379+
PipelineStatusFile sf = PipelineService.get().getStatusFile(viewJavaLogForm.getJobId());
380+
File parentDir = new File(sf.getFilePath()).getParentFile();
381+
if (!parentDir.exists())
382+
{
383+
throw new IllegalArgumentException("Log directory doesnt exist: " + parentDir.getPath());
384+
}
385+
386+
File[] javaLogs = parentDir.listFiles((dir, name) -> {
387+
return name.endsWith(".java.log");
388+
});
389+
390+
if (javaLogs == null || javaLogs.length == 0)
391+
{
392+
throw new IllegalArgumentException("No files ending with java.log found: " + parentDir.getPath());
393+
}
394+
395+
long lastModifiedTime = Long.MIN_VALUE;
396+
File chosenFile = null;
397+
for (File file : javaLogs)
398+
{
399+
if (file.lastModified() > lastModifiedTime)
400+
{
401+
chosenFile = file;
402+
lastModifiedTime = file.lastModified();
403+
}
404+
}
405+
406+
PipeRoot root = PipelineService.get().getPipelineRootSetting(sf.lookupContainer());
407+
if (root == null)
408+
{
409+
throw new IllegalArgumentException("Unable to find pipeline root for folder: " + sf.lookupContainer().getPath());
410+
}
411+
412+
if (!root.isUnderRoot(chosenFile))
413+
{
414+
throw new IllegalArgumentException("Log file is not under the pipeline root for folder: " + sf.lookupContainer().getPath());
415+
}
416+
417+
String relPath = root.relativePath(chosenFile);
418+
if (relPath == null)
419+
{
420+
throw new IllegalArgumentException("Unable to find log file path for folder: " + sf.lookupContainer().getPath());
421+
}
422+
423+
relPath = org.labkey.api.util.Path.parse(FilenameUtils.separatorsToUnix(relPath)).encode();
424+
425+
return new URLHelper(root.getWebdavURL() + relPath);
426+
}
427+
}
428+
429+
public static class ViewJavaLogForm
430+
{
431+
private Integer _jobId;
432+
433+
public Integer getJobId()
434+
{
435+
return _jobId;
436+
}
437+
438+
public void setJobId(Integer jobId)
439+
{
440+
_jobId = jobId;
441+
}
442+
}
333443
}

cluster/src/org/labkey/cluster/ClusterModule.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.labkey.api.data.DbSchema;
2525
import org.labkey.api.data.DbSchemaType;
2626
import org.labkey.api.ldk.ExtendedSimpleModule;
27+
import org.labkey.api.ldk.LDKService;
2728
import org.labkey.api.module.Module;
2829
import org.labkey.api.module.ModuleContext;
2930
import org.labkey.api.module.ModuleLoader;
@@ -36,7 +37,11 @@
3637
import org.labkey.api.view.WebPartFactory;
3738
import org.labkey.cluster.pipeline.ClusterPipelineJobNotificationProvider;
3839
import org.labkey.cluster.pipeline.ClusterPipelineProvider;
40+
import org.labkey.cluster.pipeline.SlurmExecutionEngine;
3941
import org.labkey.cluster.pipeline.TestCase;
42+
import org.labkey.cluster.query.ForceCancelJobsButton;
43+
import org.labkey.cluster.query.RecoverCompletedJobsButton;
44+
import org.labkey.cluster.query.ViewJavaLogButton;
4045

4146
import java.util.Arrays;
4247
import java.util.Collection;
@@ -92,6 +97,10 @@ public void doStartupAfterSpringConfig(ModuleContext moduleContext)
9297
PipelineService.get().registerPipelineProvider(new ClusterPipelineProvider(this));
9398

9499
PipelineService.get().registerPipelineJobNotificationProvider(new ClusterPipelineJobNotificationProvider());
100+
101+
LDKService.get().registerQueryButton(new ViewJavaLogButton(), "pipeline", "job");
102+
LDKService.get().registerQueryButton(new RecoverCompletedJobsButton(), "pipeline", "job");
103+
LDKService.get().registerQueryButton(new ForceCancelJobsButton(), "pipeline", "job");
95104
}
96105

97106
@Override
@@ -120,6 +129,14 @@ public Set<Class> getIntegrationTests()
120129
return testClasses;
121130
}
122131

132+
@Override
133+
public @NotNull Set<Class> getUnitTests()
134+
{
135+
return new HashSet<>(Arrays.asList(
136+
SlurmExecutionEngine.TestCase.class
137+
));
138+
}
139+
123140
@Override
124141
protected void registerSchemas()
125142
{

cluster/src/org/labkey/cluster/ClusterServiceImpl.java

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
import org.labkey.api.cluster.ClusterService;
55
import org.labkey.api.data.Container;
66
import org.labkey.api.pipeline.PipelineJob;
7+
import org.labkey.api.pipeline.PipelineJobService;
78
import org.labkey.api.pipeline.PipelineValidationException;
89
import org.labkey.api.pipeline.RemoteExecutionEngine;
910
import org.labkey.api.pipeline.TaskId;
1011
import org.labkey.api.security.User;
12+
import org.labkey.cluster.pipeline.AbstractClusterExecutionEngine;
1113
import org.labkey.cluster.pipeline.ClusterPipelineJob;
14+
import org.labkey.cluster.pipeline.HTCondorExecutionEngine;
15+
import org.labkey.cluster.pipeline.SlurmExecutionEngine;
1216

1317
import java.io.File;
1418
import java.util.ArrayList;
@@ -75,7 +79,52 @@ public String getClusterUser(Container c)
7579
}
7680

7781
@Override
78-
public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException
82+
public File getSerializedJobFile(File jobLogFile)
83+
{
84+
return AbstractClusterExecutionEngine.getSerializedJobFile(jobLogFile);
85+
}
86+
87+
@Override
88+
public File getExpectedSubmitScript(PipelineJob job)
89+
{
90+
if (job.isActiveTaskLocal())
91+
{
92+
//nothing to do:
93+
return null;
94+
}
95+
else if (job.getActiveTaskFactory() != null)
96+
{
97+
String location = job.getActiveTaskFactory().getExecutionLocation();
98+
for (RemoteExecutionEngine<?> engine : PipelineJobService.get().getRemoteExecutionEngines())
99+
{
100+
if (location.equals(engine.getConfig().getLocation()))
101+
{
102+
if (SlurmExecutionEngine.TYPE.equals(engine.getType()))
103+
{
104+
return SlurmExecutionEngine.getExpectedSubmitScript(job);
105+
}
106+
else if (HTCondorExecutionEngine.TYPE.equals(engine.getType()))
107+
{
108+
return HTCondorExecutionEngine.getExpectedSubmitScript(job);
109+
}
110+
111+
job.getLogger().error("Unknown execution engine type: " + engine.getType());
112+
return null;
113+
}
114+
}
115+
116+
job.getLogger().error("Unable to find remote execution engine for location: " + location);
117+
return null;
118+
}
119+
else
120+
{
121+
job.getLogger().error("TaskFactory is null, cannot identify submit script");
122+
return null;
123+
}
124+
}
125+
126+
@Override
127+
public PipelineJob createClusterRemotePipelineJob(Container c, User u, String jobName, RemoteExecutionEngine<?> engine, ClusterRemoteTask task, File logFile) throws PipelineValidationException
79128
{
80129
return ClusterPipelineJob.createJob(c, u, jobName, task, engine, logFile);
81130
}

0 commit comments

Comments
 (0)