Skip to content

Commit a13469b

Browse files
committed
Better visibility for cluster warnings
1 parent 2cff752 commit a13469b

File tree

2 files changed

+18
-6
lines changed

2 files changed

+18
-6
lines changed

cluster/src/org/labkey/cluster/pipeline/AbstractClusterExecutionEngine.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,11 @@ protected void updateJobStatus(@Nullable String status, ClusterJob j, @Nullable
501501
{
502502
statusChanged = true;
503503
}
504+
505+
if (!statusChanged && !sf.getStatus().equalsIgnoreCase(j.getStatus()))
506+
{
507+
_log.error("ClusterExecutionEngine reports status not changed, but was: " + sf.getStatus() + " / " + j.getStatus());
508+
}
504509
}
505510

506511
//no need to redundantly update PipelineJob

cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.apache.logging.log4j.LogManager;
55
import org.apache.logging.log4j.Logger;
66
import org.jetbrains.annotations.NotNull;
7+
import org.jetbrains.annotations.Nullable;
78
import org.labkey.api.cluster.ClusterResourceAllocator;
89
import org.labkey.api.collections.CaseInsensitiveHashSet;
910
import org.labkey.api.data.Container;
@@ -207,14 +208,15 @@ protected Pair<String, String> getStatusForJob(ClusterJob job, Container c)
207208
}
208209

209210
String command = getConfig().getHistoryCommandExpr().eval(ctx);
211+
String info = null;
210212
List<String> ret = execute(command);
211213
if (ret != null)
212214
{
213215
//verify success
214216
boolean headerFound = false;
215217
boolean foundJobLine = false;
216218
LinkedHashSet<String> statuses = new LinkedHashSet<>();
217-
List<String> header = null;
219+
List<String> header;
218220
int jobIdx = -1;
219221
int stateIdx = -1;
220222
int hostnameIdx = -1;
@@ -290,7 +292,7 @@ else if (headerFound)
290292
_log.error("more than one status returned for job " + job.getClusterId() + ": " + StringUtils.join(statuses, ";") + ", using: " + status);
291293
}
292294

293-
return translateSlurmStatusToTaskStatus(status);
295+
return translateSlurmStatusToTaskStatus(status, info);
294296
}
295297
}
296298

@@ -496,21 +498,26 @@ private File createSubmitScript(PipelineJob job) throws PipelineJobException
496498
}
497499

498500
private Pair<String, String> translateSlurmStatusToTaskStatus(String status)
501+
{
502+
return translateSlurmStatusToTaskStatus(status, null);
503+
}
504+
505+
private Pair<String, String> translateSlurmStatusToTaskStatus(String status, @Nullable String info)
499506
{
500507
if (status == null)
501508
return null;
502509

503510
try
504511
{
505512
StatusType st = StatusType.parseValue(status);
506-
return Pair.of(st.getLabkeyStatus().toUpperCase(), st.getInfo());
513+
return Pair.of(st.getLabkeyStatus().toUpperCase(), st.getInfo(info));
507514
}
508515
catch (IllegalArgumentException e)
509516
{
510517
_log.error("Unknown status type: [" + status + "]");
511518
}
512519

513-
return Pair.of(status, null);
520+
return Pair.of(status, info);
514521
}
515522

516523
public static enum StatusType
@@ -562,9 +569,9 @@ public String getLabkeyStatus()
562569
return _taskStatus == null ? _labkeyStatus : _taskStatus.name();
563570
}
564571

565-
public String getInfo()
572+
public String getInfo(@Nullable String info)
566573
{
567-
return _info;
574+
return _info == null ? info : _info + (info == null ? "" : " / " + info);
568575
}
569576

570577
public static StatusType parseValue(String value)

0 commit comments

Comments
 (0)