Skip to content

Commit d5a352d

Browse files
committed
Add more visibility on slurm cluster submission errors
1 parent 85b0700 commit d5a352d

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ protected Set<String> updateStatusForAllJobs() throws PipelineJobException
123123
int jobIdx = -1;
124124
int stateIdx = -1;
125125
int hostnameIdx = -1;
126+
int reasonIdx = -1;
126127
for (String line : ret)
127128
{
128129
line = StringUtils.trimToNull(line);
@@ -138,6 +139,7 @@ protected Set<String> updateStatusForAllJobs() throws PipelineJobException
138139
jobIdx = header.indexOf("JOBID");
139140
stateIdx = header.indexOf("STATE");
140141
hostnameIdx = header.indexOf("NODELIST");
142+
reasonIdx = header.indexOf("REASON");
141143

142144
if (stateIdx == -1)
143145
{
@@ -177,6 +179,21 @@ protected Set<String> updateStatusForAllJobs() throws PipelineJobException
177179
}
178180

179181
Pair<String, String> status = translateSlurmStatusToTaskStatus(StringUtils.trimToNull(tokens[stateIdx]));
182+
183+
String reason = tokens.length > reasonIdx ? StringUtils.trimToNull(tokens[reasonIdx]) : null;
184+
if (reason != null)
185+
{
186+
if (!"Priority".equals(reason))
187+
{
188+
if (status == null)
189+
{
190+
status = new Pair<>("ERROR", null);
191+
}
192+
193+
status.second = "ERROR: " + reason;
194+
}
195+
}
196+
180197
updateJobStatus(status == null ? null : status.first, j, status == null ? null : status.second);
181198
jobsUpdated.add(j.getClusterId());
182199
}

0 commit comments

Comments
 (0)