Skip to content

Commit 9f2d28c

Browse files
committed
Reduce false-positive errors/warnings
1 parent d9536c1 commit 9f2d28c

File tree

1 file changed

+4
-10
lines changed

1 file changed

+4
-10
lines changed

cluster/src/org/labkey/cluster/pipeline/AbstractClusterExecutionEngine.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -483,10 +483,9 @@ protected void updateJobStatus(@Nullable String status, ClusterJob j, @Nullable
483483
j.setLogModified(new Date(log.lastModified()));
484484
}
485485

486-
//NOTE: in rare cases the actual job and status file can get out of sync
487-
if (status.equalsIgnoreCase(PipelineJob.TaskStatus.running.name()) && sf.getStatus().equalsIgnoreCase(PipelineJob.TaskStatus.error.name()))
486+
//NOTE: in rare cases the actual job and status file can get out of sync. This is generally a race-condition with differences in timing of ActiveMQ-messages vs. polling the cluster
487+
if (status != null && status.equalsIgnoreCase(PipelineJob.TaskStatus.running.name()) && sf.getStatus().equalsIgnoreCase(PipelineJob.TaskStatus.error.name()))
488488
{
489-
_log.error("Pipeline job and cluster out of sync: " + sf.getStatus() + " / " + status + ", for job: " + sf.getRowId());
490489
statusChanged = true;
491490
}
492491
}
@@ -545,11 +544,6 @@ protected void updateJobStatus(@Nullable String status, ClusterJob j, @Nullable
545544
{
546545
taskStatus = PipelineJob.TaskStatus.error;
547546
}
548-
else if (pj.getActiveTaskStatus() == PipelineJob.TaskStatus.running)
549-
{
550-
//this might indicate the job aborted mid-task without properly marking itself as complete
551-
pj.getLogger().warn("marking job as complete, even though JSON indicates task status is running. this might indicate the job aborted improperly?");
552-
}
553547
else if (pj.getActiveTaskStatus() != PipelineJob.TaskStatus.complete)
554548
{
555549
//this might indicate the job aborted mid-task without properly marking itself as complete
@@ -563,8 +557,8 @@ else if (pj.getErrors() > 0)
563557
}
564558
else if (pj.getActiveTaskStatus() == PipelineJob.TaskStatus.complete)
565559
{
566-
pj.getLogger().warn("Pipeline job marked complete, but the cluster status is error");
567-
560+
//NOTE: this can occur when the cluster job has a non-zero exit after the java process terminates.
561+
pj.getLogger().info("Pipeline job JSON marked complete, but the cluster status was: " + taskStatus);
568562
}
569563

570564
pj.getLogger().debug("setting active task status for job: " + j.getClusterId() + " to: " + taskStatus.name() + ". status was: " + pj.getActiveTaskStatus() + " (JSON) /" + sf.getStatus() + " (StatusFile) / " + status + " (Cluster), activeTaskId: " + (pj.getActiveTaskId() != null ? pj.getActiveTaskId().toString() : "no active task") + ", hostname: " + sf.getActiveHostName() + ", rowid: " + j.getRowId());

0 commit comments

Comments
 (0)