From cf13db6869a38531dc6d05ba1f7614900c012c34 Mon Sep 17 00:00:00 2001 From: Lee Campbell Date: Tue, 24 Mar 2026 07:37:45 +0800 Subject: [PATCH] fix: ensure agent loop continues after Claude timeout or API errors The agent loop would stop iterating when Claude exited with a non-zero, non-124 exit code. This prevented the state machine from advancing to the create-pr state after a successful execute-tasks phase where Claude timed out but sync_state had already committed and pushed the work. - Remove global CLAUDE_RC; run_claude now returns its exit code directly - Add || true to all run_claude call sites so sync_state always runs - Log Claude exit codes as warnings rather than swallowing them - entrypoint.sh no longer breaks the loop on non-zero exit codes, letting the state machine advance to the next phase Co-Authored-By: Claude Opus 4.6 (1M context) --- autonomous/agent-loop.sh | 23 ++++++++++++----------- autonomous/entrypoint.sh | 7 ++----- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/autonomous/agent-loop.sh b/autonomous/agent-loop.sh index befc8dd..a5207da 100644 --- a/autonomous/agent-loop.sh +++ b/autonomous/agent-loop.sh @@ -57,12 +57,15 @@ sync_state() { fi } -CLAUDE_RC=0 - run_claude() { local prompt="$1" + local rc=0 timeout "$CLAUDE_TIMEOUT" claude --dangerously-skip-permissions --print \ - --output-format stream-json --verbose "$prompt" || CLAUDE_RC=$? + --output-format stream-json --verbose "$prompt" || rc=$? + if [ "$rc" -ne 0 ]; then + echo "WARNING: claude exited with code $rc" >&2 + fi + return "$rc" } load_prompt() { @@ -197,25 +200,25 @@ EOF execute-tasks) ISSUE_NUM=$(get_issue_num) - run_claude "$(load_prompt execute-tasks)" + run_claude "$(load_prompt execute-tasks)" || true sync_state "feat(#${ISSUE_NUM}): implement tasks" ;; create-tasks) ISSUE_NUM=$(get_issue_num) - run_claude "$(load_prompt create-tasks)" + run_claude "$(load_prompt create-tasks)" || true sync_state "plan(#${ISSUE_NUM}): create task breakdown" ;; apply-review) ISSUE_NUM=$(get_issue_num) - run_claude "$(load_prompt apply-review)" + run_claude "$(load_prompt apply-review)" || true sync_state "plan(#${ISSUE_NUM}): apply brief review feedback" ;; review-brief) ISSUE_NUM=$(get_issue_num) - run_claude "$(load_prompt review-brief)" + run_claude "$(load_prompt review-brief)" || true sync_state "plan(#${ISSUE_NUM}): review brief" ;; @@ -269,7 +272,7 @@ EOF PROMPT=$(load_prompt pick-issue) PROMPT="${PROMPT//\{\{ISSUE_BODY\}\}/$ISSUE_BODY}" - run_claude "$PROMPT" + run_claude "$PROMPT" || true sync_state "plan(#${ISSUE_NUM}): initial brief from issue" fi else @@ -285,10 +288,8 @@ EOF PROMPT=$(load_prompt pick-issue) PROMPT="${PROMPT//\{\{ISSUE_BODY\}\}/$ISSUE_BODY}" - run_claude "$PROMPT" + run_claude "$PROMPT" || true sync_state "plan(#${ISSUE_NUM}): initial brief from issue" fi ;; esac - -exit $CLAUDE_RC diff --git a/autonomous/entrypoint.sh b/autonomous/entrypoint.sh index 0c0be31..643c2da 100644 --- a/autonomous/entrypoint.sh +++ b/autonomous/entrypoint.sh @@ -39,11 +39,8 @@ for i in $(seq 1 "$MAX_ITERATIONS"); do EXIT_CODE=0 bash /usr/local/bin/agent-loop.sh || EXIT_CODE=$? - if [ "$EXIT_CODE" -ne 0 ] && [ "$EXIT_CODE" -ne 124 ]; then - echo "Iteration $i failed with exit code $EXIT_CODE" - break - elif [ "$EXIT_CODE" -eq 124 ]; then - echo "Iteration $i timed out (exit code 124), continuing..." + if [ "$EXIT_CODE" -ne 0 ]; then + echo "Iteration $i exited with code $EXIT_CODE, continuing to next state..." fi # Done?