1111set -eo pipefail
1212TASK_WORKDIR=" ${TASK_WORKDIR:-/ workspace} "
1313TASK_REPO_ROOT=" ${TASK_REPO_ROOT:- ${VERIFY_REPO:- $TASK_WORKDIR } } "
14+ TASK_OUTPUT=" ${TASK_OUTPUT:-/ workspace/ answer.json} "
15+ VERIFY_REPO=" ${VERIFY_REPO:- $TASK_REPO_ROOT } "
16+ ARTIFACT_REQUIRED=false
1417if [ " ${ARTIFACT_ONLY:- false} " = " true" ]; then
15- answer_json_fail_closed_if_missing_or_no_changes
18+ ARTIFACT_REQUIRED=true
1619fi
17- VERIFY_REPO=" ${VERIFY_REPO:- $TASK_REPO_ROOT } "
1820
1921mkdir -p /logs/verifier
2022cd " $TASK_REPO_ROOT "
2123git config --global --add safe.directory " $TASK_REPO_ROOT " 2> /dev/null || true
24+
25+ write_invalid_output () {
26+ local code=" $1 "
27+ local message=" $2 "
28+ python3 - " $code " " $message " " $TASK_OUTPUT " " $ARTIFACT_REQUIRED " << 'PYEOF '
29+ import json
30+ import sys
31+
32+ code, message, primary_path, required_artifact = sys.argv[1:5]
33+ required = required_artifact == "true"
34+ reward_payload = {
35+ "reward": 0.0,
36+ "gate": code,
37+ "error": message,
38+ }
39+ validation_payload = {
40+ "schema_version": "validation_result.v1alpha1",
41+ "status": "invalid_output",
42+ "scorable": False,
43+ "scorer_family": "diff_similarity",
44+ "reward": 0.0,
45+ "pass_threshold": 0.5,
46+ "passed": False,
47+ "output_contract": {
48+ "mode": "answer_json_bridge",
49+ "primary_path": primary_path,
50+ "required_artifact": required,
51+ },
52+ "sub_scores": {},
53+ "failure": {
54+ "code": code,
55+ "message": message,
56+ "stage": "output_validation",
57+ },
58+ "legacy": {
59+ "reward_json": reward_payload,
60+ },
61+ }
62+ with open("/logs/verifier/reward.json", "w") as f:
63+ json.dump(reward_payload, f, indent=2)
64+ with open("/logs/verifier/validation_result.json", "w") as f:
65+ json.dump(validation_payload, f, indent=2)
66+ with open("/logs/verifier/reward.txt", "w") as f:
67+ f.write("0.0")
68+ PYEOF
69+ }
70+
71+ write_validation_from_reward_json () {
72+ local fallback_status=" $1 "
73+ python3 - " $fallback_status " " $TASK_OUTPUT " " $ARTIFACT_REQUIRED " << 'PYEOF '
74+ import json
75+ import os
76+ import sys
77+
78+ fallback_status, primary_path, required_artifact = sys.argv[1:4]
79+ required = required_artifact == "true"
80+ status = fallback_status
81+ reward_payload = {}
82+ reward_json_path = "/logs/verifier/reward.json"
83+
84+ if os.path.isfile(reward_json_path):
85+ try:
86+ with open(reward_json_path) as f:
87+ reward_payload = json.load(f)
88+ except Exception as exc:
89+ status = "verifier_error"
90+ reward_payload = {"reward": 0.0, "error": f"Failed to parse reward.json: {exc}"}
91+ else:
92+ status = "verifier_error"
93+ reward_payload = {"reward": 0.0, "error": "reward.json not written by verifier"}
94+
95+ reward = reward_payload.get("reward", 0.0)
96+ try:
97+ reward = float(reward)
98+ except (TypeError, ValueError):
99+ reward = 0.0
100+ status = "verifier_error"
101+
102+ if reward_payload.get("error"):
103+ status = "verifier_error"
104+
105+ sub_scores = {}
106+ for key in ("file_recall", "line_recall", "line_precision"):
107+ value = reward_payload.get(key)
108+ if isinstance(value, (int, float)):
109+ sub_scores[key] = float(value)
110+
111+ details = {}
112+ for key in ("expected_files", "actual_files", "expected_lines_total", "actual_lines_total", "gate", "error"):
113+ value = reward_payload.get(key)
114+ if value is not None:
115+ details[key] = value
116+
117+ failure = None
118+ passed = False
119+ if status == "scored":
120+ passed = reward >= 0.5
121+ else:
122+ failure = {
123+ "code": "verifier_exception" if reward_payload.get("error") else "missing_reward_json",
124+ "message": str(reward_payload.get("error") or "Verifier did not produce a usable reward payload"),
125+ "stage": "scoring",
126+ }
127+
128+ payload = {
129+ "schema_version": "validation_result.v1alpha1",
130+ "status": status,
131+ "scorable": status == "scored",
132+ "scorer_family": "diff_similarity",
133+ "reward": reward,
134+ "pass_threshold": 0.5,
135+ "passed": passed,
136+ "output_contract": {
137+ "mode": "answer_json_bridge",
138+ "primary_path": primary_path,
139+ "required_artifact": required,
140+ },
141+ "sub_scores": sub_scores,
142+ "failure": failure,
143+ "legacy": {
144+ "reward_json": reward_payload,
145+ },
146+ }
147+ if details:
148+ payload["details"] = details
149+
150+ with open("/logs/verifier/validation_result.json", "w") as f:
151+ json.dump(payload, f, indent=2)
152+ PYEOF
153+ }
154+
155+ if [ " ${ARTIFACT_ONLY:- false} " = " true" ] && {
156+ [ " ${ANSWER_JSON_MISSING:- false} " = " true" ] || [ " ${ANSWER_JSON_NO_CHANGES:- false} " = " true" ];
157+ }; then
158+ write_invalid_output " missing_required_output" \
159+ " answer.json missing or has no usable artifact payload at $TASK_OUTPUT "
160+ exit 0
161+ fi
162+
22163# Resolve initial commit — mirrors use orphan commits with different SHAs than upstream
23164PRE_FIX_REV=$( git rev-parse HEAD 2> /dev/null || echo " HEAD" )
24165python3 /tests/verify_diff.py \
@@ -28,6 +169,7 @@ python3 /tests/verify_diff.py \
28169 2>&1 | tee /logs/verifier/verifier.log
29170REWARD=$( python3 -c " import json; print(json.load(open('/logs/verifier/reward.json')).get('reward', 0.0))" 2> /dev/null || echo " 0.0" )
30171echo " $REWARD " > /logs/verifier/reward.txt
172+ write_validation_from_reward_json " scored"
31173echo " Final reward: $REWARD "
32174git diff " $PRE_FIX_REV " > /logs/verifier/agent.diff 2> /dev/null || true
33175git diff " $PRE_FIX_REV " --stat > /logs/verifier/diff.stat 2> /dev/null || true
0 commit comments