|
| 1 | +#!/bin/bash |
| 2 | +# answer_json_verifier_lib.sh — Unified answer.json verifier for artifact configs. |
| 3 | +# |
| 4 | +# Source this at the TOP of test.sh. It detects /tmp/.artifact_only_mode and: |
| 5 | +# 1. Validates /workspace/answer.json exists and is valid JSON |
| 6 | +# 2. Extracts analysis.reasoning → $ANALYSIS_TEXT_FILE (for keyword/pattern scoring) |
| 7 | +# 3. Extracts analysis.files_examined → $ANALYSIS_FILES_FILE (for IR metrics) |
| 8 | +# 4. If changes[] has diffs: applies diffs directly to /repo_full (zero-copy, container is ephemeral) |
| 9 | +# 5. Exports VERIFY_REPO, ARTIFACT_ONLY, ANALYSIS_TEXT_FILE, etc. |
| 10 | +# |
| 11 | +# For non-artifact-only runs, this script is a no-op that sets safe defaults. |
| 12 | +# |
| 13 | +# Usage in test.sh: |
| 14 | +# #!/bin/bash |
| 15 | +# set -e |
| 16 | +# # Artifact mode: parse answer.json, apply patches, export analysis |
| 17 | +# if [ -f /tmp/.artifact_only_mode ] && [ -f /tests/answer_json_verifier_lib.sh ]; then |
| 18 | +# source /tests/answer_json_verifier_lib.sh |
| 19 | +# fi |
| 20 | +# # ... rest of test.sh uses $VERIFY_REPO, $ANALYSIS_TEXT_FILE, etc. ... |
| 21 | + |
| 22 | +if [ ! -f /tmp/.artifact_only_mode ]; then |
| 23 | + # Not in artifact-only mode — export defaults for backward compat |
| 24 | + export VERIFY_REPO="${VERIFY_REPO:-/workspace}" |
| 25 | + export ARTIFACT_ONLY=false |
| 26 | + export ANALYSIS_TEXT_FILE="" |
| 27 | + export ANALYSIS_FILES_FILE="" |
| 28 | + export ANSWER_JSON="" |
| 29 | + export ANSWER_JSON_MISSING=false |
| 30 | + export ANSWER_JSON_NO_CHANGES=false |
| 31 | + return 0 2>/dev/null || true |
| 32 | +fi |
| 33 | + |
| 34 | +echo "[answer_json_verifier] Detected artifact-only mode" |
| 35 | +export ARTIFACT_ONLY=true |
| 36 | +export ANSWER_JSON="/workspace/answer.json" |
| 37 | +export ANALYSIS_TEXT_FILE="/tmp/analysis.txt" |
| 38 | +export ANALYSIS_FILES_FILE="/tmp/analysis_files.json" |
| 39 | +export ANSWER_JSON_MISSING=false |
| 40 | +export ANSWER_JSON_NO_CHANGES=false |
| 41 | + |
| 42 | +answer_json_fail_closed_if_missing() { |
| 43 | + if [ "${ARTIFACT_ONLY:-false}" = "true" ] && [ "${ANSWER_JSON_MISSING:-false}" = "true" ]; then |
| 44 | + mkdir -p /logs/verifier |
| 45 | + printf '0.0\n' > /logs/verifier/reward.txt |
| 46 | + echo "[answer_json_verifier] Scored 0.0 because answer.json is missing" |
| 47 | + exit 0 |
| 48 | + fi |
| 49 | +} |
| 50 | + |
| 51 | +answer_json_fail_closed_if_missing_or_no_changes() { |
| 52 | + if [ "${ARTIFACT_ONLY:-false}" = "true" ] && { |
| 53 | + [ "${ANSWER_JSON_MISSING:-false}" = "true" ] || [ "${ANSWER_JSON_NO_CHANGES:-false}" = "true" ] |
| 54 | + }; then |
| 55 | + mkdir -p /logs/verifier |
| 56 | + printf '0.0\n' > /logs/verifier/reward.txt |
| 57 | + echo "[answer_json_verifier] Scored 0.0 because answer.json has no usable artifact payload" |
| 58 | + exit 0 |
| 59 | + fi |
| 60 | +} |
| 61 | + |
| 62 | +answer_json_copy_analysis_text() { |
| 63 | + local target_path="$1" |
| 64 | + if [ "${ARTIFACT_ONLY:-false}" = "true" ] && [ -f "${ANALYSIS_TEXT_FILE:-}" ]; then |
| 65 | + mkdir -p "$(dirname "$target_path")" |
| 66 | + cp "$ANALYSIS_TEXT_FILE" "$target_path" |
| 67 | + echo "[answer_json_verifier] Copied analysis text to $target_path" |
| 68 | + fi |
| 69 | +} |
| 70 | + |
| 71 | +rm -f /tmp/.answer_json_no_changes /tmp/.answer_json_verify_repo |
| 72 | + |
| 73 | +# ── Validate answer.json ────────────────────────────────────────────────── |
| 74 | + |
| 75 | +if [ ! -f "$ANSWER_JSON" ]; then |
| 76 | + echo "[answer_json_verifier] ERROR: /workspace/answer.json not found" |
| 77 | + echo "[answer_json_verifier] Agent did not produce required artifact" |
| 78 | + export VERIFY_REPO="${VERIFY_REPO:-/workspace}" |
| 79 | + export ANSWER_JSON_MISSING=true |
| 80 | + # Signal to test.sh that there's no output — it should score 0 |
| 81 | + return 0 2>/dev/null || true |
| 82 | +fi |
| 83 | + |
| 84 | +# Validate JSON and extract fields using Python |
| 85 | +python3 - "$ANSWER_JSON" <<'PYEOF' |
| 86 | +import json, sys, os, subprocess, tempfile, re |
| 87 | +
|
| 88 | +answer_path = sys.argv[1] |
| 89 | +analysis_text_file = os.environ.get("ANALYSIS_TEXT_FILE", "/tmp/analysis.txt") |
| 90 | +analysis_files_file = os.environ.get("ANALYSIS_FILES_FILE", "/tmp/analysis_files.json") |
| 91 | +
|
| 92 | +# ── Parse answer.json ───────────────────────────────────────────────────── |
| 93 | +try: |
| 94 | + with open(answer_path) as f: |
| 95 | + raw = f.read() |
| 96 | +
|
| 97 | + # Strip markdown code fences if agent wrapped JSON in ```json blocks |
| 98 | + m = re.search(r'```(?:json)?\s*\n(.*?)```', raw, re.DOTALL) |
| 99 | + if m: |
| 100 | + raw = m.group(1).strip() |
| 101 | +
|
| 102 | + answer = json.loads(raw) |
| 103 | + if not isinstance(answer, dict): |
| 104 | + print("[answer_json_verifier] WARNING: answer.json is not a JSON object", file=sys.stderr) |
| 105 | + answer = {} |
| 106 | +except (json.JSONDecodeError, ValueError) as e: |
| 107 | + print(f"[answer_json_verifier] ERROR: Failed to parse answer.json: {e}", file=sys.stderr) |
| 108 | + answer = {} |
| 109 | +except FileNotFoundError: |
| 110 | + print("[answer_json_verifier] ERROR: answer.json not found", file=sys.stderr) |
| 111 | + answer = {} |
| 112 | +
|
| 113 | +# ── Extract analysis fields ─────────────────────────────────────────────── |
| 114 | +analysis = answer.get("analysis", {}) |
| 115 | +if not isinstance(analysis, dict): |
| 116 | + analysis = {} |
| 117 | +
|
| 118 | +# Build analysis text from summary + reasoning (what verifiers will grep) |
| 119 | +parts = [] |
| 120 | +summary = analysis.get("summary", "") |
| 121 | +if summary: |
| 122 | + parts.append(summary) |
| 123 | +reasoning = analysis.get("reasoning", "") |
| 124 | +if reasoning: |
| 125 | + parts.append(reasoning) |
| 126 | +analysis_text = "\n\n".join(parts) |
| 127 | +
|
| 128 | +with open(analysis_text_file, "w") as f: |
| 129 | + f.write(analysis_text) |
| 130 | +print(f"[answer_json_verifier] Wrote analysis text ({len(analysis_text)} chars) to {analysis_text_file}") |
| 131 | +
|
| 132 | +# Extract files_examined for IR metrics |
| 133 | +files_examined = analysis.get("files_examined", []) |
| 134 | +if not isinstance(files_examined, list): |
| 135 | + files_examined = [] |
| 136 | +with open(analysis_files_file, "w") as f: |
| 137 | + json.dump(files_examined, f, indent=2) |
| 138 | +print(f"[answer_json_verifier] Wrote {len(files_examined)} examined files to {analysis_files_file}") |
| 139 | +
|
| 140 | +# ── Extract and apply diffs from changes[] ──────────────────────────────── |
| 141 | +changes = answer.get("changes", []) |
| 142 | +if not isinstance(changes, list): |
| 143 | + changes = [] |
| 144 | +
|
| 145 | +if not changes: |
| 146 | + print("[answer_json_verifier] No changes[] in answer.json (analysis-only task)") |
| 147 | + # Signal no patches needed |
| 148 | + with open("/tmp/.answer_json_no_changes", "w") as f: |
| 149 | + f.write("1") |
| 150 | +
|
| 151 | +# ── Generate synthetic review.json for code-review verifiers ────────────── |
| 152 | +# Code-review verifiers expect /workspace/review.json with [{file, description, fix_patch}] |
| 153 | +# Generate this from answer.json changes[] so existing F1 scoring works unchanged. |
| 154 | +if changes: |
| 155 | + review_entries = [] |
| 156 | + for change in changes: |
| 157 | + entry = { |
| 158 | + "file": change.get("file", ""), |
| 159 | + "description": change.get("description", ""), |
| 160 | + "fix_patch": change.get("diff", ""), |
| 161 | + } |
| 162 | + review_entries.append(entry) |
| 163 | + review_json_path = "/workspace/review.json" |
| 164 | + with open(review_json_path, "w") as f: |
| 165 | + json.dump(review_entries, f, indent=2) |
| 166 | + print(f"[answer_json_verifier] Generated synthetic review.json ({len(review_entries)} entries)") |
| 167 | +
|
| 168 | +# ── Extract new-file diffs to /workspace/ ───────────────────────────────── |
| 169 | +# For find-and-prove tasks: agent writes regression tests as new-file diffs. |
| 170 | +# Extract file content from diffs like "--- /dev/null\n+++ b/regression_test.py" |
| 171 | +# and write directly to /workspace/. |
| 172 | +new_files_written = 0 |
| 173 | +for change in changes: |
| 174 | + diff_text = change.get("diff", "") |
| 175 | + file_path = change.get("file", "") |
| 176 | + if not diff_text or not file_path: |
| 177 | + continue |
| 178 | +
|
| 179 | + # Detect new-file diff: starts from /dev/null |
| 180 | + if "/dev/null" in diff_text: |
| 181 | + # Extract added lines (lines starting with +, excluding +++ header) |
| 182 | + lines = diff_text.split("\n") |
| 183 | + content_lines = [] |
| 184 | + in_hunk = False |
| 185 | + for line in lines: |
| 186 | + if line.startswith("@@"): |
| 187 | + in_hunk = True |
| 188 | + continue |
| 189 | + if in_hunk: |
| 190 | + if line.startswith("+"): |
| 191 | + content_lines.append(line[1:]) # Strip leading + |
| 192 | + elif line.startswith("-"): |
| 193 | + pass # skip removed lines (shouldn't exist in new-file) |
| 194 | + elif line.startswith("\\"): |
| 195 | + pass # "\ No newline at end of file" |
| 196 | + else: |
| 197 | + content_lines.append(line) # context line |
| 198 | +
|
| 199 | + if content_lines: |
| 200 | + # Determine target path — use file field, write to /workspace/ |
| 201 | + target = os.path.join("/workspace", os.path.basename(file_path)) |
| 202 | + os.makedirs(os.path.dirname(target), exist_ok=True) |
| 203 | + with open(target, "w") as f: |
| 204 | + f.write("\n".join(content_lines)) |
| 205 | + if content_lines and not content_lines[-1] == "": |
| 206 | + f.write("\n") |
| 207 | + new_files_written += 1 |
| 208 | + print(f"[answer_json_verifier] Extracted new file: {target}") |
| 209 | +
|
| 210 | +if new_files_written > 0: |
| 211 | + print(f"[answer_json_verifier] Extracted {new_files_written} new files to /workspace/") |
| 212 | +
|
| 213 | +# ── Generate fault_localization_result.json ──────────────────────────────── |
| 214 | +# Fault-loc verifiers expect /workspace/fault_localization_result.json with |
| 215 | +# {buggy_files, buggy_functions, reasoning, confidence}. Populate from analysis. |
| 216 | +if analysis: |
| 217 | + fl_result = {} |
| 218 | + # buggy_files: extract from files_examined |
| 219 | + fl_files = [fe.get("path", "") for fe in files_examined if fe.get("path")] |
| 220 | + if fl_files: |
| 221 | + fl_result["buggy_files"] = fl_files |
| 222 | + # buggy_functions: look for a "functions" or "buggy_functions" key in analysis |
| 223 | + fl_funcs = analysis.get("buggy_functions", analysis.get("functions", [])) |
| 224 | + if isinstance(fl_funcs, list) and fl_funcs: |
| 225 | + fl_result["buggy_functions"] = fl_funcs |
| 226 | + # reasoning: use the full analysis text |
| 227 | + if reasoning: |
| 228 | + fl_result["reasoning"] = reasoning |
| 229 | + # confidence: look for a confidence key |
| 230 | + confidence = analysis.get("confidence", None) |
| 231 | + if isinstance(confidence, (int, float)): |
| 232 | + fl_result["confidence"] = confidence |
| 233 | + # Only write if we have substantive content |
| 234 | + fl_path = "/workspace/fault_localization_result.json" |
| 235 | + if fl_result and not os.path.exists(fl_path): |
| 236 | + with open(fl_path, "w") as f: |
| 237 | + json.dump(fl_result, f, indent=2) |
| 238 | + print(f"[answer_json_verifier] Generated fault_localization_result.json") |
| 239 | +
|
| 240 | +# ── Apply diffs directly to /repo_full (zero-copy) ─────────────────────── |
| 241 | +if not changes: |
| 242 | + sys.exit(0) |
| 243 | +
|
| 244 | +# Apply diffs in-place — container is ephemeral, no need to preserve /repo_full |
| 245 | +repo_full = "/repo_full" |
| 246 | +verify_repo = repo_full |
| 247 | +
|
| 248 | +if not os.path.isdir(repo_full): |
| 249 | + print(f"[answer_json_verifier] WARNING: {repo_full} not found. Cannot apply diffs.") |
| 250 | + with open("/tmp/.answer_json_no_changes", "w") as f: |
| 251 | + f.write("1") |
| 252 | + sys.exit(0) |
| 253 | +
|
| 254 | +# Ensure verifier (root) can write to repo_full |
| 255 | +subprocess.run(["chmod", "-R", "u+w", repo_full], capture_output=True) |
| 256 | +print(f"[answer_json_verifier] Applying diffs to {repo_full} (in-place, zero-copy)...") |
| 257 | +subprocess.run( |
| 258 | + ["git", "config", "--global", "--add", "safe.directory", repo_full], |
| 259 | + capture_output=True |
| 260 | +) |
| 261 | +
|
| 262 | +# Apply each diff |
| 263 | +applied = 0 |
| 264 | +failed = 0 |
| 265 | +
|
| 266 | +for entry in changes: |
| 267 | + diff_text = entry.get("diff", "") |
| 268 | + if not diff_text or not diff_text.strip(): |
| 269 | + continue |
| 270 | +
|
| 271 | + file_name = entry.get("file", "unknown") |
| 272 | +
|
| 273 | + with tempfile.NamedTemporaryFile(mode='w', suffix='.patch', delete=False, dir='/tmp') as pf: |
| 274 | + pf.write(diff_text) |
| 275 | + pf.flush() |
| 276 | + pf_path = pf.name |
| 277 | +
|
| 278 | + # Try git apply (strictest) |
| 279 | + result = subprocess.run( |
| 280 | + ["git", "apply", "--allow-empty", pf_path], |
| 281 | + cwd=verify_repo, capture_output=True, text=True |
| 282 | + ) |
| 283 | + if result.returncode == 0: |
| 284 | + applied += 1 |
| 285 | + os.unlink(pf_path) |
| 286 | + print(f"[answer_json_verifier] Applied diff for {file_name} (git apply)") |
| 287 | + continue |
| 288 | +
|
| 289 | + # Fallback: patch -p1 --fuzz=3 |
| 290 | + result = subprocess.run( |
| 291 | + ["patch", "-p1", "--fuzz=3", "-i", pf_path], |
| 292 | + cwd=verify_repo, capture_output=True, text=True |
| 293 | + ) |
| 294 | + if result.returncode == 0: |
| 295 | + applied += 1 |
| 296 | + os.unlink(pf_path) |
| 297 | + print(f"[answer_json_verifier] Applied diff for {file_name} (patch -p1)") |
| 298 | + continue |
| 299 | +
|
| 300 | + # Fallback: git apply --3way |
| 301 | + result = subprocess.run( |
| 302 | + ["git", "apply", "--allow-empty", "--3way", pf_path], |
| 303 | + cwd=verify_repo, capture_output=True, text=True |
| 304 | + ) |
| 305 | + if result.returncode == 0: |
| 306 | + applied += 1 |
| 307 | + os.unlink(pf_path) |
| 308 | + print(f"[answer_json_verifier] Applied diff for {file_name} (git apply --3way)") |
| 309 | + continue |
| 310 | +
|
| 311 | + failed += 1 |
| 312 | + print(f"[answer_json_verifier] WARNING: Diff for {file_name} failed to apply", file=sys.stderr) |
| 313 | + os.unlink(pf_path) |
| 314 | +
|
| 315 | +print(f"[answer_json_verifier] Diffs applied: {applied}, failed: {failed}") |
| 316 | +
|
| 317 | +# Write verify_repo path for shell to pick up |
| 318 | +with open("/tmp/.answer_json_verify_repo", "w") as f: |
| 319 | + f.write(verify_repo) |
| 320 | +PYEOF |
| 321 | + |
| 322 | +# Pick up VERIFY_REPO from Python output |
| 323 | +if [ -f /tmp/.answer_json_no_changes ]; then |
| 324 | + export ANSWER_JSON_NO_CHANGES=true |
| 325 | +fi |
| 326 | +if [ -f /tmp/.answer_json_verify_repo ]; then |
| 327 | + export VERIFY_REPO="$(cat /tmp/.answer_json_verify_repo)" |
| 328 | + cd "$VERIFY_REPO" |
| 329 | + echo "[answer_json_verifier] VERIFY_REPO set to $VERIFY_REPO" |
| 330 | +elif [ -f /tmp/.answer_json_no_changes ]; then |
| 331 | + # Analysis-only: no repo copy needed, use /workspace or /repo_full as fallback |
| 332 | + if [ -d /repo_full ]; then |
| 333 | + export VERIFY_REPO="/repo_full" |
| 334 | + else |
| 335 | + export VERIFY_REPO="${VERIFY_REPO:-/workspace}" |
| 336 | + fi |
| 337 | + echo "[answer_json_verifier] Analysis-only mode, VERIFY_REPO=$VERIFY_REPO" |
| 338 | +else |
| 339 | + export VERIFY_REPO="${VERIFY_REPO:-/workspace}" |
| 340 | + echo "[answer_json_verifier] WARNING: Using fallback VERIFY_REPO=$VERIFY_REPO" |
| 341 | +fi |
| 342 | + |
| 343 | +# Clean up temp markers |
| 344 | +rm -f /tmp/.answer_json_verify_repo /tmp/.answer_json_no_changes |
| 345 | + |
| 346 | +echo "[answer_json_verifier] Library loaded (ARTIFACT_ONLY=$ARTIFACT_ONLY, VERIFY_REPO=$VERIFY_REPO)" |
0 commit comments