diff --git a/src/swe/docker_sandbox.rs b/src/swe/docker_sandbox.rs
index 2adf200..93bfb87 100644
--- a/src/swe/docker_sandbox.rs
+++ b/src/swe/docker_sandbox.rs
@@ -5,11 +5,36 @@
 
 use anyhow::Result;
 use std::process::Stdio;
+use std::sync::atomic::{AtomicU16, Ordering};
 use tokio::process::Command;
 
 use crate::swe::tool_server::TOOL_SERVER_PY;
 use crate::swe::{validate_file_path, validate_git_ref, validate_repo_name};
 
+/// Global atomic port counter to guarantee unique ports across all concurrent containers.
+static NEXT_PORT: AtomicU16 = AtomicU16::new(10_000);
+
+/// Allocate a unique port for a tool server.
+///
+/// Uses an atomic counter to guarantee no two concurrent sandboxes get the same port.
+/// Wraps around from 60_000 back to 10_000.
+fn allocate_port() -> u16 {
+    loop {
+        let current = NEXT_PORT.load(Ordering::Relaxed);
+        let next = if current >= 60_000 {
+            10_000
+        } else {
+            current + 1
+        };
+        if NEXT_PORT
+            .compare_exchange(current, next, Ordering::Relaxed, Ordering::Relaxed)
+            .is_ok()
+        {
+            return current;
+        }
+    }
+}
+
 /// Shell command output from inside the container.
 pub struct SandboxOutput {
     pub stdout: String,
@@ -22,6 +47,8 @@ pub struct DockerSandbox {
     container_name: String,
     /// Unique port for the tool server (needed because --network=host shares port space).
     tool_port: u16,
+    /// Whether the tool server started successfully.
+    tool_server_ok: bool,
 }
 
 /// Pick a Docker image appropriate for the given language.
@@ -51,8 +78,7 @@ impl DockerSandbox {
             .as_millis()
             % 1_000_000;
         let container_name = format!("swe-mine-{}-{}", safe_name, ts_suffix);
-        // Derive unique tool server port from timestamp (range 10000-59999)
-        let tool_port = 10_000 + (ts_suffix as u16 % 50_000);
+        let tool_port = allocate_port();
 
         // Remove stale container if it exists
         if let Err(e) = Command::new("docker")
@@ -90,9 +116,10 @@ impl DockerSandbox {
             );
         }
 
-        let sandbox = Self {
+        let mut sandbox = Self {
             container_name,
             tool_port,
+            tool_server_ok: false,
         };
 
         // Install git (only hard dependency; agent installs everything else)
@@ -103,10 +130,11 @@ impl DockerSandbox {
             )
             .await;
         if install.exit_code != 0 {
-            tracing::warn!(
-                container = %sandbox.container_name,
-                stderr = %install.stderr,
-                "git install failed (continuing)"
+            sandbox.destroy().await;
+            anyhow::bail!(
+                "git install failed in container '{}': {}",
+                sandbox.container_name,
+                install.stderr
             );
         }
 
@@ -131,17 +159,23 @@ impl DockerSandbox {
                 )
                 .await;
             if checkout.exit_code != 0 {
-                tracing::warn!(
-                    container = %sandbox.container_name,
-                    commit = base_commit,
-                    stderr = %checkout.stderr,
-                    "Checkout failed (continuing on HEAD)"
+                sandbox.destroy().await;
+                anyhow::bail!(
+                    "Checkout of commit {} failed in container '{}': {}",
+                    base_commit,
+                    sandbox.container_name,
+                    truncate(&checkout.stderr, 500)
                 );
             }
         }
 
         // Inject and start the tool server
-        sandbox.start_tool_server().await;
+        sandbox.tool_server_ok = sandbox.start_tool_server().await;
+        if sandbox.tool_server_ok {
+            tracing::debug!(container = %sandbox.container_name, port = sandbox.tool_port, "Tool server started");
+        } else {
+            tracing::debug!(container = %sandbox.container_name, "Tool server unavailable, shell fallback will be used");
+        }
 
         tracing::info!(
             container = %sandbox.container_name,
@@ -154,53 +188,103 @@ impl DockerSandbox {
     }
 
     /// Write and start the Python tool server inside the container.
-    async fn start_tool_server(&self) {
-        // Write the server script
-        let mkdir = self.exec("mkdir -p /tools", 10_000).await;
-        if mkdir.exit_code != 0 {
-            tracing::warn!(container = %self.container_name, "Failed to create /tools dir");
-            return;
-        }
+    ///
+    /// Returns `true` if the tool server started successfully, `false` otherwise.
+    /// On failure, the caller should fall back to shell-based tool execution.
+    async fn start_tool_server(&self) -> bool {
+        for retry in 0..2 {
+            if retry > 0 {
+                tracing::debug!(
+                    container = %self.container_name,
+                    retry = retry,
+                    "Retrying tool server startup"
+                );
+                // Kill any leftover process from previous attempt
+                self.exec(
+                    "pkill -f 'python3.*server.py' 2>/dev/null; rm -f /tools/server.log",
+                    5_000,
+                )
+                .await;
+                tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+            }
 
-        // Use write_file to inject the server script
-        if let Err(e) = self
-            .write_file_abs("/tools/server.py", TOOL_SERVER_PY)
-            .await
-        {
-            tracing::warn!(container = %self.container_name, error = %e, "Failed to write tool server");
-            return;
-        }
+            // Write the server script
+            let mkdir = self.exec("mkdir -p /tools", 10_000).await;
+            if mkdir.exit_code != 0 {
+                tracing::debug!(container = %self.container_name, "Failed to create /tools dir");
+                continue;
+            }
 
-        // Start server in background with unique port (--network=host shares port space)
-        let start_cmd = format!(
-            "nohup python3 -u /tools/server.py --port {} --cwd /repo > /tools/server.log 2>&1 &",
-            self.tool_port
-        );
-        let start = self.exec(&start_cmd, 5_000).await;
-        if start.exit_code != 0 {
-            tracing::warn!(
-                container = %self.container_name,
-                stderr = %start.stderr,
-                "Tool server start may have failed"
+            if let Err(e) = self
+                .write_file_abs("/tools/server.py", TOOL_SERVER_PY)
+                .await
+            {
+                tracing::debug!(container = %self.container_name, error = %e, "Failed to write tool server");
+                continue;
+            }
+
+            // Verify the script was written correctly
+            let verify = self
+                .exec("wc -c < /tools/server.py 2>/dev/null", 5_000)
+                .await;
+            let written_bytes: usize = verify.stdout.trim().parse().unwrap_or(0);
+            if written_bytes < TOOL_SERVER_PY.len() / 2 {
+                tracing::debug!(
+                    container = %self.container_name,
+                    expected = TOOL_SERVER_PY.len(),
+                    actual = written_bytes,
+                    "Tool server script truncated, retrying"
+                );
+                continue;
+            }
+
+            // Start server in background with unique port (--network=host shares port space)
+            let start_cmd = format!(
+                "nohup python3 -u /tools/server.py --port {} --cwd /repo > /tools/server.log 2>&1 &",
+                self.tool_port
             );
-        }
+            let start = self.exec(&start_cmd, 5_000).await;
+            if start.exit_code != 0 {
+                tracing::debug!(
+                    container = %self.container_name,
+                    stderr = %start.stderr,
+                    "Tool server start command failed"
+                );
+                continue;
+            }
 
-        // Health check: use python3 urllib (curl is not installed in slim images)
-        for attempt in 0..6 {
-            tokio::time::sleep(std::time::Duration::from_millis(500)).await;
-            let health = self.tool_server_health().await;
-            if health {
-                tracing::debug!(container = %self.container_name, attempt = attempt, "Tool server healthy");
-                return;
+            // Health check: 12 attempts × 500ms = 6s total
+            for attempt in 0..12 {
+                tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+                if self.tool_server_health().await {
+                    tracing::debug!(
+                        container = %self.container_name,
+                        attempt = attempt,
+                        retry = retry,
+                        "Tool server healthy"
+                    );
+                    return true;
+                }
             }
+
+            // Log server output for debugging on this retry
+            let log = self.exec("cat /tools/server.log 2>/dev/null", 5_000).await;
+            tracing::debug!(
+                container = %self.container_name,
+                retry = retry,
+                server_log = %log.stdout,
+                "Tool server health check failed after 6s"
+            );
         }
-        // Log server output for debugging
+
+        // All retries exhausted
         let log = self.exec("cat /tools/server.log 2>/dev/null", 5_000).await;
         tracing::warn!(
             container = %self.container_name,
             server_log = %log.stdout,
-            "Tool server health check failed after 3s, tools may not work"
+            "Tool server failed to start after retries, falling back to shell tools"
         );
+        false
     }
 
     /// Check if the tool server is healthy via python3 urllib inside the container.
@@ -229,6 +313,11 @@ impl DockerSandbox {
         matches!(result, Ok(Ok(status)) if status.success())
     }
 
+    /// Whether the tool server is available for HTTP-based tool requests.
+    pub fn has_tool_server(&self) -> bool {
+        self.tool_server_ok
+    }
+
     /// Call a tool on the HTTP tool server running inside the container.
     /// Pipes the JSON args via stdin to avoid shell escaping issues.
     pub async fn tool_request(&self, tool_name: &str, args_json: &str) -> SandboxOutput {
@@ -590,4 +679,21 @@ mod tests {
         assert!(output.stdout.is_empty());
         assert_eq!(output.exit_code, 0);
     }
+
+    #[test]
+    fn test_allocate_port_returns_valid_range() {
+        let port = allocate_port();
+        assert!(port >= 10_000);
+        assert!(port <= 60_000);
+    }
+
+    #[test]
+    fn test_allocate_port_sequential_unique() {
+        let p1 = allocate_port();
+        let p2 = allocate_port();
+        let p3 = allocate_port();
+        assert_ne!(p1, p2);
+        assert_ne!(p2, p3);
+        assert_ne!(p1, p3);
+    }
 }
diff --git a/src/swe/test_generator.rs b/src/swe/test_generator.rs
index 653be13..b22c5ce 100644
--- a/src/swe/test_generator.rs
+++ b/src/swe/test_generator.rs
@@ -12,7 +12,7 @@ use crate::llm::{
     GenerationRequest, LlmProvider, Message, ToolCallInfo, ToolChoice, ToolDefinition,
 };
 use crate::swe::docker_sandbox::DockerSandbox;
-use crate::swe::SweTask;
+use crate::swe::{validate_file_path, SweTask};
 
 const MAX_AGENT_TURNS: usize = 200;
 const MAX_VALIDATION_RETRIES: usize = 3;
@@ -871,44 +871,36 @@ impl TestGenerator {
                 }
             }
             "read_file" | "list_dir" | "grep_files" | "search_files" | "apply_patch" => {
-                let result = sandbox
-                    .tool_request(tc.function.name.as_str(), &tc.function.arguments)
-                    .await;
-                let output = if !result.stdout.is_empty() {
-                    // Parse JSON response from tool server
-                    match serde_json::from_str::<serde_json::Value>(result.stdout.trim()) {
-                        Ok(v) => {
-                            if let Some(err) = v.get("error").and_then(|e| e.as_str()) {
-                                err.to_string()
-                            } else if let Some(out) = v.get("output").and_then(|o| o.as_str()) {
-                                let mut s = out.to_string();
-                                if let Some(total) = v.get("total_lines").and_then(|t| t.as_u64()) {
-                                    if v.get("truncated")
-                                        .and_then(|t| t.as_bool())
-                                        .unwrap_or(false)
-                                    {
-                                        s.push_str(&format!("\n\n[Showing {}/{} lines. Use offset/limit to see more.]",
-                                            v.get("shown_lines").and_then(|s| s.as_u64()).unwrap_or(0), total));
-                                    } else {
-                                        s.push_str(&format!("\n\n[{} total lines]", total));
-                                    }
-                                }
-                                s
-                            } else {
-                                result.stdout.clone()
-                            }
-                        }
-                        Err(_) => result.stdout.clone(),
+                let tool_name = tc.function.name.as_str();
+                let args_json = &tc.function.arguments;
+
+                let output = if sandbox.has_tool_server() {
+                    let result = sandbox.tool_request(tool_name, args_json).await;
+                    let server_down = result.exit_code != 0
+                        && result.stdout.is_empty()
+                        && (result.stderr.contains("Connection refused")
+                            || result.stderr.contains("URLError")
+                            || result.stderr.contains("Tool request error"));
+                    if server_down {
+                        tracing::debug!(
+                            task_id = task_id,
+                            turn = turn,
+                            tool = tool_name,
+                            "Tool server unavailable, falling back to shell"
+                        );
+                        shell_fallback(sandbox, tool_name, args_json).await
+                    } else {
+                        parse_tool_response(&result)
                     }
-                } else if !result.stderr.is_empty() {
-                    format!("Error: {}", truncate_utf8(&result.stderr, 1500))
                 } else {
-                    "No output".to_string()
+                    shell_fallback(sandbox, tool_name, args_json).await
                 };
+
                 tracing::debug!(
-                    task_id = task_id, turn = turn,
-                    tool = %tc.function.name,
-                    "Agent tool call via HTTP server"
+                    task_id = task_id,
+                    turn = turn,
+                    tool = tool_name,
+                    "Agent tool call"
                 );
                 ToolResult::ShellOutput(truncate_utf8(&output, 4000).to_string())
             }
@@ -927,6 +919,190 @@ enum ToolResult {
     Error(String),
 }
 
+/// Parse a JSON response from the tool server into a user-friendly string.
+fn parse_tool_response(result: &crate::swe::docker_sandbox::SandboxOutput) -> String {
+    if !result.stdout.is_empty() {
+        match serde_json::from_str::<serde_json::Value>(result.stdout.trim()) {
+            Ok(v) => {
+                if let Some(err) = v.get("error").and_then(|e| e.as_str()) {
+                    err.to_string()
+                } else if let Some(out) = v.get("output").and_then(|o| o.as_str()) {
+                    let mut s = out.to_string();
+                    if let Some(total) = v.get("total_lines").and_then(|t| t.as_u64()) {
+                        if v.get("truncated")
+                            .and_then(|t| t.as_bool())
+                            .unwrap_or(false)
+                        {
+                            s.push_str(&format!(
+                                "\n\n[Showing {}/{} lines. Use offset/limit to see more.]",
+                                v.get("shown_lines").and_then(|sl| sl.as_u64()).unwrap_or(0),
+                                total
+                            ));
+                        } else {
+                            s.push_str(&format!("\n\n[{} total lines]", total));
+                        }
+                    }
+                    s
+                } else {
+                    result.stdout.clone()
+                }
+            }
+            Err(_) => result.stdout.clone(),
+        }
+    } else if !result.stderr.is_empty() {
+        format!("Error: {}", truncate_utf8(&result.stderr, 1500))
+    } else {
+        "No output".to_string()
+    }
+}
+
+/// Escape a string for safe inclusion inside a POSIX single-quoted shell argument.
+///
+/// Replaces each `'` with `'\''` (end quote, backslash-escaped literal quote,
+/// re-open quote). This is the standard POSIX technique for embedding single
+/// quotes inside single-quoted strings and prevents shell breakout.
+fn sanitize_shell_arg(s: &str) -> String {
+    s.replace('\'', "'\\''")
+}
+
+/// Execute a tool via direct shell commands when the HTTP tool server is unavailable.
+///
+/// All values interpolated into shell commands are either validated via
+/// [`validate_file_path`] (for filesystem paths) or escaped via
+/// [`sanitize_shell_arg`] (for patterns/globs that may contain special chars).
+async fn shell_fallback(sandbox: &DockerSandbox, tool_name: &str, args_json: &str) -> String {
+    let args: serde_json::Value = match serde_json::from_str(args_json) {
+        Ok(v) => v,
+        Err(e) => return format!("Invalid tool args: {}", e),
+    };
+
+    match tool_name {
+        "read_file" => {
+            let file_path = args.get("file_path").and_then(|v| v.as_str()).unwrap_or("");
+            if file_path.is_empty() {
+                return "Error: missing file_path".to_string();
+            }
+            if let Err(e) = validate_file_path(file_path) {
+                return format!("Error: invalid file_path: {}", e);
+            }
+            let offset = args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
+            let limit = args.get("limit").and_then(|v| v.as_u64());
+            let cmd = match limit {
+                Some(lim) => format!(
+                    "awk 'NR>{} && NR<={}{{print NR\": \"$0}}' '{}'",
+                    offset,
+                    offset + lim,
+                    file_path
+                ),
+                None => format!("awk '{{print NR\": \"$0}}' '{}'", file_path),
+            };
+            let result = sandbox.exec(&cmd, 10_000).await;
+            if result.exit_code != 0 {
+                format!("Error reading file: {}", result.stderr)
+            } else if result.stdout.is_empty() {
+                "(empty file)".to_string()
+            } else {
+                result.stdout
+            }
+        }
+        "list_dir" => {
+            let dir = args
+                .get("directory_path")
+                .and_then(|v| v.as_str())
+                .unwrap_or(".");
+            if dir != "." {
+                if let Err(e) = validate_file_path(dir) {
+                    return format!("Error: invalid directory_path: {}", e);
+                }
+            }
+            let cmd = format!("ls -la '{}'", sanitize_shell_arg(dir));
+            let result = sandbox.exec(&cmd, 10_000).await;
+            if result.exit_code != 0 {
+                format!("Error listing directory: {}", result.stderr)
+            } else {
+                result.stdout
+            }
+        }
+        "grep_files" => {
+            let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
+            if pattern.is_empty() {
+                return "Error: missing pattern".to_string();
+            }
+            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
+            if path != "." {
+                if let Err(e) = validate_file_path(path) {
+                    return format!("Error: invalid path: {}", e);
+                }
+            }
+            let include = args.get("include").and_then(|v| v.as_str());
+            let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(100);
+            let include_arg = match include {
+                Some(glob) => format!(" --include='{}'", sanitize_shell_arg(glob)),
+                None => String::new(),
+            };
+            let cmd = format!(
+                "grep -rn --color=never{} '{}' '{}' | head -n {}",
+                include_arg,
+                sanitize_shell_arg(pattern),
+                sanitize_shell_arg(path),
+                limit
+            );
+            let result = sandbox.exec(&cmd, 30_000).await;
+            if result.stdout.is_empty() {
+                "No matches found.".to_string()
+            } else {
+                result.stdout
+            }
+        }
+        "search_files" => {
+            let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
+            if pattern.is_empty() {
+                return "Error: missing pattern".to_string();
+            }
+            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
+            if path != "." {
+                if let Err(e) = validate_file_path(path) {
+                    return format!("Error: invalid path: {}", e);
+                }
+            }
+            let cmd = format!(
+                "find '{}' -name '{}' -not -path '*/.git/*' -not -path '*/node_modules/*' | sort",
+                sanitize_shell_arg(path),
+                sanitize_shell_arg(pattern)
+            );
+            let result = sandbox.exec(&cmd, 30_000).await;
+            if result.stdout.is_empty() {
+                format!("No files matching '{}'", pattern)
+            } else {
+                result.stdout
+            }
+        }
+        "apply_patch" => {
+            let patch = args.get("patch").and_then(|v| v.as_str()).unwrap_or("");
+            if patch.is_empty() {
+                return "Error: missing patch".to_string();
+            }
+            match sandbox.write_file(".swe_forge_tool_patch.tmp", patch).await {
+                Ok(_) => {
+                    let result = sandbox
+                        .exec(
+                            "git apply --allow-empty .swe_forge_tool_patch.tmp 2>&1 && rm -f .swe_forge_tool_patch.tmp",
+                            30_000,
+                        )
+                        .await;
+                    if result.exit_code == 0 {
+                        "Patch applied successfully.".to_string()
+                    } else {
+                        format!("git apply failed: {}", result.stdout)
+                    }
+                }
+                Err(e) => format!("Failed to write patch file: {}", e),
+            }
+        }
+        _ => format!("Unknown tool: {}", tool_name),
+    }
+}
+
 /// Scan test files for string-matching anti-patterns and return a rejection reason if found.
 fn reject_string_matching_tests(files: &[TestFile]) -> Option<String> {
     let patterns: &[(&str, &str)] = &[
diff --git a/src/swe/tool_server.rs b/src/swe/tool_server.rs
index 8683377..b0f6c0e 100644
--- a/src/swe/tool_server.rs
+++ b/src/swe/tool_server.rs
@@ -2,7 +2,7 @@
 /// Adapted from baseagent (https://github.com/PlatformNetwork/baseagent).
 ///
 /// Provides structured, token-efficient tools: read_file, list_dir,
-/// grep_files, search_files, apply_patch. Runs on port 8080.
+/// grep_files, search_files, apply_patch. Default port 8080, overridden via `--port` flag.
 pub const TOOL_SERVER_PY: &str = r#####"#!/usr/bin/env python3
 """HTTP tool server for swe-forge Docker containers.
 Adapted from baseagent tools. Provides structured file exploration tools
@@ -305,7 +305,12 @@ def main():
         elif arg == "--cwd" and i < len(sys.argv) - 1:
             CWD = Path(sys.argv[i + 1])
 
-    server = HTTPServer(("0.0.0.0", port), ToolHandler)
+    HTTPServer.allow_reuse_address = True
+    try:
+        server = HTTPServer(("0.0.0.0", port), ToolHandler)
+    except OSError as e:
+        print(f"FATAL: Cannot bind to port {port}: {e}", flush=True)
+        sys.exit(1)
     print(f"Tool server listening on port {port}, cwd={CWD}", flush=True)
     server.serve_forever()