diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3412b31 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/AGENTS.md diff --git a/README.md b/README.md index 53762ab..fa3557e 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,14 @@ This repo is the community ecosystem around OpenShell -- a hub for contributed s ### Quick Start with Brev -TODO: Add Brev instructions +Skip the setup and launch OpenShell Community on a fully configured Brev instance, whether you want to use Brev as a remote OpenShell gateway with or without GPU accelerators, or as an all-in-one playground for sandboxes, inference, and UI workflows. + +| Instance | Best For | Deploy | +| -------- | -------- | ------ | +| CPU-only | Remote OpenShell gateway deployments, external inference endpoints, remote APIs, and lighter-weight sandbox workflows | Deploy on Brev | +| NVIDIA H100 | All-in-one OpenShell playgrounds, locally hosted LLM endpoints, GPU-heavy sandboxes, and higher-throughput agent workloads | Deploy on Brev | + +After the Brev instance is ready, access the Welcome UI to inject provider keys and access your Openclaw sandbox. ### Using Sandboxes diff --git a/brev/.gitignore b/brev/.gitignore index c26c3f6..54affb1 100644 --- a/brev/.gitignore +++ b/brev/.gitignore @@ -1 +1,2 @@ -brev-start-vm.sh \ No newline at end of file +brev-start-vm.sh +reset.sh \ No newline at end of file diff --git a/brev/launch.sh b/brev/launch.sh index dfee5f8..782be4e 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -32,6 +32,15 @@ CLI_RETRY_COUNT="${CLI_RETRY_COUNT:-5}" CLI_RETRY_DELAY_SECS="${CLI_RETRY_DELAY_SECS:-3}" GHCR_LOGIN="${GHCR_LOGIN:-auto}" GHCR_USER="${GHCR_USER:-}" +DEFAULT_NEMOCLAW_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest" +if [[ -n "${NEMOCLAW_IMAGE+x}" ]]; then + NEMOCLAW_IMAGE_EXPLICIT=1 +else + NEMOCLAW_IMAGE_EXPLICIT=0 +fi +NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-$DEFAULT_NEMOCLAW_IMAGE}" +SKIP_NEMOCLAW_IMAGE_BUILD="${SKIP_NEMOCLAW_IMAGE_BUILD:-}" +CLUSTER_CONTAINER_NAME="${CLUSTER_CONTAINER_NAME:-openshell-cluster-openshell}" mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" @@ -252,6 +261,136 @@ docker_login_ghcr_if_needed() { fi } +should_build_nemoclaw_image() { + if [[ "$SKIP_NEMOCLAW_IMAGE_BUILD" == "1" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "true" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "yes" ]]; then + return 1 + fi + [[ -n "$COMMUNITY_REF" && "$COMMUNITY_REF" != "main" ]] +} + +maybe_use_branch_local_nemoclaw_tag() { + if ! should_build_nemoclaw_image; then + return + fi + + if [[ "$NEMOCLAW_IMAGE_EXPLICIT" == "1" || "$NEMOCLAW_IMAGE" != "$DEFAULT_NEMOCLAW_IMAGE" ]]; then + return + fi + + NEMOCLAW_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:local-dev" + log "Using non-main branch NeMoClaw image tag: $NEMOCLAW_IMAGE" +} + +build_nemoclaw_image_if_needed() { + local docker_cmd=() + local image_context="$REPO_ROOT/sandboxes/nemoclaw" + local dockerfile_path="$image_context/Dockerfile" + + if ! should_build_nemoclaw_image; then + if [[ "$SKIP_NEMOCLAW_IMAGE_BUILD" == "1" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "true" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "yes" ]]; then + log "Skipping local NeMoClaw image build by override (SKIP_NEMOCLAW_IMAGE_BUILD=${SKIP_NEMOCLAW_IMAGE_BUILD})." + else + log "Skipping local NeMoClaw image build (COMMUNITY_REF=${COMMUNITY_REF:-})." + fi + return + fi + + if [[ ! -f "$dockerfile_path" ]]; then + log "NeMoClaw Dockerfile not found: $dockerfile_path" + exit 1 + fi + + if command -v docker >/dev/null 2>&1; then + docker_cmd=(docker) + elif command -v sudo >/dev/null 2>&1; then + docker_cmd=(sudo docker) + else + log "Docker is required to build the NeMoClaw sandbox image." + exit 1 + fi + + log "Building local NeMoClaw image for non-main ref '$COMMUNITY_REF': $NEMOCLAW_IMAGE" + if ! "${docker_cmd[@]}" build \ + --pull \ + --tag "$NEMOCLAW_IMAGE" \ + --file "$dockerfile_path" \ + "$image_context"; then + log "Local NeMoClaw image build failed." + exit 1 + fi + + log "Local NeMoClaw image ready: $NEMOCLAW_IMAGE" +} + +resolve_docker_cmd() { + if command -v docker >/dev/null 2>&1; then + printf 'docker' + return 0 + fi + if command -v sudo >/dev/null 2>&1; then + printf 'sudo docker' + return 0 + fi + return 1 +} + +resolve_cluster_container_name() { + local docker_bin + + if [[ -n "$CLUSTER_CONTAINER_NAME" ]]; then + printf '%s' "$CLUSTER_CONTAINER_NAME" + return 0 + fi + + docker_bin="$(resolve_docker_cmd)" || return 1 + + CLUSTER_CONTAINER_NAME="$($docker_bin ps --format '{{.Names}}\t{{.Image}}' | awk '$1 ~ /^openshell-cluster-/ { print $1; exit }')" + if [[ -z "$CLUSTER_CONTAINER_NAME" ]]; then + CLUSTER_CONTAINER_NAME="$($docker_bin ps --format '{{.Names}}\t{{.Image}}' | awk '$2 ~ /ghcr.io\\/nvidia\\/openshell\\/cluster/ { print $1; exit }')" + fi + + [[ -n "$CLUSTER_CONTAINER_NAME" ]] +} + +import_nemoclaw_image_into_cluster_if_needed() { + local docker_bin cluster_name + + if ! should_build_nemoclaw_image && [[ "$NEMOCLAW_IMAGE_EXPLICIT" != "1" ]]; then + log "Skipping cluster image import; using registry-backed image: $NEMOCLAW_IMAGE" + return + fi + + docker_bin="$(resolve_docker_cmd)" || { + log "Docker not available; skipping cluster image import." + return + } + + if ! $docker_bin image inspect "$NEMOCLAW_IMAGE" >/dev/null 2>&1; then + log "Local NeMoClaw image not present on host; skipping cluster image import: $NEMOCLAW_IMAGE" + return + fi + + if ! cluster_name="$(resolve_cluster_container_name)"; then + log "OpenShell cluster container not found; skipping cluster image import." + return + fi + + log "Importing NeMoClaw image into cluster containerd: $NEMOCLAW_IMAGE -> $cluster_name" + if ! $docker_bin save "$NEMOCLAW_IMAGE" | $docker_bin exec -i "$cluster_name" sh -lc 'ctr -n k8s.io images import -'; then + log "Failed to import NeMoClaw image into cluster containerd." + exit 1 + fi + + if ! $docker_bin exec -i "$cluster_name" sh -lc "ctr -n k8s.io images ls | awk '{print \$1}' | grep -Fx '$NEMOCLAW_IMAGE' >/dev/null"; then + log "Imported image tag not found in cluster containerd: $NEMOCLAW_IMAGE" + log "Cluster image list:" + $docker_bin exec -i "$cluster_name" sh -lc "ctr -n k8s.io images ls | grep 'sandboxes/nemoclaw' || true" + exit 1 + fi + + log "Cluster image import complete: $NEMOCLAW_IMAGE" +} + checkout_repo_ref() { if [[ -z "$COMMUNITY_REF" ]]; then return @@ -518,7 +657,12 @@ start_welcome_ui() { log "Starting welcome UI in background..." log "Welcome UI log: $WELCOME_UI_LOG" - nohup env PORT="$PORT" REPO_ROOT="$REPO_ROOT" CLI_BIN="$CLI_BIN" node server.js >> "$WELCOME_UI_LOG" 2>&1 & + nohup env \ + PORT="$PORT" \ + REPO_ROOT="$REPO_ROOT" \ + CLI_BIN="$CLI_BIN" \ + NEMOCLAW_IMAGE="$NEMOCLAW_IMAGE" \ + node server.js >> "$WELCOME_UI_LOG" 2>&1 & WELCOME_UI_PID=$! export WELCOME_UI_PID log "Welcome UI PID: $WELCOME_UI_PID" @@ -542,8 +686,11 @@ main() { step "Resolving CLI" resolve_cli ensure_cli_compat_aliases + maybe_use_branch_local_nemoclaw_tag step "Authenticating registries" docker_login_ghcr_if_needed + step "Preparing NeMoClaw image" + build_nemoclaw_image_if_needed step "Ensuring Node.js" ensure_node @@ -555,6 +702,8 @@ main() { step "Starting gateway" start_gateway + step "Importing NeMoClaw image into cluster" + import_nemoclaw_image_into_cluster_if_needed step "Configuring providers" run_provider_create_or_replace \ diff --git a/brev/welcome-ui/OpenShell-Icon-Logo.svg b/brev/welcome-ui/OpenShell-Icon-Logo.svg new file mode 100644 index 0000000..91e389d --- /dev/null +++ b/brev/welcome-ui/OpenShell-Icon-Logo.svg @@ -0,0 +1,20 @@ + + + + + + + + + + + diff --git a/brev/welcome-ui/OpenShell-Icon.svg b/brev/welcome-ui/OpenShell-Icon.svg new file mode 100644 index 0000000..81bcd2c --- /dev/null +++ b/brev/welcome-ui/OpenShell-Icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/brev/welcome-ui/favicon.ico b/brev/welcome-ui/favicon.ico index dce0622..76d821c 100644 Binary files a/brev/welcome-ui/favicon.ico and b/brev/welcome-ui/favicon.ico differ diff --git a/brev/welcome-ui/index.html b/brev/welcome-ui/index.html index 19dcc37..4d95a34 100644 --- a/brev/welcome-ui/index.html +++ b/brev/welcome-ui/index.html @@ -4,7 +4,7 @@ OpenShell — Agent Sandbox - + @@ -16,7 +16,7 @@
- + OpenShell Sandbox diff --git a/brev/welcome-ui/openshell-mark.svg b/brev/welcome-ui/openshell-mark.svg deleted file mode 100644 index 300ba64..0000000 --- a/brev/welcome-ui/openshell-mark.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index abc63b8..a6f9036 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -38,6 +38,7 @@ const SANDBOX_START_CMD = process.env.SANDBOX_START_CMD || "nemoclaw-start"; const SANDBOX_BASE_IMAGE = process.env.SANDBOX_BASE_IMAGE || "ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest"; +const NEMOCLAW_IMAGE = (process.env.NEMOCLAW_IMAGE || "").trim(); const POLICY_FILE = path.join(SANDBOX_DIR, "policy.yaml"); const LOG_FILE = "/tmp/nemoclaw-sandbox-create.log"; @@ -264,6 +265,12 @@ const injectKeyState = { keyHash: null, }; +// Raw API key stored in memory so it can be passed to the sandbox at +// creation time. Not persisted to disk. +let _nvidiaApiKey = process.env.NVIDIA_INFERENCE_API_KEY + || process.env.NVIDIA_INTEGRATE_API_KEY + || ""; + // ── Brev ID detection & URL building ─────────────────────────────────────── function extractBrevId(host) { @@ -286,7 +293,7 @@ function buildOpenclawUrl(token) { } else { url = `http://127.0.0.1:${PORT}/`; } - if (token) url += `?token=${token}`; + if (token) url += `#token=${token}`; return url; } @@ -627,18 +634,44 @@ function runSandboxCreate() { const cmd = [ CLI_BIN, "sandbox", "create", "--name", SANDBOX_NAME, - "--from", SANDBOX_DIR, + "--from", NEMOCLAW_IMAGE || SANDBOX_DIR, "--forward", "18789", ]; if (policyPath) cmd.push("--policy", policyPath); - cmd.push( - "--", - "env", - `CHAT_UI_URL=${chatUiUrl}`, - SANDBOX_START_CMD - ); + const envArgs = [`CHAT_UI_URL=${chatUiUrl}`]; + const loopbackNoProxy = [ + "127.0.0.1", + "localhost", + "::1", + "navigator.navigator.svc.cluster.local", + ".svc", + ".svc.cluster.local", + "10.42.0.0/16", + "10.43.0.0/16", + ].join(","); + const mergedNoProxy = [ + process.env.NO_PROXY || process.env.no_proxy || "", + loopbackNoProxy, + ] + .filter(Boolean) + .join(","); + envArgs.push(`NO_PROXY=${mergedNoProxy}`); + envArgs.push(`no_proxy=${mergedNoProxy}`); + const nvapiKey = _nvidiaApiKey + || process.env.NVIDIA_INFERENCE_API_KEY + || process.env.NVIDIA_INTEGRATE_API_KEY + || ""; + if (nvapiKey) { + envArgs.push(`NVIDIA_INFERENCE_API_KEY=${nvapiKey}`); + envArgs.push(`NVIDIA_INTEGRATE_API_KEY=${nvapiKey}`); + } + + cmd.push("--", "env", ...envArgs, SANDBOX_START_CMD); const cmdDisplay = cmd.slice(0, 8).join(" ") + " -- ..."; + if (NEMOCLAW_IMAGE) { + logWelcome(`Using NeMoClaw image override: ${NEMOCLAW_IMAGE}`); + } logWelcome(`Running: ${cmdDisplay}`); const logFd = fs.openSync(LOG_FILE, "w"); @@ -1077,6 +1110,9 @@ async function handleClusterInferenceSet(req, res) { // ── Reverse proxy (HTTP) ─────────────────────────────────────────────────── function proxyToSandbox(clientReq, clientRes) { + logWelcome( + `proxy http in ${clientReq.method || "GET"} ${clientReq.url || "/"} -> 127.0.0.1:${SANDBOX_PORT}` + ); const headers = {}; for (const [key, val] of Object.entries(clientReq.headers)) { if (key.toLowerCase() === "host") continue; @@ -1094,6 +1130,9 @@ function proxyToSandbox(clientReq, clientRes) { }; const upstream = http.request(opts, (upstreamRes) => { + logWelcome( + `proxy http out ${clientReq.method || "GET"} ${clientReq.url || "/"} status=${upstreamRes.statusCode || 0}` + ); // Filter hop-by-hop + content-length (we'll set our own) const outHeaders = {}; for (const [key, val] of Object.entries(upstreamRes.headers)) { @@ -1132,6 +1171,7 @@ function proxyToSandbox(clientReq, clientRes) { // ── Reverse proxy (WebSocket) ────────────────────────────────────────────── function proxyWebSocket(req, clientSocket, head) { + logWelcome(`proxy ws in ${req.method || "GET"} ${req.url || "/"} -> 127.0.0.1:${SANDBOX_PORT}`); const upstream = net.createConnection( { host: "127.0.0.1", port: SANDBOX_PORT }, () => { @@ -1271,8 +1311,10 @@ async function handleInjectKey(req, res) { injectKeyState.status = "injecting"; injectKeyState.error = null; injectKeyState.keyHash = keyH; + _nvidiaApiKey = key; runInjectKey(key, keyH); + return jsonResponse(res, 202, { ok: true, started: true }); } @@ -1561,6 +1603,7 @@ function _resetForTesting() { detectedBrevId = ""; _brevEnvId = ""; renderedIndex = null; + _nvidiaApiKey = ""; } function _setMocksForTesting(mocks) { diff --git a/sandboxes/nemoclaw/Dockerfile b/sandboxes/nemoclaw/Dockerfile index bb10e19..686c3c3 100644 --- a/sandboxes/nemoclaw/Dockerfile +++ b/sandboxes/nemoclaw/Dockerfile @@ -16,6 +16,16 @@ FROM ${BASE_IMAGE} USER root +RUN apt-get update && \ + apt-get install -y --no-install-recommends jq && \ + rm -rf /var/lib/apt/lists/* + +# Bake the NeMoClaw default policy into the same location used by the +# OpenClaw base image so direct image launches and create-time --policy +# launches start from the same policy. +RUN mkdir -p /etc/navigator +COPY policy.yaml /etc/navigator/policy.yaml + # Override the startup script with our version (adds runtime API key injection) COPY nemoclaw-start.sh /usr/local/bin/nemoclaw-start RUN chmod +x /usr/local/bin/nemoclaw-start @@ -30,6 +40,10 @@ RUN npm install -g @grpc/grpc-js @grpc/proto-loader js-yaml # Fix @hono/node-server authorization bypass (GHSA-wc8c-qw6v-h7f6) RUN npm install -g @hono/node-server@1.19.11 +# Allow the sandbox user to read the default policy (the startup script +# copies it to a writable location; this chown covers non-Landlock envs). +# Some base image variants do not pre-create /etc/navigator. +RUN mkdir -p /etc/navigator && chown -R sandbox:sandbox /etc/navigator # Stage the NeMoClaw DevX extension source COPY nemoclaw-ui-extension/extension/ /opt/nemoclaw-devx/ diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index e1756f9..5d70d53 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -68,6 +68,9 @@ fi # -------------------------------------------------------------------------- # Onboard and start the gateway # -------------------------------------------------------------------------- +_DEFAULT_MODEL="moonshotai/kimi-k2.5" +_DEFAULT_CONTEXT_WINDOW=200000 +_DEFAULT_MAX_TOKENS=8192 export NVIDIA_API_KEY="${NVIDIA_INFERENCE_API_KEY:- }" _ONBOARD_KEY="${NVIDIA_INFERENCE_API_KEY:-not-used}" openclaw onboard \ @@ -107,10 +110,18 @@ cfg['gateway']['controlUi'] = { 'allowInsecureAuth': True, 'allowedOrigins': origins, } +for provider in cfg.get('models', {}).get('providers', {}).values(): + if not isinstance(provider, dict): + continue + for model in provider.get('models', []): + if isinstance(model, dict) and model.get('id') in ('${_DEFAULT_MODEL}', '-'): + model['contextWindow'] = ${_DEFAULT_CONTEXT_WINDOW} + model['maxTokens'] = ${_DEFAULT_MAX_TOKENS} json.dump(cfg, open(os.environ['HOME'] + '/.openclaw/openclaw.json', 'w'), indent=2) " nohup openclaw gateway > /tmp/gateway.log 2>&1 & +echo "[gateway] openclaw gateway launched (pid $!)" # Copy the default policy to a writable location so that policy-proxy can # update it at runtime. /etc is read-only under Landlock, but /sandbox is @@ -123,23 +134,80 @@ if [ ! -f "$_POLICY_DST" ] && [ -f "$_POLICY_SRC" ]; then fi _POLICY_PATH="${_POLICY_DST}" [ -f "$_POLICY_PATH" ] || _POLICY_PATH="$_POLICY_SRC" +echo "[gateway] policy path selected: ${_POLICY_PATH} (src=${_POLICY_SRC} dst=${_POLICY_DST})" # Start the policy reverse proxy on the public-facing port. It forwards all # traffic to the OpenClaw gateway on the internal port and intercepts # /api/policy requests to read/write the sandbox policy file. NODE_PATH=$(npm root -g) POLICY_PATH=${_POLICY_PATH} UPSTREAM_PORT=${INTERNAL_GATEWAY_PORT} LISTEN_PORT=${PUBLIC_PORT} \ nohup node /usr/local/lib/policy-proxy.js >> /tmp/gateway.log 2>&1 & +echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} public=${PUBLIC_PORT}" # Auto-approve pending device pairing requests so the browser is paired # before the user notices the "pairing required" prompt in the Control UI. ( - _pair_deadline=$(($(date +%s) + 300)) - while [ "$(date +%s)" -lt "$_pair_deadline" ]; do - sleep 0.5 - if openclaw devices approve --latest --json 2>/dev/null | grep -q '"ok"'; then - echo "[auto-pair] Approved pending device pairing request." + echo "[auto-pair] watcher starting" + _pair_timeout_secs="${AUTO_PAIR_TIMEOUT_SECS:-0}" + _pair_sleep_secs="0.5" + _pair_heartbeat_every=120 + _json_has_approval() { + jq -e ' + .device + | objects + | (.approvedAtMs? // empty) or ((.tokens? // []) | length > 0) + ' >/dev/null 2>&1 + } + + _summarize_device_list() { + jq -r ' + def labels($entries): + ($entries // []) + | map(select(type == "object" and (.deviceId? // "") != "") + | "\((.clientId // "unknown")):\((.deviceId // "")[0:12])"); + (labels(.pending)) as $pending + | (labels(.paired)) as $paired + | "pending=\($pending | length) [\(($pending | if length > 0 then join(", ") else "-" end))] paired=\($paired | length) [\(($paired | if length > 0 then join(", ") else "-" end))]" + ' 2>/dev/null || echo "unparseable" + } + + if [ "${_pair_timeout_secs}" -gt 0 ] 2>/dev/null; then + _pair_deadline=$(($(date +%s) + _pair_timeout_secs)) + echo "[auto-pair] watcher timeout=${_pair_timeout_secs}s" + else + _pair_deadline=0 + echo "[auto-pair] watcher timeout=disabled" + fi + _pair_attempts=0 + _pair_approved=0 + _pair_errors=0 + while true; do + if [ "${_pair_deadline}" -gt 0 ] && [ "$(date +%s)" -ge "${_pair_deadline}" ]; then + break + fi + + sleep "${_pair_sleep_secs}" + _pair_attempts=$((_pair_attempts + 1)) + _approve_output="$(openclaw devices approve --latest --json 2>&1 || true)" + + if printf '%s\n' "$_approve_output" | _json_has_approval; then + _pair_approved=$((_pair_approved + 1)) + _approved_device_id="$(printf '%s\n' "$_approve_output" | jq -r '.device.deviceId // ""' 2>/dev/null | cut -c1-12)" + echo "[auto-pair] approved request attempts=${_pair_attempts} count=${_pair_approved} device=${_approved_device_id:-unknown}" + continue + fi + + if [ -n "$_approve_output" ] && ! printf '%s\n' "$_approve_output" | grep -qiE 'no pending|no device|not paired|nothing to approve'; then + _pair_errors=$((_pair_errors + 1)) + echo "[auto-pair] approve --latest unexpected output attempts=${_pair_attempts} errors=${_pair_errors}: ${_approve_output}" + fi + + if [ $((_pair_attempts % _pair_heartbeat_every)) -eq 0 ]; then + _list_output="$(openclaw devices list --json 2>&1 || true)" + _device_summary="$(printf '%s\n' "$_list_output" | _summarize_device_list)" + echo "[auto-pair] heartbeat attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors} ${_device_summary}" fi done + echo "[auto-pair] watcher exiting attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors}" ) >> /tmp/gateway.log 2>&1 & CONFIG_FILE="${HOME}/.openclaw/openclaw.json" @@ -147,8 +215,8 @@ token=$(grep -o '"token"\s*:\s*"[^"]*"' "${CONFIG_FILE}" 2>/dev/null | head -1 | CHAT_UI_BASE="${CHAT_UI_URL%/}" if [ -n "${token}" ]; then - LOCAL_URL="http://127.0.0.1:18789/?token=${token}" - CHAT_URL="${CHAT_UI_BASE}/?token=${token}" + LOCAL_URL="http://127.0.0.1:18789/#token=${token}" + CHAT_URL="${CHAT_UI_BASE}/#token=${token}" else LOCAL_URL="http://127.0.0.1:18789/" CHAT_URL="${CHAT_UI_BASE}/" diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts index 8da56c0..dcdcce5 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts @@ -112,3 +112,39 @@ export function waitForReconnect(timeoutMs = 15_000): Promise { }, 500); }); } + +/** + * Wait until the app remains connected for a continuous stability window. + * + * This helps distinguish "socket connected for a moment" from "dashboard is + * actually ready to be revealed after pairing/bootstrap settles". + */ +export function waitForStableConnection( + stableForMs = 3_000, + timeoutMs = 15_000, +): Promise { + return new Promise((resolve, reject) => { + const start = Date.now(); + let connectedSince = isAppConnected() ? Date.now() : 0; + + const interval = setInterval(() => { + const now = Date.now(); + + if (isAppConnected()) { + if (!connectedSince) connectedSince = now; + if (now - connectedSince >= stableForMs) { + clearInterval(interval); + resolve(); + return; + } + } else { + connectedSince = 0; + } + + if (now - start > timeoutMs) { + clearInterval(interval); + reject(new Error("Timed out waiting for stable gateway connection")); + } + }, 500); + }); +} diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 5ff25a2..b167a0a 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -15,9 +15,17 @@ import { injectButton } from "./deploy-modal.ts"; import { injectNavGroup, activateNemoPage, watchOpenClawNavClicks } from "./nav-group.ts"; import { injectModelSelector, watchChatCompose } from "./model-selector.ts"; import { ingestKeysFromUrl, DEFAULT_MODEL, resolveApiKey, isKeyConfigured } from "./model-registry.ts"; -import { waitForClient, waitForReconnect, patchConfig } from "./gateway-bridge.ts"; +import { waitForReconnect, waitForStableConnection } from "./gateway-bridge.ts"; import { syncKeysToProviders } from "./api-keys-page.ts"; +const INITIAL_CONNECT_TIMEOUT_MS = 30_000; +const EXTENDED_CONNECT_TIMEOUT_MS = 300_000; +const POST_PAIRING_SETTLE_DELAY_MS = 15_000; +const STABLE_CONNECTION_WINDOW_MS = 10_000; +const STABLE_CONNECTION_TIMEOUT_MS = 45_000; +const PAIRING_RELOAD_FLAG = "nemoclaw:pairing-bootstrap-reloaded"; +const FORCED_RELOAD_DELAY_MS = 1_000; + function inject(): boolean { const hasButton = injectButton(); const hasNav = injectNavGroup(); @@ -56,6 +64,11 @@ function showConnectOverlay(): void { document.body.prepend(overlay); } +function setConnectOverlayText(text: string): void { + const textNode = document.querySelector(".nemoclaw-connect-overlay__text"); + if (textNode) textNode.textContent = text; +} + function revealApp(): void { document.body.setAttribute("data-nemoclaw-ready", ""); const overlay = document.querySelector(".nemoclaw-connect-overlay"); @@ -65,51 +78,82 @@ function revealApp(): void { } } -/** - * Read the live OpenClaw config, find the active model.primary ref, and - * patch streaming: true for it. For proxy-managed models the model.primary - * never changes after onboard, so enabling it once covers every proxy model - * switch. - */ -async function enableStreamingForActiveModel(): Promise { - const client = await waitForClient(); - const snapshot = await client.request>("config.get", {}); +function shouldForcePairingReload(): boolean { + try { + return sessionStorage.getItem(PAIRING_RELOAD_FLAG) !== "1"; + } catch { + return true; + } +} - const agents = snapshot?.agents as Record | undefined; - const defaults = agents?.defaults as Record | undefined; - const model = defaults?.model as Record | undefined; - const primary = model?.primary as string | undefined; +function markPairingReloadComplete(): void { + try { + sessionStorage.setItem(PAIRING_RELOAD_FLAG, "1"); + } catch { + // ignore storage failures + } +} - if (!primary) { - console.warn("[NeMoClaw] Could not determine active model primary from config"); - return; +function clearPairingReloadFlag(): void { + try { + sessionStorage.removeItem(PAIRING_RELOAD_FLAG); + } catch { + // ignore storage failures } +} - const models = defaults?.models as Record> | undefined; - if (models?.[primary]?.streaming === true) return; - - await patchConfig({ - agents: { - defaults: { - models: { - [primary]: { streaming: true }, - }, - }, - }, - }); +function forcePairingReload(reason: string, overlayText: string): void { + console.info(`[NeMoClaw] pairing bootstrap: forcing one-time reload (${reason})`); + markPairingReloadComplete(); + setConnectOverlayText(overlayText); + window.setTimeout(() => window.location.reload(), FORCED_RELOAD_DELAY_MS); } function bootstrap() { + console.info("[NeMoClaw] pairing bootstrap: start"); showConnectOverlay(); - waitForReconnect(30_000) - .then(() => { - revealApp(); - enableStreamingForActiveModel().catch((err) => - console.warn("[NeMoClaw] Failed to enable streaming:", err), + const finalizeConnectedState = async () => { + setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); + console.info("[NeMoClaw] pairing bootstrap: reconnect detected"); + if (shouldForcePairingReload()) { + forcePairingReload("post-reconnect", "Device pairing approved. Reloading dashboard..."); + return; + } + setConnectOverlayText("Device pairing approved. Verifying dashboard health..."); + try { + console.info("[NeMoClaw] pairing bootstrap: waiting for stable post-reload connection"); + await waitForStableConnection( + STABLE_CONNECTION_WINDOW_MS, + STABLE_CONNECTION_TIMEOUT_MS, ); - }) - .catch(revealApp); + } catch { + console.warn("[NeMoClaw] pairing bootstrap: stable post-reload connection check timed out; delaying reveal"); + await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + } + console.info("[NeMoClaw] pairing bootstrap: reveal app"); + clearPairingReloadFlag(); + revealApp(); + }; + + waitForReconnect(INITIAL_CONNECT_TIMEOUT_MS) + .then(finalizeConnectedState) + .catch(async () => { + console.warn("[NeMoClaw] pairing bootstrap: initial reconnect timed out; extending wait"); + if (shouldForcePairingReload()) { + forcePairingReload("initial-timeout", "Pairing is still settling. Reloading dashboard..."); + return; + } + setConnectOverlayText("Still waiting for device pairing approval..."); + try { + await waitForReconnect(EXTENDED_CONNECT_TIMEOUT_MS); + await finalizeConnectedState(); + } catch { + console.warn("[NeMoClaw] pairing bootstrap: extended reconnect timed out; revealing app anyway"); + clearPairingReloadFlag(); + revealApp(); + } + }); const keysIngested = ingestKeysFromUrl(); diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index ea479f6..e699e53 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -14,6 +14,7 @@ const http = require("http"); const fs = require("fs"); const os = require("os"); const net = require("net"); +const crypto = require("crypto"); const POLICY_PATH = process.env.POLICY_PATH || "/etc/openshell/policy.yaml"; const UPSTREAM_PORT = parseInt(process.env.UPSTREAM_PORT || "18788", 10); @@ -37,6 +38,11 @@ const WELL_KNOWN_ENDPOINT = "https://navigator.navigator.svc.cluster.local:8080" let gatewayEndpoint = ""; let sandboxName = ""; +function formatRequestLine(req) { + const host = req.headers.host || "unknown-host"; + return `${req.method || "GET"} ${req.url || "/"} host=${host}`; +} + // --------------------------------------------------------------------------- // Discovery helpers // --------------------------------------------------------------------------- @@ -307,11 +313,151 @@ function pushPolicyToGateway(yamlBody) { }); } +function sha256Hex(text) { + return crypto.createHash("sha256").update(text, "utf8").digest("hex"); +} + +function hasCriticalNavigatorRule(parsed) { + const rule = parsed + && parsed.network_policies + && parsed.network_policies.allow_navigator_navigator_svc_cluster_local_8080; + if (!rule || !Array.isArray(rule.endpoints) || !Array.isArray(rule.binaries)) { + return false; + } + const hasEndpoint = rule.endpoints.some( + (ep) => ep && ep.host === "navigator.navigator.svc.cluster.local" && Number(ep.port) === 8080 + ); + const hasBinary = rule.binaries.some((bin) => bin && bin.path === "/usr/bin/node"); + return hasEndpoint && hasBinary; +} + +function policyStatusName(status) { + switch (status) { + case 1: return "PENDING"; + case 2: return "LOADED"; + case 3: return "FAILED"; + case 4: return "SUPERSEDED"; + default: return "UNSPECIFIED"; + } +} + +function auditStartupPolicyFile() { + let yaml; + try { + yaml = require("js-yaml"); + } catch (e) { + console.warn(`[policy-proxy] startup audit skipped: js-yaml unavailable (${e.message})`); + return; + } + + let raw; + try { + raw = fs.readFileSync(POLICY_PATH, "utf8"); + } catch (e) { + console.error(`[policy-proxy] startup audit failed: could not read ${POLICY_PATH}: ${e.message}`); + return; + } + + let parsed; + try { + parsed = yaml.load(raw); + } catch (e) { + console.error(`[policy-proxy] startup audit failed: YAML parse error in ${POLICY_PATH}: ${e.message}`); + return; + } + + const criticalRulePresent = hasCriticalNavigatorRule(parsed); + console.log( + `[policy-proxy] startup policy audit path=${POLICY_PATH} ` + + `sha256=${sha256Hex(raw)} version=${parsed && parsed.version ? parsed.version : 0} ` + + `critical_rule.allow_navigator_navigator_svc_cluster_local_8080=${criticalRulePresent}` + ); +} + +function listSandboxPolicies(request) { + return new Promise((resolve, reject) => { + grpcClient.ListSandboxPolicies(request, (err, response) => { + if (err) { + reject(err); + return; + } + resolve(response); + }); + }); +} + +function getSandboxPolicyStatus(request) { + return new Promise((resolve, reject) => { + grpcClient.GetSandboxPolicyStatus(request, (err, response) => { + if (err) { + reject(err); + return; + } + resolve(response); + }); + }); +} + +async function auditNavigatorPolicyState() { + if (!grpcEnabled || !grpcClient || grpcPermanentlyDisabled) { + console.log( + `[policy-proxy] startup navigator audit skipped: ` + + `grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled}` + ); + return; + } + + try { + const listed = await listSandboxPolicies({ name: sandboxName, limit: 1, offset: 0 }); + const revision = listed && Array.isArray(listed.revisions) ? listed.revisions[0] : null; + if (!revision) { + console.log(`[policy-proxy] startup navigator audit: no policy revisions found for sandbox=${sandboxName}`); + return; + } + + const statusResp = await getSandboxPolicyStatus({ name: sandboxName, version: revision.version || 0 }); + console.log( + `[policy-proxy] startup navigator audit sandbox=${sandboxName} ` + + `latest_version=${revision.version || 0} latest_hash=${revision.policy_hash || ""} ` + + `latest_status=${policyStatusName(revision.status)} active_version=${statusResp.active_version || 0}` + ); + } catch (e) { + console.warn(`[policy-proxy] startup navigator audit failed: ${e.message}`); + } +} + +function scheduleStartupAudit(attempt = 1) { + const maxAttempts = 5; + const delayMs = 1500; + + setTimeout(async () => { + if (grpcEnabled && grpcClient && !grpcPermanentlyDisabled) { + await auditNavigatorPolicyState(); + return; + } + + if (attempt >= maxAttempts) { + console.log( + `[policy-proxy] startup navigator audit gave up after ${attempt} attempts ` + + `(grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled})` + ); + return; + } + + console.log( + `[policy-proxy] startup navigator audit retry ${attempt}/${maxAttempts} ` + + `(grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled})` + ); + scheduleStartupAudit(attempt + 1); + }, delayMs); +} + // --------------------------------------------------------------------------- // HTTP proxy helpers // --------------------------------------------------------------------------- function proxyRequest(clientReq, clientRes) { + console.log(`[policy-proxy] http in ${formatRequestLine(clientReq)} -> ${UPSTREAM_HOST}:${UPSTREAM_PORT}`); const opts = { hostname: UPSTREAM_HOST, port: UPSTREAM_PORT, @@ -321,6 +467,10 @@ function proxyRequest(clientReq, clientRes) { }; const upstream = http.request(opts, (upstreamRes) => { + console.log( + `[policy-proxy] http out ${clientReq.method || "GET"} ${clientReq.url || "/"} ` + + `status=${upstreamRes.statusCode || 0}` + ); clientRes.writeHead(upstreamRes.statusCode, upstreamRes.headers); upstreamRes.pipe(clientRes, { end: true }); }); @@ -341,6 +491,7 @@ function proxyRequest(clientReq, clientRes) { // --------------------------------------------------------------------------- function handlePolicyGet(req, res) { + console.log(`[policy-proxy] policy get ${formatRequestLine(req)}`); fs.readFile(POLICY_PATH, "utf8", (err, data) => { if (err) { res.writeHead(err.code === "ENOENT" ? 404 : 500, { @@ -356,7 +507,7 @@ function handlePolicyGet(req, res) { function handlePolicyPost(req, res) { const t0 = Date.now(); - console.log(`[policy-proxy] ── POST /api/policy received`); + console.log(`[policy-proxy] policy post ${formatRequestLine(req)}`); const chunks = []; req.on("data", (chunk) => chunks.push(chunk)); req.on("end", () => { @@ -447,6 +598,7 @@ const server = http.createServer((req, res) => { // WebSocket upgrade — pipe raw TCP to upstream server.on("upgrade", (req, socket, head) => { + console.log(`[policy-proxy] ws in ${formatRequestLine(req)} -> ${UPSTREAM_HOST}:${UPSTREAM_PORT}`); const upstream = net.createConnection({ host: UPSTREAM_HOST, port: UPSTREAM_PORT }, () => { const reqLine = `${req.method} ${req.url} HTTP/${req.httpVersion}\r\n`; let headers = ""; @@ -472,7 +624,9 @@ server.on("upgrade", (req, socket, head) => { // Initialize gRPC client before starting the HTTP server. initGrpcClient(); +auditStartupPolicyFile(); server.listen(LISTEN_PORT, "127.0.0.1", () => { console.log(`[policy-proxy] Listening on 127.0.0.1:${LISTEN_PORT}, upstream 127.0.0.1:${UPSTREAM_PORT}`); + scheduleStartupAudit(); }); diff --git a/sandboxes/nemoclaw/policy.yaml b/sandboxes/nemoclaw/policy.yaml index 3a1422e..ae34f93 100644 --- a/sandboxes/nemoclaw/policy.yaml +++ b/sandboxes/nemoclaw/policy.yaml @@ -36,6 +36,20 @@ process: # SHA256 integrity is enforced in Rust via trust-on-first-use, not here. network_policies: + allow_navigator_navigator_svc_cluster_local_8080: + name: allow_navigator_navigator_svc_cluster_local_8080 + endpoints: + - host: navigator.navigator.svc.cluster.local + port: 8080 + binaries: + - path: /usr/bin/node + allow_registry_npmjs_org_443: + name: allow_registry_npmjs_org_443 + endpoints: + - host: registry.npmjs.org + port: 443 + binaries: + - path: /usr/bin/node claude_code: name: claude_code endpoints: @@ -82,10 +96,13 @@ network_policies: name: nvidia endpoints: - { host: integrate.api.nvidia.com, port: 443 } + - { host: inference-api.nvidia.com, port: 443 } binaries: - { path: /usr/bin/curl } - { path: /bin/bash } - { path: /usr/local/bin/opencode } + - { path: /usr/bin/python3 } + - { path: /usr/bin/python3.12 } nvidia_web: name: nvidia_web endpoints: