From f81e586500aa4e31d35513fae2e65fb4ce522d35 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Wed, 11 Mar 2026 15:03:18 -0700 Subject: [PATCH 01/25] Add rebrand for welcome UI, include launch.sh script --- brev/launch.sh | 51 ++++++++++++++++++++++++++- brev/welcome-ui/server.js | 73 ++++++++++++++++++++++++++++++++++----- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/brev/launch.sh b/brev/launch.sh index dfee5f8..881a1af 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -32,6 +32,7 @@ CLI_RETRY_COUNT="${CLI_RETRY_COUNT:-5}" CLI_RETRY_DELAY_SECS="${CLI_RETRY_DELAY_SECS:-3}" GHCR_LOGIN="${GHCR_LOGIN:-auto}" GHCR_USER="${GHCR_USER:-}" +NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest}" mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" @@ -252,6 +253,47 @@ docker_login_ghcr_if_needed() { fi } +should_build_nemoclaw_image() { + [[ -n "$COMMUNITY_REF" && "$COMMUNITY_REF" != "main" ]] +} + +build_nemoclaw_image_if_needed() { + local docker_cmd=() + local image_context="$REPO_ROOT/sandboxes/nemoclaw" + local dockerfile_path="$image_context/Dockerfile" + + if ! should_build_nemoclaw_image; then + log "Skipping local NeMoClaw image build (COMMUNITY_REF=${COMMUNITY_REF:-})." + return + fi + + if [[ ! -f "$dockerfile_path" ]]; then + log "NeMoClaw Dockerfile not found: $dockerfile_path" + exit 1 + fi + + if command -v docker >/dev/null 2>&1; then + docker_cmd=(docker) + elif command -v sudo >/dev/null 2>&1; then + docker_cmd=(sudo docker) + else + log "Docker is required to build the NeMoClaw sandbox image." + exit 1 + fi + + log "Building local NeMoClaw image for non-main ref '$COMMUNITY_REF': $NEMOCLAW_IMAGE" + if ! "${docker_cmd[@]}" build \ + --pull \ + --tag "$NEMOCLAW_IMAGE" \ + --file "$dockerfile_path" \ + "$image_context"; then + log "Local NeMoClaw image build failed." + exit 1 + fi + + log "Local NeMoClaw image ready: $NEMOCLAW_IMAGE" +} + checkout_repo_ref() { if [[ -z "$COMMUNITY_REF" ]]; then return @@ -518,7 +560,12 @@ start_welcome_ui() { log "Starting welcome UI in background..." log "Welcome UI log: $WELCOME_UI_LOG" - nohup env PORT="$PORT" REPO_ROOT="$REPO_ROOT" CLI_BIN="$CLI_BIN" node server.js >> "$WELCOME_UI_LOG" 2>&1 & + nohup env \ + PORT="$PORT" \ + REPO_ROOT="$REPO_ROOT" \ + CLI_BIN="$CLI_BIN" \ + NEMOCLAW_IMAGE="$NEMOCLAW_IMAGE" \ + node server.js >> "$WELCOME_UI_LOG" 2>&1 & WELCOME_UI_PID=$! export WELCOME_UI_PID log "Welcome UI PID: $WELCOME_UI_PID" @@ -544,6 +591,8 @@ main() { ensure_cli_compat_aliases step "Authenticating registries" docker_login_ghcr_if_needed + step "Preparing NeMoClaw image" + build_nemoclaw_image_if_needed step "Ensuring Node.js" ensure_node diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index abc63b8..240947b 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -38,6 +38,7 @@ const SANDBOX_START_CMD = process.env.SANDBOX_START_CMD || "nemoclaw-start"; const SANDBOX_BASE_IMAGE = process.env.SANDBOX_BASE_IMAGE || "ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest"; +const NEMOCLAW_IMAGE = (process.env.NEMOCLAW_IMAGE || "").trim(); const POLICY_FILE = path.join(SANDBOX_DIR, "policy.yaml"); const LOG_FILE = "/tmp/nemoclaw-sandbox-create.log"; @@ -264,6 +265,13 @@ const injectKeyState = { keyHash: null, }; +// Raw API key stored in memory so it can be passed to the sandbox at +// creation time and forwarded to LiteLLM for inference. Not persisted +// to disk. +let _nvidiaApiKey = process.env.NVIDIA_INFERENCE_API_KEY + || process.env.NVIDIA_INTEGRATE_API_KEY + || ""; + // ── Brev ID detection & URL building ─────────────────────────────────────── function extractBrevId(host) { @@ -286,7 +294,7 @@ function buildOpenclawUrl(token) { } else { url = `http://127.0.0.1:${PORT}/`; } - if (token) url += `?token=${token}`; + if (token) url += `#token=${token}`; return url; } @@ -627,18 +635,26 @@ function runSandboxCreate() { const cmd = [ CLI_BIN, "sandbox", "create", "--name", SANDBOX_NAME, - "--from", SANDBOX_DIR, + "--from", NEMOCLAW_IMAGE || SANDBOX_DIR, "--forward", "18789", ]; if (policyPath) cmd.push("--policy", policyPath); - cmd.push( - "--", - "env", - `CHAT_UI_URL=${chatUiUrl}`, - SANDBOX_START_CMD - ); + const envArgs = [`CHAT_UI_URL=${chatUiUrl}`]; + const nvapiKey = _nvidiaApiKey + || process.env.NVIDIA_INFERENCE_API_KEY + || process.env.NVIDIA_INTEGRATE_API_KEY + || ""; + if (nvapiKey) { + envArgs.push(`NVIDIA_INFERENCE_API_KEY=${nvapiKey}`); + envArgs.push(`NVIDIA_INTEGRATE_API_KEY=${nvapiKey}`); + } + + cmd.push("--", "env", ...envArgs, SANDBOX_START_CMD); const cmdDisplay = cmd.slice(0, 8).join(" ") + " -- ..."; + if (NEMOCLAW_IMAGE) { + logWelcome(`Using NeMoClaw image override: ${NEMOCLAW_IMAGE}`); + } logWelcome(`Running: ${cmdDisplay}`); const logFd = fs.openSync(LOG_FILE, "w"); @@ -788,6 +804,38 @@ function runInjectKey(key, keyHash) { }); } +/** + * Forward the API key to the sandbox's LiteLLM instance via the + * policy-proxy's /api/litellm-key endpoint. This triggers a config + * regeneration and LiteLLM restart with the new key. + */ +function forwardKeyToSandbox(key) { + const body = JSON.stringify({ apiKey: key }); + const opts = { + hostname: "127.0.0.1", + port: SANDBOX_PORT, + path: "/api/litellm-key", + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(body), + }, + timeout: 10000, + }; + const req = http.request(opts, (res) => { + res.resume(); + if (res.statusCode === 200) { + log("inject-key", "Forwarded API key to sandbox LiteLLM"); + } else { + log("inject-key", `Sandbox LiteLLM key forward returned ${res.statusCode}`); + } + }); + req.on("error", (err) => { + log("inject-key", `Failed to forward key to sandbox: ${err.message}`); + }); + req.end(body); +} + // ── Provider CRUD ────────────────────────────────────────────────────────── function parseProviderDetail(stdout) { @@ -1271,8 +1319,16 @@ async function handleInjectKey(req, res) { injectKeyState.status = "injecting"; injectKeyState.error = null; injectKeyState.keyHash = keyH; + _nvidiaApiKey = key; runInjectKey(key, keyH); + + // If the sandbox is already running, forward the key to LiteLLM inside + // the sandbox so it can authenticate with upstream NVIDIA APIs. + if (sandboxState.status === "running") { + forwardKeyToSandbox(key); + } + return jsonResponse(res, 202, { ok: true, started: true }); } @@ -1561,6 +1617,7 @@ function _resetForTesting() { detectedBrevId = ""; _brevEnvId = ""; renderedIndex = null; + _nvidiaApiKey = ""; } function _setMocksForTesting(mocks) { From 53258bc6076094830eed0099414297f53dc5ff64 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Wed, 11 Mar 2026 16:40:54 -0700 Subject: [PATCH 02/25] Remove BASH_SOURCE dependency --- brev/launch.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/brev/launch.sh b/brev/launch.sh index 881a1af..1fdd5d5 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -38,6 +38,10 @@ mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" exec > >(tee -a "$LAUNCH_LOG") 2>&1 +mkdir -p "$(dirname "$LAUNCH_LOG")" +touch "$LAUNCH_LOG" +exec > >(tee -a "$LAUNCH_LOG") 2>&1 + log() { printf '[launch.sh] %s\n' "$*" } From 9842d40d712120aa28ef290d65d48bb8ba30b0f3 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Wed, 11 Mar 2026 17:03:29 -0700 Subject: [PATCH 03/25] Address silent fail on launch.sh --- brev/launch.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/brev/launch.sh b/brev/launch.sh index 1fdd5d5..881a1af 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -38,10 +38,6 @@ mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" exec > >(tee -a "$LAUNCH_LOG") 2>&1 -mkdir -p "$(dirname "$LAUNCH_LOG")" -touch "$LAUNCH_LOG" -exec > >(tee -a "$LAUNCH_LOG") 2>&1 - log() { printf '[launch.sh] %s\n' "$*" } From eb74ff4c9f5771f8835dce89ec4b4d2b6687df20 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Wed, 11 Mar 2026 17:22:00 -0700 Subject: [PATCH 04/25] Add favicon, handle ghcr.io login if params present, fix logo --- brev/welcome-ui/favicon.ico | Bin 32038 -> 32038 bytes brev/welcome-ui/index.html | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/brev/welcome-ui/favicon.ico b/brev/welcome-ui/favicon.ico index dce06225d9aa9a234d2faa96a7807ab14f790700..76d821c156c32261e5063776228679bc49c1915d 100644 GIT binary patch literal 32038 zcmdU&37A|(wSYSeWC%+-fC&&FlK??fWDyKJ5hj3&M%h6Y7Xm)^9U<&w@`40W2!fz2 z0)h_(1=$3HERzHbAc%q>vQCI7pu$8HMi|rY|NHcvdwZSk1jRQWKUKHtoKvT^Q`J2^ zom_56Zt2{J5joPaxr3L^iPF1P3- zx!n5bAc}=3C&^*9{?Du_xm?N^@&~|KFar*U@sP??TADRw2=(fH5l(1KEo!}HW z5Z((*m8pl_bt#_)ov<9(mbyp7mthKQ0aIZDq$WakPs%=CERW}*w>T>;?l(74x&EY(@I{Qhq7YqhP(#^M8>Z4;z7fJrUjqSsSBxc8-R@ zYM{)r&<5(R1N!Upmteo*b7H~yi#Fd+`Y~|MmAC)e8VSR}F}W6wg|u_L|9~>9gZkn+*bW?1?Vk%@1^bf7m^zOafpg(lY!7)@ z4NihRz`AWfZN%|hjb^_-> znfHQa+SImvv#msaU`Wc!W=&a%XV>1D@HKE8R)s3PzK&e3o0*vd53Q8Toh8TQa_5~y z{MaIwTMEXm8Ej0NBkpupPvx z#ce06SKg*$q}fgx*@C!r-E~J8yNEjZMrBkYswC!kAeM9_G!6p zw5^}lz>V+)aBuz`PJ_h1waDKB?zeIqwVoo@8QWY_VWUzm%7;>aKKvcdfl*+rz7?hx zRTBL;6XsB!uQXNlJZs7b#O01U^|-)D25N?Df=}zj{{Xz znUHn={0i2tQ$L)#888>R;m6?EK37WDBrR9Ny?Kn&K}$QK(Ri~w_lPs_Eoo= z>RD%ea1IR3)jy%XK}n|eUWs${K4f+Q?D^tuogpD(iy&BRxMc&;vB>_qKE<&& z=8uMQoyw=M@p0rHhBYdyDH{XM?LHo5DN_x*ms0NDUI~)9N>bO1WAS-tJP#2!9Ix}rWFzKXNZB{b%5@~x?W3d{ zL$sMVm^{y#awv8BP(7}RY}itNFEGAwqbdEwc{7hP*K|@EY20&NXUh##r0sg=-WNirEw$lR;FwkxSm{>lVSBL{mOzkqkn{~ zaV}|JJJ&$^ zJzbZfe==qIbPhO{KD!TU7^pBA`mQDQJ_Mdg=08v&8IN)IGH$-NSP(JZJJ?ZhIvCU3 z7uUnVHEmQD#_Pyg$R7`$q23QXyDKG~AMY5h&7F(&(f+mMkAyYgK{x{RV{)~D8YM9X ziSO!v1D1wlJ)}ypXD^`c@{Ek~t~uq6=a0j4uuG~~BbT)w>*_UhwDSmD3GRc4W971& zQvMk11h#3cnGLQTxfyzaS0xoc<#v`rW+U*v@DSVx_B&hV+Zr7o&&!C*G}SN zq;@|o)Bd*bUC>|S>jX&WIvK81NBuEyEZht3%`d?yu+F=N{_Eqei8}dgyN;b>=vX?= zXTyfzIHqy~%9%{IK3;Plh>m-G4jcueK|A?Un%ax8a?Bh%{d0{wrm0M(sfO9CPQ1rs z95lwb7f%7_JhAK8><^BWW9Ha7hDm+HrW#geZNysN5xYMG`*$XM8Qcqwkz=(QEP1+dgw) zPk!El7dq!H=qWOHZg=N`d0ia~=H>JAXLQV5ICD&X;mkGj3-hbx`#MGp(BCm6-`9Rt zzGq0+%&zWLN5RW5Z(Zaj!@&C#>+)`&gAMVkX*F>wXHIz@= zHwJ$Mynnt7e}}}+;pE%E`S?26uQ$Q@_yW8K{=NN3+Z#sP+rjtXHSnCf7Pg1wAoVNq zuB-K-3!a95fU)8bFm|T$4F_&9Dzmm@4I3kF0N2Eyz;k13C_m$~O_kdihwSn27`z3K z!f`Mb-r0Vn_Ey67XTjL!8gWfu03U|*+*CgMlj^5_rDewCL*YTNA5XzFaGsp6tfj@v z$9o5^RnJM+?(;AMJ_w$dsb7^Xq;+c}Hx-OYV!vm?B+$>1upi6@*VPN)eo?Q|YV*(0 z{se5i3eSP(>v*W|bJlvut&4m&^g&OF1*Ct2KfuP2*z->19Nz)|1Y=p_*RI+2D)V92 zb?Myh4M{tdlGV>4I~twaVIdp=-W@t%9GnIHuzgWEYdg+*&);7{`Rn%Tmg<+pJ=FFc zH`}c*TUitL!v96505`*yklo)W@SOUe=%@W~eK|*4K~md%8g|}??n_`?tbPq!%UasL zarCdCkDg`5*dxI{>;k`mKf{Nr%!CbNoO|Iks1g~pAZ)m&-Ujb)gQ^r|Vm})H*MR>9 z@Z}Eb^3lW7|(~oieMj} z2G`S4(Ae*+H}QL9`+pszzmLtz)XK*iJ`WxD|3{&gRFs83dn4l>y#_p|lkxL>eG2Y^ z!NcyGqo`w*;LSI&xf55V~7K6xJ`zMFmsdf^;s{TvEg$D`{y z*c}?WQmhOeW3FTGnrPf^tdrZ2Gd>#Yj>_Qvcl{d225-AkB7E5#UE|fap!{K z^!h*HyL)mII0M|fSzI?;LmSk$)2Jr);#ugszqV^=C;ZtE8P7#yZ~ZejWS2+A_32&O z`SOmI#dZ5f@UE~7H1f1|U9|H_bhYc*Qd`c_kUg06ZP>T2Y{+@WehuCP=WB16RKXrR z-wod1c7|x9z6Z5y;`e#KMSnJ|P+O?zzhh*qeF?dmQLe57bt&v~7 z{=THXR(Va>_#C>f$HSq#-lvdpzLjxot7SsY{rXKXzJ98%T%sLzU29ju-(jDms--mE zn|7bw2-RnC%#ktHwd(lRHXnBbW7%Breo$M^((v8Xb?LY*f=lWvG^mMt_f6<8fOV=Q z@%r7p`y;3x|7gcJb1fL3jGykS{lWcyJB+P1+(b!yrfx3!Pe6L6$Cy~|{HEo# zOh&By9dfS4^7AcxUz>7$c7J<5><0GLu@U3Yq*|s2EerdOv*Yp!NcPxfkla^cmes}>qPqu-jz_D}hZ&b^4Q)OwKxeq%l!LndXT>yjsh9LTU zA+qkH4a?e$_x)a`{5)s}_w4Ji3ph>}fpf85nO-XoF%I9xhV5?!j+6J?8=(4JsI(n^ z9E_}SY#&G^;&YI$m-CC|@w(qRaJ`KNW0w6JmnyZGi}pW@4fm1j^Pp1xP}1g%#rd`n z`HLYN$Mn&=&Mhz$T%RX`eLV&41pCwYUf#%y@P8~eo`YY&ZSV%{3a(M-xse6ySAaXf zdGkC7`FQPc56@4)sA741j&~mU7eV=lq4oC!6Tc!|3mcxZ{{+{KYxECr0Zf3UA^MT{ zmhDm0?S_o~z8r>SIKPDJ*Zx^`7j1`|%q%0oH+}ANJRE<~l4ln#w4v z%pTy{bd9=JE`-e?bSw4Ct+n=4-nQ%I0JsnO;8{2WJ^=ck^d;KJKF8Xdj7&GE+XJpq z_o{0)bX)VTdFjM{q^r>0-f#!J0iI9i!KRSJ)9@$We?E-z2f?{}5?qt6&9u+Wd$MGb z<^5O@A9jQvgK_5-_zrl6>E9G^t?Bo#;aG4jCVg8nUzSum^}{n{JGcsrA+N#<@Fx5m z4usX=Kf~|Jey4s7#m9;8BQO>n45Q#3_q$N&<%3GOTzpu`v_yYfcc=N@JfCDD?e1JO z#57mv;(xh9dm%S@Drv`}+=--%I{H^ArMXi(dG72jrv05gL;88{@9b_{WIDAgzo>)1 zrJ6dlBhuj=h2nFrv%tF^eT7JSlXOw8qcAg9G~c&h zF>UJ~H@(oqMD?#Uy+G2vb#7iiH}2lNLql;}7enK?bTc%hJ)88mnfA3au%wHI#5fkZ zhQ!$V0WHe?kQS+La!!OU82Fr)1@FdBc74Se;9cZ<;QlGV6L2JqhIf1|z8hUGwwdiq z+z+>=f0u!0qW$)qdAXGLwbcD^1Z3|I@0$CI-Eeuom!m)1!}r0sX}_I!&oQBFd2uZ$$telfPeiR zWU(h4#k4Q0kpCPccNhEe8e9Xe1MjsVpR|`eM>>}JW8fk1uCKmlt@ob~!#jV6O#Emy zE%$pheDm%(7v2K>ehGZdWxQ*L@_tvgm()jUEc9M+FJ$c*Ka3-(J>~zMKjnUo!Iz`K z`C0_o`EqZk_tj>7EVut2Ygyg(1j#RIwfAu(|etEAR52rwOzdprtvR~7?_h5dd zcC-7aGH<-t4Q_$If#X~|o-{kot;)sk%Z)|$(b%L9@)(>9j!C)SgSC;$t9uZ*_bb_% zvLtS-KzjVQKQag7fMbIJ(ME;zN91*Yn2M@NuXTU7~`xlRk-E&*U#H zk^S&_O>Eo*&eQG}hby zDJ|Pg`|Z5Cr;S;z|E zx5Sobt@keT&1j}FVPhnEj@>M{3+{wF!1biB;~_Z*?6+(FMzFoOOQjZT=OS*NhrIgg z4y!HLLTUJZBDNe;U!Skkf_AcxS3~a_a2+Rovw!0A+u)t&d~p353w$471cZ%d8Vy(0sp> z_DtjbL;Y{<)GywnFy{=s0e2G#L9*t%z5TP-g!R zDXUARTFXVdjL}3^iJuS^O2VA z$KNn}A94L2+me2??>Zj|2famUoBw=!=$=;6y_>YT*HY03&v)+$$+=c< z-Dk_6Mt&l+Jk50#!WaGc2e`+Yf6W?qzHW4kAFkWF%?wneecuQ9!+I| z(*x?914ChnyIwR9G5o5M?Q2K}e_t^CJfx&!{2ToK6gul7o2+^F{1}M7*XvExd*?k2 zya#v>+7;?)v{oMWo%esj6HxtqVU-r+wfT+6Iv*Fq;CF|(*BWbFXOBbk@hp5Cj?VYN zHXTF9veL8GpTo9y!585zXnf9w{CMP^1@F(kmTcT^=-6M^y|Ko-cVnH(+OVOY&XHqq zDJ=);5G*o)g_&M6u&Y|!&^umW5 zD>YdgwvWS}{htT*KjR#)O)oFWUPoH{Z{V{{M~q2ye4R7+eo6Gxez&2wCEN`9`4ViD zh%HeXZFv7Pw!98oL2Z*Ucb-}9t6@Y8*Tgn4u`?V zKI=!Le7uh|w`Bh`(rW4O=M&Vu3Hq{0wNQCStzqa)hF_O-ZP&QF zHKgNHrMwsgai9BYshyvaR=?JYHqS1}cz!h(e`5Y`FX>&-(7Uu>Ya{QT@%6dm<~i%V z$+h5F=vm@AtskozHdIj=?M}j;@!7eJ^C>G9Yh*Tc`g00oYwPFZ>`UGwxv;KY+V@S7 zN!H?PUh;*UJ7D_5fqk)nL1p8>dX4fx*w8=Dc)28DqZ#+V-xI#+UGI3T2M{gHZpB zsBACQ)$X-W@9&dWpza1x&U4aOa}q27%U*?VLgL56|wDSHd_tFDQf6|r~ihc?!!BjecHuLocyu$?#Hudp*DG5-+q z${D9yd(Ww6!!2+y`LJW3tjh&WAid3=i%w8@)G8Bfcq@fh&_K7Wwv7*Q5U|g!gEq*y=&pHRCzIT zVLO>m+t>$Yf$QCN{tDi?kAa6E83*UUyVaz7+y1!2+Km_ zaftfuk$VZ8!|%Z;NZR-~dG9~o^)@S(Cmo4#-jA{?;CN{L?vu7-dmV5z`~s4B?ICZx zwe6T!+b`F1o$L+XKazFw8hPXKB=8=a>LmWBcHB2#0{iF~dX9|;bykK4L4Qt#R6Dg9 zer4-Ysp?#+m-w_EcD+M92DUFx!5PqaUzgkRezGIzzh|X(oV#0LcNhUlo2g$>W*-g$ z*N=J+gL^LzQ^B?2J}JK|*Rq#duFj?Mj`;^+1{lL^Q;hG%q;;X%*J_JVl9ly+fcUrB zt|I*tXtxjUhJ7KszO(vm)H#01Jb2f4O}X#X{|m%9SkI4o<*9#;voY!dFlOnS@%I5Z z0@i}&`(4>yRyXdfAHm*5@FLijcjkF;7_13tTWQ|)_;vU_sFUE_4E}z6)~D2_v1lR~ zn_dBJI;Qu){;(>fzGQ7IZa%fQG`2T^v*9_gZP&B+-xFXRNZXFQYeAhZxDRfJPr~5$ z+j3jRo=I>WBxi+t@+Yt-7+=!17q_pC^iuntXY0f1@B}3D`aAN|;r*aY>QChD!?1z! zdTo{K8Ef_cV~_o{E&KmN_zbKFX*>07zLVwI_Tu}skHv?F!L^`&^Wj|B6kIc@e@VWU zKS^DrBhlLz?uO({HU@ncwuN*Ym47#W%Kcp(pALk%;2p+s@GfvEYz6HQ<51~qQXc6V z$Q=sC7UP+Aj5)?0W6)4Y$FtJbyYZa$$8#;tJLCNup#QJJHQ>Hq9#a3){MwXHfkz-& zN6(S}8W=y)b@XoZv6(j0zSzIvuoGMl$sOW#@;Ae7;J!9S7%Qg1li*m%Z=nm`2TMVW zS2I5RC&(oJM!FpSe+;ey_pkl)yt)It4?GV^zn%NzU>u}tH+42?A>TW7 zIxdj%f4pk>z~FqgE~bZ()&hjjMb!i;Xx`O{13 z4APAm1=A6v?S&b0sOLKyrkB#WbFs_w>|#2El=ixFQ+sC5X6U**diR^n*fZ*5-$cEC zh|P9&6mqBXoeyNYJNOO=#<#bFI|iP358Y{5VE>}65o8`k6#qw Nk6#qwPhSWB`#j$;a+@= zt#*c1D5Z&570?kMAX*F&BM3e^(`lXIVSvF)J8j1&AhaORtp0y%optuP`|jsG=Ugs& z@60djJoaAe`@Z$~zV+BAK@bMDV9Jz0>73yFS`e%af?&=Z_x?-z{A>Db@#5zD`9W~< zUO~{`@7^ywHVAeu4}yidN0%xG_qmcl*{1((-53OpU6t-9`V!bvpH;;0eB6CCeSW;S zUF;D59^+}f|Gv0J>@R$Iq@ma#el3h;+Z87G&ZgnqtIyo`_b8n$d>-@knlepruekeC zQ~AWnrh3jXrh4`&Q{5oWTW!M6o}=$ud0g9e{~3DyJA41PFE`bT-e&4^_cZl6(<0{Y zWhx&z(FDllWOz{Nbb$=yj}CBNztsudtd7pU!|!j(5dQs{E|0hldXH{_bX0lN&&d19 z1G|6gvX<;t2g#34=oUyv*)jS4Z+*~IuK81Eb1!={tsl{62Mmi~8MVrp+c z*feHOjciXn95Bt)-nPh~TeA*-s(Was*#E8KS%F?A_}axLy!`?besMU#U6k9Zzgu_4(yu zAO>yhEHO`1M09Tlub!P01mCL$!C=@#c@1Wv>%J$3bdND^oSCER`vv2b{bS&n_$zJ! zSNBU)r`L-ik<@n5K4aYbq&QrtW=j5T-MCQqJt@-iFY`IF$Ric}+q$v8OXbF8?9Y_| zIlH5Cylnp-7o7a({GIbxy_11DO)HwOnqyJ^;VI=aU+PxOnWhImK0O|Gac|na-S|8|4t_1;UtcjU z4o;1SO12Z~G(GP1WyVt%4-=Czk0~n)eVP75n~vu^@qV0Gz%>7M2Ej3}20`CTK`{B* zAeb~11l4Xqe&kF3l;g`JrBN;xXNuN3#XP;4_cZ>p?wzhY`o+!S=i*h- ziM&Pb{U4R_Bf+{aZEKRij>XzjBtNp#ROUyJI~X=$&Rk{ zqm#rp1hN;gTQ9D8RAv9Ac$3I)OHO{;`%-bMC@vFwQHlEqY5NQQ^V~A12l{rjj~1_o zsLjyt`^0APAENj2=a)fSVw_tq+_*&A{uh1D*tpZW?`4P4&G;I~^#GHke-S7xdjxg7)1meN-n~ zH!jzApBBFm>3P|E%8snJy4}i8T37(RUAs7euoB$bOI8@2p;^z2AdpN{(GjW7nE#>{fzZs-`i0S5sd$ z*Hi|zF7Eq(TDh)2(l7f&n*0~*yZ;uih-CTMlfLT`Q~A`XZtv8$ll>-}`l5YI{g8c4 z{mrukX$A+)FtwHQOy#QMO?cb;lgk<|!^@IsyI?F$lmA2dF56Gtx5KQBSetvM@l_uDJRWX#8-G7hvsq=JrSn?6?rrAGVe#sWza*nAl=^Hk_ zR5$#Q3I6FacjlqAA1h>gbn9Gx*_ZZOJMjopKX7_WhuSGenef*0ZQr)%HcII*hVpaX zLT$FP={QqgexMucH5S$n-`}7E^-w*xc@CuAU&UW@)vU@$9pP2 zZ74ymBA+2UajbkEe#6Vp7(hR=F&*{OZTXRn^MZ^$AAa}nbBl2wmGvGUC%Yp%zR$*i z8iN_f7z=D%N1N=n{NDdwsqtpTJXbDbdHdKtn%DRO`9gdm{ojrW*s9y|6Mri1C3a{2 z(XrgJB@ka->x^+KhvBYUu5T88FQ_qc^LN) z^XdF=^^&zwn<4+?vNj2 zIc^lJYa*YQc4X?lujv}r5t+;Xmq=d=>b-u9<+O!W!tDX%%a4xeaHuHOmwTtj-I9Sl z6rN>qw$GBE8i}rB4}-oiAn+U5g1wPmw<*e?-ftK05z__h;v?Hj)+bI6>S2a>zxa-L zOpy08;w91P{vEbpY+=ufc0MH7le|S7Ep`*mrdhHkedq3DF9{u}ze5E6?J9AHcvSpW zbW$JmE##(tZxS2CO3^2F6_h_Z`P+4Jc~FOw#Uk-1;%sq+80>&+bnm6&G=bc+M1DVQ zmu=$pga!UUE#S@!@oW=+N{4hnsXfIm1A4}iv@c3^?noz(f*_viOp-DPg5Q_a>ZfPM z;(FTmZT67KteslvP}7dyt|=+!wED%MXr0sIgNeTh3rtv`GYj~*#hq7}q_}|CgZN;9 zI7}=P#0CJuE zk9^3Fapx&fR=%;m9vP9<+rX`Hc)g;LE%}wd&W$7VCE61Vxr&KZ)ju&)N(Z}TcF$77OtK&%wgM6rJ0uX8@` z!@BPwf!`^wKYJF)jO^&ZzB_vhqoLgf#bkL#GXF??RN$|(_4nVqls?lh@GE|-E3Q8} ze_OiqJ2U#7m+%Ycn{e~{OnAe36JCGX=(y=jyT2Fcw_WUcvV0q`2Kl_WTdWq@<3OZ; zL)Y#th%4|Z#r5V|Ti>c@&Yx%xo#$Drm%iOTTU~qWB2zzlu7h(mMbJ6W`mzH|?S#ed zOeK53*r0e{AVygy_7Uio+z$NbE}kc@B(@^HD6T&;>9>3AZ~f@}bLB%PnCh7;O>NCV zb}!SOL4Kpo7X#;vi@>$opXGOyYVSDIRJ6Zt&mj2mAYVRoxI!!u%vJLAXI?>UO?>$? zQB414y`y{OV<$PiSIu{4XC-@M=I-uXGJDG~xgh@fJ$05?=dx?Z9qP_y`|%)Oz9H$b zMbMsSi9G%1>l*Auj7^+bOn>@2ZC>?Go!5`lne%;hhR@fr*Q2Q3{yKl(==CQq-!85e z3q+p&%rA)_c8C{5zW$8!8tZI-SDUY?-mUJOLr4DIpu<*)^?Qx;cC4fBIXj0Q=&p8OUo^v>rQl3^p)s$0T?+kbESz0OR^$1%>^v5x+3>rv;9qz%V|C_t@7>(_U9Z2|Z{<_#oGgslo}zJZ>w5Ju+t&)VOW!`D^!2I#Fgtl0 zD2?+mXyIh&d>~SX=0w z{>acP|JR%^Fn8ehY49=lS^G>1XXEG}#ca?U{jq(o;~!!mp6SrpM*J)BC36d6P5VrN z<{SlV&^!IHL9gRqVo72t#TLX@#F*9&nrm1aTkyZjf35u|HS73K0emmf;k2@ zu+Q4k7pQ~uv7op5+kT*Npxg6buWu_=OigS-dyoxi2evQpj9T=&zDaiIz5d7}e_wXD)<6C8K0M#3XQJ&hrt}BefaV&^IqdThnuD}rBrijh zj=lbr&A4dCMy11ISz@^Y`1qex`7xJZ9^IN>^L&Wr9LznKgV;4@@{RQNuY0e5yuNL` zT0qAvJG`#+r;Y_Ok7kaWcF*gf{qu~s)}?s{CuFUn?D|I9_2Yf~^KBm8wKmW19R}{2s}IQO$o?(`9WpZ~c!o zV%CaTGiL2LTmP|W|MVlZDgOTo*3bDp02}k)RA~Oo8a`|Jtm(71pQk^21e*nW2J9W= z=--TSdNuy}^^arTWNOF1MZaO8|HI_3PARkgK|QjU!=4U%J9+xEM|8E=F4#ND*57~M zEC1*HqMi%2epUbfjQvfH;_F}R;jx#;o*sL9dHT=NHS85`5ksPw{#@td+-}D|HZD?3 z~tgaCV?fS>y3q&T!D4_ZCIYKhQonE3#T}c7!t|`F>#h*@o!X!&TMgZi!)sL=buLE&zUh|p#kxj$SP|*e)p(kzD67^ zri$co&+F||f1Nus$l1ZQ;x_TLcu|Z;UE5_sMr7s8<{H5n&ZzBsJ=4-ie?Jb)*8L|6 z;s;`)_Oiy~Yh*-LWJdPr3^ckG{fq3-e*PJoJ}&&(kezz{TH62_{8;ydUei{+?#N!O z-N)_X?;C#po%VFCz^7n?SHx?rKf3=`a3+6H&|j%{bdTyju3ypjksWHvkG{lsa-G1h zUq&iLb?Ee8Uw1u_+ diff --git a/brev/welcome-ui/index.html b/brev/welcome-ui/index.html index 19dcc37..4d95a34 100644 --- a/brev/welcome-ui/index.html +++ b/brev/welcome-ui/index.html @@ -4,7 +4,7 @@ OpenShell — Agent Sandbox - + @@ -16,7 +16,7 @@
- + OpenShell Sandbox From 485a94ea58c4b67dc53f31f9cb4c4b5e0a98fe2c Mon Sep 17 00:00:00 2001 From: nv-kasikritc Date: Thu, 12 Mar 2026 10:12:45 +0000 Subject: [PATCH 05/25] Init LiteLLM implementation --- sandboxes/nemoclaw/Dockerfile | 12 + sandboxes/nemoclaw/nemoclaw-start.sh | 70 +++++- .../nemoclaw-ui-extension/extension/index.ts | 43 +--- .../extension/model-registry.ts | 16 +- .../extension/model-selector.ts | 23 +- sandboxes/nemoclaw/policy-proxy.js | 226 +++++++++++++++++- 6 files changed, 323 insertions(+), 67 deletions(-) diff --git a/sandboxes/nemoclaw/Dockerfile b/sandboxes/nemoclaw/Dockerfile index bb10e19..9a5d96e 100644 --- a/sandboxes/nemoclaw/Dockerfile +++ b/sandboxes/nemoclaw/Dockerfile @@ -16,6 +16,13 @@ FROM ${BASE_IMAGE} USER root +ENV NO_PROXY=127.0.0.1,localhost,::1 +ENV no_proxy=127.0.0.1,localhost,::1 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends jq && \ + rm -rf /var/lib/apt/lists/* + # Override the startup script with our version (adds runtime API key injection) COPY nemoclaw-start.sh /usr/local/bin/nemoclaw-start RUN chmod +x /usr/local/bin/nemoclaw-start @@ -27,6 +34,11 @@ COPY policy-proxy.js /usr/local/lib/policy-proxy.js COPY proto/ /usr/local/lib/nemoclaw-proto/ RUN npm install -g @grpc/grpc-js @grpc/proto-loader js-yaml +# Install LiteLLM proxy for streaming-capable local LLM inference routing. +# LiteLLM handles SSE streaming natively, bypassing the sandbox proxy's +# inference interception path which buffers responses and times out. +RUN python3 -m pip install --no-cache-dir --break-system-packages 'litellm[proxy]' + # Fix @hono/node-server authorization bypass (GHSA-wc8c-qw6v-h7f6) RUN npm install -g @hono/node-server@1.19.11 diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index e1756f9..ba22672 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -65,6 +65,70 @@ if [ -f "$BUNDLE" ]; then ) || echo "Note: API key injection into UI bundle skipped (read-only /usr). Keys can be set via the API Keys page." fi +# -------------------------------------------------------------------------- +# LiteLLM streaming inference proxy +# +# LiteLLM runs on localhost:4000 and provides streaming-capable inference +# routing. This bypasses the sandbox proxy's inference.local interception +# path which buffers entire responses and has a 60s hard timeout. +# -------------------------------------------------------------------------- +LITELLM_PORT=4000 +LITELLM_CONFIG="/tmp/litellm_config.yaml" +LITELLM_LOG="/tmp/litellm.log" + +export NVIDIA_NIM_API_KEY="${NVIDIA_INFERENCE_API_KEY:-${NVIDIA_INTEGRATE_API_KEY:-not-set}}" + +_DEFAULT_MODEL="moonshotai/kimi-k2.5" +_DEFAULT_PROVIDER="nvidia-endpoints" + +generate_litellm_config() { + local model_id="${1:-$_DEFAULT_MODEL}" + local provider="${2:-$_DEFAULT_PROVIDER}" + local api_base="" + local litellm_prefix="nvidia_nim" + + case "$provider" in + nvidia-endpoints) + api_base="https://integrate.api.nvidia.com/v1" ;; + nvidia-inference) + api_base="https://inference-api.nvidia.com/v1" ;; + *) + api_base="https://integrate.api.nvidia.com/v1" ;; + esac + + cat > "$LITELLM_CONFIG" <> "$LITELLM_LOG" 2>&1 & +echo "[litellm] Starting on 127.0.0.1:${LITELLM_PORT} (pid $!)" + +# Wait for LiteLLM to accept connections before proceeding. +_litellm_deadline=$(($(date +%s) + 30)) +while ! curl -sf "http://127.0.0.1:${LITELLM_PORT}/health" >/dev/null 2>&1; do + if [ "$(date +%s)" -ge "$_litellm_deadline" ]; then + echo "[litellm] WARNING: LiteLLM did not become ready within 30s. Continuing anyway." + break + fi + sleep 0.5 +done + # -------------------------------------------------------------------------- # Onboard and start the gateway # -------------------------------------------------------------------------- @@ -78,9 +142,9 @@ openclaw onboard \ --skip-skills \ --skip-health \ --auth-choice custom-api-key \ - --custom-base-url "https://inference.local/v1" \ - --custom-model-id "-" \ - --custom-api-key "$_ONBOARD_KEY" \ + --custom-base-url "http://127.0.0.1:${LITELLM_PORT}/v1" \ + --custom-model-id "$_DEFAULT_MODEL" \ + --custom-api-key "sk-nemoclaw-local" \ --secret-input-mode plaintext \ --custom-compatibility openai \ --gateway-port 18788 \ diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 5ff25a2..2d4a239 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -15,7 +15,7 @@ import { injectButton } from "./deploy-modal.ts"; import { injectNavGroup, activateNemoPage, watchOpenClawNavClicks } from "./nav-group.ts"; import { injectModelSelector, watchChatCompose } from "./model-selector.ts"; import { ingestKeysFromUrl, DEFAULT_MODEL, resolveApiKey, isKeyConfigured } from "./model-registry.ts"; -import { waitForClient, waitForReconnect, patchConfig } from "./gateway-bridge.ts"; +import { waitForReconnect } from "./gateway-bridge.ts"; import { syncKeysToProviders } from "./api-keys-page.ts"; function inject(): boolean { @@ -65,50 +65,11 @@ function revealApp(): void { } } -/** - * Read the live OpenClaw config, find the active model.primary ref, and - * patch streaming: true for it. For proxy-managed models the model.primary - * never changes after onboard, so enabling it once covers every proxy model - * switch. - */ -async function enableStreamingForActiveModel(): Promise { - const client = await waitForClient(); - const snapshot = await client.request>("config.get", {}); - - const agents = snapshot?.agents as Record | undefined; - const defaults = agents?.defaults as Record | undefined; - const model = defaults?.model as Record | undefined; - const primary = model?.primary as string | undefined; - - if (!primary) { - console.warn("[NeMoClaw] Could not determine active model primary from config"); - return; - } - - const models = defaults?.models as Record> | undefined; - if (models?.[primary]?.streaming === true) return; - - await patchConfig({ - agents: { - defaults: { - models: { - [primary]: { streaming: true }, - }, - }, - }, - }); -} - function bootstrap() { showConnectOverlay(); waitForReconnect(30_000) - .then(() => { - revealApp(); - enableStreamingForActiveModel().catch((err) => - console.warn("[NeMoClaw] Failed to enable streaming:", err), - ); - }) + .then(revealApp) .catch(revealApp); const keysIngested = ingestKeysFromUrl(); diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts index 9016971..da97edc 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts @@ -118,8 +118,8 @@ export interface ModelEntry { } // --------------------------------------------------------------------------- -// Curated models — hardcoded presets routed through inference.local. -// The NemoClaw proxy injects credentials based on the providerName. +// Curated models — hardcoded presets routed through the local LiteLLM proxy. +// LiteLLM handles upstream credential injection and SSE streaming natively. // --------------------------------------------------------------------------- export interface CuratedModel { @@ -179,7 +179,7 @@ export function curatedToModelEntry(c: CuratedModel): ModelEntry { keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "https://inference.local/v1", + baseUrl: "http://127.0.0.1:4000/v1", api: "openai-completions", models: [ { @@ -215,7 +215,7 @@ export const MODEL_REGISTRY: readonly ModelEntry[] = [ modelRef: `${DEFAULT_PROVIDER_KEY}/moonshotai/kimi-k2.5`, keyType: "inference", providerConfig: { - baseUrl: "https://inference.local/v1", + baseUrl: "http://127.0.0.1:4000/v1", api: "openai-completions", models: [ { @@ -267,8 +267,8 @@ export function getModelByCuratedModelId(modelId: string): ModelEntry | undefine /** * Build a ModelEntry for a provider managed through the inference tab. - * These route through inference.local where the proxy injects credentials, - * so no client-side API key is needed. + * These route through the local LiteLLM proxy which handles credentials + * and streaming, so no client-side API key is needed. */ export function buildDynamicEntry( providerName: string, @@ -288,7 +288,7 @@ export function buildDynamicEntry( keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "https://inference.local/v1", + baseUrl: "http://127.0.0.1:4000/v1", api: "openai-completions", models: [ { @@ -328,7 +328,7 @@ export function buildQuickSelectEntry( keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "https://inference.local/v1", + baseUrl: "http://127.0.0.1:4000/v1", api: "openai-completions", models: [ { diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts index 3c897ce..7b2fbe6 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts @@ -2,11 +2,11 @@ * NeMoClaw DevX — Model Selector * * Dropdown injected into the chat compose area that lets users pick a - * model. For models routed through inference.local (curated + dynamic), - * switching only updates the NemoClaw cluster-inference route — no - * OpenClaw config.patch is needed because the NemoClaw proxy rewrites - * the model field in every request body. This avoids the gateway - * disconnect that config.patch causes. + * model. For models routed through the local LiteLLM proxy (curated + + * dynamic), switching only updates the NemoClaw cluster-inference route + * — no OpenClaw config.patch is needed because the LiteLLM proxy + * handles model routing and streaming natively. This avoids the + * gateway disconnect that config.patch causes. * * Models are fetched dynamically from the NemoClaw runtime (providers * and active route configured in the Inference tab). @@ -264,14 +264,14 @@ function dismissTransitionBanner(): void { // --------------------------------------------------------------------------- /** - * Returns true if the model routes through inference.local, meaning the - * NemoClaw proxy manages credential injection and model rewriting. + * Returns true if the model routes through the local LiteLLM proxy, + * meaning credential injection and streaming are handled server-side. * For these models we only need to update the cluster-inference route — * no OpenClaw config.patch (and therefore no gateway disconnect). */ function isProxyManaged(entry: ModelEntry): boolean { return entry.isDynamic === true || - entry.providerConfig.baseUrl === "https://inference.local/v1"; + entry.providerConfig.baseUrl === "http://127.0.0.1:4000/v1"; } async function applyModelSelection( @@ -295,10 +295,9 @@ async function applyModelSelection( try { if (isProxyManaged(entry)) { - // Proxy-managed models route through inference.local. We update the - // NemoClaw cluster-inference route (no OpenClaw config.patch, no - // gateway disconnect). The sandbox polls every ~30s for route - // updates, so we show an honest propagation countdown. + // Proxy-managed models route through the local LiteLLM proxy. We + // update the cluster-inference route and LiteLLM is restarted with the + // new model config (no OpenClaw config.patch, no gateway disconnect). const curated = getCuratedByModelId(entry.providerConfig.models[0]?.id || ""); const provName = curated?.providerName || entry.providerKey.replace(/^dynamic-/, ""); const modelId = entry.providerConfig.models[0]?.id || ""; diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index ea479f6..8b92b14 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -418,15 +418,218 @@ function syncAndRespond(yamlBody, res, t0) { }); } +// --------------------------------------------------------------------------- +// LiteLLM config manager +// +// When the user switches models via the UI, the extension POSTs to +// /api/cluster-inference. After forwarding to the gateway we regenerate +// the LiteLLM config and restart the proxy so the new model takes effect. +// --------------------------------------------------------------------------- + +const { execFile } = require("child_process"); + +const LITELLM_PORT = 4000; +const LITELLM_CONFIG_PATH = "/tmp/litellm_config.yaml"; +const LITELLM_LOG_PATH = "/tmp/litellm.log"; + +const PROVIDER_MAP = { + "nvidia-endpoints": { + litellmPrefix: "nvidia_nim", + apiBase: "https://integrate.api.nvidia.com/v1", + apiKeyEnv: "NVIDIA_NIM_API_KEY", + }, + "nvidia-inference": { + litellmPrefix: "nvidia_nim", + apiBase: "https://inference-api.nvidia.com/v1", + apiKeyEnv: "NVIDIA_NIM_API_KEY", + }, +}; + +let litellmPid = null; + +function generateLitellmConfig(providerName, modelId) { + const provider = PROVIDER_MAP[providerName] || PROVIDER_MAP["nvidia-endpoints"]; + const fullModel = `${provider.litellmPrefix}/${modelId}`; + + const config = [ + "model_list:", + ' - model_name: "*"', + " litellm_params:", + ` model: "${fullModel}"`, + ` api_key: os.environ/${provider.apiKeyEnv}`, + ` api_base: "${provider.apiBase}"`, + "general_settings:", + " master_key: sk-nemoclaw-local", + "litellm_settings:", + " request_timeout: 600", + " drop_params: true", + " num_retries: 0", + "", + ].join("\n"); + + fs.writeFileSync(LITELLM_CONFIG_PATH, config, "utf8"); + console.log(`[litellm-mgr] Config written: model=${fullModel} api_base=${provider.apiBase}`); +} + +function restartLitellm() { + return new Promise((resolve) => { + if (litellmPid) { + try { + process.kill(litellmPid, "SIGTERM"); + console.log(`[litellm-mgr] Sent SIGTERM to old LiteLLM (pid ${litellmPid})`); + } catch (e) { + // Process may have already exited. + } + litellmPid = null; + } + + // Brief grace period for the old process to release the port. + setTimeout(() => { + const logFd = fs.openSync(LITELLM_LOG_PATH, "a"); + const child = execFile( + "litellm", + ["--config", LITELLM_CONFIG_PATH, "--port", String(LITELLM_PORT), "--host", "127.0.0.1"], + { stdio: ["ignore", logFd, logFd], detached: true } + ); + child.unref(); + litellmPid = child.pid; + console.log(`[litellm-mgr] Started new LiteLLM (pid ${litellmPid})`); + fs.closeSync(logFd); + + // Wait for the health endpoint to become available. + let attempts = 0; + const maxAttempts = 20; + const poll = setInterval(() => { + attempts++; + const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health`, (healthRes) => { + if (healthRes.statusCode === 200) { + clearInterval(poll); + console.log(`[litellm-mgr] LiteLLM ready after ${attempts * 500}ms`); + resolve(true); + } + healthRes.resume(); + }); + healthReq.on("error", () => {}); + healthReq.setTimeout(400, () => healthReq.destroy()); + if (attempts >= maxAttempts) { + clearInterval(poll); + console.warn("[litellm-mgr] LiteLLM did not become ready within 10s"); + resolve(false); + } + }, 500); + }, 500); + }); +} + +// Discover existing LiteLLM pid at startup so we can manage restarts. +try { + const { execSync } = require("child_process"); + const pidStr = execSync(`pgrep -f "litellm.*--port ${LITELLM_PORT}" 2>/dev/null || true`, { encoding: "utf8" }).trim(); + if (pidStr) { + litellmPid = parseInt(pidStr.split("\n")[0], 10); + console.log(`[litellm-mgr] Discovered existing LiteLLM pid: ${litellmPid}`); + } +} catch (e) {} + +// --------------------------------------------------------------------------- +// /api/cluster-inference intercept +// --------------------------------------------------------------------------- + +function handleClusterInferencePost(clientReq, clientRes) { + const chunks = []; + clientReq.on("data", (chunk) => chunks.push(chunk)); + clientReq.on("end", () => { + const rawBody = Buffer.concat(chunks); + let payload; + try { + payload = JSON.parse(rawBody.toString("utf8")); + } catch (e) { + clientRes.writeHead(400, { "Content-Type": "application/json" }); + clientRes.end(JSON.stringify({ error: "invalid JSON" })); + return; + } + + // Forward the original request to the upstream gateway first. + const opts = { + hostname: UPSTREAM_HOST, + port: UPSTREAM_PORT, + path: clientReq.url, + method: clientReq.method, + headers: { ...clientReq.headers, "content-length": rawBody.length }, + }; + + const upstream = http.request(opts, (upstreamRes) => { + const upChunks = []; + upstreamRes.on("data", (c) => upChunks.push(c)); + upstreamRes.on("end", () => { + const upBody = Buffer.concat(upChunks); + clientRes.writeHead(upstreamRes.statusCode, upstreamRes.headers); + clientRes.end(upBody); + + // On success, regenerate LiteLLM config and restart. + if (upstreamRes.statusCode >= 200 && upstreamRes.statusCode < 300) { + const providerName = payload.providerName || "nvidia-endpoints"; + const modelId = payload.modelId || payload.model || ""; + if (modelId) { + console.log(`[litellm-mgr] Model switch detected: provider=${providerName} model=${modelId}`); + generateLitellmConfig(providerName, modelId); + restartLitellm().then((ready) => { + console.log(`[litellm-mgr] Restart complete, ready=${ready}`); + }); + } + } + }); + }); + + upstream.on("error", (err) => { + console.error("[litellm-mgr] upstream error on cluster-inference forward:", err.message); + if (!clientRes.headersSent) { + clientRes.writeHead(502, { "Content-Type": "application/json" }); + } + clientRes.end(JSON.stringify({ error: "upstream unavailable" })); + }); + + upstream.end(rawBody); + }); +} + +// --------------------------------------------------------------------------- +// /api/litellm-health handler +// --------------------------------------------------------------------------- + +function handleLitellmHealth(req, res) { + const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health`, (healthRes) => { + const chunks = []; + healthRes.on("data", (c) => chunks.push(c)); + healthRes.on("end", () => { + res.writeHead(healthRes.statusCode, { "Content-Type": "application/json" }); + res.end(Buffer.concat(chunks)); + }); + }); + healthReq.on("error", (err) => { + res.writeHead(503, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "litellm unreachable", detail: err.message, pid: litellmPid })); + }); + healthReq.setTimeout(3000, () => { + healthReq.destroy(); + res.writeHead(504, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "litellm health check timed out", pid: litellmPid })); + }); +} + // --------------------------------------------------------------------------- // HTTP server // --------------------------------------------------------------------------- +function setCorsHeaders(res) { + res.setHeader("Access-Control-Allow-Origin", "*"); + res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); + res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization"); +} + const server = http.createServer((req, res) => { if (req.url === "/api/policy") { - res.setHeader("Access-Control-Allow-Origin", "*"); - res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); - res.setHeader("Access-Control-Allow-Headers", "Content-Type"); + setCorsHeaders(res); if (req.method === "OPTIONS") { res.writeHead(204); @@ -442,6 +645,23 @@ const server = http.createServer((req, res) => { return; } + if (req.url === "/api/cluster-inference" && req.method === "POST") { + setCorsHeaders(res); + handleClusterInferencePost(req, res); + return; + } + + if (req.url === "/api/litellm-health") { + setCorsHeaders(res); + if (req.method === "OPTIONS") { + res.writeHead(204); + res.end(); + } else { + handleLitellmHealth(req, res); + } + return; + } + proxyRequest(req, res); }); From 754c756b33d8f062fad70acb7edcf1a4e23bcd5e Mon Sep 17 00:00:00 2001 From: nv-kasikritc Date: Thu, 12 Mar 2026 10:57:49 +0000 Subject: [PATCH 06/25] LiteLLM working --- sandboxes/nemoclaw/nemoclaw-start.sh | 51 +++++++++++--- sandboxes/nemoclaw/policy-proxy.js | 100 +++++++++++++++++++++++---- sandboxes/nemoclaw/policy.yaml | 3 + 3 files changed, 133 insertions(+), 21 deletions(-) diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index ba22672..e1c35ce 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -76,7 +76,22 @@ LITELLM_PORT=4000 LITELLM_CONFIG="/tmp/litellm_config.yaml" LITELLM_LOG="/tmp/litellm.log" -export NVIDIA_NIM_API_KEY="${NVIDIA_INFERENCE_API_KEY:-${NVIDIA_INTEGRATE_API_KEY:-not-set}}" +NVIDIA_NIM_API_KEY="${NVIDIA_INFERENCE_API_KEY:-${NVIDIA_INTEGRATE_API_KEY:-}}" +export NVIDIA_NIM_API_KEY + +# Persist the API key to a well-known file so the policy-proxy can read +# it later when regenerating the LiteLLM config (e.g. on model switch or +# late key injection from the welcome UI). +LITELLM_KEY_FILE="/tmp/litellm_api_key" +if [ -n "$NVIDIA_NIM_API_KEY" ]; then + echo -n "$NVIDIA_NIM_API_KEY" > "$LITELLM_KEY_FILE" + chmod 600 "$LITELLM_KEY_FILE" +fi + +# Use the local bundled cost map to avoid a blocked HTTPS fetch to GitHub +# at startup (the sandbox network policy doesn't allow Python to reach +# raw.githubusercontent.com, causing a ~5s timeout on every start). +export LITELLM_LOCAL_MODEL_COST_MAP="True" _DEFAULT_MODEL="moonshotai/kimi-k2.5" _DEFAULT_PROVIDER="nvidia-endpoints" @@ -86,6 +101,12 @@ generate_litellm_config() { local provider="${2:-$_DEFAULT_PROVIDER}" local api_base="" local litellm_prefix="nvidia_nim" + local api_key="${NVIDIA_NIM_API_KEY:-}" + + # Read from persisted key file if env var is empty. + if [ -z "$api_key" ] && [ -f "$LITELLM_KEY_FILE" ]; then + api_key="$(cat "$LITELLM_KEY_FILE")" + fi case "$provider" in nvidia-endpoints) @@ -96,12 +117,23 @@ generate_litellm_config() { api_base="https://integrate.api.nvidia.com/v1" ;; esac + # Write the actual key value into the config. Using os.environ/ references + # is fragile inside the sandbox where env vars may not be propagated to all + # child processes. If no key is available yet, use a placeholder — the + # policy-proxy will regenerate the config when the key arrives. + local key_yaml + if [ -n "$api_key" ]; then + key_yaml=" api_key: \"${api_key}\"" + else + key_yaml=" api_key: \"key-not-yet-configured\"" + fi + cat > "$LITELLM_CONFIG" <> "$LITELLM_LOG" 2>&1 & echo "[litellm] Starting on 127.0.0.1:${LITELLM_PORT} (pid $!)" # Wait for LiteLLM to accept connections before proceeding. -_litellm_deadline=$(($(date +%s) + 30)) -while ! curl -sf "http://127.0.0.1:${LITELLM_PORT}/health" >/dev/null 2>&1; do +# Use /health/liveliness (basic liveness, no model checks) and --noproxy +# to bypass the sandbox HTTP proxy for localhost connections. +_litellm_deadline=$(($(date +%s) + 60)) +while ! curl -sf --noproxy 127.0.0.1 "http://127.0.0.1:${LITELLM_PORT}/health/liveliness" >/dev/null 2>&1; do if [ "$(date +%s)" -ge "$_litellm_deadline" ]; then - echo "[litellm] WARNING: LiteLLM did not become ready within 30s. Continuing anyway." + echo "[litellm] WARNING: LiteLLM did not become ready within 60s. Continuing anyway." break fi - sleep 0.5 + sleep 1 done # -------------------------------------------------------------------------- diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index 8b92b14..308cc8b 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -431,32 +431,44 @@ const { execFile } = require("child_process"); const LITELLM_PORT = 4000; const LITELLM_CONFIG_PATH = "/tmp/litellm_config.yaml"; const LITELLM_LOG_PATH = "/tmp/litellm.log"; +const LITELLM_KEY_FILE = "/tmp/litellm_api_key"; const PROVIDER_MAP = { "nvidia-endpoints": { litellmPrefix: "nvidia_nim", apiBase: "https://integrate.api.nvidia.com/v1", - apiKeyEnv: "NVIDIA_NIM_API_KEY", }, "nvidia-inference": { litellmPrefix: "nvidia_nim", apiBase: "https://inference-api.nvidia.com/v1", - apiKeyEnv: "NVIDIA_NIM_API_KEY", }, }; let litellmPid = null; +function readApiKey() { + try { + const key = fs.readFileSync(LITELLM_KEY_FILE, "utf8").trim(); + if (key) return key; + } catch (e) {} + return process.env.NVIDIA_NIM_API_KEY || ""; +} + +function writeApiKey(key) { + fs.writeFileSync(LITELLM_KEY_FILE, key, { mode: 0o600 }); +} + function generateLitellmConfig(providerName, modelId) { const provider = PROVIDER_MAP[providerName] || PROVIDER_MAP["nvidia-endpoints"]; const fullModel = `${provider.litellmPrefix}/${modelId}`; + const apiKey = readApiKey() || "key-not-yet-configured"; const config = [ "model_list:", ' - model_name: "*"', " litellm_params:", ` model: "${fullModel}"`, - ` api_key: os.environ/${provider.apiKeyEnv}`, + ` api_key: "${apiKey}"`, ` api_base: "${provider.apiBase}"`, "general_settings:", " master_key: sk-nemoclaw-local", @@ -468,7 +480,8 @@ function generateLitellmConfig(providerName, modelId) { ].join("\n"); fs.writeFileSync(LITELLM_CONFIG_PATH, config, "utf8"); - console.log(`[litellm-mgr] Config written: model=${fullModel} api_base=${provider.apiBase}`); + const keyStatus = apiKey === "key-not-yet-configured" ? "missing" : "present"; + console.log(`[litellm-mgr] Config written: model=${fullModel} api_base=${provider.apiBase} key=${keyStatus}`); } function restartLitellm() { @@ -486,37 +499,38 @@ function restartLitellm() { // Brief grace period for the old process to release the port. setTimeout(() => { const logFd = fs.openSync(LITELLM_LOG_PATH, "a"); + const env = { ...process.env, LITELLM_LOCAL_MODEL_COST_MAP: "True" }; const child = execFile( "litellm", ["--config", LITELLM_CONFIG_PATH, "--port", String(LITELLM_PORT), "--host", "127.0.0.1"], - { stdio: ["ignore", logFd, logFd], detached: true } + { stdio: ["ignore", logFd, logFd], detached: true, env } ); child.unref(); litellmPid = child.pid; console.log(`[litellm-mgr] Started new LiteLLM (pid ${litellmPid})`); fs.closeSync(logFd); - // Wait for the health endpoint to become available. + // Wait for the liveness endpoint (no model connectivity checks). let attempts = 0; - const maxAttempts = 20; + const maxAttempts = 60; const poll = setInterval(() => { attempts++; - const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health`, (healthRes) => { + const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health/liveliness`, (healthRes) => { if (healthRes.statusCode === 200) { clearInterval(poll); - console.log(`[litellm-mgr] LiteLLM ready after ${attempts * 500}ms`); + console.log(`[litellm-mgr] LiteLLM ready after ${attempts}s`); resolve(true); } healthRes.resume(); }); healthReq.on("error", () => {}); - healthReq.setTimeout(400, () => healthReq.destroy()); + healthReq.setTimeout(800, () => healthReq.destroy()); if (attempts >= maxAttempts) { clearInterval(poll); - console.warn("[litellm-mgr] LiteLLM did not become ready within 10s"); + console.warn("[litellm-mgr] LiteLLM did not become ready within 60s"); resolve(false); } - }, 500); + }, 1000); }, 500); }); } @@ -593,12 +607,66 @@ function handleClusterInferencePost(clientReq, clientRes) { }); } +// --------------------------------------------------------------------------- +// /api/litellm-key handler — accepts an API key update from the welcome UI +// --------------------------------------------------------------------------- + +function handleLitellmKey(req, res) { + const chunks = []; + req.on("data", (c) => chunks.push(c)); + req.on("end", () => { + let body; + try { + body = JSON.parse(Buffer.concat(chunks).toString("utf8")); + } catch (e) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "invalid JSON" })); + return; + } + + const apiKey = (body.apiKey || "").trim(); + if (!apiKey) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "missing apiKey" })); + return; + } + + console.log(`[litellm-mgr] API key update received (${apiKey.length} chars)`); + writeApiKey(apiKey); + + // Read the current config to extract the model/provider, then regenerate + // with the new key. + let currentModel = "moonshotai/kimi-k2.5"; + let currentProvider = "nvidia-endpoints"; + try { + const cfg = fs.readFileSync(LITELLM_CONFIG_PATH, "utf8"); + const modelMatch = cfg.match(/model:\s*"[^/]+\/(.+?)"/); + if (modelMatch) currentModel = modelMatch[1]; + const baseMatch = cfg.match(/api_base:\s*"(.+?)"/); + if (baseMatch) { + const base = baseMatch[1]; + for (const [name, p] of Object.entries(PROVIDER_MAP)) { + if (p.apiBase === base) { currentProvider = name; break; } + } + } + } catch (e) {} + + generateLitellmConfig(currentProvider, currentModel); + restartLitellm().then((ready) => { + console.log(`[litellm-mgr] Restarted with new key, ready=${ready}`); + }); + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true })); + }); +} + // --------------------------------------------------------------------------- // /api/litellm-health handler // --------------------------------------------------------------------------- function handleLitellmHealth(req, res) { - const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health`, (healthRes) => { + const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health/liveliness`, (healthRes) => { const chunks = []; healthRes.on("data", (c) => chunks.push(c)); healthRes.on("end", () => { @@ -651,6 +719,12 @@ const server = http.createServer((req, res) => { return; } + if (req.url === "/api/litellm-key" && req.method === "POST") { + setCorsHeaders(res); + handleLitellmKey(req, res); + return; + } + if (req.url === "/api/litellm-health") { setCorsHeaders(res); if (req.method === "OPTIONS") { diff --git a/sandboxes/nemoclaw/policy.yaml b/sandboxes/nemoclaw/policy.yaml index 3a1422e..749a058 100644 --- a/sandboxes/nemoclaw/policy.yaml +++ b/sandboxes/nemoclaw/policy.yaml @@ -82,10 +82,13 @@ network_policies: name: nvidia endpoints: - { host: integrate.api.nvidia.com, port: 443 } + - { host: inference-api.nvidia.com, port: 443 } binaries: - { path: /usr/bin/curl } - { path: /bin/bash } - { path: /usr/local/bin/opencode } + - { path: /usr/bin/python3 } + - { path: /usr/bin/python3.12 } nvidia_web: name: nvidia_web endpoints: From dc1d7acc61bc1daa308c43c618ffd8982afeb36b Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Thu, 12 Mar 2026 11:37:11 -0700 Subject: [PATCH 07/25] Update welcome UI icon assets --- brev/welcome-ui/OpenShell-Icon.svg | 1 + brev/welcome-ui/openshell-mark.svg | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) create mode 100644 brev/welcome-ui/OpenShell-Icon.svg delete mode 100644 brev/welcome-ui/openshell-mark.svg diff --git a/brev/welcome-ui/OpenShell-Icon.svg b/brev/welcome-ui/OpenShell-Icon.svg new file mode 100644 index 0000000..81bcd2c --- /dev/null +++ b/brev/welcome-ui/OpenShell-Icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/brev/welcome-ui/openshell-mark.svg b/brev/welcome-ui/openshell-mark.svg deleted file mode 100644 index 300ba64..0000000 --- a/brev/welcome-ui/openshell-mark.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - From ce8197af187006d9aeba3d31722c9f9bba36eab5 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Thu, 12 Mar 2026 14:05:58 -0700 Subject: [PATCH 08/25] Add on-demand nemoclaw build; improve auto-pair --- sandboxes/nemoclaw/nemoclaw-start.sh | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index e1c35ce..541f2ed 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -210,6 +210,7 @@ json.dump(cfg, open(os.environ['HOME'] + '/.openclaw/openclaw.json', 'w'), inden " nohup openclaw gateway > /tmp/gateway.log 2>&1 & +echo "[gateway] openclaw gateway launched (pid $!)" # Copy the default policy to a writable location so that policy-proxy can # update it at runtime. /etc is read-only under Landlock, but /sandbox is @@ -228,17 +229,38 @@ _POLICY_PATH="${_POLICY_DST}" # /api/policy requests to read/write the sandbox policy file. NODE_PATH=$(npm root -g) POLICY_PATH=${_POLICY_PATH} UPSTREAM_PORT=${INTERNAL_GATEWAY_PORT} LISTEN_PORT=${PUBLIC_PORT} \ nohup node /usr/local/lib/policy-proxy.js >> /tmp/gateway.log 2>&1 & +echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} public=${PUBLIC_PORT}" # Auto-approve pending device pairing requests so the browser is paired # before the user notices the "pairing required" prompt in the Control UI. ( + echo "[auto-pair] watcher starting" _pair_deadline=$(($(date +%s) + 300)) + _pair_attempts=0 + _pair_approved=0 + _pair_errors=0 while [ "$(date +%s)" -lt "$_pair_deadline" ]; do sleep 0.5 - if openclaw devices approve --latest --json 2>/dev/null | grep -q '"ok"'; then - echo "[auto-pair] Approved pending device pairing request." + _pair_attempts=$((_pair_attempts + 1)) + _approve_output="$(openclaw devices approve --latest --json 2>&1 || true)" + + if printf '%s\n' "$_approve_output" | grep -q '"ok"[[:space:]]*:[[:space:]]*true'; then + _pair_approved=$((_pair_approved + 1)) + echo "[auto-pair] Approved pending device pairing request: ${_approve_output}" + continue + fi + + if [ -n "$_approve_output" ] && ! printf '%s\n' "$_approve_output" | grep -qiE 'no pending|no device|not paired|nothing to approve'; then + _pair_errors=$((_pair_errors + 1)) + echo "[auto-pair] approve --latest returned non-success output: ${_approve_output}" + fi + + if [ $((_pair_attempts % 20)) -eq 0 ]; then + _list_output="$(openclaw devices list --json 2>&1 || true)" + echo "[auto-pair] heartbeat attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors} devices=${_list_output}" fi done + echo "[auto-pair] watcher exiting attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors}" ) >> /tmp/gateway.log 2>&1 & CONFIG_FILE="${HOME}/.openclaw/openclaw.json" @@ -246,8 +268,8 @@ token=$(grep -o '"token"\s*:\s*"[^"]*"' "${CONFIG_FILE}" 2>/dev/null | head -1 | CHAT_UI_BASE="${CHAT_UI_URL%/}" if [ -n "${token}" ]; then - LOCAL_URL="http://127.0.0.1:18789/?token=${token}" - CHAT_URL="${CHAT_UI_BASE}/?token=${token}" + LOCAL_URL="http://127.0.0.1:18789/#token=${token}" + CHAT_URL="${CHAT_UI_BASE}/#token=${token}" else LOCAL_URL="http://127.0.0.1:18789/" CHAT_URL="${CHAT_UI_BASE}/" From 6c319fa6f30880def7290694d04eb29694778439 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Thu, 12 Mar 2026 15:38:44 -0700 Subject: [PATCH 09/25] Logo fixup, improve auto-approve cycle, NO_PROXY for localhost --- brev/welcome-ui/OpenShell-Icon-Logo.svg | 20 +++++++++++ sandboxes/nemoclaw/nemoclaw-start.sh | 36 ++++++++++++++++--- .../nemoclaw-ui-extension/extension/index.ts | 16 +++++++-- 3 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 brev/welcome-ui/OpenShell-Icon-Logo.svg diff --git a/brev/welcome-ui/OpenShell-Icon-Logo.svg b/brev/welcome-ui/OpenShell-Icon-Logo.svg new file mode 100644 index 0000000..91e389d --- /dev/null +++ b/brev/welcome-ui/OpenShell-Icon-Logo.svg @@ -0,0 +1,20 @@ + + + + + + + + + + + diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index 541f2ed..06912a5 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -48,6 +48,12 @@ if [ -z "${CHAT_UI_URL:-}" ]; then exit 1 fi +# Keep local service-to-service traffic off the sandbox forward proxy. +# LiteLLM/OpenClaw must talk to 127.0.0.1 directly, while upstream NVIDIA +# requests should continue using the configured HTTP(S) proxy. +export NO_PROXY="${NO_PROXY:+${NO_PROXY},}127.0.0.1,localhost,::1" +export no_proxy="${no_proxy:+${no_proxy},}127.0.0.1,localhost,::1" + BUNDLE="$(npm root -g)/openclaw/dist/control-ui/assets/nemoclaw-devx.js" if [ -f "$BUNDLE" ]; then @@ -235,6 +241,26 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} # before the user notices the "pairing required" prompt in the Control UI. ( echo "[auto-pair] watcher starting" + _json_has_approval() { + jq -e ' + .device + | objects + | (.approvedAtMs? // empty) or ((.tokens? // []) | length > 0) + ' >/dev/null 2>&1 + } + + _summarize_device_list() { + jq -r ' + def labels($entries): + ($entries // []) + | map(select(type == "object" and (.deviceId? // "") != "") + | "\((.clientId // "unknown")):\((.deviceId // "")[0:12])"); + (labels(.pending)) as $pending + | (labels(.paired)) as $paired + | "pending=\($pending | length) [\(($pending | if length > 0 then join(", ") else "-" end))] paired=\($paired | length) [\(($paired | if length > 0 then join(", ") else "-" end))]" + ' 2>/dev/null || echo "unparseable" + } + _pair_deadline=$(($(date +%s) + 300)) _pair_attempts=0 _pair_approved=0 @@ -244,20 +270,22 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} _pair_attempts=$((_pair_attempts + 1)) _approve_output="$(openclaw devices approve --latest --json 2>&1 || true)" - if printf '%s\n' "$_approve_output" | grep -q '"ok"[[:space:]]*:[[:space:]]*true'; then + if printf '%s\n' "$_approve_output" | _json_has_approval; then _pair_approved=$((_pair_approved + 1)) - echo "[auto-pair] Approved pending device pairing request: ${_approve_output}" + _approved_device_id="$(printf '%s\n' "$_approve_output" | jq -r '.device.deviceId // ""' 2>/dev/null | cut -c1-12)" + echo "[auto-pair] approved request attempts=${_pair_attempts} count=${_pair_approved} device=${_approved_device_id:-unknown}" continue fi if [ -n "$_approve_output" ] && ! printf '%s\n' "$_approve_output" | grep -qiE 'no pending|no device|not paired|nothing to approve'; then _pair_errors=$((_pair_errors + 1)) - echo "[auto-pair] approve --latest returned non-success output: ${_approve_output}" + echo "[auto-pair] approve --latest unexpected output attempts=${_pair_attempts} errors=${_pair_errors}: ${_approve_output}" fi if [ $((_pair_attempts % 20)) -eq 0 ]; then _list_output="$(openclaw devices list --json 2>&1 || true)" - echo "[auto-pair] heartbeat attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors} devices=${_list_output}" + _device_summary="$(printf '%s\n' "$_list_output" | _summarize_device_list)" + echo "[auto-pair] heartbeat attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors} ${_device_summary}" fi done echo "[auto-pair] watcher exiting attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors}" diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 2d4a239..939ccdb 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -18,6 +18,9 @@ import { ingestKeysFromUrl, DEFAULT_MODEL, resolveApiKey, isKeyConfigured } from import { waitForReconnect } from "./gateway-bridge.ts"; import { syncKeysToProviders } from "./api-keys-page.ts"; +const INITIAL_CONNECT_TIMEOUT_MS = 30_000; +const POST_PAIRING_SETTLE_DELAY_MS = 15_000; + function inject(): boolean { const hasButton = injectButton(); const hasNav = injectNavGroup(); @@ -56,6 +59,11 @@ function showConnectOverlay(): void { document.body.prepend(overlay); } +function setConnectOverlayText(text: string): void { + const textNode = document.querySelector(".nemoclaw-connect-overlay__text"); + if (textNode) textNode.textContent = text; +} + function revealApp(): void { document.body.setAttribute("data-nemoclaw-ready", ""); const overlay = document.querySelector(".nemoclaw-connect-overlay"); @@ -68,8 +76,12 @@ function revealApp(): void { function bootstrap() { showConnectOverlay(); - waitForReconnect(30_000) - .then(revealApp) + waitForReconnect(INITIAL_CONNECT_TIMEOUT_MS) + .then(async () => { + setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); + await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + revealApp(); + }) .catch(revealApp); const keysIngested = ingestKeysFromUrl(); From 536e63c9efe34ae90ea1ede2fa9488e4d1e3ca6a Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Thu, 12 Mar 2026 17:10:40 -0700 Subject: [PATCH 10/25] Bump defualt context window, set NO_PROXY widely --- sandboxes/nemoclaw/nemoclaw-start.sh | 8 ++++++++ sandboxes/openclaw/policy.yaml | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index 06912a5..a6b5518 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -101,6 +101,8 @@ export LITELLM_LOCAL_MODEL_COST_MAP="True" _DEFAULT_MODEL="moonshotai/kimi-k2.5" _DEFAULT_PROVIDER="nvidia-endpoints" +_DEFAULT_CONTEXT_WINDOW=200000 +_DEFAULT_MAX_TOKENS=8192 generate_litellm_config() { local model_id="${1:-$_DEFAULT_MODEL}" @@ -212,6 +214,12 @@ cfg['gateway']['controlUi'] = { 'allowInsecureAuth': True, 'allowedOrigins': origins, } +provider = cfg.get('models', {}).get('providers', {}).get('custom-127-0-0-1-4000') +if isinstance(provider, dict): + for model in provider.get('models', []): + if isinstance(model, dict) and model.get('id') == '${_DEFAULT_MODEL}': + model['contextWindow'] = ${_DEFAULT_CONTEXT_WINDOW} + model['maxTokens'] = ${_DEFAULT_MAX_TOKENS} json.dump(cfg, open(os.environ['HOME'] + '/.openclaw/openclaw.json', 'w'), indent=2) " diff --git a/sandboxes/openclaw/policy.yaml b/sandboxes/openclaw/policy.yaml index a91da84..a12c46b 100644 --- a/sandboxes/openclaw/policy.yaml +++ b/sandboxes/openclaw/policy.yaml @@ -125,3 +125,7 @@ network_policies: binaries: - { path: /usr/local/bin/claude } - { path: /usr/bin/gh } + +inference: + allowed_routes: + - local From ec4895452e19d9e0f3ae80d599e69262f6d1f9e3 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 08:21:22 -0700 Subject: [PATCH 11/25] Extend timer for device auto approval, minimize wait --- sandboxes/nemoclaw/nemoclaw-start.sh | 21 ++++++++--- .../extension/gateway-bridge.ts | 36 +++++++++++++++++++ .../nemoclaw-ui-extension/extension/index.ts | 12 +++++-- 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index a6b5518..522e648 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -249,6 +249,9 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} # before the user notices the "pairing required" prompt in the Control UI. ( echo "[auto-pair] watcher starting" + _pair_timeout_secs="${AUTO_PAIR_TIMEOUT_SECS:-1800}" + _pair_sleep_secs="0.5" + _pair_heartbeat_every=120 _json_has_approval() { jq -e ' .device @@ -269,12 +272,22 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} ' 2>/dev/null || echo "unparseable" } - _pair_deadline=$(($(date +%s) + 300)) + if [ "${_pair_timeout_secs}" -gt 0 ] 2>/dev/null; then + _pair_deadline=$(($(date +%s) + _pair_timeout_secs)) + echo "[auto-pair] watcher timeout=${_pair_timeout_secs}s" + else + _pair_deadline=0 + echo "[auto-pair] watcher timeout=disabled" + fi _pair_attempts=0 _pair_approved=0 _pair_errors=0 - while [ "$(date +%s)" -lt "$_pair_deadline" ]; do - sleep 0.5 + while true; do + if [ "${_pair_deadline}" -gt 0 ] && [ "$(date +%s)" -ge "${_pair_deadline}" ]; then + break + fi + + sleep "${_pair_sleep_secs}" _pair_attempts=$((_pair_attempts + 1)) _approve_output="$(openclaw devices approve --latest --json 2>&1 || true)" @@ -290,7 +303,7 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} echo "[auto-pair] approve --latest unexpected output attempts=${_pair_attempts} errors=${_pair_errors}: ${_approve_output}" fi - if [ $((_pair_attempts % 20)) -eq 0 ]; then + if [ $((_pair_attempts % _pair_heartbeat_every)) -eq 0 ]; then _list_output="$(openclaw devices list --json 2>&1 || true)" _device_summary="$(printf '%s\n' "$_list_output" | _summarize_device_list)" echo "[auto-pair] heartbeat attempts=${_pair_attempts} approved=${_pair_approved} errors=${_pair_errors} ${_device_summary}" diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts index 8da56c0..dcdcce5 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/gateway-bridge.ts @@ -112,3 +112,39 @@ export function waitForReconnect(timeoutMs = 15_000): Promise { }, 500); }); } + +/** + * Wait until the app remains connected for a continuous stability window. + * + * This helps distinguish "socket connected for a moment" from "dashboard is + * actually ready to be revealed after pairing/bootstrap settles". + */ +export function waitForStableConnection( + stableForMs = 3_000, + timeoutMs = 15_000, +): Promise { + return new Promise((resolve, reject) => { + const start = Date.now(); + let connectedSince = isAppConnected() ? Date.now() : 0; + + const interval = setInterval(() => { + const now = Date.now(); + + if (isAppConnected()) { + if (!connectedSince) connectedSince = now; + if (now - connectedSince >= stableForMs) { + clearInterval(interval); + resolve(); + return; + } + } else { + connectedSince = 0; + } + + if (now - start > timeoutMs) { + clearInterval(interval); + reject(new Error("Timed out waiting for stable gateway connection")); + } + }, 500); + }); +} diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 939ccdb..fc48f58 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -15,11 +15,12 @@ import { injectButton } from "./deploy-modal.ts"; import { injectNavGroup, activateNemoPage, watchOpenClawNavClicks } from "./nav-group.ts"; import { injectModelSelector, watchChatCompose } from "./model-selector.ts"; import { ingestKeysFromUrl, DEFAULT_MODEL, resolveApiKey, isKeyConfigured } from "./model-registry.ts"; -import { waitForReconnect } from "./gateway-bridge.ts"; +import { waitForReconnect, waitForStableConnection } from "./gateway-bridge.ts"; import { syncKeysToProviders } from "./api-keys-page.ts"; const INITIAL_CONNECT_TIMEOUT_MS = 30_000; const POST_PAIRING_SETTLE_DELAY_MS = 15_000; +const STABLE_CONNECTION_WINDOW_MS = 3_000; function inject(): boolean { const hasButton = injectButton(); @@ -79,7 +80,14 @@ function bootstrap() { waitForReconnect(INITIAL_CONNECT_TIMEOUT_MS) .then(async () => { setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); - await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + try { + await waitForStableConnection( + STABLE_CONNECTION_WINDOW_MS, + POST_PAIRING_SETTLE_DELAY_MS, + ); + } catch { + await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + } revealApp(); }) .catch(revealApp); From 7d6355f8a46205ea0a4497562f04788f602f6d0a Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 08:55:32 -0700 Subject: [PATCH 12/25] Reload dashboard once after pairing approval --- .../nemoclaw-ui-extension/extension/index.ts | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index fc48f58..37f0e70 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -21,6 +21,7 @@ import { syncKeysToProviders } from "./api-keys-page.ts"; const INITIAL_CONNECT_TIMEOUT_MS = 30_000; const POST_PAIRING_SETTLE_DELAY_MS = 15_000; const STABLE_CONNECTION_WINDOW_MS = 3_000; +const PAIRING_RELOAD_FLAG = "nemoclaw:pairing-bootstrap-reloaded"; function inject(): boolean { const hasButton = injectButton(); @@ -74,6 +75,30 @@ function revealApp(): void { } } +function shouldForcePairingReload(): boolean { + try { + return sessionStorage.getItem(PAIRING_RELOAD_FLAG) !== "1"; + } catch { + return true; + } +} + +function markPairingReloadComplete(): void { + try { + sessionStorage.setItem(PAIRING_RELOAD_FLAG, "1"); + } catch { + // ignore storage failures + } +} + +function clearPairingReloadFlag(): void { + try { + sessionStorage.removeItem(PAIRING_RELOAD_FLAG); + } catch { + // ignore storage failures + } +} + function bootstrap() { showConnectOverlay(); @@ -88,9 +113,19 @@ function bootstrap() { } catch { await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); } + if (shouldForcePairingReload()) { + markPairingReloadComplete(); + setConnectOverlayText("Device pairing approved. Reloading dashboard..."); + window.location.reload(); + return; + } + clearPairingReloadFlag(); revealApp(); }) - .catch(revealApp); + .catch(() => { + clearPairingReloadFlag(); + revealApp(); + }); const keysIngested = ingestKeysFromUrl(); From e512644017b8f136478cf6c101e94123fdb40222 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 10:24:29 -0700 Subject: [PATCH 13/25] Revert nemoclaw runtime back to inference.local --- brev/welcome-ui/server.js | 41 +-- sandboxes/nemoclaw/Dockerfile | 8 - sandboxes/nemoclaw/nemoclaw-start.sh | 117 +------ .../extension/model-registry.ts | 16 +- .../extension/model-selector.ts | 23 +- sandboxes/nemoclaw/policy-proxy.js | 300 +----------------- 6 files changed, 31 insertions(+), 474 deletions(-) diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index 240947b..0a12223 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -266,8 +266,7 @@ const injectKeyState = { }; // Raw API key stored in memory so it can be passed to the sandbox at -// creation time and forwarded to LiteLLM for inference. Not persisted -// to disk. +// creation time. Not persisted to disk. let _nvidiaApiKey = process.env.NVIDIA_INFERENCE_API_KEY || process.env.NVIDIA_INTEGRATE_API_KEY || ""; @@ -804,38 +803,6 @@ function runInjectKey(key, keyHash) { }); } -/** - * Forward the API key to the sandbox's LiteLLM instance via the - * policy-proxy's /api/litellm-key endpoint. This triggers a config - * regeneration and LiteLLM restart with the new key. - */ -function forwardKeyToSandbox(key) { - const body = JSON.stringify({ apiKey: key }); - const opts = { - hostname: "127.0.0.1", - port: SANDBOX_PORT, - path: "/api/litellm-key", - method: "POST", - headers: { - "Content-Type": "application/json", - "Content-Length": Buffer.byteLength(body), - }, - timeout: 10000, - }; - const req = http.request(opts, (res) => { - res.resume(); - if (res.statusCode === 200) { - log("inject-key", "Forwarded API key to sandbox LiteLLM"); - } else { - log("inject-key", `Sandbox LiteLLM key forward returned ${res.statusCode}`); - } - }); - req.on("error", (err) => { - log("inject-key", `Failed to forward key to sandbox: ${err.message}`); - }); - req.end(body); -} - // ── Provider CRUD ────────────────────────────────────────────────────────── function parseProviderDetail(stdout) { @@ -1323,12 +1290,6 @@ async function handleInjectKey(req, res) { runInjectKey(key, keyH); - // If the sandbox is already running, forward the key to LiteLLM inside - // the sandbox so it can authenticate with upstream NVIDIA APIs. - if (sandboxState.status === "running") { - forwardKeyToSandbox(key); - } - return jsonResponse(res, 202, { ok: true, started: true }); } diff --git a/sandboxes/nemoclaw/Dockerfile b/sandboxes/nemoclaw/Dockerfile index 9a5d96e..c07b6d6 100644 --- a/sandboxes/nemoclaw/Dockerfile +++ b/sandboxes/nemoclaw/Dockerfile @@ -16,9 +16,6 @@ FROM ${BASE_IMAGE} USER root -ENV NO_PROXY=127.0.0.1,localhost,::1 -ENV no_proxy=127.0.0.1,localhost,::1 - RUN apt-get update && \ apt-get install -y --no-install-recommends jq && \ rm -rf /var/lib/apt/lists/* @@ -34,11 +31,6 @@ COPY policy-proxy.js /usr/local/lib/policy-proxy.js COPY proto/ /usr/local/lib/nemoclaw-proto/ RUN npm install -g @grpc/grpc-js @grpc/proto-loader js-yaml -# Install LiteLLM proxy for streaming-capable local LLM inference routing. -# LiteLLM handles SSE streaming natively, bypassing the sandbox proxy's -# inference interception path which buffers responses and times out. -RUN python3 -m pip install --no-cache-dir --break-system-packages 'litellm[proxy]' - # Fix @hono/node-server authorization bypass (GHSA-wc8c-qw6v-h7f6) RUN npm install -g @hono/node-server@1.19.11 diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index 522e648..e1f1282 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -48,12 +48,6 @@ if [ -z "${CHAT_UI_URL:-}" ]; then exit 1 fi -# Keep local service-to-service traffic off the sandbox forward proxy. -# LiteLLM/OpenClaw must talk to 127.0.0.1 directly, while upstream NVIDIA -# requests should continue using the configured HTTP(S) proxy. -export NO_PROXY="${NO_PROXY:+${NO_PROXY},}127.0.0.1,localhost,::1" -export no_proxy="${no_proxy:+${no_proxy},}127.0.0.1,localhost,::1" - BUNDLE="$(npm root -g)/openclaw/dist/control-ui/assets/nemoclaw-devx.js" if [ -f "$BUNDLE" ]; then @@ -72,109 +66,11 @@ if [ -f "$BUNDLE" ]; then fi # -------------------------------------------------------------------------- -# LiteLLM streaming inference proxy -# -# LiteLLM runs on localhost:4000 and provides streaming-capable inference -# routing. This bypasses the sandbox proxy's inference.local interception -# path which buffers entire responses and has a 60s hard timeout. +# Onboard and start the gateway # -------------------------------------------------------------------------- -LITELLM_PORT=4000 -LITELLM_CONFIG="/tmp/litellm_config.yaml" -LITELLM_LOG="/tmp/litellm.log" - -NVIDIA_NIM_API_KEY="${NVIDIA_INFERENCE_API_KEY:-${NVIDIA_INTEGRATE_API_KEY:-}}" -export NVIDIA_NIM_API_KEY - -# Persist the API key to a well-known file so the policy-proxy can read -# it later when regenerating the LiteLLM config (e.g. on model switch or -# late key injection from the welcome UI). -LITELLM_KEY_FILE="/tmp/litellm_api_key" -if [ -n "$NVIDIA_NIM_API_KEY" ]; then - echo -n "$NVIDIA_NIM_API_KEY" > "$LITELLM_KEY_FILE" - chmod 600 "$LITELLM_KEY_FILE" -fi - -# Use the local bundled cost map to avoid a blocked HTTPS fetch to GitHub -# at startup (the sandbox network policy doesn't allow Python to reach -# raw.githubusercontent.com, causing a ~5s timeout on every start). -export LITELLM_LOCAL_MODEL_COST_MAP="True" - _DEFAULT_MODEL="moonshotai/kimi-k2.5" -_DEFAULT_PROVIDER="nvidia-endpoints" _DEFAULT_CONTEXT_WINDOW=200000 _DEFAULT_MAX_TOKENS=8192 - -generate_litellm_config() { - local model_id="${1:-$_DEFAULT_MODEL}" - local provider="${2:-$_DEFAULT_PROVIDER}" - local api_base="" - local litellm_prefix="nvidia_nim" - local api_key="${NVIDIA_NIM_API_KEY:-}" - - # Read from persisted key file if env var is empty. - if [ -z "$api_key" ] && [ -f "$LITELLM_KEY_FILE" ]; then - api_key="$(cat "$LITELLM_KEY_FILE")" - fi - - case "$provider" in - nvidia-endpoints) - api_base="https://integrate.api.nvidia.com/v1" ;; - nvidia-inference) - api_base="https://inference-api.nvidia.com/v1" ;; - *) - api_base="https://integrate.api.nvidia.com/v1" ;; - esac - - # Write the actual key value into the config. Using os.environ/ references - # is fragile inside the sandbox where env vars may not be propagated to all - # child processes. If no key is available yet, use a placeholder — the - # policy-proxy will regenerate the config when the key arrives. - local key_yaml - if [ -n "$api_key" ]; then - key_yaml=" api_key: \"${api_key}\"" - else - key_yaml=" api_key: \"key-not-yet-configured\"" - fi - - cat > "$LITELLM_CONFIG" <> "$LITELLM_LOG" 2>&1 & -echo "[litellm] Starting on 127.0.0.1:${LITELLM_PORT} (pid $!)" - -# Wait for LiteLLM to accept connections before proceeding. -# Use /health/liveliness (basic liveness, no model checks) and --noproxy -# to bypass the sandbox HTTP proxy for localhost connections. -_litellm_deadline=$(($(date +%s) + 60)) -while ! curl -sf --noproxy 127.0.0.1 "http://127.0.0.1:${LITELLM_PORT}/health/liveliness" >/dev/null 2>&1; do - if [ "$(date +%s)" -ge "$_litellm_deadline" ]; then - echo "[litellm] WARNING: LiteLLM did not become ready within 60s. Continuing anyway." - break - fi - sleep 1 -done - -# -------------------------------------------------------------------------- -# Onboard and start the gateway -# -------------------------------------------------------------------------- export NVIDIA_API_KEY="${NVIDIA_INFERENCE_API_KEY:- }" _ONBOARD_KEY="${NVIDIA_INFERENCE_API_KEY:-not-used}" openclaw onboard \ @@ -185,9 +81,9 @@ openclaw onboard \ --skip-skills \ --skip-health \ --auth-choice custom-api-key \ - --custom-base-url "http://127.0.0.1:${LITELLM_PORT}/v1" \ - --custom-model-id "$_DEFAULT_MODEL" \ - --custom-api-key "sk-nemoclaw-local" \ + --custom-base-url "https://inference.local/v1" \ + --custom-model-id "-" \ + --custom-api-key "$_ONBOARD_KEY" \ --secret-input-mode plaintext \ --custom-compatibility openai \ --gateway-port 18788 \ @@ -214,8 +110,9 @@ cfg['gateway']['controlUi'] = { 'allowInsecureAuth': True, 'allowedOrigins': origins, } -provider = cfg.get('models', {}).get('providers', {}).get('custom-127-0-0-1-4000') -if isinstance(provider, dict): +for provider in cfg.get('models', {}).get('providers', {}).values(): + if not isinstance(provider, dict): + continue for model in provider.get('models', []): if isinstance(model, dict) and model.get('id') == '${_DEFAULT_MODEL}': model['contextWindow'] = ${_DEFAULT_CONTEXT_WINDOW} diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts index da97edc..9016971 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-registry.ts @@ -118,8 +118,8 @@ export interface ModelEntry { } // --------------------------------------------------------------------------- -// Curated models — hardcoded presets routed through the local LiteLLM proxy. -// LiteLLM handles upstream credential injection and SSE streaming natively. +// Curated models — hardcoded presets routed through inference.local. +// The NemoClaw proxy injects credentials based on the providerName. // --------------------------------------------------------------------------- export interface CuratedModel { @@ -179,7 +179,7 @@ export function curatedToModelEntry(c: CuratedModel): ModelEntry { keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "http://127.0.0.1:4000/v1", + baseUrl: "https://inference.local/v1", api: "openai-completions", models: [ { @@ -215,7 +215,7 @@ export const MODEL_REGISTRY: readonly ModelEntry[] = [ modelRef: `${DEFAULT_PROVIDER_KEY}/moonshotai/kimi-k2.5`, keyType: "inference", providerConfig: { - baseUrl: "http://127.0.0.1:4000/v1", + baseUrl: "https://inference.local/v1", api: "openai-completions", models: [ { @@ -267,8 +267,8 @@ export function getModelByCuratedModelId(modelId: string): ModelEntry | undefine /** * Build a ModelEntry for a provider managed through the inference tab. - * These route through the local LiteLLM proxy which handles credentials - * and streaming, so no client-side API key is needed. + * These route through inference.local where the proxy injects credentials, + * so no client-side API key is needed. */ export function buildDynamicEntry( providerName: string, @@ -288,7 +288,7 @@ export function buildDynamicEntry( keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "http://127.0.0.1:4000/v1", + baseUrl: "https://inference.local/v1", api: "openai-completions", models: [ { @@ -328,7 +328,7 @@ export function buildQuickSelectEntry( keyType: "inference", isDynamic: true, providerConfig: { - baseUrl: "http://127.0.0.1:4000/v1", + baseUrl: "https://inference.local/v1", api: "openai-completions", models: [ { diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts index 7b2fbe6..3c897ce 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/model-selector.ts @@ -2,11 +2,11 @@ * NeMoClaw DevX — Model Selector * * Dropdown injected into the chat compose area that lets users pick a - * model. For models routed through the local LiteLLM proxy (curated + - * dynamic), switching only updates the NemoClaw cluster-inference route - * — no OpenClaw config.patch is needed because the LiteLLM proxy - * handles model routing and streaming natively. This avoids the - * gateway disconnect that config.patch causes. + * model. For models routed through inference.local (curated + dynamic), + * switching only updates the NemoClaw cluster-inference route — no + * OpenClaw config.patch is needed because the NemoClaw proxy rewrites + * the model field in every request body. This avoids the gateway + * disconnect that config.patch causes. * * Models are fetched dynamically from the NemoClaw runtime (providers * and active route configured in the Inference tab). @@ -264,14 +264,14 @@ function dismissTransitionBanner(): void { // --------------------------------------------------------------------------- /** - * Returns true if the model routes through the local LiteLLM proxy, - * meaning credential injection and streaming are handled server-side. + * Returns true if the model routes through inference.local, meaning the + * NemoClaw proxy manages credential injection and model rewriting. * For these models we only need to update the cluster-inference route — * no OpenClaw config.patch (and therefore no gateway disconnect). */ function isProxyManaged(entry: ModelEntry): boolean { return entry.isDynamic === true || - entry.providerConfig.baseUrl === "http://127.0.0.1:4000/v1"; + entry.providerConfig.baseUrl === "https://inference.local/v1"; } async function applyModelSelection( @@ -295,9 +295,10 @@ async function applyModelSelection( try { if (isProxyManaged(entry)) { - // Proxy-managed models route through the local LiteLLM proxy. We - // update the cluster-inference route and LiteLLM is restarted with the - // new model config (no OpenClaw config.patch, no gateway disconnect). + // Proxy-managed models route through inference.local. We update the + // NemoClaw cluster-inference route (no OpenClaw config.patch, no + // gateway disconnect). The sandbox polls every ~30s for route + // updates, so we show an honest propagation countdown. const curated = getCuratedByModelId(entry.providerConfig.models[0]?.id || ""); const provName = curated?.providerName || entry.providerKey.replace(/^dynamic-/, ""); const modelId = entry.providerConfig.models[0]?.id || ""; diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index 308cc8b..ea479f6 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -418,286 +418,15 @@ function syncAndRespond(yamlBody, res, t0) { }); } -// --------------------------------------------------------------------------- -// LiteLLM config manager -// -// When the user switches models via the UI, the extension POSTs to -// /api/cluster-inference. After forwarding to the gateway we regenerate -// the LiteLLM config and restart the proxy so the new model takes effect. -// --------------------------------------------------------------------------- - -const { execFile } = require("child_process"); - -const LITELLM_PORT = 4000; -const LITELLM_CONFIG_PATH = "/tmp/litellm_config.yaml"; -const LITELLM_LOG_PATH = "/tmp/litellm.log"; -const LITELLM_KEY_FILE = "/tmp/litellm_api_key"; - -const PROVIDER_MAP = { - "nvidia-endpoints": { - litellmPrefix: "nvidia_nim", - apiBase: "https://integrate.api.nvidia.com/v1", - }, - "nvidia-inference": { - litellmPrefix: "nvidia_nim", - apiBase: "https://inference-api.nvidia.com/v1", - }, -}; - -let litellmPid = null; - -function readApiKey() { - try { - const key = fs.readFileSync(LITELLM_KEY_FILE, "utf8").trim(); - if (key) return key; - } catch (e) {} - return process.env.NVIDIA_NIM_API_KEY || ""; -} - -function writeApiKey(key) { - fs.writeFileSync(LITELLM_KEY_FILE, key, { mode: 0o600 }); -} - -function generateLitellmConfig(providerName, modelId) { - const provider = PROVIDER_MAP[providerName] || PROVIDER_MAP["nvidia-endpoints"]; - const fullModel = `${provider.litellmPrefix}/${modelId}`; - const apiKey = readApiKey() || "key-not-yet-configured"; - - const config = [ - "model_list:", - ' - model_name: "*"', - " litellm_params:", - ` model: "${fullModel}"`, - ` api_key: "${apiKey}"`, - ` api_base: "${provider.apiBase}"`, - "general_settings:", - " master_key: sk-nemoclaw-local", - "litellm_settings:", - " request_timeout: 600", - " drop_params: true", - " num_retries: 0", - "", - ].join("\n"); - - fs.writeFileSync(LITELLM_CONFIG_PATH, config, "utf8"); - const keyStatus = apiKey === "key-not-yet-configured" ? "missing" : "present"; - console.log(`[litellm-mgr] Config written: model=${fullModel} api_base=${provider.apiBase} key=${keyStatus}`); -} - -function restartLitellm() { - return new Promise((resolve) => { - if (litellmPid) { - try { - process.kill(litellmPid, "SIGTERM"); - console.log(`[litellm-mgr] Sent SIGTERM to old LiteLLM (pid ${litellmPid})`); - } catch (e) { - // Process may have already exited. - } - litellmPid = null; - } - - // Brief grace period for the old process to release the port. - setTimeout(() => { - const logFd = fs.openSync(LITELLM_LOG_PATH, "a"); - const env = { ...process.env, LITELLM_LOCAL_MODEL_COST_MAP: "True" }; - const child = execFile( - "litellm", - ["--config", LITELLM_CONFIG_PATH, "--port", String(LITELLM_PORT), "--host", "127.0.0.1"], - { stdio: ["ignore", logFd, logFd], detached: true, env } - ); - child.unref(); - litellmPid = child.pid; - console.log(`[litellm-mgr] Started new LiteLLM (pid ${litellmPid})`); - fs.closeSync(logFd); - - // Wait for the liveness endpoint (no model connectivity checks). - let attempts = 0; - const maxAttempts = 60; - const poll = setInterval(() => { - attempts++; - const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health/liveliness`, (healthRes) => { - if (healthRes.statusCode === 200) { - clearInterval(poll); - console.log(`[litellm-mgr] LiteLLM ready after ${attempts}s`); - resolve(true); - } - healthRes.resume(); - }); - healthReq.on("error", () => {}); - healthReq.setTimeout(800, () => healthReq.destroy()); - if (attempts >= maxAttempts) { - clearInterval(poll); - console.warn("[litellm-mgr] LiteLLM did not become ready within 60s"); - resolve(false); - } - }, 1000); - }, 500); - }); -} - -// Discover existing LiteLLM pid at startup so we can manage restarts. -try { - const { execSync } = require("child_process"); - const pidStr = execSync(`pgrep -f "litellm.*--port ${LITELLM_PORT}" 2>/dev/null || true`, { encoding: "utf8" }).trim(); - if (pidStr) { - litellmPid = parseInt(pidStr.split("\n")[0], 10); - console.log(`[litellm-mgr] Discovered existing LiteLLM pid: ${litellmPid}`); - } -} catch (e) {} - -// --------------------------------------------------------------------------- -// /api/cluster-inference intercept -// --------------------------------------------------------------------------- - -function handleClusterInferencePost(clientReq, clientRes) { - const chunks = []; - clientReq.on("data", (chunk) => chunks.push(chunk)); - clientReq.on("end", () => { - const rawBody = Buffer.concat(chunks); - let payload; - try { - payload = JSON.parse(rawBody.toString("utf8")); - } catch (e) { - clientRes.writeHead(400, { "Content-Type": "application/json" }); - clientRes.end(JSON.stringify({ error: "invalid JSON" })); - return; - } - - // Forward the original request to the upstream gateway first. - const opts = { - hostname: UPSTREAM_HOST, - port: UPSTREAM_PORT, - path: clientReq.url, - method: clientReq.method, - headers: { ...clientReq.headers, "content-length": rawBody.length }, - }; - - const upstream = http.request(opts, (upstreamRes) => { - const upChunks = []; - upstreamRes.on("data", (c) => upChunks.push(c)); - upstreamRes.on("end", () => { - const upBody = Buffer.concat(upChunks); - clientRes.writeHead(upstreamRes.statusCode, upstreamRes.headers); - clientRes.end(upBody); - - // On success, regenerate LiteLLM config and restart. - if (upstreamRes.statusCode >= 200 && upstreamRes.statusCode < 300) { - const providerName = payload.providerName || "nvidia-endpoints"; - const modelId = payload.modelId || payload.model || ""; - if (modelId) { - console.log(`[litellm-mgr] Model switch detected: provider=${providerName} model=${modelId}`); - generateLitellmConfig(providerName, modelId); - restartLitellm().then((ready) => { - console.log(`[litellm-mgr] Restart complete, ready=${ready}`); - }); - } - } - }); - }); - - upstream.on("error", (err) => { - console.error("[litellm-mgr] upstream error on cluster-inference forward:", err.message); - if (!clientRes.headersSent) { - clientRes.writeHead(502, { "Content-Type": "application/json" }); - } - clientRes.end(JSON.stringify({ error: "upstream unavailable" })); - }); - - upstream.end(rawBody); - }); -} - -// --------------------------------------------------------------------------- -// /api/litellm-key handler — accepts an API key update from the welcome UI -// --------------------------------------------------------------------------- - -function handleLitellmKey(req, res) { - const chunks = []; - req.on("data", (c) => chunks.push(c)); - req.on("end", () => { - let body; - try { - body = JSON.parse(Buffer.concat(chunks).toString("utf8")); - } catch (e) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "invalid JSON" })); - return; - } - - const apiKey = (body.apiKey || "").trim(); - if (!apiKey) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "missing apiKey" })); - return; - } - - console.log(`[litellm-mgr] API key update received (${apiKey.length} chars)`); - writeApiKey(apiKey); - - // Read the current config to extract the model/provider, then regenerate - // with the new key. - let currentModel = "moonshotai/kimi-k2.5"; - let currentProvider = "nvidia-endpoints"; - try { - const cfg = fs.readFileSync(LITELLM_CONFIG_PATH, "utf8"); - const modelMatch = cfg.match(/model:\s*"[^/]+\/(.+?)"/); - if (modelMatch) currentModel = modelMatch[1]; - const baseMatch = cfg.match(/api_base:\s*"(.+?)"/); - if (baseMatch) { - const base = baseMatch[1]; - for (const [name, p] of Object.entries(PROVIDER_MAP)) { - if (p.apiBase === base) { currentProvider = name; break; } - } - } - } catch (e) {} - - generateLitellmConfig(currentProvider, currentModel); - restartLitellm().then((ready) => { - console.log(`[litellm-mgr] Restarted with new key, ready=${ready}`); - }); - - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ ok: true })); - }); -} - -// --------------------------------------------------------------------------- -// /api/litellm-health handler -// --------------------------------------------------------------------------- - -function handleLitellmHealth(req, res) { - const healthReq = http.get(`http://127.0.0.1:${LITELLM_PORT}/health/liveliness`, (healthRes) => { - const chunks = []; - healthRes.on("data", (c) => chunks.push(c)); - healthRes.on("end", () => { - res.writeHead(healthRes.statusCode, { "Content-Type": "application/json" }); - res.end(Buffer.concat(chunks)); - }); - }); - healthReq.on("error", (err) => { - res.writeHead(503, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "litellm unreachable", detail: err.message, pid: litellmPid })); - }); - healthReq.setTimeout(3000, () => { - healthReq.destroy(); - res.writeHead(504, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "litellm health check timed out", pid: litellmPid })); - }); -} - // --------------------------------------------------------------------------- // HTTP server // --------------------------------------------------------------------------- -function setCorsHeaders(res) { - res.setHeader("Access-Control-Allow-Origin", "*"); - res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); - res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization"); -} - const server = http.createServer((req, res) => { if (req.url === "/api/policy") { - setCorsHeaders(res); + res.setHeader("Access-Control-Allow-Origin", "*"); + res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); + res.setHeader("Access-Control-Allow-Headers", "Content-Type"); if (req.method === "OPTIONS") { res.writeHead(204); @@ -713,29 +442,6 @@ const server = http.createServer((req, res) => { return; } - if (req.url === "/api/cluster-inference" && req.method === "POST") { - setCorsHeaders(res); - handleClusterInferencePost(req, res); - return; - } - - if (req.url === "/api/litellm-key" && req.method === "POST") { - setCorsHeaders(res); - handleLitellmKey(req, res); - return; - } - - if (req.url === "/api/litellm-health") { - setCorsHeaders(res); - if (req.method === "OPTIONS") { - res.writeHead(204); - res.end(); - } else { - handleLitellmHealth(req, res); - } - return; - } - proxyRequest(req, res); }); From 10d871a91035d858afeecd5e69961a28e96d40a8 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 10:26:25 -0700 Subject: [PATCH 14/25] Keep pairing watcher alive until approval --- sandboxes/nemoclaw/nemoclaw-start.sh | 2 +- .../nemoclaw-ui-extension/extension/index.ts | 47 +++++++++++-------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index e1f1282..bc82fa9 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -146,7 +146,7 @@ echo "[gateway] policy-proxy launched (pid $!) upstream=${INTERNAL_GATEWAY_PORT} # before the user notices the "pairing required" prompt in the Control UI. ( echo "[auto-pair] watcher starting" - _pair_timeout_secs="${AUTO_PAIR_TIMEOUT_SECS:-1800}" + _pair_timeout_secs="${AUTO_PAIR_TIMEOUT_SECS:-0}" _pair_sleep_secs="0.5" _pair_heartbeat_every=120 _json_has_approval() { diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 37f0e70..249538b 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -19,6 +19,7 @@ import { waitForReconnect, waitForStableConnection } from "./gateway-bridge.ts"; import { syncKeysToProviders } from "./api-keys-page.ts"; const INITIAL_CONNECT_TIMEOUT_MS = 30_000; +const EXTENDED_CONNECT_TIMEOUT_MS = 300_000; const POST_PAIRING_SETTLE_DELAY_MS = 15_000; const STABLE_CONNECTION_WINDOW_MS = 3_000; const PAIRING_RELOAD_FLAG = "nemoclaw:pairing-bootstrap-reloaded"; @@ -102,29 +103,37 @@ function clearPairingReloadFlag(): void { function bootstrap() { showConnectOverlay(); + const finalizeConnectedState = async () => { + setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); + try { + await waitForStableConnection( + STABLE_CONNECTION_WINDOW_MS, + POST_PAIRING_SETTLE_DELAY_MS, + ); + } catch { + await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + } + if (shouldForcePairingReload()) { + markPairingReloadComplete(); + setConnectOverlayText("Device pairing approved. Reloading dashboard..."); + window.location.reload(); + return; + } + clearPairingReloadFlag(); + revealApp(); + }; + waitForReconnect(INITIAL_CONNECT_TIMEOUT_MS) - .then(async () => { - setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); + .then(finalizeConnectedState) + .catch(async () => { + setConnectOverlayText("Still waiting for device pairing approval..."); try { - await waitForStableConnection( - STABLE_CONNECTION_WINDOW_MS, - POST_PAIRING_SETTLE_DELAY_MS, - ); + await waitForReconnect(EXTENDED_CONNECT_TIMEOUT_MS); + await finalizeConnectedState(); } catch { - await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); - } - if (shouldForcePairingReload()) { - markPairingReloadComplete(); - setConnectOverlayText("Device pairing approved. Reloading dashboard..."); - window.location.reload(); - return; + clearPairingReloadFlag(); + revealApp(); } - clearPairingReloadFlag(); - revealApp(); - }) - .catch(() => { - clearPairingReloadFlag(); - revealApp(); }); const keysIngested = ingestKeysFromUrl(); From 9483694f53f259e1fa49f3a6b81a0b8e0e0d406e Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 12:21:59 -0700 Subject: [PATCH 15/25] Add proxy request tracing for sandbox launch --- brev/welcome-ui/server.js | 7 +++++++ sandboxes/nemoclaw/policy-proxy.js | 14 +++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index 0a12223..b0d58f9 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -1092,6 +1092,9 @@ async function handleClusterInferenceSet(req, res) { // ── Reverse proxy (HTTP) ─────────────────────────────────────────────────── function proxyToSandbox(clientReq, clientRes) { + logWelcome( + `proxy http in ${clientReq.method || "GET"} ${clientReq.url || "/"} -> 127.0.0.1:${SANDBOX_PORT}` + ); const headers = {}; for (const [key, val] of Object.entries(clientReq.headers)) { if (key.toLowerCase() === "host") continue; @@ -1109,6 +1112,9 @@ function proxyToSandbox(clientReq, clientRes) { }; const upstream = http.request(opts, (upstreamRes) => { + logWelcome( + `proxy http out ${clientReq.method || "GET"} ${clientReq.url || "/"} status=${upstreamRes.statusCode || 0}` + ); // Filter hop-by-hop + content-length (we'll set our own) const outHeaders = {}; for (const [key, val] of Object.entries(upstreamRes.headers)) { @@ -1147,6 +1153,7 @@ function proxyToSandbox(clientReq, clientRes) { // ── Reverse proxy (WebSocket) ────────────────────────────────────────────── function proxyWebSocket(req, clientSocket, head) { + logWelcome(`proxy ws in ${req.method || "GET"} ${req.url || "/"} -> 127.0.0.1:${SANDBOX_PORT}`); const upstream = net.createConnection( { host: "127.0.0.1", port: SANDBOX_PORT }, () => { diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index ea479f6..9030097 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -37,6 +37,11 @@ const WELL_KNOWN_ENDPOINT = "https://navigator.navigator.svc.cluster.local:8080" let gatewayEndpoint = ""; let sandboxName = ""; +function formatRequestLine(req) { + const host = req.headers.host || "unknown-host"; + return `${req.method || "GET"} ${req.url || "/"} host=${host}`; +} + // --------------------------------------------------------------------------- // Discovery helpers // --------------------------------------------------------------------------- @@ -312,6 +317,7 @@ function pushPolicyToGateway(yamlBody) { // --------------------------------------------------------------------------- function proxyRequest(clientReq, clientRes) { + console.log(`[policy-proxy] http in ${formatRequestLine(clientReq)} -> ${UPSTREAM_HOST}:${UPSTREAM_PORT}`); const opts = { hostname: UPSTREAM_HOST, port: UPSTREAM_PORT, @@ -321,6 +327,10 @@ function proxyRequest(clientReq, clientRes) { }; const upstream = http.request(opts, (upstreamRes) => { + console.log( + `[policy-proxy] http out ${clientReq.method || "GET"} ${clientReq.url || "/"} ` + + `status=${upstreamRes.statusCode || 0}` + ); clientRes.writeHead(upstreamRes.statusCode, upstreamRes.headers); upstreamRes.pipe(clientRes, { end: true }); }); @@ -341,6 +351,7 @@ function proxyRequest(clientReq, clientRes) { // --------------------------------------------------------------------------- function handlePolicyGet(req, res) { + console.log(`[policy-proxy] policy get ${formatRequestLine(req)}`); fs.readFile(POLICY_PATH, "utf8", (err, data) => { if (err) { res.writeHead(err.code === "ENOENT" ? 404 : 500, { @@ -356,7 +367,7 @@ function handlePolicyGet(req, res) { function handlePolicyPost(req, res) { const t0 = Date.now(); - console.log(`[policy-proxy] ── POST /api/policy received`); + console.log(`[policy-proxy] policy post ${formatRequestLine(req)}`); const chunks = []; req.on("data", (chunk) => chunks.push(chunk)); req.on("end", () => { @@ -447,6 +458,7 @@ const server = http.createServer((req, res) => { // WebSocket upgrade — pipe raw TCP to upstream server.on("upgrade", (req, socket, head) => { + console.log(`[policy-proxy] ws in ${formatRequestLine(req)} -> ${UPSTREAM_HOST}:${UPSTREAM_PORT}`); const upstream = net.createConnection({ host: UPSTREAM_HOST, port: UPSTREAM_PORT }, () => { const reqLine = `${req.method} ${req.url} HTTP/${req.httpVersion}\r\n`; let headers = ""; From b2f361c8bf2a7ad0092c239958ccdc0ee535af87 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 12:24:46 -0700 Subject: [PATCH 16/25] Add override to skip nemoclaw image build --- brev/launch.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/brev/launch.sh b/brev/launch.sh index 881a1af..b429498 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -33,6 +33,7 @@ CLI_RETRY_DELAY_SECS="${CLI_RETRY_DELAY_SECS:-3}" GHCR_LOGIN="${GHCR_LOGIN:-auto}" GHCR_USER="${GHCR_USER:-}" NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest}" +SKIP_NEMOCLAW_IMAGE_BUILD="${SKIP_NEMOCLAW_IMAGE_BUILD:-}" mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" @@ -254,6 +255,9 @@ docker_login_ghcr_if_needed() { } should_build_nemoclaw_image() { + if [[ "$SKIP_NEMOCLAW_IMAGE_BUILD" == "1" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "true" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "yes" ]]; then + return 1 + fi [[ -n "$COMMUNITY_REF" && "$COMMUNITY_REF" != "main" ]] } @@ -263,7 +267,11 @@ build_nemoclaw_image_if_needed() { local dockerfile_path="$image_context/Dockerfile" if ! should_build_nemoclaw_image; then - log "Skipping local NeMoClaw image build (COMMUNITY_REF=${COMMUNITY_REF:-})." + if [[ "$SKIP_NEMOCLAW_IMAGE_BUILD" == "1" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "true" || "$SKIP_NEMOCLAW_IMAGE_BUILD" == "yes" ]]; then + log "Skipping local NeMoClaw image build by override (SKIP_NEMOCLAW_IMAGE_BUILD=${SKIP_NEMOCLAW_IMAGE_BUILD})." + else + log "Skipping local NeMoClaw image build (COMMUNITY_REF=${COMMUNITY_REF:-})." + fi return fi From 6784eae86beba7da30a3b7eb3710ebcc8b0f8a2c Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 14:16:59 -0700 Subject: [PATCH 17/25] Add revised policy and NO_PROXY --- brev/welcome-ui/server.js | 9 +++++++++ sandboxes/nemoclaw/policy.yaml | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index b0d58f9..4631874 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -639,6 +639,15 @@ function runSandboxCreate() { ]; if (policyPath) cmd.push("--policy", policyPath); const envArgs = [`CHAT_UI_URL=${chatUiUrl}`]; + const loopbackNoProxy = "127.0.0.1,localhost,::1"; + const mergedNoProxy = [ + process.env.NO_PROXY || process.env.no_proxy || "", + loopbackNoProxy, + ] + .filter(Boolean) + .join(","); + envArgs.push(`NO_PROXY=${mergedNoProxy}`); + envArgs.push(`no_proxy=${mergedNoProxy}`); const nvapiKey = _nvidiaApiKey || process.env.NVIDIA_INFERENCE_API_KEY || process.env.NVIDIA_INTEGRATE_API_KEY diff --git a/sandboxes/nemoclaw/policy.yaml b/sandboxes/nemoclaw/policy.yaml index 749a058..ae34f93 100644 --- a/sandboxes/nemoclaw/policy.yaml +++ b/sandboxes/nemoclaw/policy.yaml @@ -36,6 +36,20 @@ process: # SHA256 integrity is enforced in Rust via trust-on-first-use, not here. network_policies: + allow_navigator_navigator_svc_cluster_local_8080: + name: allow_navigator_navigator_svc_cluster_local_8080 + endpoints: + - host: navigator.navigator.svc.cluster.local + port: 8080 + binaries: + - path: /usr/bin/node + allow_registry_npmjs_org_443: + name: allow_registry_npmjs_org_443 + endpoints: + - host: registry.npmjs.org + port: 443 + binaries: + - path: /usr/bin/node claude_code: name: claude_code endpoints: From 29720c9fb4a007329a71920d074dea634314c27e Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 18:38:48 -0700 Subject: [PATCH 18/25] Fix unconditional chown --- sandboxes/nemoclaw/Dockerfile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sandboxes/nemoclaw/Dockerfile b/sandboxes/nemoclaw/Dockerfile index c07b6d6..d04d19a 100644 --- a/sandboxes/nemoclaw/Dockerfile +++ b/sandboxes/nemoclaw/Dockerfile @@ -16,6 +16,10 @@ FROM ${BASE_IMAGE} USER root +RUN apt-get update && \ + apt-get install -y --no-install-recommends jq && \ + rm -rf /var/lib/apt/lists/* + RUN apt-get update && \ apt-get install -y --no-install-recommends jq && \ rm -rf /var/lib/apt/lists/* @@ -34,6 +38,10 @@ RUN npm install -g @grpc/grpc-js @grpc/proto-loader js-yaml # Fix @hono/node-server authorization bypass (GHSA-wc8c-qw6v-h7f6) RUN npm install -g @hono/node-server@1.19.11 +# Allow the sandbox user to read the default policy (the startup script +# copies it to a writable location; this chown covers non-Landlock envs). +# Some base image variants do not pre-create /etc/navigator. +RUN mkdir -p /etc/navigator && chown -R sandbox:sandbox /etc/navigator # Stage the NeMoClaw DevX extension source COPY nemoclaw-ui-extension/extension/ /opt/nemoclaw-devx/ From 61e84fa3a0ebf4adfbfab7e112165fcecb919751 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 19:16:16 -0700 Subject: [PATCH 19/25] Added guarded reload for pairing; ensure custom policy.yaml bake-in --- README.md | 19 ++- sandboxes/nemoclaw/Dockerfile | 8 +- sandboxes/nemoclaw/nemoclaw-start.sh | 1 + .../nemoclaw-ui-extension/extension/index.ts | 14 +- sandboxes/nemoclaw/policy-proxy.js | 142 ++++++++++++++++++ 5 files changed, 178 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 53762ab..489021e 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,24 @@ This repo is the community ecosystem around OpenShell -- a hub for contributed s ### Quick Start with Brev -TODO: Add Brev instructions +#### Deploy Instantly with NVIDIA Brev + +Skip the setup and launch OpenShell Community on a fully configured Brev instance. + +| Instance | Best For | Deploy | +| -------- | -------- | ------ | +| CPU-only | External inference endpoints, remote APIs, lighter-weight sandbox workflows | Deploy on Brev | +| NVIDIA H100 | Locally hosted LLM endpoints, GPU-heavy sandboxes, higher-throughput agent workloads | Deploy on Brev | + +After the Brev instance is ready, bootstrap the Welcome UI: + +```bash +git clone https://github.com/NVIDIA/OpenShell-Community.git +cd OpenShell-Community +bash brev/launch.sh +``` + +The launcher brings up the Welcome UI on `http://localhost:8081`, where you can inject provider keys and create the NeMoClaw sandbox flow. ### Using Sandboxes diff --git a/sandboxes/nemoclaw/Dockerfile b/sandboxes/nemoclaw/Dockerfile index d04d19a..686c3c3 100644 --- a/sandboxes/nemoclaw/Dockerfile +++ b/sandboxes/nemoclaw/Dockerfile @@ -20,9 +20,11 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends jq && \ rm -rf /var/lib/apt/lists/* -RUN apt-get update && \ - apt-get install -y --no-install-recommends jq && \ - rm -rf /var/lib/apt/lists/* +# Bake the NeMoClaw default policy into the same location used by the +# OpenClaw base image so direct image launches and create-time --policy +# launches start from the same policy. +RUN mkdir -p /etc/navigator +COPY policy.yaml /etc/navigator/policy.yaml # Override the startup script with our version (adds runtime API key injection) COPY nemoclaw-start.sh /usr/local/bin/nemoclaw-start diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index bc82fa9..6e65f66 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -134,6 +134,7 @@ if [ ! -f "$_POLICY_DST" ] && [ -f "$_POLICY_SRC" ]; then fi _POLICY_PATH="${_POLICY_DST}" [ -f "$_POLICY_PATH" ] || _POLICY_PATH="$_POLICY_SRC" +echo "[gateway] policy path selected: ${_POLICY_PATH} (src=${_POLICY_SRC} dst=${_POLICY_DST})" # Start the policy reverse proxy on the public-facing port. It forwards all # traffic to the OpenClaw gateway on the internal port and intercepts diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 249538b..89b1d96 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -21,7 +21,8 @@ import { syncKeysToProviders } from "./api-keys-page.ts"; const INITIAL_CONNECT_TIMEOUT_MS = 30_000; const EXTENDED_CONNECT_TIMEOUT_MS = 300_000; const POST_PAIRING_SETTLE_DELAY_MS = 15_000; -const STABLE_CONNECTION_WINDOW_MS = 3_000; +const STABLE_CONNECTION_WINDOW_MS = 10_000; +const STABLE_CONNECTION_TIMEOUT_MS = 45_000; const PAIRING_RELOAD_FLAG = "nemoclaw:pairing-bootstrap-reloaded"; function inject(): boolean { @@ -108,7 +109,7 @@ function bootstrap() { try { await waitForStableConnection( STABLE_CONNECTION_WINDOW_MS, - POST_PAIRING_SETTLE_DELAY_MS, + STABLE_CONNECTION_TIMEOUT_MS, ); } catch { await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); @@ -119,6 +120,15 @@ function bootstrap() { window.location.reload(); return; } + setConnectOverlayText("Device pairing approved. Verifying dashboard health..."); + try { + await waitForStableConnection( + STABLE_CONNECTION_WINDOW_MS, + STABLE_CONNECTION_TIMEOUT_MS, + ); + } catch { + await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); + } clearPairingReloadFlag(); revealApp(); }; diff --git a/sandboxes/nemoclaw/policy-proxy.js b/sandboxes/nemoclaw/policy-proxy.js index 9030097..e699e53 100644 --- a/sandboxes/nemoclaw/policy-proxy.js +++ b/sandboxes/nemoclaw/policy-proxy.js @@ -14,6 +14,7 @@ const http = require("http"); const fs = require("fs"); const os = require("os"); const net = require("net"); +const crypto = require("crypto"); const POLICY_PATH = process.env.POLICY_PATH || "/etc/openshell/policy.yaml"; const UPSTREAM_PORT = parseInt(process.env.UPSTREAM_PORT || "18788", 10); @@ -312,6 +313,145 @@ function pushPolicyToGateway(yamlBody) { }); } +function sha256Hex(text) { + return crypto.createHash("sha256").update(text, "utf8").digest("hex"); +} + +function hasCriticalNavigatorRule(parsed) { + const rule = parsed + && parsed.network_policies + && parsed.network_policies.allow_navigator_navigator_svc_cluster_local_8080; + if (!rule || !Array.isArray(rule.endpoints) || !Array.isArray(rule.binaries)) { + return false; + } + const hasEndpoint = rule.endpoints.some( + (ep) => ep && ep.host === "navigator.navigator.svc.cluster.local" && Number(ep.port) === 8080 + ); + const hasBinary = rule.binaries.some((bin) => bin && bin.path === "/usr/bin/node"); + return hasEndpoint && hasBinary; +} + +function policyStatusName(status) { + switch (status) { + case 1: return "PENDING"; + case 2: return "LOADED"; + case 3: return "FAILED"; + case 4: return "SUPERSEDED"; + default: return "UNSPECIFIED"; + } +} + +function auditStartupPolicyFile() { + let yaml; + try { + yaml = require("js-yaml"); + } catch (e) { + console.warn(`[policy-proxy] startup audit skipped: js-yaml unavailable (${e.message})`); + return; + } + + let raw; + try { + raw = fs.readFileSync(POLICY_PATH, "utf8"); + } catch (e) { + console.error(`[policy-proxy] startup audit failed: could not read ${POLICY_PATH}: ${e.message}`); + return; + } + + let parsed; + try { + parsed = yaml.load(raw); + } catch (e) { + console.error(`[policy-proxy] startup audit failed: YAML parse error in ${POLICY_PATH}: ${e.message}`); + return; + } + + const criticalRulePresent = hasCriticalNavigatorRule(parsed); + console.log( + `[policy-proxy] startup policy audit path=${POLICY_PATH} ` + + `sha256=${sha256Hex(raw)} version=${parsed && parsed.version ? parsed.version : 0} ` + + `critical_rule.allow_navigator_navigator_svc_cluster_local_8080=${criticalRulePresent}` + ); +} + +function listSandboxPolicies(request) { + return new Promise((resolve, reject) => { + grpcClient.ListSandboxPolicies(request, (err, response) => { + if (err) { + reject(err); + return; + } + resolve(response); + }); + }); +} + +function getSandboxPolicyStatus(request) { + return new Promise((resolve, reject) => { + grpcClient.GetSandboxPolicyStatus(request, (err, response) => { + if (err) { + reject(err); + return; + } + resolve(response); + }); + }); +} + +async function auditNavigatorPolicyState() { + if (!grpcEnabled || !grpcClient || grpcPermanentlyDisabled) { + console.log( + `[policy-proxy] startup navigator audit skipped: ` + + `grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled}` + ); + return; + } + + try { + const listed = await listSandboxPolicies({ name: sandboxName, limit: 1, offset: 0 }); + const revision = listed && Array.isArray(listed.revisions) ? listed.revisions[0] : null; + if (!revision) { + console.log(`[policy-proxy] startup navigator audit: no policy revisions found for sandbox=${sandboxName}`); + return; + } + + const statusResp = await getSandboxPolicyStatus({ name: sandboxName, version: revision.version || 0 }); + console.log( + `[policy-proxy] startup navigator audit sandbox=${sandboxName} ` + + `latest_version=${revision.version || 0} latest_hash=${revision.policy_hash || ""} ` + + `latest_status=${policyStatusName(revision.status)} active_version=${statusResp.active_version || 0}` + ); + } catch (e) { + console.warn(`[policy-proxy] startup navigator audit failed: ${e.message}`); + } +} + +function scheduleStartupAudit(attempt = 1) { + const maxAttempts = 5; + const delayMs = 1500; + + setTimeout(async () => { + if (grpcEnabled && grpcClient && !grpcPermanentlyDisabled) { + await auditNavigatorPolicyState(); + return; + } + + if (attempt >= maxAttempts) { + console.log( + `[policy-proxy] startup navigator audit gave up after ${attempt} attempts ` + + `(grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled})` + ); + return; + } + + console.log( + `[policy-proxy] startup navigator audit retry ${attempt}/${maxAttempts} ` + + `(grpcEnabled=${grpcEnabled} grpcClient=${!!grpcClient} disabled=${grpcPermanentlyDisabled})` + ); + scheduleStartupAudit(attempt + 1); + }, delayMs); +} + // --------------------------------------------------------------------------- // HTTP proxy helpers // --------------------------------------------------------------------------- @@ -484,7 +624,9 @@ server.on("upgrade", (req, socket, head) => { // Initialize gRPC client before starting the HTTP server. initGrpcClient(); +auditStartupPolicyFile(); server.listen(LISTEN_PORT, "127.0.0.1", () => { console.log(`[policy-proxy] Listening on 127.0.0.1:${LISTEN_PORT}, upstream 127.0.0.1:${UPSTREAM_PORT}`); + scheduleStartupAudit(); }); From 93436b09962b8148337756c3c1e26441aead1c38 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 19:51:15 -0700 Subject: [PATCH 20/25] Add console logging for device pairing; extend NO_PROXY --- brev/.gitignore | 3 +- brev/reset.sh.log | 81 +++++++++++++++++++ brev/welcome-ui/server.js | 11 ++- .../nemoclaw-ui-extension/extension/index.ts | 16 ++-- 4 files changed, 101 insertions(+), 10 deletions(-) create mode 100644 brev/reset.sh.log diff --git a/brev/.gitignore b/brev/.gitignore index c26c3f6..54affb1 100644 --- a/brev/.gitignore +++ b/brev/.gitignore @@ -1 +1,2 @@ -brev-start-vm.sh \ No newline at end of file +brev-start-vm.sh +reset.sh \ No newline at end of file diff --git a/brev/reset.sh.log b/brev/reset.sh.log new file mode 100644 index 0000000..d2acd08 --- /dev/null +++ b/brev/reset.sh.log @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)" + +CLI_BIN="${CLI_BIN:-openshell}" +SANDBOX_NAME="${SANDBOX_NAME:-nemoclaw}" +WELCOME_UI_PATTERN="${WELCOME_UI_PATTERN:-node server.js}" +NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest}" +REMOVE_IMAGE="${REMOVE_IMAGE:-0}" + +log() { + printf '[reset] %s\n' "$*" +} + +try_run() { + if "$@"; then + return 0 + fi + return 1 +} + +stop_welcome_ui() { + if pgrep -f "$WELCOME_UI_PATTERN" >/dev/null 2>&1; then + log "Stopping Welcome UI processes matching: $WELCOME_UI_PATTERN" + pkill -f "$WELCOME_UI_PATTERN" || true + else + log "No Welcome UI process found" + fi +} + +delete_sandbox() { + log "Deleting sandbox: $SANDBOX_NAME" + if ! try_run "$CLI_BIN" sandbox delete "$SANDBOX_NAME"; then + log "Sandbox delete returned non-zero; continuing" + fi +} + +stop_forward() { + if "$CLI_BIN" forward --help >/dev/null 2>&1; then + log "Stopping forwarded port 18789 for $SANDBOX_NAME" + if ! try_run "$CLI_BIN" forward stop 18789 "$SANDBOX_NAME"; then + log "Forward stop returned non-zero; continuing" + fi + else + log "openshell forward subcommand unavailable; skipping forward stop" + fi +} + +cleanup_logs() { + log "Removing temporary logs and generated policy files" + rm -f \ + /tmp/welcome-ui.log \ + /tmp/nemoclaw-sandbox-create.log \ + /tmp/sandbox-policy-*.yaml +} + +remove_image() { + if [[ "$REMOVE_IMAGE" == "1" || "$REMOVE_IMAGE" == "true" || "$REMOVE_IMAGE" == "yes" ]]; then + log "Removing local image: $NEMOCLAW_IMAGE" + if ! try_run docker rmi "$NEMOCLAW_IMAGE"; then + log "Image removal returned non-zero; continuing" + fi + else + log "Leaving local image in place (set REMOVE_IMAGE=1 to remove it)" + fi +} + +main() { + log "Repo root: $REPO_ROOT" + stop_welcome_ui + delete_sandbox + stop_forward + cleanup_logs + remove_image + log "Reset complete" +} + +main "$@" diff --git a/brev/welcome-ui/server.js b/brev/welcome-ui/server.js index 4631874..a6f9036 100644 --- a/brev/welcome-ui/server.js +++ b/brev/welcome-ui/server.js @@ -639,7 +639,16 @@ function runSandboxCreate() { ]; if (policyPath) cmd.push("--policy", policyPath); const envArgs = [`CHAT_UI_URL=${chatUiUrl}`]; - const loopbackNoProxy = "127.0.0.1,localhost,::1"; + const loopbackNoProxy = [ + "127.0.0.1", + "localhost", + "::1", + "navigator.navigator.svc.cluster.local", + ".svc", + ".svc.cluster.local", + "10.42.0.0/16", + "10.43.0.0/16", + ].join(","); const mergedNoProxy = [ process.env.NO_PROXY || process.env.no_proxy || "", loopbackNoProxy, diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 89b1d96..2127453 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -102,19 +102,14 @@ function clearPairingReloadFlag(): void { } function bootstrap() { + console.info("[NeMoClaw] pairing bootstrap: start"); showConnectOverlay(); const finalizeConnectedState = async () => { setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); - try { - await waitForStableConnection( - STABLE_CONNECTION_WINDOW_MS, - STABLE_CONNECTION_TIMEOUT_MS, - ); - } catch { - await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); - } + console.info("[NeMoClaw] pairing bootstrap: reconnect detected"); if (shouldForcePairingReload()) { + console.info("[NeMoClaw] pairing bootstrap: forcing one-time reload"); markPairingReloadComplete(); setConnectOverlayText("Device pairing approved. Reloading dashboard..."); window.location.reload(); @@ -122,13 +117,16 @@ function bootstrap() { } setConnectOverlayText("Device pairing approved. Verifying dashboard health..."); try { + console.info("[NeMoClaw] pairing bootstrap: waiting for stable post-reload connection"); await waitForStableConnection( STABLE_CONNECTION_WINDOW_MS, STABLE_CONNECTION_TIMEOUT_MS, ); } catch { + console.warn("[NeMoClaw] pairing bootstrap: stable post-reload connection check timed out; delaying reveal"); await new Promise((resolve) => setTimeout(resolve, POST_PAIRING_SETTLE_DELAY_MS)); } + console.info("[NeMoClaw] pairing bootstrap: reveal app"); clearPairingReloadFlag(); revealApp(); }; @@ -136,11 +134,13 @@ function bootstrap() { waitForReconnect(INITIAL_CONNECT_TIMEOUT_MS) .then(finalizeConnectedState) .catch(async () => { + console.warn("[NeMoClaw] pairing bootstrap: initial reconnect timed out; extending wait"); setConnectOverlayText("Still waiting for device pairing approval..."); try { await waitForReconnect(EXTENDED_CONNECT_TIMEOUT_MS); await finalizeConnectedState(); } catch { + console.warn("[NeMoClaw] pairing bootstrap: extended reconnect timed out; revealing app anyway"); clearPairingReloadFlag(); revealApp(); } From 59b4389649bf7d37a3c7f6f09a72bccfdc892a4b Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 20:22:47 -0700 Subject: [PATCH 21/25] Handle context mod for inference.local --- .gitignore | 1 + sandboxes/nemoclaw/nemoclaw-start.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3412b31 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/AGENTS.md diff --git a/sandboxes/nemoclaw/nemoclaw-start.sh b/sandboxes/nemoclaw/nemoclaw-start.sh index 6e65f66..5d70d53 100644 --- a/sandboxes/nemoclaw/nemoclaw-start.sh +++ b/sandboxes/nemoclaw/nemoclaw-start.sh @@ -114,7 +114,7 @@ for provider in cfg.get('models', {}).get('providers', {}).values(): if not isinstance(provider, dict): continue for model in provider.get('models', []): - if isinstance(model, dict) and model.get('id') == '${_DEFAULT_MODEL}': + if isinstance(model, dict) and model.get('id') in ('${_DEFAULT_MODEL}', '-'): model['contextWindow'] = ${_DEFAULT_CONTEXT_WINDOW} model['maxTokens'] = ${_DEFAULT_MAX_TOKENS} json.dump(cfg, open(os.environ['HOME'] + '/.openclaw/openclaw.json', 'w'), indent=2) From c35e759dbce5a53b6c8431167fe17a3a49f35e20 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 21:04:54 -0700 Subject: [PATCH 22/25] Fix k3s image import on build; force reload on first pass timeout --- brev/launch.sh | 94 ++++++++++++++++++- brev/reset.sh.log | 81 ---------------- .../nemoclaw-ui-extension/extension/index.ts | 17 +++- 3 files changed, 106 insertions(+), 86 deletions(-) delete mode 100644 brev/reset.sh.log diff --git a/brev/launch.sh b/brev/launch.sh index b429498..782be4e 100755 --- a/brev/launch.sh +++ b/brev/launch.sh @@ -32,8 +32,15 @@ CLI_RETRY_COUNT="${CLI_RETRY_COUNT:-5}" CLI_RETRY_DELAY_SECS="${CLI_RETRY_DELAY_SECS:-3}" GHCR_LOGIN="${GHCR_LOGIN:-auto}" GHCR_USER="${GHCR_USER:-}" -NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest}" +DEFAULT_NEMOCLAW_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest" +if [[ -n "${NEMOCLAW_IMAGE+x}" ]]; then + NEMOCLAW_IMAGE_EXPLICIT=1 +else + NEMOCLAW_IMAGE_EXPLICIT=0 +fi +NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-$DEFAULT_NEMOCLAW_IMAGE}" SKIP_NEMOCLAW_IMAGE_BUILD="${SKIP_NEMOCLAW_IMAGE_BUILD:-}" +CLUSTER_CONTAINER_NAME="${CLUSTER_CONTAINER_NAME:-openshell-cluster-openshell}" mkdir -p "$(dirname "$LAUNCH_LOG")" touch "$LAUNCH_LOG" @@ -261,6 +268,19 @@ should_build_nemoclaw_image() { [[ -n "$COMMUNITY_REF" && "$COMMUNITY_REF" != "main" ]] } +maybe_use_branch_local_nemoclaw_tag() { + if ! should_build_nemoclaw_image; then + return + fi + + if [[ "$NEMOCLAW_IMAGE_EXPLICIT" == "1" || "$NEMOCLAW_IMAGE" != "$DEFAULT_NEMOCLAW_IMAGE" ]]; then + return + fi + + NEMOCLAW_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:local-dev" + log "Using non-main branch NeMoClaw image tag: $NEMOCLAW_IMAGE" +} + build_nemoclaw_image_if_needed() { local docker_cmd=() local image_context="$REPO_ROOT/sandboxes/nemoclaw" @@ -302,6 +322,75 @@ build_nemoclaw_image_if_needed() { log "Local NeMoClaw image ready: $NEMOCLAW_IMAGE" } +resolve_docker_cmd() { + if command -v docker >/dev/null 2>&1; then + printf 'docker' + return 0 + fi + if command -v sudo >/dev/null 2>&1; then + printf 'sudo docker' + return 0 + fi + return 1 +} + +resolve_cluster_container_name() { + local docker_bin + + if [[ -n "$CLUSTER_CONTAINER_NAME" ]]; then + printf '%s' "$CLUSTER_CONTAINER_NAME" + return 0 + fi + + docker_bin="$(resolve_docker_cmd)" || return 1 + + CLUSTER_CONTAINER_NAME="$($docker_bin ps --format '{{.Names}}\t{{.Image}}' | awk '$1 ~ /^openshell-cluster-/ { print $1; exit }')" + if [[ -z "$CLUSTER_CONTAINER_NAME" ]]; then + CLUSTER_CONTAINER_NAME="$($docker_bin ps --format '{{.Names}}\t{{.Image}}' | awk '$2 ~ /ghcr.io\\/nvidia\\/openshell\\/cluster/ { print $1; exit }')" + fi + + [[ -n "$CLUSTER_CONTAINER_NAME" ]] +} + +import_nemoclaw_image_into_cluster_if_needed() { + local docker_bin cluster_name + + if ! should_build_nemoclaw_image && [[ "$NEMOCLAW_IMAGE_EXPLICIT" != "1" ]]; then + log "Skipping cluster image import; using registry-backed image: $NEMOCLAW_IMAGE" + return + fi + + docker_bin="$(resolve_docker_cmd)" || { + log "Docker not available; skipping cluster image import." + return + } + + if ! $docker_bin image inspect "$NEMOCLAW_IMAGE" >/dev/null 2>&1; then + log "Local NeMoClaw image not present on host; skipping cluster image import: $NEMOCLAW_IMAGE" + return + fi + + if ! cluster_name="$(resolve_cluster_container_name)"; then + log "OpenShell cluster container not found; skipping cluster image import." + return + fi + + log "Importing NeMoClaw image into cluster containerd: $NEMOCLAW_IMAGE -> $cluster_name" + if ! $docker_bin save "$NEMOCLAW_IMAGE" | $docker_bin exec -i "$cluster_name" sh -lc 'ctr -n k8s.io images import -'; then + log "Failed to import NeMoClaw image into cluster containerd." + exit 1 + fi + + if ! $docker_bin exec -i "$cluster_name" sh -lc "ctr -n k8s.io images ls | awk '{print \$1}' | grep -Fx '$NEMOCLAW_IMAGE' >/dev/null"; then + log "Imported image tag not found in cluster containerd: $NEMOCLAW_IMAGE" + log "Cluster image list:" + $docker_bin exec -i "$cluster_name" sh -lc "ctr -n k8s.io images ls | grep 'sandboxes/nemoclaw' || true" + exit 1 + fi + + log "Cluster image import complete: $NEMOCLAW_IMAGE" +} + checkout_repo_ref() { if [[ -z "$COMMUNITY_REF" ]]; then return @@ -597,6 +686,7 @@ main() { step "Resolving CLI" resolve_cli ensure_cli_compat_aliases + maybe_use_branch_local_nemoclaw_tag step "Authenticating registries" docker_login_ghcr_if_needed step "Preparing NeMoClaw image" @@ -612,6 +702,8 @@ main() { step "Starting gateway" start_gateway + step "Importing NeMoClaw image into cluster" + import_nemoclaw_image_into_cluster_if_needed step "Configuring providers" run_provider_create_or_replace \ diff --git a/brev/reset.sh.log b/brev/reset.sh.log deleted file mode 100644 index d2acd08..0000000 --- a/brev/reset.sh.log +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd)" - -CLI_BIN="${CLI_BIN:-openshell}" -SANDBOX_NAME="${SANDBOX_NAME:-nemoclaw}" -WELCOME_UI_PATTERN="${WELCOME_UI_PATTERN:-node server.js}" -NEMOCLAW_IMAGE="${NEMOCLAW_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/nemoclaw:latest}" -REMOVE_IMAGE="${REMOVE_IMAGE:-0}" - -log() { - printf '[reset] %s\n' "$*" -} - -try_run() { - if "$@"; then - return 0 - fi - return 1 -} - -stop_welcome_ui() { - if pgrep -f "$WELCOME_UI_PATTERN" >/dev/null 2>&1; then - log "Stopping Welcome UI processes matching: $WELCOME_UI_PATTERN" - pkill -f "$WELCOME_UI_PATTERN" || true - else - log "No Welcome UI process found" - fi -} - -delete_sandbox() { - log "Deleting sandbox: $SANDBOX_NAME" - if ! try_run "$CLI_BIN" sandbox delete "$SANDBOX_NAME"; then - log "Sandbox delete returned non-zero; continuing" - fi -} - -stop_forward() { - if "$CLI_BIN" forward --help >/dev/null 2>&1; then - log "Stopping forwarded port 18789 for $SANDBOX_NAME" - if ! try_run "$CLI_BIN" forward stop 18789 "$SANDBOX_NAME"; then - log "Forward stop returned non-zero; continuing" - fi - else - log "openshell forward subcommand unavailable; skipping forward stop" - fi -} - -cleanup_logs() { - log "Removing temporary logs and generated policy files" - rm -f \ - /tmp/welcome-ui.log \ - /tmp/nemoclaw-sandbox-create.log \ - /tmp/sandbox-policy-*.yaml -} - -remove_image() { - if [[ "$REMOVE_IMAGE" == "1" || "$REMOVE_IMAGE" == "true" || "$REMOVE_IMAGE" == "yes" ]]; then - log "Removing local image: $NEMOCLAW_IMAGE" - if ! try_run docker rmi "$NEMOCLAW_IMAGE"; then - log "Image removal returned non-zero; continuing" - fi - else - log "Leaving local image in place (set REMOVE_IMAGE=1 to remove it)" - fi -} - -main() { - log "Repo root: $REPO_ROOT" - stop_welcome_ui - delete_sandbox - stop_forward - cleanup_logs - remove_image - log "Reset complete" -} - -main "$@" diff --git a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts index 2127453..b167a0a 100644 --- a/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts +++ b/sandboxes/nemoclaw/nemoclaw-ui-extension/extension/index.ts @@ -24,6 +24,7 @@ const POST_PAIRING_SETTLE_DELAY_MS = 15_000; const STABLE_CONNECTION_WINDOW_MS = 10_000; const STABLE_CONNECTION_TIMEOUT_MS = 45_000; const PAIRING_RELOAD_FLAG = "nemoclaw:pairing-bootstrap-reloaded"; +const FORCED_RELOAD_DELAY_MS = 1_000; function inject(): boolean { const hasButton = injectButton(); @@ -101,6 +102,13 @@ function clearPairingReloadFlag(): void { } } +function forcePairingReload(reason: string, overlayText: string): void { + console.info(`[NeMoClaw] pairing bootstrap: forcing one-time reload (${reason})`); + markPairingReloadComplete(); + setConnectOverlayText(overlayText); + window.setTimeout(() => window.location.reload(), FORCED_RELOAD_DELAY_MS); +} + function bootstrap() { console.info("[NeMoClaw] pairing bootstrap: start"); showConnectOverlay(); @@ -109,10 +117,7 @@ function bootstrap() { setConnectOverlayText("Device pairing approved. Finalizing dashboard..."); console.info("[NeMoClaw] pairing bootstrap: reconnect detected"); if (shouldForcePairingReload()) { - console.info("[NeMoClaw] pairing bootstrap: forcing one-time reload"); - markPairingReloadComplete(); - setConnectOverlayText("Device pairing approved. Reloading dashboard..."); - window.location.reload(); + forcePairingReload("post-reconnect", "Device pairing approved. Reloading dashboard..."); return; } setConnectOverlayText("Device pairing approved. Verifying dashboard health..."); @@ -135,6 +140,10 @@ function bootstrap() { .then(finalizeConnectedState) .catch(async () => { console.warn("[NeMoClaw] pairing bootstrap: initial reconnect timed out; extending wait"); + if (shouldForcePairingReload()) { + forcePairingReload("initial-timeout", "Pairing is still settling. Reloading dashboard..."); + return; + } setConnectOverlayText("Still waiting for device pairing approval..."); try { await waitForReconnect(EXTENDED_CONNECT_TIMEOUT_MS); From 9ef9e78719f4b3bd23d4dcbb250f79b858951ddd Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 21:16:00 -0700 Subject: [PATCH 23/25] Revise Brev README --- README.md | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 489021e..a9e2a18 100644 --- a/README.md +++ b/README.md @@ -32,22 +32,14 @@ This repo is the community ecosystem around OpenShell -- a hub for contributed s #### Deploy Instantly with NVIDIA Brev -Skip the setup and launch OpenShell Community on a fully configured Brev instance. +Skip the setup and launch OpenShell Community on a fully configured Brev instance, whether you want to use Brev as a remote OpenShell gateway with or without GPU accelerators, or as an all-in-one playground for sandboxes, inference, and UI workflows. | Instance | Best For | Deploy | | -------- | -------- | ------ | -| CPU-only | External inference endpoints, remote APIs, lighter-weight sandbox workflows | Deploy on Brev | -| NVIDIA H100 | Locally hosted LLM endpoints, GPU-heavy sandboxes, higher-throughput agent workloads | Deploy on Brev | +| CPU-only | Remote OpenShell gateway deployments, external inference endpoints, remote APIs, and lighter-weight sandbox workflows | Deploy on Brev | +| NVIDIA H100 | All-in-one OpenShell playgrounds, locally hosted LLM endpoints, GPU-heavy sandboxes, and higher-throughput agent workloads | Deploy on Brev | -After the Brev instance is ready, bootstrap the Welcome UI: - -```bash -git clone https://github.com/NVIDIA/OpenShell-Community.git -cd OpenShell-Community -bash brev/launch.sh -``` - -The launcher brings up the Welcome UI on `http://localhost:8081`, where you can inject provider keys and create the NeMoClaw sandbox flow. +After the Brev instance is ready, access the Welcome UI to inject provider keys and access your Openclaw sandbox. ### Using Sandboxes From efeb9aa8c27a2ba325407a5b08748f2e9881b1a1 Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 21:29:09 -0700 Subject: [PATCH 24/25] Cleanup Brev section --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index a9e2a18..fa3557e 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,6 @@ This repo is the community ecosystem around OpenShell -- a hub for contributed s ### Quick Start with Brev -#### Deploy Instantly with NVIDIA Brev - Skip the setup and launch OpenShell Community on a fully configured Brev instance, whether you want to use Brev as a remote OpenShell gateway with or without GPU accelerators, or as an all-in-one playground for sandboxes, inference, and UI workflows. | Instance | Best For | Deploy | From 7aa8d1811595f22c1799da3e9879438d93bbb10c Mon Sep 17 00:00:00 2001 From: JR Morgan Date: Fri, 13 Mar 2026 22:08:06 -0700 Subject: [PATCH 25/25] Revert policy.yaml to orig --- sandboxes/openclaw/policy.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sandboxes/openclaw/policy.yaml b/sandboxes/openclaw/policy.yaml index a12c46b..a91da84 100644 --- a/sandboxes/openclaw/policy.yaml +++ b/sandboxes/openclaw/policy.yaml @@ -125,7 +125,3 @@ network_policies: binaries: - { path: /usr/local/bin/claude } - { path: /usr/bin/gh } - -inference: - allowed_routes: - - local