Skip to content

Commit 4e32be1

Browse files
committed
Refactor llama-swap configuration and command generation for GGUF support
- Updated environment variable handling to ensure `LLAMA_STUDIO_*` keys are reserved and ignored in user configurations. - Refactored command generation to treat `LD_LIBRARY_PATH` as a literal entry in YAML `env`, enhancing clarity and consistency. - Renamed functions and adjusted tests to reflect changes in environment handling and command structure, ensuring accurate validation of new behavior. - Improved frontend documentation to clarify the usage of macros and environment variables in GGUF models.
1 parent 758d681 commit 4e32be1

File tree

3 files changed

+57
-98
lines changed

3 files changed

+57
-98
lines changed

backend/llama_swap_config.py

Lines changed: 34 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,7 @@
3333
_LLAMA_SWAP_MACRO_TOKEN_RE = re.compile(r"^\$\{[A-Za-z0-9_-]+\}$")
3434
_UNQUOTED_CMD_TOKEN_RE = re.compile(r"^[A-Za-z0-9_./:=+,@%${}-]+$")
3535

36-
# llama-swap ``env`` keeps generated per-model context for observability / child-process env.
37-
# Shared engine binary + base ``LD_LIBRARY_PATH`` live in top-level ``macros``.
38-
_STUDIO_ENV_MODEL_PATH = "LLAMA_STUDIO_MODEL_PATH"
39-
_STUDIO_ENV_HF_REPO = "LLAMA_STUDIO_HF_REPO"
40-
_STUDIO_ENV_MMPROJ_PATH = "LLAMA_STUDIO_MMPROJ_PATH"
36+
# User ``swap_env`` keys prefixed with ``LLAMA_STUDIO_`` are reserved (ignored).
4137
_STUDIO_ENV_PREFIX = "LLAMA_STUDIO_"
4238

4339
_MODEL_MACRO_MODEL_PATH = "studio_model_path"
@@ -282,10 +278,10 @@ def _register_gguf_engine_macros(
282278
engine: str,
283279
abs_bin: str,
284280
ld: str,
285-
) -> tuple[str, Optional[str]]:
281+
) -> str:
286282
"""
287-
Register llama-swap macros for one GGUF engine (binary path + base library path).
288-
Returns ``(bin_macro_name, ld_macro_name_or_none)``; ``ld_macro`` is omitted when ``ld`` is empty.
283+
Register shared GGUF engine binary macro; validate ``ld`` (library path base) is consistent
284+
per engine. ``ld`` is emitted as literal ``LD_LIBRARY_PATH`` in ``env``, not as a macro.
289285
"""
290286
bin_macro, ld_macro = _gguf_macro_names(engine)
291287
norm_bin = os.path.abspath(abs_bin) if os.path.exists(abs_bin) else abs_bin
@@ -296,23 +292,21 @@ def _register_gguf_engine_macros(
296292
raise ValueError(
297293
f"llama-swap config: inconsistent binary or library path for engine {engine!r}"
298294
)
299-
return ex["bin_macro"], (ex["ld_macro"] if ex["ld"] else None)
295+
return ex["bin_macro"]
300296
registry[engine] = {
301297
"bin_macro": bin_macro,
302298
"ld_macro": ld_macro,
303299
"abs_bin": norm_bin,
304300
"ld": ld,
305301
}
306-
return bin_macro, (ld_macro if ld else None)
302+
return bin_macro
307303

308304

309305
def _macros_dict_for_yaml(registry: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
310-
"""Ordered map: ld macros before bin macros per engine (llama-swap macro-in-macro ordering)."""
306+
"""Global GGUF macros: engine binary path only (``LD_LIBRARY_PATH`` is literal ``env``)."""
311307
out: Dict[str, str] = {}
312308
for eng in sorted(registry.keys()):
313309
inf = registry[eng]
314-
if inf["ld"]:
315-
out[inf["ld_macro"]] = inf["ld"]
316310
out[inf["bin_macro"]] = inf["abs_bin"]
317311
return out
318312

@@ -342,27 +336,6 @@ def _merge_macro_maps(*maps: Optional[Dict[str, str]]) -> Optional[Dict[str, str
342336
return out or None
343337

344338

345-
def _gguf_ld_env_assignment(
346-
ld_macro: Optional[str], resolved_ld: str, user_ld: Optional[str]
347-
) -> Optional[str]:
348-
"""
349-
Single ``env`` argument ``LD_LIBRARY_PATH=…`` for the ``env`` prefix on ``cmd``.
350-
Uses ``${ld_macro}`` when the base path is non-empty (llama-swap expands macros in ``cmd`` only).
351-
"""
352-
resolved_ld = (resolved_ld or "").strip()
353-
if ld_macro and resolved_ld:
354-
val = _macro_ref(ld_macro)
355-
elif resolved_ld:
356-
val = resolved_ld
357-
else:
358-
val = ""
359-
if user_ld:
360-
val = f"{val}:{user_ld}" if val else user_ld
361-
if not val:
362-
return None
363-
return f"LD_LIBRARY_PATH={val}"
364-
365-
366339
def _normalize_swap_env(config: Optional[Dict[str, Any]]) -> Dict[str, str]:
367340
"""Parse per-model ``swap_env`` (llama-swap YAML ``env``) from effective config."""
368341
raw = (config or {}).get("swap_env")
@@ -384,7 +357,7 @@ def _normalize_swap_env(config: Optional[Dict[str, Any]]) -> Dict[str, str]:
384357

385358
def _gguf_user_env_and_ld_suffix(user_env: Dict[str, str]) -> tuple[Dict[str, str], Optional[str]]:
386359
"""
387-
Drop reserved ``LLAMA_STUDIO_*`` keys; split out ``LD_LIBRARY_PATH`` (inlined into GGUF ``cmd``).
360+
Drop reserved ``LLAMA_STUDIO_*`` keys; split out ``LD_LIBRARY_PATH`` (merged into GGUF ``env``).
388361
"""
389362
merged = {
390363
k: v
@@ -397,14 +370,23 @@ def _gguf_user_env_and_ld_suffix(user_env: Dict[str, str]) -> tuple[Dict[str, st
397370
return merged, None
398371

399372

400-
def _llama_swap_env_lines(
373+
def _gguf_swap_env_lines(
401374
user_merged: Dict[str, str],
402-
studio_env: Optional[Dict[str, str]] = None,
375+
*,
376+
resolved_ld_library_path: str,
377+
user_ld_suffix: Optional[str],
403378
) -> List[str]:
404-
"""Build llama-swap ``env`` lines (no ``LD_LIBRARY_PATH`` for GGUF — that uses ``cmd`` + macros)."""
379+
"""
380+
GGUF ``env``: user ``swap_env`` (minus reserved keys and split ``LD_LIBRARY_PATH``) plus
381+
merged ``LD_LIBRARY_PATH`` as a **literal** (llama-swap does not expand ``${…}`` in ``env``).
382+
Model / mmproj / hf-repo paths live only in per-model ``macros``, not here.
383+
"""
405384
merged = dict(user_merged)
406-
if studio_env:
407-
merged.update(studio_env)
385+
ld = (resolved_ld_library_path or "").strip()
386+
if user_ld_suffix:
387+
ld = f"{ld}:{user_ld_suffix}" if ld else user_ld_suffix
388+
if ld:
389+
merged["LD_LIBRARY_PATH"] = ld
408390
return [f"{k}={merged[k]}" for k in sorted(merged.keys())]
409391

410392

@@ -597,31 +579,19 @@ def _resolve_mmproj_path(
597579
def _build_llama_swap_gguf_cmd(
598580
*,
599581
bin_macro: str,
600-
ld_macro: Optional[str],
601-
resolved_ld_library_path: str,
602-
user_ld_suffix: Optional[str],
603582
proxy_model_name: str,
604583
model_macros: Dict[str, str],
605584
structured_argv: List[str],
606585
) -> str:
607586
"""
608-
Single-line ``cmd`` for llama-swap: no ``bash -c``.
609-
610-
Shared binary and base ``LD_LIBRARY_PATH`` use llama-swap ``macros`` (e.g. ``${studio_gguf_bin_llama_cpp}``).
611-
Because macros are expanded in ``cmd`` but not in per-model ``env``, any
612-
``LD_LIBRARY_PATH`` (macro + optional user suffix from ``swap_env``) is passed via an
613-
``env`` *prefix* on this line.
587+
Single-line ``cmd`` for llama-swap: no ``bash -c``, no ``env`` prefix.
614588
615-
Per-model model / repo / mmproj values also use llama-swap macros so the command stays
616-
valid without a shell wrapper. Generated ``LLAMA_STUDIO_*`` entries remain in ``env``
617-
for visibility and any child-process tooling that wants them.
589+
Shared engine binary uses a top-level macro (e.g. ``${studio_gguf_bin_llama_cpp}``).
590+
Per-model model / hf-repo / mmproj paths use per-model ``macros`` (``${studio_model_path}``, …).
591+
``LD_LIBRARY_PATH`` is **not** in ``cmd``; it is a literal entry in YAML ``env`` (llama-swap
592+
does not expand ``${…}`` inside ``env`` values).
618593
"""
619594
parts: List[str] = []
620-
ld_assign = _gguf_ld_env_assignment(
621-
ld_macro, resolved_ld_library_path, user_ld_suffix
622-
)
623-
if ld_assign:
624-
parts.extend(["env", ld_assign])
625595
parts.append(_macro_ref(bin_macro))
626596
if _MODEL_MACRO_HF_REPO in model_macros:
627597
parts.extend(["--hf-repo", _macro_ref(_MODEL_MACRO_HF_REPO)])
@@ -667,13 +637,6 @@ def _build_llama_command(
667637
config, engine=structured_engine, param_index=param_index
668638
)
669639

670-
studio_env: Dict[str, str] = {}
671-
if hf_repo_arg:
672-
studio_env[_STUDIO_ENV_HF_REPO] = str(hf_repo_arg)
673-
else:
674-
studio_env[_STUDIO_ENV_MODEL_PATH] = str(model_path)
675-
if mmproj_path:
676-
studio_env[_STUDIO_ENV_MMPROJ_PATH] = str(mmproj_path)
677640
model_macros = _llama_swap_model_macros(
678641
model_path=model_path,
679642
hf_repo_arg=hf_repo_arg,
@@ -682,26 +645,26 @@ def _build_llama_command(
682645

683646
user_merged, user_ld = _gguf_user_env_and_ld_suffix(_normalize_swap_env(config))
684647
if gguf_macro_registry is not None:
685-
bin_macro, ld_macro = _register_gguf_engine_macros(
648+
bin_macro = _register_gguf_engine_macros(
686649
gguf_macro_registry,
687650
engine=structured_engine,
688651
abs_bin=exec_path,
689652
ld=library_path,
690653
)
691654
else:
692-
bin_macro, ld_name = _gguf_macro_names(structured_engine)
693-
ld_macro = ld_name if (library_path or "").strip() else None
655+
bin_macro = _gguf_macro_names(structured_engine)[0]
694656

695657
cmd = _build_llama_swap_gguf_cmd(
696658
bin_macro=bin_macro,
697-
ld_macro=ld_macro,
698-
resolved_ld_library_path=library_path,
699-
user_ld_suffix=user_ld,
700659
proxy_model_name=proxy_model_name,
701660
model_macros=model_macros,
702661
structured_argv=structured_argv,
703662
)
704-
env_list = _llama_swap_env_lines(user_merged, studio_env=studio_env)
663+
env_list = _gguf_swap_env_lines(
664+
user_merged,
665+
resolved_ld_library_path=library_path,
666+
user_ld_suffix=user_ld,
667+
)
705668
return cmd, env_list, model_macros
706669

707670

backend/tests/test_llama_swap_config.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -301,21 +301,20 @@ def get_active_engine_version(self, engine):
301301
assert llama_swap_config.is_ik_llama_cpp("/tmp/llama") is False
302302

303303

304-
def test_llama_swap_env_lines_drops_user_llama_studio_keys_and_merges_studio_env():
304+
def test_gguf_swap_env_lines_drops_reserved_llama_studio_keys():
305305
user = llama_swap_config._normalize_swap_env(
306306
{"swap_env": {"LLAMA_STUDIO_MODEL_PATH": "/evil.gguf", "FOO": "bar"}}
307307
)
308308
merged, user_ld = llama_swap_config._gguf_user_env_and_ld_suffix(user)
309309
assert user_ld is None
310-
out = llama_swap_config._llama_swap_env_lines(
310+
out = llama_swap_config._gguf_swap_env_lines(
311311
merged,
312-
studio_env={
313-
llama_swap_config._STUDIO_ENV_MODEL_PATH: "/real.gguf",
314-
},
312+
resolved_ld_library_path="/lib",
313+
user_ld_suffix=None,
315314
)
316315
assert out == [
317316
"FOO=bar",
318-
"LLAMA_STUDIO_MODEL_PATH=/real.gguf",
317+
"LD_LIBRARY_PATH=/lib",
319318
]
320319

321320

@@ -339,8 +338,13 @@ def test_normalize_swap_env_and_gguf_user_env_ld_split():
339338
llama_swap_config._normalize_swap_env(raw)
340339
)
341340
assert uld == "/extra/lib"
342-
assert llama_swap_config._llama_swap_env_lines(um) == [
341+
assert llama_swap_config._gguf_swap_env_lines(
342+
um,
343+
resolved_ld_library_path="/base",
344+
user_ld_suffix="/extra/lib",
345+
) == [
343346
"CUDA_VISIBLE_DEVICES=0",
347+
"LD_LIBRARY_PATH=/base:/extra/lib",
344348
]
345349

346350

@@ -379,16 +383,11 @@ def test_render_bash_command_round_trips_shell_escaping():
379383
def test_build_llama_swap_gguf_cmd_keeps_structured_args_as_separate_tokens():
380384
cmd = llama_swap_config._build_llama_swap_gguf_cmd(
381385
bin_macro="studio_gguf_bin_llama_cpp",
382-
ld_macro="studio_gguf_ld_llama_cpp",
383-
resolved_ld_library_path="/base/lib",
384-
user_ld_suffix="/extra/lib",
385386
proxy_model_name="org-model.q4_k_m",
386387
model_macros={"studio_model_path": "/m"},
387388
structured_argv=["--ctx-size", "4096", "--stop", "END HERE"],
388389
)
389390
assert shlex.split(cmd) == [
390-
"env",
391-
"LD_LIBRARY_PATH=${studio_gguf_ld_llama_cpp}:/extra/lib",
392391
"${studio_gguf_bin_llama_cpp}",
393392
"--model",
394393
"${studio_model_path}",
@@ -561,9 +560,8 @@ def test_preview_llama_swap_command_uses_catalog_metadata(monkeypatch, tmp_path)
561560
assert "bash -c" not in preview["cmd"]
562561
assert "${studio_model_path}" in preview["cmd"]
563562
assert "${studio_gguf_bin_llama_cpp}" in preview["cmd"]
564-
assert "${studio_gguf_ld_llama_cpp}" in preview["cmd"]
563+
assert "studio_gguf_ld_" not in preview["cmd"]
565564
assert preview["macros"] == {
566-
"studio_gguf_ld_llama_cpp": "/fake/lib",
567565
"studio_gguf_bin_llama_cpp": str(binary_path),
568566
"studio_model_path": str(model_path),
569567
}
@@ -576,10 +574,10 @@ def test_preview_llama_swap_command_uses_catalog_metadata(monkeypatch, tmp_path)
576574
assert preview["use_model_name"] is None
577575
env_lines = sorted(preview["env"] or [])
578576
assert env_lines == [
579-
f"LLAMA_STUDIO_MODEL_PATH={model_path}",
577+
"LD_LIBRARY_PATH=/fake/lib",
580578
]
581-
assert preview["cmd"].startswith("env LD_LIBRARY_PATH=")
582-
assert "$LLAMA_STUDIO_MODEL_PATH" not in preview["cmd"]
579+
assert not preview["cmd"].lstrip().startswith("env ")
580+
assert "LLAMA_STUDIO_" not in "\n".join(env_lines)
583581

584582

585583
def test_preview_lmdeploy_command_uses_catalog_metadata(monkeypatch):
@@ -740,13 +738,12 @@ def fake_param_index(engine):
740738
assert "--temperature 0.9" in doc["models"]["org-model.q4_k_m"]["cmd"]
741739
gguf_env = sorted(doc["models"]["org-model.q4_k_m"]["env"])
742740
assert gguf_env == [
743-
f"LLAMA_STUDIO_MODEL_PATH={str(model_path)}",
741+
"LD_LIBRARY_PATH=/fake/lib",
744742
]
745743
assert "bash -c" not in doc["models"]["org-model.q4_k_m"]["cmd"]
746744
assert "${studio_gguf_bin_llama_cpp}" in doc["models"]["org-model.q4_k_m"]["cmd"]
747745
assert "${studio_model_path}" in doc["models"]["org-model.q4_k_m"]["cmd"]
748746
assert doc["macros"] == {
749-
"studio_gguf_ld_llama_cpp": "/fake/lib",
750747
"studio_gguf_bin_llama_cpp": str(binary_path),
751748
}
752749
assert doc["models"]["org-model.q4_k_m"]["macros"] == {
@@ -852,13 +849,12 @@ def inv_paths(path):
852849
cmd = doc["models"]["org-model.q4_k_m"]["cmd"]
853850
assert "bash -c" not in cmd
854851
assert "${studio_gguf_bin_ik_llama}" in cmd
855-
assert "${studio_gguf_ld_ik_llama}" in cmd
852+
assert "studio_gguf_ld_" not in cmd
856853
assert "${studio_model_path}" in cmd
857854
assert sorted(doc["models"]["org-model.q4_k_m"]["env"]) == [
858-
f"LLAMA_STUDIO_MODEL_PATH={str(model_path)}",
855+
"LD_LIBRARY_PATH=/fake/lib",
859856
]
860857
assert doc["macros"] == {
861-
"studio_gguf_ld_ik_llama": "/fake/lib",
862858
"studio_gguf_bin_ik_llama": str(ik_bin),
863859
}
864860
assert doc["models"]["org-model.q4_k_m"]["macros"] == {

frontend/src/views/ModelConfig.vue

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,11 +371,11 @@
371371
href="https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml"
372372
target="_blank"
373373
rel="noopener noreferrer"
374-
>llama-swap config.example.yaml</a>). GGUF models use top-level llama-swap
375-
<code>macros</code> for the engine binary and base <code>LD_LIBRARY_PATH</code>; your
376-
<code>LD_LIBRARY_PATH</code> here is appended on the <code>cmd</code> line. Per-model
377-
<code>LLAMA_STUDIO_*</code> entries (model path, optional mmproj / hf-repo) are generated;
378-
do not set those names manually—they are ignored if present.
374+
>llama-swap config.example.yaml</a>). For GGUF, model paths and the engine binary use
375+
llama-swap <code>macros</code> (shown in the preview). Real environment variables
376+
(<code>CUDA_VISIBLE_DEVICES</code>, merged <code>LD_LIBRARY_PATH</code>, etc.) live only in
377+
YAML <code>env</code>. Do not set <code>LLAMA_STUDIO_*</code> keys in swap env—they are
378+
reserved and ignored.
379379
<template v-if="showNvidiaGpuBind">
380380
<code>CUDA_VISIBLE_DEVICES</code> is configured above when NVIDIA GPUs are detected.
381381
</template>

0 commit comments

Comments
 (0)