CRITICAL FIX: Add missing parameters to llama_params_fit

Ralf Waldukat · Ralf Waldukat · commit 6dbddacdc5ca · 2026-01-05T13:32:05.000+01:00
Found by Gemini-3-Flash deep review.

The llama_params_fit signature was missing 3 parameters (margin, n_ctx_min, log_level)
causing guaranteed stack corruption if the function was ever called.

Before (WRONG - 6 parameters):
- path_model, mparams, cparams, tensor_split, tensor_buft_overrides, n_buft_overrides

After (CORRECT - 8 parameters per llama.h:480-488):
- path_model, mparams, cparams, tensor_split, tensor_buft_overrides, margin, n_ctx_min, log_level

Impact: Stack corruption/segfault prevented. This function is rarely used (memory fitting),
so the bug was latent but critical.
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -1409,7 +1409,9 @@ def llama_max_tensor_buft_overrides() -> int:
 #                 struct llama_context_params * cparams,
 #                                       float * tensor_split,
 #     struct llama_model_tensor_buft_override * tensor_buft_overrides,
-#                                      size_t   n_buft_overrides);
+#                                      size_t   margin,
+#                                    uint32_t   n_ctx_min,
+#                         enum ggml_log_level   log_level);
 @ctypes_function(
     "llama_params_fit",
     [
@@ -1418,7 +1420,9 @@ def llama_max_tensor_buft_overrides() -> int:
         ctypes.POINTER(llama_context_params),
         ctypes.POINTER(ctypes.c_float),
         ctypes.c_void_p,  # tensor_buft_overrides - not fully bound
-        ctypes.c_size_t,
+        ctypes.c_size_t,  # margin
+        ctypes.c_uint32,  # n_ctx_min
+        ctypes.c_int,  # ggml_log_level (enum)
     ],
     ctypes.c_int,
 )
@@ -1428,11 +1432,18 @@ def llama_params_fit(
     cparams: CtypesPointerOrRef[llama_context_params],
     tensor_split: CtypesArray[ctypes.c_float],
     tensor_buft_overrides: Optional[ctypes.c_void_p],
-    n_buft_overrides: Union[ctypes.c_size_t, int],
+    margin: Union[ctypes.c_size_t, int],
+    n_ctx_min: Union[ctypes.c_uint32, int],
+    log_level: int,
     /,
 ) -> int:
     """Check if model parameters will fit in memory
 
+    Args:
+        margin: Memory margin to leave per device in bytes
+        n_ctx_min: Minimum context size when trying to reduce memory
+        log_level: Minimum log level (ggml_log_level enum)
+
     Returns:
         LLAMA_PARAMS_FIT_STATUS_SUCCESS (0) - found allocations that are projected to fit
         LLAMA_PARAMS_FIT_STATUS_FAILURE (1) - could not find allocations that are projected to fit