From 9f4eb71f6c3034868077fc05319b37edf361ee8f Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Mon, 26 Jan 2026 01:43:33 +0200
Subject: [PATCH 1/2] fix: adapt to `llama.cpp` changes

---
 llama/addon/AddonModel.cpp                 | 13 +++++++------
 llama/addon/AddonModelLora.cpp             |  4 +---
 src/evaluator/LlamaContext/LlamaContext.ts | 11 -----------
 src/evaluator/LlamaContext/types.ts        |  2 ++
 src/evaluator/LlamaModel/LlamaModel.ts     | 18 ------------------
 5 files changed, 10 insertions(+), 38 deletions(-)
diff --git a/llama/addon/AddonModel.cpp b/llama/addon/AddonModel.cpp
index d58f8731..94b4e576 100644
--- a/llama/addon/AddonModel.cpp
+++ b/llama/addon/AddonModel.cpp
@@ -362,6 +362,13 @@ void AddonModel::dispose() {
     }
 
     disposed = true;
+    
+    if (data != nullptr) {
+        auto currentData = data;
+        data = nullptr;
+        delete currentData;
+    }
+
     if (modelLoaded) {
         modelLoaded = false;
         llama_model_free(model);
@@ -370,12 +377,6 @@ void AddonModel::dispose() {
         loadedModelSize = 0;
     }
 
-    if (data != nullptr) {
-        auto currentData = data;
-        data = nullptr;
-        delete currentData;
-    }
-
     if (hasAddonExportsRef) {
         addonExportsRef.Unref();
         hasAddonExportsRef = false;
diff --git a/llama/addon/AddonModelLora.cpp b/llama/addon/AddonModelLora.cpp
index 211f35e8..cf70f939 100644
--- a/llama/addon/AddonModelLora.cpp
+++ b/llama/addon/AddonModelLora.cpp
@@ -53,10 +53,8 @@ AddonModelLora::~AddonModelLora() {
 
 void AddonModelLora::dispose(bool skipErase) {
     if (lora_adapter != nullptr) {
-        auto loraAdapterToDispose = lora_adapter;
         lora_adapter = nullptr;
-        llama_adapter_lora_free(loraAdapterToDispose);
-        
+
         if (!skipErase && model->data != nullptr) {
             model->data->removeLora(this);
         }
diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts
index bffc1ad6..c1e669a5 100644
--- a/src/evaluator/LlamaContext/LlamaContext.ts
+++ b/src/evaluator/LlamaContext/LlamaContext.ts
@@ -60,7 +60,6 @@ export class LlamaContext {
     /** @internal */ private readonly _disposeAggregator = new AsyncDisposeAggregator();
     /** @internal */ private readonly _modelPreventDisposalHandle: DisposalPreventionHandle;
     /** @internal */ private readonly _loraAdapters = new Set<AddonModelLora>();
-    /** @internal */ private readonly _gcRegistry: FinalizationRegistry<Set<AddonModelLora>>;
     /** @internal */ private _nextGeneratedSequenceId = 0;
     /** @internal */ private _dispatchDecodeScheduled = false;
     /** @internal */ private _batchDispatchPending = false;
@@ -146,8 +145,6 @@ export class LlamaContext {
             dispatchSchedule: batchingDispatchSchedule,
             itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
         };
-        this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
-        this._gcRegistry.register(this, this._loraAdapters);
 
         this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
         this._freeReservedThreads = this._freeReservedThreads.bind(this);
@@ -155,7 +152,6 @@ export class LlamaContext {
         this._disposeAggregator.add(() => {
             this._disposed = true;
         });
-        this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
         this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
         this._disposeAggregator.add(this.onDispose.dispatchEvent);
         this._disposeAggregator.add(
@@ -163,13 +159,6 @@ export class LlamaContext {
                 disposeContextIfReferenced.bind(null, new WeakRef(this))
             )
         );
-        this._disposeAggregator.add((): Promise<void> | void => {
-            if (this._loraAdapters.size > 0) {
-                const loraAdapters = new Set(this._loraAdapters);
-                this._loraAdapters.clear();
-                return this._model._removeLoraUsage(loraAdapters);
-            }
-        });
 
         this._disposeAggregator.add(async () => {
             await this._backendContextDisposeGuard.acquireDisposeLock();
diff --git a/src/evaluator/LlamaContext/types.ts b/src/evaluator/LlamaContext/types.ts
index f8a00ae2..890f3af4 100644
--- a/src/evaluator/LlamaContext/types.ts
+++ b/src/evaluator/LlamaContext/types.ts
@@ -126,6 +126,8 @@ export type LlamaContextOptions = {
      * without the need for extensive retraining from scratch.
      *
      * If a string is provided, it will be treated as a path to a single LoRA adapter file.
+     *
+     * The adapters will be released from memory once the model (not just the context) is disposed.
      */
     lora?: string | {
         adapters: Array<{
diff --git a/src/evaluator/LlamaModel/LlamaModel.ts b/src/evaluator/LlamaModel/LlamaModel.ts
index 2da930e6..7bce656a 100644
--- a/src/evaluator/LlamaModel/LlamaModel.ts
+++ b/src/evaluator/LlamaModel/LlamaModel.ts
@@ -275,8 +275,6 @@ export class LlamaModel {
             this._llamaPreventDisposalHandle.dispose();
         });
 
-        this._removeLoraUsage = this._removeLoraUsage.bind(this);
-
         this.tokenize = this.tokenize.bind(this);
         this.detokenize = this.detokenize.bind(this);
         this.isSpecialToken = this.isSpecialToken.bind(this);
@@ -703,22 +701,6 @@ export class LlamaModel {
         });
     }
 
-    /** @internal */
-    public async _removeLoraUsage(loraAdapters: Set<AddonModelLora>) {
-        return await withLock([this._loraAdapters, "modify"], async () => {
-            await Promise.all(
-                [...loraAdapters].map(async (lora) => {
-                    lora.usages--;
-
-                    if (lora.usages <= 0 && this._loraAdapters.get(lora.filePath) === lora) {
-                        this._loraAdapters.delete(lora.filePath);
-                        await lora.dispose();
-                    }
-                })
-            );
-        });
-    }
-
     /** @internal */
     public static async _create(modelOptions: LlamaModelOptions, {
         _llama

From 1cdae6479cdc929a1597532882e85761458de3c5 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Mon, 26 Jan 2026 01:44:01 +0200
Subject: [PATCH 2/2] fix: change level of common logs

---
 src/bindings/Llama.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index 55c8592a..f1700303 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -690,6 +690,10 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
         return LlamaLogLevel.info;
     else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
         return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is enabled, disabling mmap"))
+        return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is not available, using mmap"))
+        return LlamaLogLevel.info;
     else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
         return LlamaLogLevel.info;
     else if (gpu === "metal" && level === LlamaLogLevel.warn && message.startsWith("ggml_metal_device_init: tensor API disabled for"))