From 9f4eb71f6c3034868077fc05319b37edf361ee8f Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 26 Jan 2026 01:43:33 +0200 Subject: [PATCH 1/2] fix: adapt to `llama.cpp` changes --- llama/addon/AddonModel.cpp | 13 +++++++------ llama/addon/AddonModelLora.cpp | 4 +--- src/evaluator/LlamaContext/LlamaContext.ts | 11 ----------- src/evaluator/LlamaContext/types.ts | 2 ++ src/evaluator/LlamaModel/LlamaModel.ts | 18 ------------------ 5 files changed, 10 insertions(+), 38 deletions(-) diff --git a/llama/addon/AddonModel.cpp b/llama/addon/AddonModel.cpp index d58f8731..94b4e576 100644 --- a/llama/addon/AddonModel.cpp +++ b/llama/addon/AddonModel.cpp @@ -362,6 +362,13 @@ void AddonModel::dispose() { } disposed = true; + + if (data != nullptr) { + auto currentData = data; + data = nullptr; + delete currentData; + } + if (modelLoaded) { modelLoaded = false; llama_model_free(model); @@ -370,12 +377,6 @@ void AddonModel::dispose() { loadedModelSize = 0; } - if (data != nullptr) { - auto currentData = data; - data = nullptr; - delete currentData; - } - if (hasAddonExportsRef) { addonExportsRef.Unref(); hasAddonExportsRef = false; diff --git a/llama/addon/AddonModelLora.cpp b/llama/addon/AddonModelLora.cpp index 211f35e8..cf70f939 100644 --- a/llama/addon/AddonModelLora.cpp +++ b/llama/addon/AddonModelLora.cpp @@ -53,10 +53,8 @@ AddonModelLora::~AddonModelLora() { void AddonModelLora::dispose(bool skipErase) { if (lora_adapter != nullptr) { - auto loraAdapterToDispose = lora_adapter; lora_adapter = nullptr; - llama_adapter_lora_free(loraAdapterToDispose); - + if (!skipErase && model->data != nullptr) { model->data->removeLora(this); } diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts index bffc1ad6..c1e669a5 100644 --- a/src/evaluator/LlamaContext/LlamaContext.ts +++ b/src/evaluator/LlamaContext/LlamaContext.ts @@ -60,7 +60,6 @@ export class LlamaContext { /** @internal */ private readonly _disposeAggregator = new AsyncDisposeAggregator(); /** @internal */ private readonly _modelPreventDisposalHandle: DisposalPreventionHandle; /** @internal */ private readonly _loraAdapters = new Set(); - /** @internal */ private readonly _gcRegistry: FinalizationRegistry>; /** @internal */ private _nextGeneratedSequenceId = 0; /** @internal */ private _dispatchDecodeScheduled = false; /** @internal */ private _batchDispatchPending = false; @@ -146,8 +145,6 @@ export class LlamaContext { dispatchSchedule: batchingDispatchSchedule, itemPrioritizationStrategy: batchingItemsPrioritizationStrategy }; - this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage); - this._gcRegistry.register(this, this._loraAdapters); this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this); this._freeReservedThreads = this._freeReservedThreads.bind(this); @@ -155,7 +152,6 @@ export class LlamaContext { this._disposeAggregator.add(() => { this._disposed = true; }); - this._disposeAggregator.add(() => void this._gcRegistry.unregister(this)); this._disposeAggregator.add(this._onReclaimUnusedSequenceId); this._disposeAggregator.add(this.onDispose.dispatchEvent); this._disposeAggregator.add( @@ -163,13 +159,6 @@ export class LlamaContext { disposeContextIfReferenced.bind(null, new WeakRef(this)) ) ); - this._disposeAggregator.add((): Promise | void => { - if (this._loraAdapters.size > 0) { - const loraAdapters = new Set(this._loraAdapters); - this._loraAdapters.clear(); - return this._model._removeLoraUsage(loraAdapters); - } - }); this._disposeAggregator.add(async () => { await this._backendContextDisposeGuard.acquireDisposeLock(); diff --git a/src/evaluator/LlamaContext/types.ts b/src/evaluator/LlamaContext/types.ts index f8a00ae2..890f3af4 100644 --- a/src/evaluator/LlamaContext/types.ts +++ b/src/evaluator/LlamaContext/types.ts @@ -126,6 +126,8 @@ export type LlamaContextOptions = { * without the need for extensive retraining from scratch. * * If a string is provided, it will be treated as a path to a single LoRA adapter file. + * + * The adapters will be released from memory once the model (not just the context) is disposed. */ lora?: string | { adapters: Array<{ diff --git a/src/evaluator/LlamaModel/LlamaModel.ts b/src/evaluator/LlamaModel/LlamaModel.ts index 2da930e6..7bce656a 100644 --- a/src/evaluator/LlamaModel/LlamaModel.ts +++ b/src/evaluator/LlamaModel/LlamaModel.ts @@ -275,8 +275,6 @@ export class LlamaModel { this._llamaPreventDisposalHandle.dispose(); }); - this._removeLoraUsage = this._removeLoraUsage.bind(this); - this.tokenize = this.tokenize.bind(this); this.detokenize = this.detokenize.bind(this); this.isSpecialToken = this.isSpecialToken.bind(this); @@ -703,22 +701,6 @@ export class LlamaModel { }); } - /** @internal */ - public async _removeLoraUsage(loraAdapters: Set) { - return await withLock([this._loraAdapters, "modify"], async () => { - await Promise.all( - [...loraAdapters].map(async (lora) => { - lora.usages--; - - if (lora.usages <= 0 && this._loraAdapters.get(lora.filePath) === lora) { - this._loraAdapters.delete(lora.filePath); - await lora.dispose(); - } - }) - ); - }); - } - /** @internal */ public static async _create(modelOptions: LlamaModelOptions, { _llama From 1cdae6479cdc929a1597532882e85761458de3c5 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 26 Jan 2026 01:44:01 +0200 Subject: [PATCH 2/2] fix: change level of common logs --- src/bindings/Llama.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index 55c8592a..f1700303 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -690,6 +690,10 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil return LlamaLogLevel.info; else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified")) return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is enabled, disabling mmap")) + return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is not available, using mmap")) + return LlamaLogLevel.info; else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU")) return LlamaLogLevel.info; else if (gpu === "metal" && level === LlamaLogLevel.warn && message.startsWith("ggml_metal_device_init: tensor API disabled for"))