Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions llama/addon/AddonModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,13 @@ void AddonModel::dispose() {
}

disposed = true;

if (data != nullptr) {
auto currentData = data;
data = nullptr;
delete currentData;
}

if (modelLoaded) {
modelLoaded = false;
llama_model_free(model);
Expand All @@ -370,12 +377,6 @@ void AddonModel::dispose() {
loadedModelSize = 0;
}

if (data != nullptr) {
auto currentData = data;
data = nullptr;
delete currentData;
}

if (hasAddonExportsRef) {
addonExportsRef.Unref();
hasAddonExportsRef = false;
Expand Down
4 changes: 1 addition & 3 deletions llama/addon/AddonModelLora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ AddonModelLora::~AddonModelLora() {

void AddonModelLora::dispose(bool skipErase) {
if (lora_adapter != nullptr) {
auto loraAdapterToDispose = lora_adapter;
lora_adapter = nullptr;
llama_adapter_lora_free(loraAdapterToDispose);


if (!skipErase && model->data != nullptr) {
model->data->removeLora(this);
}
Expand Down
4 changes: 4 additions & 0 deletions src/bindings/Llama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,10 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
return LlamaLogLevel.info;
else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
return LlamaLogLevel.info;
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is enabled, disabling mmap"))
return LlamaLogLevel.info;
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is not available, using mmap"))
return LlamaLogLevel.info;
else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
return LlamaLogLevel.info;
else if (gpu === "metal" && level === LlamaLogLevel.warn && message.startsWith("ggml_metal_device_init: tensor API disabled for"))
Expand Down
11 changes: 0 additions & 11 deletions src/evaluator/LlamaContext/LlamaContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ export class LlamaContext {
/** @internal */ private readonly _disposeAggregator = new AsyncDisposeAggregator();
/** @internal */ private readonly _modelPreventDisposalHandle: DisposalPreventionHandle;
/** @internal */ private readonly _loraAdapters = new Set<AddonModelLora>();
/** @internal */ private readonly _gcRegistry: FinalizationRegistry<Set<AddonModelLora>>;
/** @internal */ private _nextGeneratedSequenceId = 0;
/** @internal */ private _dispatchDecodeScheduled = false;
/** @internal */ private _batchDispatchPending = false;
Expand Down Expand Up @@ -146,30 +145,20 @@ export class LlamaContext {
dispatchSchedule: batchingDispatchSchedule,
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
};
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
this._gcRegistry.register(this, this._loraAdapters);

this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
this._freeReservedThreads = this._freeReservedThreads.bind(this);

this._disposeAggregator.add(() => {
this._disposed = true;
});
this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
this._disposeAggregator.add(this.onDispose.dispatchEvent);
this._disposeAggregator.add(
this.model.onDispose.createListener(
disposeContextIfReferenced.bind(null, new WeakRef(this))
)
);
this._disposeAggregator.add((): Promise<void> | void => {
if (this._loraAdapters.size > 0) {
const loraAdapters = new Set(this._loraAdapters);
this._loraAdapters.clear();
return this._model._removeLoraUsage(loraAdapters);
}
});

this._disposeAggregator.add(async () => {
await this._backendContextDisposeGuard.acquireDisposeLock();
Expand Down
2 changes: 2 additions & 0 deletions src/evaluator/LlamaContext/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ export type LlamaContextOptions = {
* without the need for extensive retraining from scratch.
*
* If a string is provided, it will be treated as a path to a single LoRA adapter file.
*
* The adapters will be released from memory once the model (not just the context) is disposed.
*/
lora?: string | {
adapters: Array<{
Expand Down
18 changes: 0 additions & 18 deletions src/evaluator/LlamaModel/LlamaModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,6 @@ export class LlamaModel {
this._llamaPreventDisposalHandle.dispose();
});

this._removeLoraUsage = this._removeLoraUsage.bind(this);

this.tokenize = this.tokenize.bind(this);
this.detokenize = this.detokenize.bind(this);
this.isSpecialToken = this.isSpecialToken.bind(this);
Expand Down Expand Up @@ -703,22 +701,6 @@ export class LlamaModel {
});
}

/** @internal */
public async _removeLoraUsage(loraAdapters: Set<AddonModelLora>) {
return await withLock([this._loraAdapters, "modify"], async () => {
await Promise.all(
[...loraAdapters].map(async (lora) => {
lora.usages--;

if (lora.usages <= 0 && this._loraAdapters.get(lora.filePath) === lora) {
this._loraAdapters.delete(lora.filePath);
await lora.dispose();
}
})
);
});
}

/** @internal */
public static async _create(modelOptions: LlamaModelOptions, {
_llama
Expand Down
Loading