From afbfe6370e356f3b70e677df3cfeeb94e6dd4809 Mon Sep 17 00:00:00 2001 From: mudler <2420543+mudler@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:42:31 +0000 Subject: [PATCH] chore(model gallery): :robot: add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- gallery/index.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 941f041c67f0..c205e5999f84 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,38 @@ --- +- name: "glm-4.7-flash-claude-4.5-opus-i1-mxfp4_moe_xl-exp" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/noctrex/GLM-4.7-Flash-Claude-4.5-Opus-i1-MXFP4_MOE_XL-exp-GGUF + description: | + **Model Description:** + This is an experimental quantized version of the GLM-4.7-Flash model, optimized with MXFP4 MOE for efficient deployment. It is derived from the `TeichAI/GLM-4.7-Flash-Claude-Opus-4.5-High-Reasoning-Distill` base model, trained for high-reasoning tasks like coding, science, and research. The model uses Unsloth for accelerated training and is designed for tasks requiring deep reasoning. It is still experimental, with no official benchmarks, but offers a lightweight, high-reasoning variant. + + **Key Features:** + - **Quantization:** MXFP4 MOE (4-bit) with dynamic precision (BF16/Q8_0) based on tensor importance. + - **Use Cases:** Coding, scientific research, and deep reasoning. + - **License:** Apache-2.0. + - **Optimized for:** Speed and efficiency with Unsloth. + + *Note: This is an experimental variant; results may vary, and benchmarks are pending.* + overrides: + parameters: + model: llama-cpp/models/imatrix.gguf + name: GLM-4.7-Flash-Claude-4.5-Opus-i1-MXFP4_MOE_XL-exp-GGUF + backend: llama-cpp + template: + use_tokenizer_template: true + known_usecases: + - chat + function: + grammar: + disable: true + description: Imported from https://huggingface.co/noctrex/GLM-4.7-Flash-Claude-4.5-Opus-i1-MXFP4_MOE_XL-exp-GGUF + options: + - use_jinja:true + files: + - filename: llama-cpp/models/imatrix.gguf + sha256: 3a27a5d7a01954883f9157b7ef1ca6123886ded8f6916dbd3f354ecd2cb670be + uri: https://huggingface.co/noctrex/GLM-4.7-Flash-Claude-4.5-Opus-i1-MXFP4_MOE_XL-exp-GGUF/resolve/main/imatrix.gguf - name: "glm-4.7-flash-derestricted" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: