From 893e921a764da92b243d43c90fd3c8297d3dba40 Mon Sep 17 00:00:00 2001
From: Piotr Stachaczynski <piotr.stachaczynski@priva.com>
Date: Mon, 25 May 2026 14:24:58 +0200
Subject: [PATCH] feat: InferPage dockerization, model catalog expansion, and
 Ollama improvements

- Add Dockerfile.inferpage with cpu, cuda, ollama, and ollama-bundled targets
- Add entrypoint-ollama.sh for bundled Ollama+InferPage single image
- Add docker-compose.inferpage-ollama.yml compose stack
- Publish to GHCR via GitHub Actions (no DockerHub secrets needed)
- Make Ollama base URL configurable via MaIN__OllamaBaseUrl env var
- Add latest Claude (Opus 4.7, Sonnet 4.6, Haiku 4.5), OpenAI (GPT-5.x series,
  o3, o4-mini, Codex Mini), Gemini (3.x series), Groq (Llama 4, Qwen3, Compound),
  and Ollama cloud model definitions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../workflows/docker-publish-inferpage.yml    |  66 +++
 scripts/docker-compose.inferpage-ollama.yml   |  33 ++
 src/.dockerignore                             |  10 +
 src/Dockerfile.inferpage                      | 111 +++++
 src/MaIN.Core/.nuspec                         |   2 +-
 .../Interfaces/ModelContext/IModelContext.cs  |   8 +-
 src/MaIN.Core/Hub/Contexts/ModelContext.cs    | 148 ++++--
 src/MaIN.Core/MaIN.Core.csproj                |   2 +-
 src/MaIN.Domain/Configuration/MaINSettings.cs |   1 +
 src/MaIN.Domain/Entities/Chat.cs              |   2 -
 src/MaIN.Domain/MaIN.Domain.csproj            |   2 +-
 .../Models/Concrete/CloudModels.cs            | 440 +++++++++++++++++-
 src/MaIN.Domain/Models/Models.cs              |  69 ++-
 .../Components/Pages/Settings.razor           | 236 +++++++++-
 src/MaIN.InferPage/MaIN.InferPage.csproj      |   2 +-
 src/MaIN.InferPage/Program.cs                 |  17 +-
 src/MaIN.InferPage/Utils.cs                   |  16 +
 .../appsettings.Production.json               |  14 +
 .../Models/DownloadProgress.cs                |  15 +
 src/MaIN.Services/MaIN.Services.csproj        |   6 +-
 .../Services/DataSourceProvider.cs            |   6 +-
 .../Embeddings/LLamaEmbedderMaINClone.cs      |   2 +-
 .../Services/LLMService/OllamaService.cs      |   6 +-
 src/entrypoint-ollama.sh                      |  31 ++
 24 files changed, 1137 insertions(+), 108 deletions(-)
 create mode 100644 .github/workflows/docker-publish-inferpage.yml
 create mode 100644 scripts/docker-compose.inferpage-ollama.yml
 create mode 100644 src/.dockerignore
 create mode 100644 src/Dockerfile.inferpage
 create mode 100644 src/MaIN.InferPage/appsettings.Production.json
 create mode 100644 src/MaIN.Infrastructure/Models/DownloadProgress.cs
 create mode 100644 src/entrypoint-ollama.sh

diff --git a/.github/workflows/docker-publish-inferpage.yml b/.github/workflows/docker-publish-inferpage.yml
new file mode 100644
index 00000000..8affd48d
--- /dev/null
+++ b/.github/workflows/docker-publish-inferpage.yml
@@ -0,0 +1,66 @@
+name: Build and Publish InferPage Docker Images
+
+on:
+  push:
+    branches: [main]
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU (for linux/arm64 cross-build)
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push CPU image (linux/amd64 + linux/arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: ./src
+          file: ./src/Dockerfile.inferpage
+          target: runtime-cpu
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ghcr.io/${{ github.repository_owner }}/main-inferpage:cpu
+            ghcr.io/${{ github.repository_owner }}/main-inferpage:latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Build and push CUDA image (linux/amd64)
+        uses: docker/build-push-action@v6
+        with:
+          context: ./src
+          file: ./src/Dockerfile.inferpage
+          target: runtime-cuda
+          platforms: linux/amd64
+          push: true
+          tags: ghcr.io/${{ github.repository_owner }}/main-inferpage:cuda
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Build and push Ollama-bundled image (linux/amd64 + linux/arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: ./src
+          file: ./src/Dockerfile.inferpage
+          target: runtime-ollama-bundled
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ghcr.io/${{ github.repository_owner }}/main-inferpage:ollama
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/scripts/docker-compose.inferpage-ollama.yml b/scripts/docker-compose.inferpage-ollama.yml
new file mode 100644
index 00000000..099f76b7
--- /dev/null
+++ b/scripts/docker-compose.inferpage-ollama.yml
@@ -0,0 +1,33 @@
+services:
+
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
+    restart: unless-stopped
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama-models:/root/.ollama
+    # Uncomment to enable NVIDIA GPU pass-through:
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+
+  inferpage:
+    image: ghcr.io/${GITHUB_OWNER}/main-inferpage:ollama
+    container_name: inferpage
+    restart: unless-stopped
+    ports:
+      - "5555:5555"
+    environment:
+      MaIN__OllamaBaseUrl: http://ollama:11434
+      MaIN__BackendType: 7
+    depends_on:
+      - ollama
+
+volumes:
+  ollama-models:
diff --git a/src/.dockerignore b/src/.dockerignore
new file mode 100644
index 00000000..8000416c
--- /dev/null
+++ b/src/.dockerignore
@@ -0,0 +1,10 @@
+**/bin/
+**/obj/
+**/.vs/
+**/.idea/
+**/*.user
+**/*.suo
+MaIN.Core.UnitTests/
+MaIN.Core.IntegrationTests/
+MaIN.Core.E2ETests/
+**/appsettings.*.local.json
diff --git a/src/Dockerfile.inferpage b/src/Dockerfile.inferpage
new file mode 100644
index 00000000..392afe4d
--- /dev/null
+++ b/src/Dockerfile.inferpage
@@ -0,0 +1,111 @@
+FROM mcr.microsoft.com/dotnet/sdk:10.0 AS restore
+WORKDIR /src
+
+COPY MaIN.Domain/MaIN.Domain.csproj                 MaIN.Domain/
+COPY MaIN.Infrastructure/MaIN.Infrastructure.csproj MaIN.Infrastructure/
+COPY MaIN.Services/MaIN.Services.csproj              MaIN.Services/
+COPY MaIN.Core/MaIN.Core.csproj                      MaIN.Core/
+COPY MaIN.InferPage/MaIN.InferPage.csproj            MaIN.InferPage/
+
+RUN dotnet restore MaIN.InferPage/MaIN.InferPage.csproj
+
+FROM restore AS publish
+
+COPY MaIN.Domain/         MaIN.Domain/
+COPY MaIN.Infrastructure/ MaIN.Infrastructure/
+COPY MaIN.Services/       MaIN.Services/
+COPY MaIN.Core/           MaIN.Core/
+COPY MaIN.InferPage/      MaIN.InferPage/
+
+RUN dotnet publish MaIN.InferPage/MaIN.InferPage.csproj \
+    --framework net10.0 \
+    --configuration Release \
+    --no-restore \
+    --output /app/out
+
+FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        libicu74 \
+        libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=mcr.microsoft.com/dotnet/aspnet:10.0 /usr/share/dotnet /usr/share/dotnet
+RUN ln -sf /usr/share/dotnet/dotnet /usr/local/bin/dotnet
+
+WORKDIR /app
+
+ENV DOTNET_ROOT=/usr/share/dotnet
+ENV ASPNETCORE_URLS=http://+:5555
+ENV ASPNETCORE_ENVIRONMENT=Production
+ENV MaIN__ModelsPath=/app/Models
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV LD_LIBRARY_PATH=/usr/lib/wsl/lib
+
+VOLUME /app/Models
+EXPOSE 5555
+
+COPY --from=publish /app/out .
+
+RUN find /app/runtimes/linux-x64/native -maxdepth 2 -mindepth 1 -type d \
+    | tee /etc/ld.so.conf.d/llamasharp.conf \
+    && ldconfig
+
+ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"]
+
+FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-cpu
+
+RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+ENV ASPNETCORE_URLS=http://+:5555
+ENV ASPNETCORE_ENVIRONMENT=Production
+ENV MaIN__ModelsPath=/app/Models
+
+VOLUME /app/Models
+EXPOSE 5555
+
+COPY --from=publish /app/out .
+
+ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"]
+
+FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-ollama
+
+WORKDIR /app
+
+ENV ASPNETCORE_URLS=http://+:5555
+ENV ASPNETCORE_ENVIRONMENT=Production
+ENV MaIN__OllamaBaseUrl=http://host.docker.internal:11434
+ENV MaIN__BackendType=7
+
+EXPOSE 5555
+
+COPY --from=publish /app/out .
+
+ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"]
+
+FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-ollama-bundled
+
+COPY --from=ollama/ollama:latest /usr/bin/ollama /usr/bin/ollama
+
+WORKDIR /app
+
+ENV ASPNETCORE_URLS=http://+:5555
+ENV ASPNETCORE_ENVIRONMENT=Production
+ENV MaIN__OllamaBaseUrl=http://localhost:11434
+ENV MaIN__BackendType=7
+
+VOLUME /root/.ollama
+EXPOSE 5555
+EXPOSE 11434
+
+COPY --from=publish /app/out .
+COPY entrypoint-ollama.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/src/MaIN.Core/.nuspec b/src/MaIN.Core/.nuspec
index 7923c9f4..8ed72828 100644
--- a/src/MaIN.Core/.nuspec
+++ b/src/MaIN.Core/.nuspec
@@ -2,7 +2,7 @@
 <package>
   <metadata>
     <id>MaIN.NET</id>
-    <version>0.10.5</version>
+    <version>0.10.9</version>
     <authors>Wisedev</authors>
     <owners>Wisedev</owners>
     <icon>favicon.png</icon>
diff --git a/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs b/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs
index b2da7598..59054b10 100644
--- a/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs
+++ b/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs
@@ -1,4 +1,6 @@
-﻿using MaIN.Domain.Models.Abstract;
+﻿using MaIN.Core.Hub.Contexts;
+using MaIN.Domain.Models.Abstract;
+using MaIN.Infrastructure.Models;
 
 namespace MaIN.Core.Hub.Contexts.Interfaces.ModelContext;
 
@@ -51,6 +53,8 @@ public interface IModelContext
     /// returning the context instance implementing <see cref="IModelContext"/> for method chaining.</returns>
     Task<IModelContext> DownloadAsync(string modelId, CancellationToken cancellationToken = default);
 
+    Task<IModelContext> DownloadAsync(string modelId, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken = default);
+
     /// <summary>
     /// Ensures a known local model is downloaded before use. If the model is already present on disk the call
     /// returns immediately; if not, the model is downloaded. Cloud models are silently skipped.
@@ -62,6 +66,8 @@ public interface IModelContext
     /// <see cref="IModelContext"/> for method chaining.</returns>
     Task<IModelContext> EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default);
 
+    Task<IModelContext> EnsureDownloadedAsync(string modelId, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken = default);
+
     /// <summary>
     /// Ensures a known local model is downloaded before use using a strongly-typed model reference.
     /// If the model is already present on disk the call returns immediately; if not, the model is downloaded.
diff --git a/src/MaIN.Core/Hub/Contexts/ModelContext.cs b/src/MaIN.Core/Hub/Contexts/ModelContext.cs
index 167887f3..fa45c80f 100644
--- a/src/MaIN.Core/Hub/Contexts/ModelContext.cs
+++ b/src/MaIN.Core/Hub/Contexts/ModelContext.cs
@@ -8,6 +8,7 @@
 using MaIN.Services.Constants;
 using MaIN.Services.Services.LLMService.Utils;
 using System.Diagnostics;
+using MaIN.Infrastructure.Models;
 
 namespace MaIN.Core.Hub.Contexts;
 
@@ -22,6 +23,8 @@ public sealed class ModelContext : IModelContext
     private const int FileStreamBufferSize = 65536;
     private const int ProgressUpdateIntervalMilliseconds = 1000;
     private static readonly TimeSpan DefaultHttpTimeout = TimeSpan.FromMinutes(30);
+    private const int MaxRetryAttempts = 5;
+    private static readonly TimeSpan ReadStallTimeout = TimeSpan.FromSeconds(30);
 
     internal ModelContext(MaINSettings settings, IHttpClientFactory httpClientFactory)
     {
@@ -53,7 +56,10 @@ public bool Exists(string modelId)
         return localModel.IsDownloaded(_defaultModelsPath);
     }
 
-    public async Task<IModelContext> EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default)
+    public Task<IModelContext> EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default)
+        => EnsureDownloadedAsync(modelId, null, cancellationToken);
+
+    public async Task<IModelContext> EnsureDownloadedAsync(string modelId, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken = default)
     {
         if (string.IsNullOrWhiteSpace(modelId))
         {
@@ -77,7 +83,7 @@ public async Task<IModelContext> EnsureDownloadedAsync(string modelId, Cancellat
             // Double-check
             if (!localModel.IsDownloaded(_defaultModelsPath))
             {
-                await DownloadModelAsync(localModel, cancellationToken);
+                await DownloadModelAsync(localModel, progress, cancellationToken);
             }
         }
 
@@ -90,7 +96,10 @@ public async Task<IModelContext> EnsureDownloadedAsync(string modelId, Cancellat
         return await EnsureDownloadedAsync(model.Id, cancellationToken);
     }
 
-    public async Task<IModelContext> DownloadAsync(string modelId, CancellationToken cancellationToken = default)
+    public Task<IModelContext> DownloadAsync(string modelId, CancellationToken cancellationToken = default)
+        => DownloadAsync(modelId, null, cancellationToken);
+
+    public async Task<IModelContext> DownloadAsync(string modelId, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken = default)
     {
         if (string.IsNullOrWhiteSpace(modelId))
         {
@@ -106,7 +115,7 @@ public async Task<IModelContext> DownloadAsync(string modelId, CancellationToken
 
         using (await _downloadLocks.LockAsync(modelId, cancellationToken))
         {
-            await DownloadModelAsync(localModel, cancellationToken);
+            await DownloadModelAsync(localModel, progress, cancellationToken);
         }
 
         return this;
@@ -131,72 +140,123 @@ public async Task<IModelContext> LoadToCacheAsync(LocalModel model)
         return this;
     }
 
-    private async Task DownloadModelAsync(LocalModel localModel, CancellationToken cancellationToken)
+    private async Task DownloadModelAsync(LocalModel localModel, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken)
     {
-        using var httpClient = CreateConfiguredHttpClient();
-        var filePath = GetModelFilePath(localModel);
-
-        if (localModel.DownloadUrl is null)
-        {
-            throw new DownloadUrlNullOrEmptyException();
-        }
+        if (localModel.DownloadUrl is null) throw new DownloadUrlNullOrEmptyException();
 
+        var filePath = GetModelFilePath(localModel);
         Console.WriteLine($"Starting download of {localModel.FileName}...");
 
-        try
-        {
-            using var response = await httpClient.GetAsync(localModel.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
-            response.EnsureSuccessStatusCode();
-
-            await DownloadWithProgressAsync(response, filePath, localModel.FileName, cancellationToken);
-        }
-        catch (Exception ex)
+        for (int attempt = 0; attempt <= MaxRetryAttempts; attempt++)
         {
-            Console.WriteLine($"Download failed: {ex.Message}");
+            if (attempt > 0)
+            {
+                var delay = TimeSpan.FromSeconds(Math.Min(2 * Math.Pow(2, attempt - 1), 60));
+                Console.WriteLine($"\nRetrying ({attempt}/{MaxRetryAttempts}) in {delay.TotalSeconds:F0}s...");
+                await Task.Delay(delay, cancellationToken);
+            }
 
-            if (File.Exists(filePath))
+            try
             {
-                File.Delete(filePath);
+                await TryDownloadWithResumeAsync(localModel.DownloadUrl, filePath, localModel.FileName, progress, cancellationToken);
+                return;
+            }
+            catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
+            {
+                if (File.Exists(filePath)) File.Delete(filePath);
+                throw;
+            }
+            catch (Exception ex) when (attempt < MaxRetryAttempts)
+            {
+                Console.WriteLine($"\nDownload error: {ex.Message}");
             }
-            throw;
         }
+
+        if (File.Exists(filePath)) File.Delete(filePath);
+        throw new IOException($"Download of {localModel.FileName} failed after {MaxRetryAttempts} retries.");
     }
 
-    private static async Task DownloadWithProgressAsync(HttpResponseMessage response, string filePath, string fileName, CancellationToken cancellationToken)
+    private async Task TryDownloadWithResumeAsync(Uri url, string filePath, string fileName, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken)
     {
-        var totalBytes = response.Content.Headers.ContentLength;
-        var totalBytesRead = 0L;
-        var buffer = new byte[DefaultBufferSize];
-        var progressStopwatch = Stopwatch.StartNew();
-        var totalStopwatch = Stopwatch.StartNew();
+        var resumeFrom = File.Exists(filePath) ? new FileInfo(filePath).Length : 0L;
 
-        if (totalBytes.HasValue)
+        using var httpClient = CreateConfiguredHttpClient();
+        using var request = new HttpRequestMessage(HttpMethod.Get, url);
+        if (resumeFrom > 0)
+            request.Headers.Range = new System.Net.Http.Headers.RangeHeaderValue(resumeFrom, null);
+
+        using var response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
+
+        if (response.StatusCode == System.Net.HttpStatusCode.RequestedRangeNotSatisfiable)
         {
-            Console.WriteLine($"File size: {FormatBytes(totalBytes.Value)}");
+            Console.WriteLine("\nFile already fully downloaded.");
+            return;
         }
 
-        await using var fileStream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None, FileStreamBufferSize);
+        var isResume = response.StatusCode == System.Net.HttpStatusCode.PartialContent;
+        if (!isResume) resumeFrom = 0;
+
+        response.EnsureSuccessStatusCode();
+
+        long? totalBytes = response.Content.Headers.ContentLength is { } cl
+            ? cl + (isResume ? resumeFrom : 0)
+            : null;
+
+        if (isResume)
+            Console.WriteLine($"Resuming from {FormatBytes(resumeFrom)}...");
+        else if (totalBytes.HasValue)
+            Console.WriteLine($"File size: {FormatBytes(totalBytes.Value)}");
+
+        var fileMode = isResume ? FileMode.Append : FileMode.Create;
+        await using var fileStream = new FileStream(filePath, fileMode, FileAccess.Write, FileShare.None, FileStreamBufferSize);
         await using var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken);
 
+        await ReadChunksAsync(contentStream, fileStream, resumeFrom, totalBytes, fileName, progress, cancellationToken);
+    }
+
+    private static async Task ReadChunksAsync(Stream content, FileStream file, long startOffset, long? totalBytes, string fileName, IProgress<DownloadProgress>? progress, CancellationToken cancellationToken)
+    {
+        var totalBytesRead = startOffset;
+        var buffer = new byte[DefaultBufferSize];
+        var progressStopwatch = Stopwatch.StartNew();
+        var totalStopwatch = Stopwatch.StartNew();
+
         while (true)
         {
-            var bytesRead = await contentStream.ReadAsync(buffer, cancellationToken);
-            if (bytesRead == 0)
+            int bytesRead;
+            using (var stallCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken))
             {
-                break;
+                stallCts.CancelAfter(ReadStallTimeout);
+                try
+                {
+                    bytesRead = await content.ReadAsync(buffer, stallCts.Token);
+                }
+                catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
+                {
+                    throw new IOException($"Download stalled: no data received for {ReadStallTimeout.TotalSeconds:F0}s.");
+                }
             }
 
-            await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken);
+            if (bytesRead == 0) break;
+
+            await file.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken);
             totalBytesRead += bytesRead;
 
             if (ShouldUpdateProgress(progressStopwatch))
             {
+                var elapsed = totalStopwatch.Elapsed.TotalSeconds;
+                var speed = elapsed > 0 ? totalBytesRead / elapsed : 0;
                 ShowProgress(totalBytesRead, totalBytes, totalStopwatch);
+                progress?.Report(new DownloadProgress(totalBytesRead, totalBytes, speed));
                 progressStopwatch.Restart();
             }
         }
 
-        ShowFinalProgress(totalBytesRead, totalStopwatch, fileName);
+        var totalTime = totalStopwatch.Elapsed;
+        var avgSpeed = totalTime.TotalSeconds > 0 ? totalBytesRead / totalTime.TotalSeconds : 0;
+        Console.WriteLine($"\nDownload completed: {fileName}. " +
+                          $"Total: {FormatBytes(totalBytesRead)}, Time: {totalTime:hh\\:mm\\:ss}, Speed: {FormatBytes((long)avgSpeed)}/s");
+        progress?.Report(new DownloadProgress(totalBytesRead, totalBytes, 0));
     }
 
     private HttpClient CreateConfiguredHttpClient()
@@ -241,18 +301,6 @@ private static void ShowProgress(long totalBytesRead, long? totalBytes, Stopwatc
         }
     }
 
-    private static void ShowFinalProgress(long totalBytesRead, Stopwatch totalStopwatch, string fileName)
-    {
-        totalStopwatch.Stop();
-        var totalTime = totalStopwatch.Elapsed;
-        var avgSpeed = totalTime.TotalSeconds > 0 ? totalBytesRead / totalTime.TotalSeconds : 0;
-
-        Console.WriteLine($"\nDownload completed: {fileName}. " +
-                         $"Total size: {FormatBytes(totalBytesRead)}, " +
-                         $"Time: {totalTime:hh\\:mm\\:ss}, " +
-                         $"Average speed: {FormatBytes((long)avgSpeed)}/s");
-    }
-
     private static string FormatBytes(long bytes)
     {
         if (bytes == 0)
diff --git a/src/MaIN.Core/MaIN.Core.csproj b/src/MaIN.Core/MaIN.Core.csproj
index 667eedf9..f0a17974 100644
--- a/src/MaIN.Core/MaIN.Core.csproj
+++ b/src/MaIN.Core/MaIN.Core.csproj
@@ -8,7 +8,7 @@
 
     <ItemGroup>
         <PackageReference Include="AsyncKeyedLock" Version="8.0.2" />
-        <PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.26.0" />
+        <PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.27.0" />
         <PackageReference Include="Tesseract.Data.English" Version="4.0.0" />
     </ItemGroup>
 
diff --git a/src/MaIN.Domain/Configuration/MaINSettings.cs b/src/MaIN.Domain/Configuration/MaINSettings.cs
index 738ee2aa..f53ccc0a 100644
--- a/src/MaIN.Domain/Configuration/MaINSettings.cs
+++ b/src/MaIN.Domain/Configuration/MaINSettings.cs
@@ -13,6 +13,7 @@ public class MaINSettings
     public string? AnthropicKey { get; set; }
     public string? GroqCloudKey { get; set; }
     public string? OllamaKey { get; set; }
+    public string? OllamaBaseUrl { get; set; }
     public string? XaiKey { get; set; }
     public MongoDbSettings? MongoDbSettings { get; set; }
     public FileSystemSettings? FileSystemSettings { get; set; }
diff --git a/src/MaIN.Domain/Entities/Chat.cs b/src/MaIN.Domain/Entities/Chat.cs
index 175a4bf1..a233e94e 100644
--- a/src/MaIN.Domain/Entities/Chat.cs
+++ b/src/MaIN.Domain/Entities/Chat.cs
@@ -1,9 +1,7 @@
 using LLama.Batched;
-using MaIN.Domain.Configuration;
 using MaIN.Domain.Configuration.BackendInferenceParams;
 using MaIN.Domain.Entities.Skills;
 using MaIN.Domain.Entities.Tools;
-using MaIN.Domain.Models.Abstract;
 using Grammar = MaIN.Domain.Models.Grammar;
 
 namespace MaIN.Domain.Entities;
diff --git a/src/MaIN.Domain/MaIN.Domain.csproj b/src/MaIN.Domain/MaIN.Domain.csproj
index 612f95d4..9f995107 100644
--- a/src/MaIN.Domain/MaIN.Domain.csproj
+++ b/src/MaIN.Domain/MaIN.Domain.csproj
@@ -8,7 +8,7 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="LLamaSharp" Version="0.26.0" />
+      <PackageReference Include="LLamaSharp" Version="0.27.0" />
     </ItemGroup>
 
 </Project>
diff --git a/src/MaIN.Domain/Models/Concrete/CloudModels.cs b/src/MaIN.Domain/Models/Concrete/CloudModels.cs
index 87b55fb0..797432d4 100644
--- a/src/MaIN.Domain/Models/Concrete/CloudModels.cs
+++ b/src/MaIN.Domain/Models/Concrete/CloudModels.cs
@@ -5,6 +5,61 @@ namespace MaIN.Domain.Models.Concrete;
 
 // ===== OpenAI Models =====
 
+public sealed record Gpt4o() : CloudModel(
+    Models.OpenAi.Gpt4o,
+    BackendType.OpenAi,
+    "GPT-4o",
+    128000,
+    "OpenAI's flagship multimodal model with strong reasoning and vision"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gpt4_1() : CloudModel(
+    Models.OpenAi.Gpt4_1,
+    BackendType.OpenAi,
+    "GPT-4.1",
+    1000000,
+    "OpenAI's most capable GPT model with 1M token context window"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record O3() : CloudModel(
+    Models.OpenAi.O3,
+    BackendType.OpenAi,
+    "o3",
+    200000,
+    "OpenAI's most powerful reasoning model for complex multi-step problems"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record O3Mini() : CloudModel(
+    Models.OpenAi.O3Mini,
+    BackendType.OpenAi,
+    "o3 Mini",
+    200000,
+    "Compact and efficient OpenAI reasoning model"), IReasoningModel
+{
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record O4Mini() : CloudModel(
+    Models.OpenAi.O4Mini,
+    BackendType.OpenAi,
+    "o4 Mini",
+    200000,
+    "OpenAI's fast reasoning model optimised for agentic tool use"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
 public sealed record Gpt4oMini() : CloudModel(
     Models.OpenAi.Gpt4oMini,
     BackendType.OpenAi,
@@ -25,6 +80,28 @@ public sealed record Gpt4_1Mini() : CloudModel(
     public string? MMProjectName => null;
 }
 
+public sealed record Gpt5() : CloudModel(
+    Models.OpenAi.Gpt5,
+    BackendType.OpenAi,
+    "GPT-5",
+    1000000,
+    "OpenAI's most capable model with native reasoning and multimodal understanding"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record Gpt5Mini() : CloudModel(
+    Models.OpenAi.Gpt5Mini,
+    BackendType.OpenAi,
+    "GPT-5 Mini",
+    1000000,
+    "Faster and more affordable GPT-5 variant"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
 public sealed record Gpt5Nano() : CloudModel(
     Models.OpenAi.Gpt5Nano,
     BackendType.OpenAi,
@@ -35,14 +112,90 @@ public sealed record Gpt5Nano() : CloudModel(
     public string? MMProjectName => null;
 }
 
+public sealed record Gpt5_1() : CloudModel(
+    Models.OpenAi.Gpt5_1,
+    BackendType.OpenAi,
+    "GPT-5.1",
+    1000000,
+    "OpenAI's flagship model for coding and agentic tasks with configurable reasoning"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record Gpt5_2() : CloudModel(
+    Models.OpenAi.Gpt5_2,
+    BackendType.OpenAi,
+    "GPT-5.2",
+    1000000,
+    "OpenAI frontier model for complex professional work with long-context reasoning"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gpt5_4() : CloudModel(
+    Models.OpenAi.Gpt5_4,
+    BackendType.OpenAi,
+    "GPT-5.4",
+    1000000,
+    "OpenAI frontier model producing smarter and more precise responses"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gpt5_4Mini() : CloudModel(
+    Models.OpenAi.Gpt5_4Mini,
+    BackendType.OpenAi,
+    "GPT-5.4 Mini",
+    1000000,
+    "Faster, more efficient GPT-5.4 variant designed for high-volume workloads"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gpt5_4Nano() : CloudModel(
+    Models.OpenAi.Gpt5_4Nano,
+    BackendType.OpenAi,
+    "GPT-5.4 Nano",
+    1000000,
+    "Ultra-fast GPT-5.4 variant optimised for classification, extraction, and sub-agents"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
 public sealed record Gpt5_5() : CloudModel(
     Models.OpenAi.Gpt5_5,
     BackendType.OpenAi,
     "GPT-5.5",
-    ModelDefaults.DefaultMaxContextWindow,
-    "OpenAI model required for native Skills API (shell tool + container.skills)"), IVisionModel
+    1000000,
+    "OpenAI's latest flagship model for the most complex professional and agentic work"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gpt5_5Pro() : CloudModel(
+    Models.OpenAi.Gpt5_5Pro,
+    BackendType.OpenAi,
+    "GPT-5.5 Pro",
+    1000000,
+    "GPT-5.5 variant that uses extended compute to think harder for maximum precision"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record CodexMini() : CloudModel(
+    Models.OpenAi.CodexMini,
+    BackendType.OpenAi,
+    "Codex Mini",
+    200000,
+    "OpenAI's Codex model optimised for agentic code generation and software engineering"), IVisionModel, IReasoningModel
 {
     public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
 }
 
 public sealed record DallE3() : CloudModel(
@@ -61,6 +214,38 @@ public sealed record GptImage1() : CloudModel(
 
 // ===== Anthropic Models =====
 
+public sealed record ClaudeOpus4_7() : CloudModel(
+    Models.Anthropic.ClaudeOpus4_7,
+    BackendType.Anthropic,
+    "Claude Opus 4.7",
+    200000,
+    "Anthropic's most capable model with extended thinking and superior reasoning"), IVisionModel, IReasoningModel
+{
+    public string? MMProjectName => null;
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record ClaudeSonnet4_6() : CloudModel(
+    Models.Anthropic.ClaudeSonnet4_6,
+    BackendType.Anthropic,
+    "Claude Sonnet 4.6",
+    200000,
+    "Anthropic's balanced model with strong performance and speed"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record ClaudeHaiku4_5() : CloudModel(
+    Models.Anthropic.ClaudeHaiku4_5,
+    BackendType.Anthropic,
+    "Claude Haiku 4.5",
+    200000,
+    "Anthropic's fastest and most compact model for everyday tasks"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
 public sealed record ClaudeSonnet4() : CloudModel(
     Models.Anthropic.ClaudeSonnet4,
     BackendType.Anthropic,
@@ -81,27 +266,44 @@ public sealed record ClaudeSonnet4_5() : CloudModel(
     public string? MMProjectName => null;
 }
 
-public sealed record ClaudeOpus4_6() : CloudModel(
-    Models.Anthropic.ClaudeOpus4_6,
-    BackendType.Anthropic,
-    "Claude Opus 4.6",
-    200000,
-    "Anthropic flagship reasoning model — supports native Skills API (container.skills + code_execution beta)"), IVisionModel
+// ===== Gemini Models =====
+
+public sealed record Gemini3_5Flash() : CloudModel(
+    Models.Gemini.Gemini3_5Flash,
+    BackendType.Gemini,
+    "Gemini 3.5 Flash",
+    1000000,
+    "Google's latest flagship model for agentic tasks, coding, and multi-step workflows"), IVisionModel
 {
     public string? MMProjectName => null;
 }
 
-public sealed record ClaudeOpus4_7() : CloudModel(
-    Models.Anthropic.ClaudeOpus4_7,
-    BackendType.Anthropic,
-    "Claude Opus 4.7",
-    200000,
-    "Latest Opus revision — recommended for stable Anthropic Skills API usage"), IVisionModel
+public sealed record Gemini3_1FlashLite() : CloudModel(
+    Models.Gemini.Gemini3_1FlashLite,
+    BackendType.Gemini,
+    "Gemini 3.1 Flash Lite",
+    1000000,
+    "Low-latency, cost-efficient Gemini model for high-volume agentic and extraction workloads"), IVisionModel
 {
     public string? MMProjectName => null;
 }
 
-// ===== Gemini Models =====
+public sealed record Gemini3_1ProPreview() : CloudModel(
+    Models.Gemini.Gemini3_1ProPreview,
+    BackendType.Gemini,
+    "Gemini 3.1 Pro (Preview)",
+    1000000,
+    "Google's most capable Pro model with improved thinking and factual consistency"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record Gemini3_1FlashImagePreview() : CloudModel(
+    Models.Gemini.Gemini3_1FlashImagePreview,
+    BackendType.Gemini,
+    "Gemini 3.1 Flash Image (Preview)",
+    4000,
+    "Google's latest image generation model via Gemini Flash"), IImageGenerationModel;
 
 public sealed record Gemini2_5Flash() : CloudModel(
     Models.Gemini.Gemini2_5Flash,
@@ -268,6 +470,37 @@ public sealed record GrokImagineImagePro() : CloudModel(
 
 // ===== GroqCloud Models =====
 
+public sealed record Llama4Scout17b() : CloudModel(
+    Models.Groq.Llama4Scout17b,
+    BackendType.GroqCloud,
+    "Llama 4 Scout 17B (Groq)",
+    131072,
+    "Meta's MoE model with 17B active parameters, vision support, and fast inference on Groq"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record GroqCompound() : CloudModel(
+    Models.Groq.Compound,
+    BackendType.GroqCloud,
+    "Groq Compound",
+    131072,
+    "Groq's AI system with built-in web search and code execution for complex agentic tasks");
+
+public sealed record GroqCompoundMini() : CloudModel(
+    Models.Groq.CompoundMini,
+    BackendType.GroqCloud,
+    "Groq Compound Mini",
+    131072,
+    "Lightweight Groq compound system for fast agentic workflows with tool use");
+
+public sealed record Qwen3_32b() : CloudModel(
+    Models.Groq.Qwen3_32b,
+    BackendType.GroqCloud,
+    "Qwen 3 32B (Groq)",
+    128000,
+    "Alibaba's Qwen 3 32B model running on Groq infrastructure");
+
 public sealed record Llama3_1_8bInstant() : CloudModel(
     Models.Groq.Llama3_1_8b,
     BackendType.GroqCloud,
@@ -318,12 +551,187 @@ public sealed record DeepSeekChat() : CloudModel(
 
 // ===== Ollama Models =====
 
+// Llama 4
+public sealed record OllamaLlama4Scout() : CloudModel(
+    Models.Ollama.Llama4Scout,
+    BackendType.Ollama,
+    "Llama 4 Scout (Ollama)",
+    131072,
+    "Meta's MoE model with native multimodal understanding running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record OllamaLlama4Maverick() : CloudModel(
+    Models.Ollama.Llama4Maverick,
+    BackendType.Ollama,
+    "Llama 4 Maverick (Ollama)",
+    131072,
+    "Meta's larger Llama 4 MoE variant with multimodal capabilities running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+// Gemma 3
 public sealed record OllamaGemma3_4b() : CloudModel(
     Models.Ollama.Gemma3_4b,
     BackendType.Ollama,
     "Gemma3 4B (Ollama)",
-    8192,
+    128000,
     "Balanced 4B model running on Ollama for writing, analysis, and mathematical reasoning"), IVisionModel
 {
     public string? MMProjectName => null;
 }
+
+public sealed record OllamaGemma3_12b() : CloudModel(
+    Models.Ollama.Gemma3_12b,
+    BackendType.Ollama,
+    "Gemma3 12B (Ollama)",
+    128000,
+    "Google's mid-range Gemma 3 model with strong reasoning running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record OllamaGemma3_27b() : CloudModel(
+    Models.Ollama.Gemma3_27b,
+    BackendType.Ollama,
+    "Gemma3 27B (Ollama)",
+    128000,
+    "Google's largest Gemma 3 model with frontier-level performance running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+// Gemma 4
+public sealed record OllamaGemma4_E4b() : CloudModel(
+    Models.Ollama.Gemma4_E4b,
+    BackendType.Ollama,
+    "Gemma4 E4B (Ollama)",
+    128000,
+    "Efficient 4B Gemma 4 model for agentic and coding tasks running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record OllamaGemma4_26b() : CloudModel(
+    Models.Ollama.Gemma4_26b,
+    BackendType.Ollama,
+    "Gemma4 26B (Ollama)",
+    128000,
+    "Google's powerful Gemma 4 26B model for reasoning and multimodal tasks running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+// Qwen 3
+public sealed record OllamaQwen3_8b() : CloudModel(
+    Models.Ollama.Qwen3_8b,
+    BackendType.Ollama,
+    "Qwen3 8B (Ollama)",
+    128000,
+    "Alibaba's Qwen 3 8B model running on Ollama");
+
+public sealed record OllamaQwen3_14b() : CloudModel(
+    Models.Ollama.Qwen3_14b,
+    BackendType.Ollama,
+    "Qwen3 14B (Ollama)",
+    128000,
+    "Alibaba's Qwen 3 14B model running on Ollama");
+
+public sealed record OllamaQwen3_30b() : CloudModel(
+    Models.Ollama.Qwen3_30b,
+    BackendType.Ollama,
+    "Qwen3 30B (Ollama)",
+    128000,
+    "Alibaba's Qwen 3 30B MoE model running on Ollama");
+
+// Qwen 3.5
+public sealed record OllamaQwen3_5_9b() : CloudModel(
+    Models.Ollama.Qwen3_5_9b,
+    BackendType.Ollama,
+    "Qwen3.5 9B (Ollama)",
+    128000,
+    "Alibaba's multimodal Qwen 3.5 9B model running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+public sealed record OllamaQwen3_5_27b() : CloudModel(
+    Models.Ollama.Qwen3_5_27b,
+    BackendType.Ollama,
+    "Qwen3.5 27B (Ollama)",
+    128000,
+    "Alibaba's multimodal Qwen 3.5 27B model running on Ollama"), IVisionModel
+{
+    public string? MMProjectName => null;
+}
+
+// Qwen 3.6
+public sealed record OllamaQwen3_6_27b() : CloudModel(
+    Models.Ollama.Qwen3_6_27b,
+    BackendType.Ollama,
+    "Qwen3.6 27B (Ollama)",
+    128000,
+    "Alibaba's Qwen 3.6 27B with improved agentic coding running on Ollama");
+
+public sealed record OllamaQwen3_6_35b() : CloudModel(
+    Models.Ollama.Qwen3_6_35b,
+    BackendType.Ollama,
+    "Qwen3.6 35B (Ollama)",
+    128000,
+    "Alibaba's Qwen 3.6 35B with strong coding and reasoning running on Ollama");
+
+// Qwen 3 Coder
+public sealed record OllamaQwen3Coder_8b() : CloudModel(
+    Models.Ollama.Qwen3Coder_8b,
+    BackendType.Ollama,
+    "Qwen3-Coder 8B (Ollama)",
+    128000,
+    "Alibaba's efficient code-focused Qwen 3 model running on Ollama");
+
+public sealed record OllamaQwen3Coder_30b() : CloudModel(
+    Models.Ollama.Qwen3Coder_30b,
+    BackendType.Ollama,
+    "Qwen3-Coder 30B (Ollama)",
+    128000,
+    "Alibaba's most capable agentic code model running on Ollama");
+
+// DeepSeek R1
+public sealed record OllamaDeepSeekR1_7b() : CloudModel(
+    Models.Ollama.DeepSeekR1_7b,
+    BackendType.Ollama,
+    "DeepSeek R1 7B (Ollama)",
+    128000,
+    "DeepSeek's open reasoning model at 7B running on Ollama"), IReasoningModel
+{
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+public sealed record OllamaDeepSeekR1_14b() : CloudModel(
+    Models.Ollama.DeepSeekR1_14b,
+    BackendType.Ollama,
+    "DeepSeek R1 14B (Ollama)",
+    128000,
+    "DeepSeek's open reasoning model at 14B running on Ollama"), IReasoningModel
+{
+    public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
+    public string? AdditionalPrompt => null;
+}
+
+// Microsoft Phi
+public sealed record OllamaPhi4_14b() : CloudModel(
+    Models.Ollama.Phi4_14b,
+    BackendType.Ollama,
+    "Phi4 14B (Ollama)",
+    16000,
+    "Microsoft's state-of-the-art 14B model running on Ollama");
+
+// Mistral
+public sealed record OllamaMistral_7b() : CloudModel(
+    Models.Ollama.Mistral_7b,
+    BackendType.Ollama,
+    "Mistral 7B (Ollama)",
+    32000,
+    "Mistral AI's efficient 7B base model running on Ollama");
diff --git a/src/MaIN.Domain/Models/Models.cs b/src/MaIN.Domain/Models/Models.cs
index cd9604f4..55dfc21e 100644
--- a/src/MaIN.Domain/Models/Models.cs
+++ b/src/MaIN.Domain/Models/Models.cs
@@ -8,24 +8,43 @@ public static class Models
 {
     public static class OpenAi
     {
+        public const string Gpt4o = "gpt-4o";
         public const string Gpt4oMini = "gpt-4o-mini";
+        public const string Gpt4_1 = "gpt-4.1";
         public const string Gpt4_1Mini = "gpt-4.1-mini";
+        public const string Gpt5 = "gpt-5";
+        public const string Gpt5Mini = "gpt-5-mini";
         public const string Gpt5Nano = "gpt-5-nano";
+        public const string Gpt5_1 = "gpt-5.1";
+        public const string Gpt5_2 = "gpt-5.2";
+        public const string Gpt5_4 = "gpt-5.4";
+        public const string Gpt5_4Mini = "gpt-5.4-mini";
+        public const string Gpt5_4Nano = "gpt-5.4-nano";
         public const string Gpt5_5 = "gpt-5.5";
+        public const string Gpt5_5Pro = "gpt-5.5-pro";
+        public const string CodexMini = "codex-mini-latest";
+        public const string O3 = "o3";
+        public const string O3Mini = "o3-mini";
+        public const string O4Mini = "o4-mini";
         public const string DallE3 = "dall-e-3";
         public const string GptImage1 = "gpt-image-1";
     }
 
     public static class Anthropic
     {
+        public const string ClaudeOpus4_7 = "claude-opus-4-7";
+        public const string ClaudeSonnet4_6 = "claude-sonnet-4-6";
+        public const string ClaudeHaiku4_5 = "claude-haiku-4-5-20251001";
         public const string ClaudeSonnet4 = "claude-sonnet-4-20250514";
         public const string ClaudeSonnet4_5 = "claude-sonnet-4-5-20250929";
-        public const string ClaudeOpus4_6 = "claude-opus-4-6";
-        public const string ClaudeOpus4_7 = "claude-opus-4-7";
     }
 
     public static class Gemini
     {
+        public const string Gemini3_5Flash = "gemini-3.5-flash";
+        public const string Gemini3_1FlashLite = "gemini-3.1-flash-lite";
+        public const string Gemini3_1ProPreview = "gemini-3.1-pro-preview";
+        public const string Gemini3_1FlashImagePreview = "gemini-3.1-flash-image-preview";
         public const string Gemini2_5Pro = "gemini-2.5-pro";
         public const string Gemini2_5Flash = "gemini-2.5-flash";
         public const string Gemini2_0Flash = "gemini-2.0-flash";
@@ -47,10 +66,14 @@ public static class Xai
 
     public static class Groq
     {
-        public const string Llama3_1_8b = "llama-3.1-8b-instant";
+        public const string Llama4Scout17b = "meta-llama/llama-4-scout-17b-16e-instruct";
         public const string Llama3_3_70b = "llama-3.3-70b-versatile";
-        public const string GptOss20b = "openai/gpt-oss-20b";
+        public const string Llama3_1_8b = "llama-3.1-8b-instant";
+        public const string Qwen3_32b = "qwen/qwen3-32b";
+        public const string Compound = "groq/compound";
+        public const string CompoundMini = "groq/compound-mini";
         public const string GptOss120b = "openai/gpt-oss-120b";
+        public const string GptOss20b = "openai/gpt-oss-20b";
     }
 
     public static class DeepSeek
@@ -61,7 +84,45 @@ public static class DeepSeek
 
     public static class Ollama
     {
+        // Llama 4
+        public const string Llama4Scout = "llama4:scout";
+        public const string Llama4Maverick = "llama4:maverick";
+
+        // Gemma 3
         public const string Gemma3_4b = "gemma3:4b";
+        public const string Gemma3_12b = "gemma3:12b";
+        public const string Gemma3_27b = "gemma3:27b";
+
+        // Gemma 4
+        public const string Gemma4_E4b = "gemma4:e4b";
+        public const string Gemma4_26b = "gemma4:26b";
+
+        // Qwen 3
+        public const string Qwen3_8b = "qwen3:8b";
+        public const string Qwen3_14b = "qwen3:14b";
+        public const string Qwen3_30b = "qwen3:30b";
+
+        // Qwen 3.5
+        public const string Qwen3_5_9b = "qwen3.5:9b";
+        public const string Qwen3_5_27b = "qwen3.5:27b";
+
+        // Qwen 3.6
+        public const string Qwen3_6_27b = "qwen3.6:27b";
+        public const string Qwen3_6_35b = "qwen3.6:35b";
+
+        // Qwen 3 Coder
+        public const string Qwen3Coder_8b = "qwen3-coder:8b";
+        public const string Qwen3Coder_30b = "qwen3-coder:30b";
+
+        // DeepSeek R1
+        public const string DeepSeekR1_7b = "deepseek-r1:7b";
+        public const string DeepSeekR1_14b = "deepseek-r1:14b";
+
+        // Microsoft Phi
+        public const string Phi4_14b = "phi4:14b";
+
+        // Mistral
+        public const string Mistral_7b = "mistral:7b";
     }
 
     public static class Vertex
diff --git a/src/MaIN.InferPage/Components/Pages/Settings.razor b/src/MaIN.InferPage/Components/Pages/Settings.razor
index d8086e80..c7c832f7 100644
--- a/src/MaIN.InferPage/Components/Pages/Settings.razor
+++ b/src/MaIN.InferPage/Components/Pages/Settings.razor
@@ -1,6 +1,9 @@
 @using MaIN.Domain.Configuration
 @using MaIN.Domain.Configuration.Vertex
 @using MaIN.Domain.Models.Abstract
+@using MaIN.Core.Hub
+@using MaIN.Core.Hub.Contexts
+@using MaIN.Infrastructure.Models
 @inject SettingsService SettingsStorage
 @inject MaINSettings MaINSettings
 @rendermode @(new InteractiveServerRenderMode(prerender: false))
@@ -35,10 +38,25 @@
 
             <div class="settings-field">
                 <label class="settings-label">Model</label>
-                <FluentTextField @bind-Value="_modelName"
-                                 @bind-Value:after="OnModelNameChanged"
-                                 Placeholder="e.g., gpt-4o, gemma3-4b"
-                                 Style="width: 100%;" />
+                @if (_selectedBackend?.BackendType == BackendType.Self)
+                {
+                    <FluentAutocomplete TOption="LocalModel"
+                                        OptionText="@(m => FormatLocalModelName(m))"
+                                        SelectedOptions="@_selectedLocalModels"
+                                        SelectedOptionsChanged="@OnLocalModelSelected"
+                                        OnOptionsSearch="@SearchLocalModels"
+                                        MaximumSelectedOptions="1"
+                                        MaximumOptionsSearch="@_chatLocalModels.Length"
+                                        Placeholder="Search and select a model..."
+                                        Style="width:100%;" />
+                }
+                else
+                {
+                    <FluentTextField @bind-Value="_modelName"
+                                     @bind-Value:after="OnModelNameChanged"
+                                     Placeholder="e.g., gpt-4o, gemma3-4b"
+                                     Style="width: 100%;" />
+                }
             </div>
 
             @if (RequiresApiKey)
@@ -113,12 +131,21 @@
                 <div class="settings-field">
                     <label class="settings-label">Model Path</label>
                     <FluentTextField @bind-Value="_modelPath"
-                                     Placeholder="Optional: model.gguf, C:\dir\model.gguf, or C:\dir"
+                                     @bind-Value:after="OnModelPathChanged"
+                                     Placeholder="@_modelPathPlaceholder"
                                      Style="width: 100%;" />
                     @if (ResolvedModelPathPreview is { } preview)
                     {
                         <span class="api-key-hint">Will load: @preview</span>
                     }
+                    @if (_isDocker)
+                    {
+                        <span class="api-key-hint">
+                            Running in Docker — mount your models directory when starting the container:
+                            <code>-v /your/local/models:/app/Models</code>
+                            The path <code>/app/Models</code> is pre-configured as the default.
+                        </span>
+                    }
                 </div>
             }
 
@@ -164,6 +191,52 @@
                 </div>
             }
 
+            @if (_selectedBackend?.BackendType == BackendType.Self && _isRegisteredModel
+                 && ModelRegistry.TryGetById(_modelName!, out var _dlModel) && _dlModel is LocalModel { DownloadUrl: not null })
+            {
+                <div class="settings-field">
+                    <label class="settings-label">Download</label>
+                    @if (_isDownloading)
+                    {
+                        <div style="display:flex;flex-direction:column;gap:6px;">
+                            @if (_downloadProgress?.Percentage is { } pct)
+                            {
+                                <FluentProgress Value="@((int)pct)" Max="100" Style="width:100%;" />
+                                <span class="api-key-hint">
+                                    @($"{pct:F1}%  ·  {FormatBytes(_downloadProgress.Value.BytesRead)}")
+                                    @if (_downloadProgress.Value.TotalBytes is { } total) { @($" / {FormatBytes(total)}") }
+                                    @($"  ·  {FormatSpeed(_downloadProgress.Value.BytesPerSecond)}")
+                                    @if (_downloadProgress?.Eta is { } eta) { @($"  ·  ETA {eta:mm\\:ss}") }
+                                </span>
+                            }
+                            else
+                            {
+                                <div style="display:flex;align-items:center;gap:8px;">
+                                    <FluentProgressRing Style="width:20px;height:20px;" />
+                                    <span class="api-key-hint">Connecting...</span>
+                                </div>
+                            }
+                        </div>
+                    }
+                    else if (_isModelDownloaded)
+                    {
+                        <FluentBadge BackgroundColor="#16a34a" Color="#fff">Downloaded</FluentBadge>
+                    }
+                    else
+                    {
+                        <div style="display:flex;flex-direction:column;gap:4px;">
+                            <FluentButton Appearance="Appearance.Accent" OnClick="@DownloadModelAsync">
+                                Download
+                            </FluentButton>
+                            @if (_downloadError != null)
+                            {
+                                <span class="api-key-hint" style="color:var(--error);">@_downloadError</span>
+                            }
+                        </div>
+                    }
+                </div>
+            }
+
             @if (_selectedBackend?.BackendType == BackendType.Self && !_isRegisteredModel && _manualVision)
             {
                 <div class="settings-field">
@@ -191,6 +264,17 @@
     [Parameter] public EventCallback OnSettingsApplied { get; set; }
     [Parameter] public EventCallback OnClose { get; set; }
 
+    // Chat-appropriate local models (excludes embedding, TTS, image-gen, ONNX-only models)
+    private static readonly LocalModel[] _chatLocalModels = ModelRegistry.GetAllLocal()
+        .Where(m => m.DownloadUrl != null &&
+                    m is not IImageGenerationModel &&
+                    !m.FileName.EndsWith(".onnx", StringComparison.OrdinalIgnoreCase) &&
+                    !m.Name.Contains("Embed", StringComparison.OrdinalIgnoreCase))
+        .OrderBy(m => m.Name)
+        .ToArray();
+
+    private IEnumerable<LocalModel> _selectedLocalModels = [];
+
     private List<BackendOption> _backendOptions = null!;
     private BackendOption? _selectedBackend;
     private string? _modelName;
@@ -237,13 +321,13 @@
                 fileName = sanitizedModel;
             }
 
-            // Relative path: prepend configured models directory
+            // Relative path: prepend configured models directory (or default)
             if (!Path.IsPathFullyQualified(fileName))
             {
                 var modelsDir = MaINSettings.ModelsPath
-                    ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath");
-                if (!string.IsNullOrEmpty(modelsDir))
-                    fileName = Path.Combine(modelsDir, fileName);
+                    ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath")
+                    ?? Utils.DefaultModelsPath;
+                fileName = Path.Combine(modelsDir, fileName);
             }
 
             return fileName;
@@ -286,6 +370,20 @@
     private bool _manualImageGen;
     private string? _mmProjName;
 
+    // Download state (registered local models only)
+    private bool _isModelDownloaded;
+    private bool _isDownloading;
+    private string? _downloadError;
+    private CancellationTokenSource? _downloadCts;
+    private DownloadProgress? _downloadProgress;
+
+    private static readonly bool _isDocker =
+        Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER") == "true";
+
+    private string _modelPathPlaceholder => _isDocker
+        ? "/app/Models  (or leave blank to use default)"
+        : "Optional: model.gguf, C:\\dir\\model.gguf, or C:\\dir";
+
     private bool RequiresApiKey => _selectedBackend?.RequiresApiKey == true;
     private bool IsVertexBackend => _selectedBackend?.BackendType == BackendType.Vertex;
     private bool HasVertexRequiredFields => !string.IsNullOrWhiteSpace(_vertexProjectId)
@@ -366,6 +464,7 @@
                 _vertexLocation = settings.VertexLocation;
             }
 
+            SyncLocalModelSelection();
             OnModelNameChanged();
         }
         else if (!Utils.NeedsConfiguration)
@@ -382,6 +481,7 @@
             _modelName = Utils.Model;
             _modelPath = Utils.Path;
 
+            SyncLocalModelSelection();
             OnModelNameChanged();
         }
 
@@ -398,6 +498,7 @@
         if (profile != null)
         {
             _modelName = profile.Model;
+            SyncLocalModelSelection();
             OnModelNameChanged();                    // sets _isRegisteredModel + auto-detects caps
             if (!_isRegisteredModel)                // only restore manual caps for unregistered models
             {
@@ -414,8 +515,14 @@
             if (!string.IsNullOrEmpty(lastModel))
             {
                 _modelName = lastModel;
+                SyncLocalModelSelection();
                 OnModelNameChanged();
             }
+            else
+            {
+                _modelName = null;
+                SyncLocalModelSelection();
+            }
         }
 
         if (IsVertexBackend)
@@ -442,8 +549,48 @@
         _apiKeyInput = null;
     }
 
+    private void OnLocalModelSelected(IEnumerable<LocalModel> selected)
+    {
+        _selectedLocalModels = selected;
+        _modelName = selected.FirstOrDefault()?.Id;
+        OnModelNameChanged();
+    }
+
+    private void SearchLocalModels(OptionsSearchEventArgs<LocalModel> e)
+    {
+        e.Items = string.IsNullOrEmpty(e.Text)
+            ? _chatLocalModels
+            : _chatLocalModels.Where(m =>
+                m.Name.Contains(e.Text, StringComparison.OrdinalIgnoreCase) ||
+                m.Id.Contains(e.Text, StringComparison.OrdinalIgnoreCase));
+    }
+
+    private static string FormatLocalModelName(LocalModel m)
+    {
+        var caps = new List<string>();
+        if (m is IVisionModel) caps.Add("Vision");
+        if (m is IReasoningModel) caps.Add("Reasoning");
+        return caps.Count > 0 ? $"{m.Name}  [{string.Join(", ", caps)}]" : m.Name;
+    }
+
+    private void SyncLocalModelSelection()
+    {
+        if (_selectedBackend?.BackendType == BackendType.Self && !string.IsNullOrEmpty(_modelName))
+        {
+            var match = _chatLocalModels.FirstOrDefault(m => m.Id == _modelName);
+            _selectedLocalModels = match != null ? [match] : [];
+        }
+        else
+        {
+            _selectedLocalModels = [];
+        }
+    }
+
     private void OnModelNameChanged()
     {
+        _isModelDownloaded = false;
+        _downloadError = null;
+
         if (string.IsNullOrWhiteSpace(_modelName))
         {
             _isRegisteredModel = false;
@@ -456,6 +603,9 @@
             _detectedVision = model is IVisionModel;
             _detectedReasoning = model is IReasoningModel;
             _detectedImageGen = model is IImageGenerationModel;
+
+            if (_selectedBackend?.BackendType == BackendType.Self && model is LocalModel localModel)
+                _isModelDownloaded = localModel.IsDownloaded(GetEffectiveModelsBasePath());
         }
         else
         {
@@ -469,6 +619,74 @@
         }
     }
 
+    private void OnModelPathChanged()
+    {
+        if (_isRegisteredModel && _selectedBackend?.BackendType == BackendType.Self
+            && ModelRegistry.TryGetById(_modelName!, out var model) && model is LocalModel localModel)
+        {
+            _isModelDownloaded = localModel.IsDownloaded(GetEffectiveModelsBasePath());
+            _downloadError = null;
+        }
+    }
+
+    private string GetEffectiveModelsBasePath()
+    {
+        var raw = _modelPath?.Trim() ?? "";
+        if (!string.IsNullOrEmpty(raw))
+        {
+            return raw.EndsWith(".gguf", StringComparison.OrdinalIgnoreCase)
+                ? (Path.GetDirectoryName(raw) ?? "")
+                : raw;
+        }
+        return MaINSettings.ModelsPath
+            ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath")
+            ?? Utils.DefaultModelsPath;
+    }
+
+    private async Task DownloadModelAsync()
+    {
+        _isDownloading = true;
+        _downloadError = null;
+        _downloadProgress = null;
+        _downloadCts = new CancellationTokenSource();
+        StateHasChanged();
+        try
+        {
+            var basePath = GetEffectiveModelsBasePath();
+            Directory.CreateDirectory(basePath);
+            Environment.SetEnvironmentVariable("MaIN_ModelsPath", basePath);
+            MaINSettings.ModelsPath = basePath;
+            var progress = new Progress<DownloadProgress>(p =>
+            {
+                _downloadProgress = p;
+                _ = InvokeAsync(StateHasChanged);
+            });
+            await AIHub.Model().EnsureDownloadedAsync(_modelName!, progress, _downloadCts.Token);
+            _isModelDownloaded = true;
+        }
+        catch (OperationCanceledException) { }
+        catch (Exception ex)
+        {
+            _downloadError = ex.Message;
+        }
+        finally
+        {
+            _isDownloading = false;
+            _downloadCts = null;
+            StateHasChanged();
+        }
+    }
+
+    private static string FormatBytes(long bytes) => bytes switch
+    {
+        >= 1024L * 1024 * 1024 => $"{bytes / (1024.0 * 1024 * 1024):F1} GB",
+        >= 1024 * 1024          => $"{bytes / (1024.0 * 1024):F1} MB",
+        >= 1024                 => $"{bytes / 1024.0:F1} KB",
+        _                       => $"{bytes} B"
+    };
+
+    private static string FormatSpeed(double bytesPerSecond) => $"{FormatBytes((long)bytesPerSecond)}/s";
+
     private async Task LoadApiKeyPreview()
     {
         if (_selectedBackend == null || !_selectedBackend.RequiresApiKey)
diff --git a/src/MaIN.InferPage/MaIN.InferPage.csproj b/src/MaIN.InferPage/MaIN.InferPage.csproj
index ba1ff18a..66aafede 100644
--- a/src/MaIN.InferPage/MaIN.InferPage.csproj
+++ b/src/MaIN.InferPage/MaIN.InferPage.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk.Web">
 
     <PropertyGroup>
-        <TargetFrameworks>net8.0;net10.0</TargetFrameworks>
+        <TargetFramework>net10.0</TargetFramework>
         <Nullable>enable</Nullable>
         <ImplicitUsings>enable</ImplicitUsings>
         <IsPackable>false</IsPackable>
diff --git a/src/MaIN.InferPage/Program.cs b/src/MaIN.InferPage/Program.cs
index ef07369d..36a6d31a 100644
--- a/src/MaIN.InferPage/Program.cs
+++ b/src/MaIN.InferPage/Program.cs
@@ -75,16 +75,11 @@
             {
                 if (string.IsNullOrEmpty(modelPathArg))
                 {
-                    Console.WriteLine("Error: A model path must be provided using --path when a local model is specified.");
-                    return;
-                }
-
-                var envModelsPath = Environment.GetEnvironmentVariable("MaIN_ModelsPath");
-                if (string.IsNullOrEmpty(envModelsPath))
-                {
-                    Console.Write("Please enter the MaIN_ModelsPath: ");
-                    envModelsPath = Console.ReadLine();
-                    Environment.SetEnvironmentVariable("MaIN_ModelsPath", envModelsPath);
+                    var defaultPath = Utils.DefaultModelsPath;
+                    Directory.CreateDirectory(defaultPath);
+                    Environment.SetEnvironmentVariable("MaIN_ModelsPath", defaultPath);
+                    Utils.Path = defaultPath;
+                    Console.WriteLine($"No --path provided. Using default models directory: {defaultPath}");
                 }
             }
         }
@@ -129,9 +124,9 @@
 }
 
 app.UseHttpsRedirection();
-app.UseStaticFiles();
 app.UseAntiforgery();
 app.Services.UseMaIN();
+app.MapStaticAssets();
 app.MapRazorComponents<App>()
     .AddInteractiveServerRenderMode();
 
diff --git a/src/MaIN.InferPage/Utils.cs b/src/MaIN.InferPage/Utils.cs
index 582fe267..1fc14820 100644
--- a/src/MaIN.InferPage/Utils.cs
+++ b/src/MaIN.InferPage/Utils.cs
@@ -16,6 +16,8 @@ public static class Utils
 
     public static bool NeedsConfiguration { get; set; }
 
+    public static string DefaultModelsPath => System.IO.Path.Combine(Directory.GetCurrentDirectory(), "models");
+
     // Manual capability overrides for unregistered models (set from Settings UI)
     public static bool? ManualVision { get; set; }
     public static bool? ManualReasoning { get; set; }
@@ -67,6 +69,20 @@ public static void ApplySettings(
         {
             Path = null;
         }
+
+        if (backendType == BackendType.Self && Path == null)
+        {
+            // Prefer an already-configured path (e.g. MaIN_ModelsPath set by Docker ENV) so that
+            // volume mounts are respected without the user having to re-enter the path.
+            var defaultPath = Environment.GetEnvironmentVariable("MaIN_ModelsPath")
+                ?? (!string.IsNullOrEmpty(mainSettings.ModelsPath) ? mainSettings.ModelsPath : null)
+                ?? DefaultModelsPath;
+            Directory.CreateDirectory(defaultPath);
+            Path = defaultPath;
+            mainSettings.ModelsPath = defaultPath;
+            Environment.SetEnvironmentVariable("MaIN_ModelsPath", defaultPath);
+        }
+
         HasApiKey = !string.IsNullOrEmpty(apiKey);
         NeedsConfiguration = false;
 
diff --git a/src/MaIN.InferPage/appsettings.Production.json b/src/MaIN.InferPage/appsettings.Production.json
new file mode 100644
index 00000000..7152d15b
--- /dev/null
+++ b/src/MaIN.InferPage/appsettings.Production.json
@@ -0,0 +1,14 @@
+{
+  "Logging": {
+    "LogLevel": {
+      "LLama": "Debug"
+    }
+  },
+  "Kestrel": {
+    "EndPoints": {
+      "Http": {
+        "Url": "http://+:5555"
+      }
+    }
+  }
+}
diff --git a/src/MaIN.Infrastructure/Models/DownloadProgress.cs b/src/MaIN.Infrastructure/Models/DownloadProgress.cs
new file mode 100644
index 00000000..bcc8e3b4
--- /dev/null
+++ b/src/MaIN.Infrastructure/Models/DownloadProgress.cs
@@ -0,0 +1,15 @@
+namespace MaIN.Infrastructure.Models;
+
+public readonly record struct DownloadProgress(
+    long BytesRead,
+    long? TotalBytes,
+    double BytesPerSecond)
+{
+    public double? Percentage => TotalBytes is > 0
+        ? (double)BytesRead / TotalBytes.Value * 100
+        : null;
+
+    public TimeSpan? Eta => TotalBytes.HasValue && BytesPerSecond > 0
+        ? TimeSpan.FromSeconds((TotalBytes.Value - BytesRead) / BytesPerSecond)
+        : null;
+}
diff --git a/src/MaIN.Services/MaIN.Services.csproj b/src/MaIN.Services/MaIN.Services.csproj
index 50459831..003ba66f 100644
--- a/src/MaIN.Services/MaIN.Services.csproj
+++ b/src/MaIN.Services/MaIN.Services.csproj
@@ -15,9 +15,9 @@
       <PackageReference Include="GTranslate" Version="2.3.1" />
       <PackageReference Include="YamlDotNet" Version="16.3.0" />
       <PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
-      <PackageReference Include="llamasharp.backend.cpu" Version="0.26.0" />
-      <PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.26.0" />
-      <PackageReference Include="LLamaSharp.kernel-memory" Version="0.26.0" />
+      <PackageReference Include="llamasharp.backend.cpu" Version="0.27.0" />
+      <PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.27.0" />
+      <PackageReference Include="LLamaSharp.kernel-memory" Version="0.27.0" />
       <PackageReference Include="Microsoft.Extensions.Http" Version="10.0.1" />
       <PackageReference Include="Microsoft.KernelMemory" Version="0.98.250508.3" />
       <PackageReference Include="Microsoft.SemanticKernel.Connectors.Google" Version="1.64.0-alpha" />
diff --git a/src/MaIN.Services/Services/DataSourceProvider.cs b/src/MaIN.Services/Services/DataSourceProvider.cs
index 1dce392c..686b5bc0 100644
--- a/src/MaIN.Services/Services/DataSourceProvider.cs
+++ b/src/MaIN.Services/Services/DataSourceProvider.cs
@@ -52,11 +52,7 @@ public string FetchTextData(object? sourceDetails)
     public async Task<string> FetchApiData(object? details, string? filter,
         IHttpClientFactory httpClientFactory, Dictionary<string, string> properties)
     {
-        var apiDetails = JsonSerializer.Deserialize<AgentApiSourceDetails>(details!.ToString()!,
-            new JsonSerializerOptions()
-            {
-                PropertyNameCaseInsensitive = true,
-            });
+        var apiDetails = details as AgentApiSourceDetails;
 
         var httpClient = httpClientFactory.CreateClient();
 
diff --git a/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs b/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs
index 8dee7c23..e84c5c12 100644
--- a/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs
+++ b/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs
@@ -74,7 +74,7 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
     {
         // Create a fresh context for each embedding call (0.26.0 pattern)
         using var context = _weights.CreateContext(_params, _logger);
-        NativeApi.llama_set_embeddings(context.NativeHandle, true);
+        //NativeApi.em.llama_set_embeddings(context.NativeHandle, true);
 
         var tokens = context.Tokenize(input, special: true);
         if (tokens.Length > context.ContextSize)
diff --git a/src/MaIN.Services/Services/LLMService/OllamaService.cs b/src/MaIN.Services/Services/LLMService/OllamaService.cs
index 978ff6c8..f1f5e0db 100644
--- a/src/MaIN.Services/Services/LLMService/OllamaService.cs
+++ b/src/MaIN.Services/Services/LLMService/OllamaService.cs
@@ -24,9 +24,11 @@ public sealed class OllamaService(
 
     private bool HasApiKey => !string.IsNullOrEmpty(_settings.OllamaKey) || !string.IsNullOrEmpty(Environment.GetEnvironmentVariable(LLMApiRegistry.Ollama.ApiKeyEnvName));
 
+    private string LocalBaseUrl => (_settings.OllamaBaseUrl ?? "http://localhost:11434").TrimEnd('/');
+
     protected override string HttpClientName => HasApiKey ? ServiceConstants.HttpClients.OllamaClient : ServiceConstants.HttpClients.OllamaLocalClient;
-    protected override string ChatCompletionsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaOpenAiChatCompletions : ServiceConstants.ApiUrls.OllamaLocalOpenAiChatCompletions;
-    protected override string ModelsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaModels : ServiceConstants.ApiUrls.OllamaLocalModels;
+    protected override string ChatCompletionsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaOpenAiChatCompletions : $"{LocalBaseUrl}/v1/chat/completions";
+    protected override string ModelsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaModels : $"{LocalBaseUrl}/v1/models";
     protected override Type ExpectedParamsType => typeof(OllamaInferenceParams);
 
     protected override string GetApiKey()
diff --git a/src/entrypoint-ollama.sh b/src/entrypoint-ollama.sh
new file mode 100644
index 00000000..224548f9
--- /dev/null
+++ b/src/entrypoint-ollama.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+set -e
+
+# Start Ollama in the background, binding to all interfaces so port 11434
+# is reachable from the host for model management (ollama pull, etc.)
+OLLAMA_HOST=0.0.0.0:11434 ollama serve &
+OLLAMA_PID=$!
+
+# Forward SIGTERM/SIGINT to both child processes
+_term() {
+  kill "$OLLAMA_PID" 2>/dev/null
+  kill "$INFERPAGE_PID" 2>/dev/null
+}
+trap _term SIGTERM SIGINT
+
+# Wait until Ollama's HTTP API is accepting connections
+echo "[entrypoint] Waiting for Ollama to be ready..."
+until bash -c 'printf "" 2>/dev/null >> /dev/tcp/localhost/11434' 2>/dev/null; do
+  sleep 1
+done
+echo "[entrypoint] Ollama is ready."
+
+# Start InferPage in the background so we can wait on both PIDs
+dotnet MaIN.InferPage.dll &
+INFERPAGE_PID=$!
+
+# Block until either process exits, then shut down the other
+wait -n "$OLLAMA_PID" "$INFERPAGE_PID"
+EXIT_CODE=$?
+kill "$OLLAMA_PID" "$INFERPAGE_PID" 2>/dev/null
+exit $EXIT_CODE