From 893e921a764da92b243d43c90fd3c8297d3dba40 Mon Sep 17 00:00:00 2001 From: Piotr Stachaczynski Date: Mon, 25 May 2026 14:24:58 +0200 Subject: [PATCH] feat: InferPage dockerization, model catalog expansion, and Ollama improvements - Add Dockerfile.inferpage with cpu, cuda, ollama, and ollama-bundled targets - Add entrypoint-ollama.sh for bundled Ollama+InferPage single image - Add docker-compose.inferpage-ollama.yml compose stack - Publish to GHCR via GitHub Actions (no DockerHub secrets needed) - Make Ollama base URL configurable via MaIN__OllamaBaseUrl env var - Add latest Claude (Opus 4.7, Sonnet 4.6, Haiku 4.5), OpenAI (GPT-5.x series, o3, o4-mini, Codex Mini), Gemini (3.x series), Groq (Llama 4, Qwen3, Compound), and Ollama cloud model definitions Co-Authored-By: Claude Sonnet 4.6 --- .../workflows/docker-publish-inferpage.yml | 66 +++ scripts/docker-compose.inferpage-ollama.yml | 33 ++ src/.dockerignore | 10 + src/Dockerfile.inferpage | 111 +++++ src/MaIN.Core/.nuspec | 2 +- .../Interfaces/ModelContext/IModelContext.cs | 8 +- src/MaIN.Core/Hub/Contexts/ModelContext.cs | 148 ++++-- src/MaIN.Core/MaIN.Core.csproj | 2 +- src/MaIN.Domain/Configuration/MaINSettings.cs | 1 + src/MaIN.Domain/Entities/Chat.cs | 2 - src/MaIN.Domain/MaIN.Domain.csproj | 2 +- .../Models/Concrete/CloudModels.cs | 440 +++++++++++++++++- src/MaIN.Domain/Models/Models.cs | 69 ++- .../Components/Pages/Settings.razor | 236 +++++++++- src/MaIN.InferPage/MaIN.InferPage.csproj | 2 +- src/MaIN.InferPage/Program.cs | 17 +- src/MaIN.InferPage/Utils.cs | 16 + .../appsettings.Production.json | 14 + .../Models/DownloadProgress.cs | 15 + src/MaIN.Services/MaIN.Services.csproj | 6 +- .../Services/DataSourceProvider.cs | 6 +- .../Embeddings/LLamaEmbedderMaINClone.cs | 2 +- .../Services/LLMService/OllamaService.cs | 6 +- src/entrypoint-ollama.sh | 31 ++ 24 files changed, 1137 insertions(+), 108 deletions(-) create mode 100644 .github/workflows/docker-publish-inferpage.yml create mode 100644 scripts/docker-compose.inferpage-ollama.yml create mode 100644 src/.dockerignore create mode 100644 src/Dockerfile.inferpage create mode 100644 src/MaIN.InferPage/appsettings.Production.json create mode 100644 src/MaIN.Infrastructure/Models/DownloadProgress.cs create mode 100644 src/entrypoint-ollama.sh diff --git a/.github/workflows/docker-publish-inferpage.yml b/.github/workflows/docker-publish-inferpage.yml new file mode 100644 index 00000000..8affd48d --- /dev/null +++ b/.github/workflows/docker-publish-inferpage.yml @@ -0,0 +1,66 @@ +name: Build and Publish InferPage Docker Images + +on: + push: + branches: [main] + +jobs: + docker: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU (for linux/arm64 cross-build) + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push CPU image (linux/amd64 + linux/arm64) + uses: docker/build-push-action@v6 + with: + context: ./src + file: ./src/Dockerfile.inferpage + target: runtime-cpu + platforms: linux/amd64,linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository_owner }}/main-inferpage:cpu + ghcr.io/${{ github.repository_owner }}/main-inferpage:latest + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push CUDA image (linux/amd64) + uses: docker/build-push-action@v6 + with: + context: ./src + file: ./src/Dockerfile.inferpage + target: runtime-cuda + platforms: linux/amd64 + push: true + tags: ghcr.io/${{ github.repository_owner }}/main-inferpage:cuda + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push Ollama-bundled image (linux/amd64 + linux/arm64) + uses: docker/build-push-action@v6 + with: + context: ./src + file: ./src/Dockerfile.inferpage + target: runtime-ollama-bundled + platforms: linux/amd64,linux/arm64 + push: true + tags: ghcr.io/${{ github.repository_owner }}/main-inferpage:ollama + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/scripts/docker-compose.inferpage-ollama.yml b/scripts/docker-compose.inferpage-ollama.yml new file mode 100644 index 00000000..099f76b7 --- /dev/null +++ b/scripts/docker-compose.inferpage-ollama.yml @@ -0,0 +1,33 @@ +services: + + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "11434:11434" + volumes: + - ollama-models:/root/.ollama + # Uncomment to enable NVIDIA GPU pass-through: + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + inferpage: + image: ghcr.io/${GITHUB_OWNER}/main-inferpage:ollama + container_name: inferpage + restart: unless-stopped + ports: + - "5555:5555" + environment: + MaIN__OllamaBaseUrl: http://ollama:11434 + MaIN__BackendType: 7 + depends_on: + - ollama + +volumes: + ollama-models: diff --git a/src/.dockerignore b/src/.dockerignore new file mode 100644 index 00000000..8000416c --- /dev/null +++ b/src/.dockerignore @@ -0,0 +1,10 @@ +**/bin/ +**/obj/ +**/.vs/ +**/.idea/ +**/*.user +**/*.suo +MaIN.Core.UnitTests/ +MaIN.Core.IntegrationTests/ +MaIN.Core.E2ETests/ +**/appsettings.*.local.json diff --git a/src/Dockerfile.inferpage b/src/Dockerfile.inferpage new file mode 100644 index 00000000..392afe4d --- /dev/null +++ b/src/Dockerfile.inferpage @@ -0,0 +1,111 @@ +FROM mcr.microsoft.com/dotnet/sdk:10.0 AS restore +WORKDIR /src + +COPY MaIN.Domain/MaIN.Domain.csproj MaIN.Domain/ +COPY MaIN.Infrastructure/MaIN.Infrastructure.csproj MaIN.Infrastructure/ +COPY MaIN.Services/MaIN.Services.csproj MaIN.Services/ +COPY MaIN.Core/MaIN.Core.csproj MaIN.Core/ +COPY MaIN.InferPage/MaIN.InferPage.csproj MaIN.InferPage/ + +RUN dotnet restore MaIN.InferPage/MaIN.InferPage.csproj + +FROM restore AS publish + +COPY MaIN.Domain/ MaIN.Domain/ +COPY MaIN.Infrastructure/ MaIN.Infrastructure/ +COPY MaIN.Services/ MaIN.Services/ +COPY MaIN.Core/ MaIN.Core/ +COPY MaIN.InferPage/ MaIN.InferPage/ + +RUN dotnet publish MaIN.InferPage/MaIN.InferPage.csproj \ + --framework net10.0 \ + --configuration Release \ + --no-restore \ + --output /app/out + +FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + libicu74 \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=mcr.microsoft.com/dotnet/aspnet:10.0 /usr/share/dotnet /usr/share/dotnet +RUN ln -sf /usr/share/dotnet/dotnet /usr/local/bin/dotnet + +WORKDIR /app + +ENV DOTNET_ROOT=/usr/share/dotnet +ENV ASPNETCORE_URLS=http://+:5555 +ENV ASPNETCORE_ENVIRONMENT=Production +ENV MaIN__ModelsPath=/app/Models +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility +ENV LD_LIBRARY_PATH=/usr/lib/wsl/lib + +VOLUME /app/Models +EXPOSE 5555 + +COPY --from=publish /app/out . + +RUN find /app/runtimes/linux-x64/native -maxdepth 2 -mindepth 1 -type d \ + | tee /etc/ld.so.conf.d/llamasharp.conf \ + && ldconfig + +ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"] + +FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-cpu + +RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +ENV ASPNETCORE_URLS=http://+:5555 +ENV ASPNETCORE_ENVIRONMENT=Production +ENV MaIN__ModelsPath=/app/Models + +VOLUME /app/Models +EXPOSE 5555 + +COPY --from=publish /app/out . + +ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"] + +FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-ollama + +WORKDIR /app + +ENV ASPNETCORE_URLS=http://+:5555 +ENV ASPNETCORE_ENVIRONMENT=Production +ENV MaIN__OllamaBaseUrl=http://host.docker.internal:11434 +ENV MaIN__BackendType=7 + +EXPOSE 5555 + +COPY --from=publish /app/out . + +ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"] + +FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-ollama-bundled + +COPY --from=ollama/ollama:latest /usr/bin/ollama /usr/bin/ollama + +WORKDIR /app + +ENV ASPNETCORE_URLS=http://+:5555 +ENV ASPNETCORE_ENVIRONMENT=Production +ENV MaIN__OllamaBaseUrl=http://localhost:11434 +ENV MaIN__BackendType=7 + +VOLUME /root/.ollama +EXPOSE 5555 +EXPOSE 11434 + +COPY --from=publish /app/out . +COPY entrypoint-ollama.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/src/MaIN.Core/.nuspec b/src/MaIN.Core/.nuspec index 7923c9f4..8ed72828 100644 --- a/src/MaIN.Core/.nuspec +++ b/src/MaIN.Core/.nuspec @@ -2,7 +2,7 @@ MaIN.NET - 0.10.5 + 0.10.9 Wisedev Wisedev favicon.png diff --git a/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs b/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs index b2da7598..59054b10 100644 --- a/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs +++ b/src/MaIN.Core/Hub/Contexts/Interfaces/ModelContext/IModelContext.cs @@ -1,4 +1,6 @@ -using MaIN.Domain.Models.Abstract; +using MaIN.Core.Hub.Contexts; +using MaIN.Domain.Models.Abstract; +using MaIN.Infrastructure.Models; namespace MaIN.Core.Hub.Contexts.Interfaces.ModelContext; @@ -51,6 +53,8 @@ public interface IModelContext /// returning the context instance implementing for method chaining. Task DownloadAsync(string modelId, CancellationToken cancellationToken = default); + Task DownloadAsync(string modelId, IProgress? progress, CancellationToken cancellationToken = default); + /// /// Ensures a known local model is downloaded before use. If the model is already present on disk the call /// returns immediately; if not, the model is downloaded. Cloud models are silently skipped. @@ -62,6 +66,8 @@ public interface IModelContext /// for method chaining. Task EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default); + Task EnsureDownloadedAsync(string modelId, IProgress? progress, CancellationToken cancellationToken = default); + /// /// Ensures a known local model is downloaded before use using a strongly-typed model reference. /// If the model is already present on disk the call returns immediately; if not, the model is downloaded. diff --git a/src/MaIN.Core/Hub/Contexts/ModelContext.cs b/src/MaIN.Core/Hub/Contexts/ModelContext.cs index 167887f3..fa45c80f 100644 --- a/src/MaIN.Core/Hub/Contexts/ModelContext.cs +++ b/src/MaIN.Core/Hub/Contexts/ModelContext.cs @@ -8,6 +8,7 @@ using MaIN.Services.Constants; using MaIN.Services.Services.LLMService.Utils; using System.Diagnostics; +using MaIN.Infrastructure.Models; namespace MaIN.Core.Hub.Contexts; @@ -22,6 +23,8 @@ public sealed class ModelContext : IModelContext private const int FileStreamBufferSize = 65536; private const int ProgressUpdateIntervalMilliseconds = 1000; private static readonly TimeSpan DefaultHttpTimeout = TimeSpan.FromMinutes(30); + private const int MaxRetryAttempts = 5; + private static readonly TimeSpan ReadStallTimeout = TimeSpan.FromSeconds(30); internal ModelContext(MaINSettings settings, IHttpClientFactory httpClientFactory) { @@ -53,7 +56,10 @@ public bool Exists(string modelId) return localModel.IsDownloaded(_defaultModelsPath); } - public async Task EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default) + public Task EnsureDownloadedAsync(string modelId, CancellationToken cancellationToken = default) + => EnsureDownloadedAsync(modelId, null, cancellationToken); + + public async Task EnsureDownloadedAsync(string modelId, IProgress? progress, CancellationToken cancellationToken = default) { if (string.IsNullOrWhiteSpace(modelId)) { @@ -77,7 +83,7 @@ public async Task EnsureDownloadedAsync(string modelId, Cancellat // Double-check if (!localModel.IsDownloaded(_defaultModelsPath)) { - await DownloadModelAsync(localModel, cancellationToken); + await DownloadModelAsync(localModel, progress, cancellationToken); } } @@ -90,7 +96,10 @@ public async Task EnsureDownloadedAsync(string modelId, Cancellat return await EnsureDownloadedAsync(model.Id, cancellationToken); } - public async Task DownloadAsync(string modelId, CancellationToken cancellationToken = default) + public Task DownloadAsync(string modelId, CancellationToken cancellationToken = default) + => DownloadAsync(modelId, null, cancellationToken); + + public async Task DownloadAsync(string modelId, IProgress? progress, CancellationToken cancellationToken = default) { if (string.IsNullOrWhiteSpace(modelId)) { @@ -106,7 +115,7 @@ public async Task DownloadAsync(string modelId, CancellationToken using (await _downloadLocks.LockAsync(modelId, cancellationToken)) { - await DownloadModelAsync(localModel, cancellationToken); + await DownloadModelAsync(localModel, progress, cancellationToken); } return this; @@ -131,72 +140,123 @@ public async Task LoadToCacheAsync(LocalModel model) return this; } - private async Task DownloadModelAsync(LocalModel localModel, CancellationToken cancellationToken) + private async Task DownloadModelAsync(LocalModel localModel, IProgress? progress, CancellationToken cancellationToken) { - using var httpClient = CreateConfiguredHttpClient(); - var filePath = GetModelFilePath(localModel); - - if (localModel.DownloadUrl is null) - { - throw new DownloadUrlNullOrEmptyException(); - } + if (localModel.DownloadUrl is null) throw new DownloadUrlNullOrEmptyException(); + var filePath = GetModelFilePath(localModel); Console.WriteLine($"Starting download of {localModel.FileName}..."); - try - { - using var response = await httpClient.GetAsync(localModel.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken); - response.EnsureSuccessStatusCode(); - - await DownloadWithProgressAsync(response, filePath, localModel.FileName, cancellationToken); - } - catch (Exception ex) + for (int attempt = 0; attempt <= MaxRetryAttempts; attempt++) { - Console.WriteLine($"Download failed: {ex.Message}"); + if (attempt > 0) + { + var delay = TimeSpan.FromSeconds(Math.Min(2 * Math.Pow(2, attempt - 1), 60)); + Console.WriteLine($"\nRetrying ({attempt}/{MaxRetryAttempts}) in {delay.TotalSeconds:F0}s..."); + await Task.Delay(delay, cancellationToken); + } - if (File.Exists(filePath)) + try { - File.Delete(filePath); + await TryDownloadWithResumeAsync(localModel.DownloadUrl, filePath, localModel.FileName, progress, cancellationToken); + return; + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + if (File.Exists(filePath)) File.Delete(filePath); + throw; + } + catch (Exception ex) when (attempt < MaxRetryAttempts) + { + Console.WriteLine($"\nDownload error: {ex.Message}"); } - throw; } + + if (File.Exists(filePath)) File.Delete(filePath); + throw new IOException($"Download of {localModel.FileName} failed after {MaxRetryAttempts} retries."); } - private static async Task DownloadWithProgressAsync(HttpResponseMessage response, string filePath, string fileName, CancellationToken cancellationToken) + private async Task TryDownloadWithResumeAsync(Uri url, string filePath, string fileName, IProgress? progress, CancellationToken cancellationToken) { - var totalBytes = response.Content.Headers.ContentLength; - var totalBytesRead = 0L; - var buffer = new byte[DefaultBufferSize]; - var progressStopwatch = Stopwatch.StartNew(); - var totalStopwatch = Stopwatch.StartNew(); + var resumeFrom = File.Exists(filePath) ? new FileInfo(filePath).Length : 0L; - if (totalBytes.HasValue) + using var httpClient = CreateConfiguredHttpClient(); + using var request = new HttpRequestMessage(HttpMethod.Get, url); + if (resumeFrom > 0) + request.Headers.Range = new System.Net.Http.Headers.RangeHeaderValue(resumeFrom, null); + + using var response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + + if (response.StatusCode == System.Net.HttpStatusCode.RequestedRangeNotSatisfiable) { - Console.WriteLine($"File size: {FormatBytes(totalBytes.Value)}"); + Console.WriteLine("\nFile already fully downloaded."); + return; } - await using var fileStream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None, FileStreamBufferSize); + var isResume = response.StatusCode == System.Net.HttpStatusCode.PartialContent; + if (!isResume) resumeFrom = 0; + + response.EnsureSuccessStatusCode(); + + long? totalBytes = response.Content.Headers.ContentLength is { } cl + ? cl + (isResume ? resumeFrom : 0) + : null; + + if (isResume) + Console.WriteLine($"Resuming from {FormatBytes(resumeFrom)}..."); + else if (totalBytes.HasValue) + Console.WriteLine($"File size: {FormatBytes(totalBytes.Value)}"); + + var fileMode = isResume ? FileMode.Append : FileMode.Create; + await using var fileStream = new FileStream(filePath, fileMode, FileAccess.Write, FileShare.None, FileStreamBufferSize); await using var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken); + await ReadChunksAsync(contentStream, fileStream, resumeFrom, totalBytes, fileName, progress, cancellationToken); + } + + private static async Task ReadChunksAsync(Stream content, FileStream file, long startOffset, long? totalBytes, string fileName, IProgress? progress, CancellationToken cancellationToken) + { + var totalBytesRead = startOffset; + var buffer = new byte[DefaultBufferSize]; + var progressStopwatch = Stopwatch.StartNew(); + var totalStopwatch = Stopwatch.StartNew(); + while (true) { - var bytesRead = await contentStream.ReadAsync(buffer, cancellationToken); - if (bytesRead == 0) + int bytesRead; + using (var stallCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken)) { - break; + stallCts.CancelAfter(ReadStallTimeout); + try + { + bytesRead = await content.ReadAsync(buffer, stallCts.Token); + } + catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) + { + throw new IOException($"Download stalled: no data received for {ReadStallTimeout.TotalSeconds:F0}s."); + } } - await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken); + if (bytesRead == 0) break; + + await file.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken); totalBytesRead += bytesRead; if (ShouldUpdateProgress(progressStopwatch)) { + var elapsed = totalStopwatch.Elapsed.TotalSeconds; + var speed = elapsed > 0 ? totalBytesRead / elapsed : 0; ShowProgress(totalBytesRead, totalBytes, totalStopwatch); + progress?.Report(new DownloadProgress(totalBytesRead, totalBytes, speed)); progressStopwatch.Restart(); } } - ShowFinalProgress(totalBytesRead, totalStopwatch, fileName); + var totalTime = totalStopwatch.Elapsed; + var avgSpeed = totalTime.TotalSeconds > 0 ? totalBytesRead / totalTime.TotalSeconds : 0; + Console.WriteLine($"\nDownload completed: {fileName}. " + + $"Total: {FormatBytes(totalBytesRead)}, Time: {totalTime:hh\\:mm\\:ss}, Speed: {FormatBytes((long)avgSpeed)}/s"); + progress?.Report(new DownloadProgress(totalBytesRead, totalBytes, 0)); } private HttpClient CreateConfiguredHttpClient() @@ -241,18 +301,6 @@ private static void ShowProgress(long totalBytesRead, long? totalBytes, Stopwatc } } - private static void ShowFinalProgress(long totalBytesRead, Stopwatch totalStopwatch, string fileName) - { - totalStopwatch.Stop(); - var totalTime = totalStopwatch.Elapsed; - var avgSpeed = totalTime.TotalSeconds > 0 ? totalBytesRead / totalTime.TotalSeconds : 0; - - Console.WriteLine($"\nDownload completed: {fileName}. " + - $"Total size: {FormatBytes(totalBytesRead)}, " + - $"Time: {totalTime:hh\\:mm\\:ss}, " + - $"Average speed: {FormatBytes((long)avgSpeed)}/s"); - } - private static string FormatBytes(long bytes) { if (bytes == 0) diff --git a/src/MaIN.Core/MaIN.Core.csproj b/src/MaIN.Core/MaIN.Core.csproj index 667eedf9..f0a17974 100644 --- a/src/MaIN.Core/MaIN.Core.csproj +++ b/src/MaIN.Core/MaIN.Core.csproj @@ -8,7 +8,7 @@ - + diff --git a/src/MaIN.Domain/Configuration/MaINSettings.cs b/src/MaIN.Domain/Configuration/MaINSettings.cs index 738ee2aa..f53ccc0a 100644 --- a/src/MaIN.Domain/Configuration/MaINSettings.cs +++ b/src/MaIN.Domain/Configuration/MaINSettings.cs @@ -13,6 +13,7 @@ public class MaINSettings public string? AnthropicKey { get; set; } public string? GroqCloudKey { get; set; } public string? OllamaKey { get; set; } + public string? OllamaBaseUrl { get; set; } public string? XaiKey { get; set; } public MongoDbSettings? MongoDbSettings { get; set; } public FileSystemSettings? FileSystemSettings { get; set; } diff --git a/src/MaIN.Domain/Entities/Chat.cs b/src/MaIN.Domain/Entities/Chat.cs index 175a4bf1..a233e94e 100644 --- a/src/MaIN.Domain/Entities/Chat.cs +++ b/src/MaIN.Domain/Entities/Chat.cs @@ -1,9 +1,7 @@ using LLama.Batched; -using MaIN.Domain.Configuration; using MaIN.Domain.Configuration.BackendInferenceParams; using MaIN.Domain.Entities.Skills; using MaIN.Domain.Entities.Tools; -using MaIN.Domain.Models.Abstract; using Grammar = MaIN.Domain.Models.Grammar; namespace MaIN.Domain.Entities; diff --git a/src/MaIN.Domain/MaIN.Domain.csproj b/src/MaIN.Domain/MaIN.Domain.csproj index 612f95d4..9f995107 100644 --- a/src/MaIN.Domain/MaIN.Domain.csproj +++ b/src/MaIN.Domain/MaIN.Domain.csproj @@ -8,7 +8,7 @@ - + diff --git a/src/MaIN.Domain/Models/Concrete/CloudModels.cs b/src/MaIN.Domain/Models/Concrete/CloudModels.cs index 87b55fb0..797432d4 100644 --- a/src/MaIN.Domain/Models/Concrete/CloudModels.cs +++ b/src/MaIN.Domain/Models/Concrete/CloudModels.cs @@ -5,6 +5,61 @@ namespace MaIN.Domain.Models.Concrete; // ===== OpenAI Models ===== +public sealed record Gpt4o() : CloudModel( + Models.OpenAi.Gpt4o, + BackendType.OpenAi, + "GPT-4o", + 128000, + "OpenAI's flagship multimodal model with strong reasoning and vision"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gpt4_1() : CloudModel( + Models.OpenAi.Gpt4_1, + BackendType.OpenAi, + "GPT-4.1", + 1000000, + "OpenAI's most capable GPT model with 1M token context window"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record O3() : CloudModel( + Models.OpenAi.O3, + BackendType.OpenAi, + "o3", + 200000, + "OpenAI's most powerful reasoning model for complex multi-step problems"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record O3Mini() : CloudModel( + Models.OpenAi.O3Mini, + BackendType.OpenAi, + "o3 Mini", + 200000, + "Compact and efficient OpenAI reasoning model"), IReasoningModel +{ + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record O4Mini() : CloudModel( + Models.OpenAi.O4Mini, + BackendType.OpenAi, + "o4 Mini", + 200000, + "OpenAI's fast reasoning model optimised for agentic tool use"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + public sealed record Gpt4oMini() : CloudModel( Models.OpenAi.Gpt4oMini, BackendType.OpenAi, @@ -25,6 +80,28 @@ public sealed record Gpt4_1Mini() : CloudModel( public string? MMProjectName => null; } +public sealed record Gpt5() : CloudModel( + Models.OpenAi.Gpt5, + BackendType.OpenAi, + "GPT-5", + 1000000, + "OpenAI's most capable model with native reasoning and multimodal understanding"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record Gpt5Mini() : CloudModel( + Models.OpenAi.Gpt5Mini, + BackendType.OpenAi, + "GPT-5 Mini", + 1000000, + "Faster and more affordable GPT-5 variant"), IVisionModel +{ + public string? MMProjectName => null; +} + public sealed record Gpt5Nano() : CloudModel( Models.OpenAi.Gpt5Nano, BackendType.OpenAi, @@ -35,14 +112,90 @@ public sealed record Gpt5Nano() : CloudModel( public string? MMProjectName => null; } +public sealed record Gpt5_1() : CloudModel( + Models.OpenAi.Gpt5_1, + BackendType.OpenAi, + "GPT-5.1", + 1000000, + "OpenAI's flagship model for coding and agentic tasks with configurable reasoning"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record Gpt5_2() : CloudModel( + Models.OpenAi.Gpt5_2, + BackendType.OpenAi, + "GPT-5.2", + 1000000, + "OpenAI frontier model for complex professional work with long-context reasoning"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gpt5_4() : CloudModel( + Models.OpenAi.Gpt5_4, + BackendType.OpenAi, + "GPT-5.4", + 1000000, + "OpenAI frontier model producing smarter and more precise responses"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gpt5_4Mini() : CloudModel( + Models.OpenAi.Gpt5_4Mini, + BackendType.OpenAi, + "GPT-5.4 Mini", + 1000000, + "Faster, more efficient GPT-5.4 variant designed for high-volume workloads"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gpt5_4Nano() : CloudModel( + Models.OpenAi.Gpt5_4Nano, + BackendType.OpenAi, + "GPT-5.4 Nano", + 1000000, + "Ultra-fast GPT-5.4 variant optimised for classification, extraction, and sub-agents"), IVisionModel +{ + public string? MMProjectName => null; +} + public sealed record Gpt5_5() : CloudModel( Models.OpenAi.Gpt5_5, BackendType.OpenAi, "GPT-5.5", - ModelDefaults.DefaultMaxContextWindow, - "OpenAI model required for native Skills API (shell tool + container.skills)"), IVisionModel + 1000000, + "OpenAI's latest flagship model for the most complex professional and agentic work"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gpt5_5Pro() : CloudModel( + Models.OpenAi.Gpt5_5Pro, + BackendType.OpenAi, + "GPT-5.5 Pro", + 1000000, + "GPT-5.5 variant that uses extended compute to think harder for maximum precision"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record CodexMini() : CloudModel( + Models.OpenAi.CodexMini, + BackendType.OpenAi, + "Codex Mini", + 200000, + "OpenAI's Codex model optimised for agentic code generation and software engineering"), IVisionModel, IReasoningModel { public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; } public sealed record DallE3() : CloudModel( @@ -61,6 +214,38 @@ public sealed record GptImage1() : CloudModel( // ===== Anthropic Models ===== +public sealed record ClaudeOpus4_7() : CloudModel( + Models.Anthropic.ClaudeOpus4_7, + BackendType.Anthropic, + "Claude Opus 4.7", + 200000, + "Anthropic's most capable model with extended thinking and superior reasoning"), IVisionModel, IReasoningModel +{ + public string? MMProjectName => null; + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record ClaudeSonnet4_6() : CloudModel( + Models.Anthropic.ClaudeSonnet4_6, + BackendType.Anthropic, + "Claude Sonnet 4.6", + 200000, + "Anthropic's balanced model with strong performance and speed"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record ClaudeHaiku4_5() : CloudModel( + Models.Anthropic.ClaudeHaiku4_5, + BackendType.Anthropic, + "Claude Haiku 4.5", + 200000, + "Anthropic's fastest and most compact model for everyday tasks"), IVisionModel +{ + public string? MMProjectName => null; +} + public sealed record ClaudeSonnet4() : CloudModel( Models.Anthropic.ClaudeSonnet4, BackendType.Anthropic, @@ -81,27 +266,44 @@ public sealed record ClaudeSonnet4_5() : CloudModel( public string? MMProjectName => null; } -public sealed record ClaudeOpus4_6() : CloudModel( - Models.Anthropic.ClaudeOpus4_6, - BackendType.Anthropic, - "Claude Opus 4.6", - 200000, - "Anthropic flagship reasoning model — supports native Skills API (container.skills + code_execution beta)"), IVisionModel +// ===== Gemini Models ===== + +public sealed record Gemini3_5Flash() : CloudModel( + Models.Gemini.Gemini3_5Flash, + BackendType.Gemini, + "Gemini 3.5 Flash", + 1000000, + "Google's latest flagship model for agentic tasks, coding, and multi-step workflows"), IVisionModel { public string? MMProjectName => null; } -public sealed record ClaudeOpus4_7() : CloudModel( - Models.Anthropic.ClaudeOpus4_7, - BackendType.Anthropic, - "Claude Opus 4.7", - 200000, - "Latest Opus revision — recommended for stable Anthropic Skills API usage"), IVisionModel +public sealed record Gemini3_1FlashLite() : CloudModel( + Models.Gemini.Gemini3_1FlashLite, + BackendType.Gemini, + "Gemini 3.1 Flash Lite", + 1000000, + "Low-latency, cost-efficient Gemini model for high-volume agentic and extraction workloads"), IVisionModel { public string? MMProjectName => null; } -// ===== Gemini Models ===== +public sealed record Gemini3_1ProPreview() : CloudModel( + Models.Gemini.Gemini3_1ProPreview, + BackendType.Gemini, + "Gemini 3.1 Pro (Preview)", + 1000000, + "Google's most capable Pro model with improved thinking and factual consistency"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record Gemini3_1FlashImagePreview() : CloudModel( + Models.Gemini.Gemini3_1FlashImagePreview, + BackendType.Gemini, + "Gemini 3.1 Flash Image (Preview)", + 4000, + "Google's latest image generation model via Gemini Flash"), IImageGenerationModel; public sealed record Gemini2_5Flash() : CloudModel( Models.Gemini.Gemini2_5Flash, @@ -268,6 +470,37 @@ public sealed record GrokImagineImagePro() : CloudModel( // ===== GroqCloud Models ===== +public sealed record Llama4Scout17b() : CloudModel( + Models.Groq.Llama4Scout17b, + BackendType.GroqCloud, + "Llama 4 Scout 17B (Groq)", + 131072, + "Meta's MoE model with 17B active parameters, vision support, and fast inference on Groq"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record GroqCompound() : CloudModel( + Models.Groq.Compound, + BackendType.GroqCloud, + "Groq Compound", + 131072, + "Groq's AI system with built-in web search and code execution for complex agentic tasks"); + +public sealed record GroqCompoundMini() : CloudModel( + Models.Groq.CompoundMini, + BackendType.GroqCloud, + "Groq Compound Mini", + 131072, + "Lightweight Groq compound system for fast agentic workflows with tool use"); + +public sealed record Qwen3_32b() : CloudModel( + Models.Groq.Qwen3_32b, + BackendType.GroqCloud, + "Qwen 3 32B (Groq)", + 128000, + "Alibaba's Qwen 3 32B model running on Groq infrastructure"); + public sealed record Llama3_1_8bInstant() : CloudModel( Models.Groq.Llama3_1_8b, BackendType.GroqCloud, @@ -318,12 +551,187 @@ public sealed record DeepSeekChat() : CloudModel( // ===== Ollama Models ===== +// Llama 4 +public sealed record OllamaLlama4Scout() : CloudModel( + Models.Ollama.Llama4Scout, + BackendType.Ollama, + "Llama 4 Scout (Ollama)", + 131072, + "Meta's MoE model with native multimodal understanding running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record OllamaLlama4Maverick() : CloudModel( + Models.Ollama.Llama4Maverick, + BackendType.Ollama, + "Llama 4 Maverick (Ollama)", + 131072, + "Meta's larger Llama 4 MoE variant with multimodal capabilities running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +// Gemma 3 public sealed record OllamaGemma3_4b() : CloudModel( Models.Ollama.Gemma3_4b, BackendType.Ollama, "Gemma3 4B (Ollama)", - 8192, + 128000, "Balanced 4B model running on Ollama for writing, analysis, and mathematical reasoning"), IVisionModel { public string? MMProjectName => null; } + +public sealed record OllamaGemma3_12b() : CloudModel( + Models.Ollama.Gemma3_12b, + BackendType.Ollama, + "Gemma3 12B (Ollama)", + 128000, + "Google's mid-range Gemma 3 model with strong reasoning running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record OllamaGemma3_27b() : CloudModel( + Models.Ollama.Gemma3_27b, + BackendType.Ollama, + "Gemma3 27B (Ollama)", + 128000, + "Google's largest Gemma 3 model with frontier-level performance running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +// Gemma 4 +public sealed record OllamaGemma4_E4b() : CloudModel( + Models.Ollama.Gemma4_E4b, + BackendType.Ollama, + "Gemma4 E4B (Ollama)", + 128000, + "Efficient 4B Gemma 4 model for agentic and coding tasks running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record OllamaGemma4_26b() : CloudModel( + Models.Ollama.Gemma4_26b, + BackendType.Ollama, + "Gemma4 26B (Ollama)", + 128000, + "Google's powerful Gemma 4 26B model for reasoning and multimodal tasks running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +// Qwen 3 +public sealed record OllamaQwen3_8b() : CloudModel( + Models.Ollama.Qwen3_8b, + BackendType.Ollama, + "Qwen3 8B (Ollama)", + 128000, + "Alibaba's Qwen 3 8B model running on Ollama"); + +public sealed record OllamaQwen3_14b() : CloudModel( + Models.Ollama.Qwen3_14b, + BackendType.Ollama, + "Qwen3 14B (Ollama)", + 128000, + "Alibaba's Qwen 3 14B model running on Ollama"); + +public sealed record OllamaQwen3_30b() : CloudModel( + Models.Ollama.Qwen3_30b, + BackendType.Ollama, + "Qwen3 30B (Ollama)", + 128000, + "Alibaba's Qwen 3 30B MoE model running on Ollama"); + +// Qwen 3.5 +public sealed record OllamaQwen3_5_9b() : CloudModel( + Models.Ollama.Qwen3_5_9b, + BackendType.Ollama, + "Qwen3.5 9B (Ollama)", + 128000, + "Alibaba's multimodal Qwen 3.5 9B model running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +public sealed record OllamaQwen3_5_27b() : CloudModel( + Models.Ollama.Qwen3_5_27b, + BackendType.Ollama, + "Qwen3.5 27B (Ollama)", + 128000, + "Alibaba's multimodal Qwen 3.5 27B model running on Ollama"), IVisionModel +{ + public string? MMProjectName => null; +} + +// Qwen 3.6 +public sealed record OllamaQwen3_6_27b() : CloudModel( + Models.Ollama.Qwen3_6_27b, + BackendType.Ollama, + "Qwen3.6 27B (Ollama)", + 128000, + "Alibaba's Qwen 3.6 27B with improved agentic coding running on Ollama"); + +public sealed record OllamaQwen3_6_35b() : CloudModel( + Models.Ollama.Qwen3_6_35b, + BackendType.Ollama, + "Qwen3.6 35B (Ollama)", + 128000, + "Alibaba's Qwen 3.6 35B with strong coding and reasoning running on Ollama"); + +// Qwen 3 Coder +public sealed record OllamaQwen3Coder_8b() : CloudModel( + Models.Ollama.Qwen3Coder_8b, + BackendType.Ollama, + "Qwen3-Coder 8B (Ollama)", + 128000, + "Alibaba's efficient code-focused Qwen 3 model running on Ollama"); + +public sealed record OllamaQwen3Coder_30b() : CloudModel( + Models.Ollama.Qwen3Coder_30b, + BackendType.Ollama, + "Qwen3-Coder 30B (Ollama)", + 128000, + "Alibaba's most capable agentic code model running on Ollama"); + +// DeepSeek R1 +public sealed record OllamaDeepSeekR1_7b() : CloudModel( + Models.Ollama.DeepSeekR1_7b, + BackendType.Ollama, + "DeepSeek R1 7B (Ollama)", + 128000, + "DeepSeek's open reasoning model at 7B running on Ollama"), IReasoningModel +{ + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +public sealed record OllamaDeepSeekR1_14b() : CloudModel( + Models.Ollama.DeepSeekR1_14b, + BackendType.Ollama, + "DeepSeek R1 14B (Ollama)", + 128000, + "DeepSeek's open reasoning model at 14B running on Ollama"), IReasoningModel +{ + public Func? ReasonFunction => null; + public string? AdditionalPrompt => null; +} + +// Microsoft Phi +public sealed record OllamaPhi4_14b() : CloudModel( + Models.Ollama.Phi4_14b, + BackendType.Ollama, + "Phi4 14B (Ollama)", + 16000, + "Microsoft's state-of-the-art 14B model running on Ollama"); + +// Mistral +public sealed record OllamaMistral_7b() : CloudModel( + Models.Ollama.Mistral_7b, + BackendType.Ollama, + "Mistral 7B (Ollama)", + 32000, + "Mistral AI's efficient 7B base model running on Ollama"); diff --git a/src/MaIN.Domain/Models/Models.cs b/src/MaIN.Domain/Models/Models.cs index cd9604f4..55dfc21e 100644 --- a/src/MaIN.Domain/Models/Models.cs +++ b/src/MaIN.Domain/Models/Models.cs @@ -8,24 +8,43 @@ public static class Models { public static class OpenAi { + public const string Gpt4o = "gpt-4o"; public const string Gpt4oMini = "gpt-4o-mini"; + public const string Gpt4_1 = "gpt-4.1"; public const string Gpt4_1Mini = "gpt-4.1-mini"; + public const string Gpt5 = "gpt-5"; + public const string Gpt5Mini = "gpt-5-mini"; public const string Gpt5Nano = "gpt-5-nano"; + public const string Gpt5_1 = "gpt-5.1"; + public const string Gpt5_2 = "gpt-5.2"; + public const string Gpt5_4 = "gpt-5.4"; + public const string Gpt5_4Mini = "gpt-5.4-mini"; + public const string Gpt5_4Nano = "gpt-5.4-nano"; public const string Gpt5_5 = "gpt-5.5"; + public const string Gpt5_5Pro = "gpt-5.5-pro"; + public const string CodexMini = "codex-mini-latest"; + public const string O3 = "o3"; + public const string O3Mini = "o3-mini"; + public const string O4Mini = "o4-mini"; public const string DallE3 = "dall-e-3"; public const string GptImage1 = "gpt-image-1"; } public static class Anthropic { + public const string ClaudeOpus4_7 = "claude-opus-4-7"; + public const string ClaudeSonnet4_6 = "claude-sonnet-4-6"; + public const string ClaudeHaiku4_5 = "claude-haiku-4-5-20251001"; public const string ClaudeSonnet4 = "claude-sonnet-4-20250514"; public const string ClaudeSonnet4_5 = "claude-sonnet-4-5-20250929"; - public const string ClaudeOpus4_6 = "claude-opus-4-6"; - public const string ClaudeOpus4_7 = "claude-opus-4-7"; } public static class Gemini { + public const string Gemini3_5Flash = "gemini-3.5-flash"; + public const string Gemini3_1FlashLite = "gemini-3.1-flash-lite"; + public const string Gemini3_1ProPreview = "gemini-3.1-pro-preview"; + public const string Gemini3_1FlashImagePreview = "gemini-3.1-flash-image-preview"; public const string Gemini2_5Pro = "gemini-2.5-pro"; public const string Gemini2_5Flash = "gemini-2.5-flash"; public const string Gemini2_0Flash = "gemini-2.0-flash"; @@ -47,10 +66,14 @@ public static class Xai public static class Groq { - public const string Llama3_1_8b = "llama-3.1-8b-instant"; + public const string Llama4Scout17b = "meta-llama/llama-4-scout-17b-16e-instruct"; public const string Llama3_3_70b = "llama-3.3-70b-versatile"; - public const string GptOss20b = "openai/gpt-oss-20b"; + public const string Llama3_1_8b = "llama-3.1-8b-instant"; + public const string Qwen3_32b = "qwen/qwen3-32b"; + public const string Compound = "groq/compound"; + public const string CompoundMini = "groq/compound-mini"; public const string GptOss120b = "openai/gpt-oss-120b"; + public const string GptOss20b = "openai/gpt-oss-20b"; } public static class DeepSeek @@ -61,7 +84,45 @@ public static class DeepSeek public static class Ollama { + // Llama 4 + public const string Llama4Scout = "llama4:scout"; + public const string Llama4Maverick = "llama4:maverick"; + + // Gemma 3 public const string Gemma3_4b = "gemma3:4b"; + public const string Gemma3_12b = "gemma3:12b"; + public const string Gemma3_27b = "gemma3:27b"; + + // Gemma 4 + public const string Gemma4_E4b = "gemma4:e4b"; + public const string Gemma4_26b = "gemma4:26b"; + + // Qwen 3 + public const string Qwen3_8b = "qwen3:8b"; + public const string Qwen3_14b = "qwen3:14b"; + public const string Qwen3_30b = "qwen3:30b"; + + // Qwen 3.5 + public const string Qwen3_5_9b = "qwen3.5:9b"; + public const string Qwen3_5_27b = "qwen3.5:27b"; + + // Qwen 3.6 + public const string Qwen3_6_27b = "qwen3.6:27b"; + public const string Qwen3_6_35b = "qwen3.6:35b"; + + // Qwen 3 Coder + public const string Qwen3Coder_8b = "qwen3-coder:8b"; + public const string Qwen3Coder_30b = "qwen3-coder:30b"; + + // DeepSeek R1 + public const string DeepSeekR1_7b = "deepseek-r1:7b"; + public const string DeepSeekR1_14b = "deepseek-r1:14b"; + + // Microsoft Phi + public const string Phi4_14b = "phi4:14b"; + + // Mistral + public const string Mistral_7b = "mistral:7b"; } public static class Vertex diff --git a/src/MaIN.InferPage/Components/Pages/Settings.razor b/src/MaIN.InferPage/Components/Pages/Settings.razor index d8086e80..c7c832f7 100644 --- a/src/MaIN.InferPage/Components/Pages/Settings.razor +++ b/src/MaIN.InferPage/Components/Pages/Settings.razor @@ -1,6 +1,9 @@ @using MaIN.Domain.Configuration @using MaIN.Domain.Configuration.Vertex @using MaIN.Domain.Models.Abstract +@using MaIN.Core.Hub +@using MaIN.Core.Hub.Contexts +@using MaIN.Infrastructure.Models @inject SettingsService SettingsStorage @inject MaINSettings MaINSettings @rendermode @(new InteractiveServerRenderMode(prerender: false)) @@ -35,10 +38,25 @@
- + @if (_selectedBackend?.BackendType == BackendType.Self) + { + + } + else + { + + }
@if (RequiresApiKey) @@ -113,12 +131,21 @@
@if (ResolvedModelPathPreview is { } preview) { Will load: @preview } + @if (_isDocker) + { + + Running in Docker — mount your models directory when starting the container: + -v /your/local/models:/app/Models + The path /app/Models is pre-configured as the default. + + }
} @@ -164,6 +191,52 @@ } + @if (_selectedBackend?.BackendType == BackendType.Self && _isRegisteredModel + && ModelRegistry.TryGetById(_modelName!, out var _dlModel) && _dlModel is LocalModel { DownloadUrl: not null }) + { +
+ + @if (_isDownloading) + { +
+ @if (_downloadProgress?.Percentage is { } pct) + { + + + @($"{pct:F1}% · {FormatBytes(_downloadProgress.Value.BytesRead)}") + @if (_downloadProgress.Value.TotalBytes is { } total) { @($" / {FormatBytes(total)}") } + @($" · {FormatSpeed(_downloadProgress.Value.BytesPerSecond)}") + @if (_downloadProgress?.Eta is { } eta) { @($" · ETA {eta:mm\\:ss}") } + + } + else + { +
+ + Connecting... +
+ } +
+ } + else if (_isModelDownloaded) + { + Downloaded + } + else + { +
+ + Download + + @if (_downloadError != null) + { + @_downloadError + } +
+ } +
+ } + @if (_selectedBackend?.BackendType == BackendType.Self && !_isRegisteredModel && _manualVision) {
@@ -191,6 +264,17 @@ [Parameter] public EventCallback OnSettingsApplied { get; set; } [Parameter] public EventCallback OnClose { get; set; } + // Chat-appropriate local models (excludes embedding, TTS, image-gen, ONNX-only models) + private static readonly LocalModel[] _chatLocalModels = ModelRegistry.GetAllLocal() + .Where(m => m.DownloadUrl != null && + m is not IImageGenerationModel && + !m.FileName.EndsWith(".onnx", StringComparison.OrdinalIgnoreCase) && + !m.Name.Contains("Embed", StringComparison.OrdinalIgnoreCase)) + .OrderBy(m => m.Name) + .ToArray(); + + private IEnumerable _selectedLocalModels = []; + private List _backendOptions = null!; private BackendOption? _selectedBackend; private string? _modelName; @@ -237,13 +321,13 @@ fileName = sanitizedModel; } - // Relative path: prepend configured models directory + // Relative path: prepend configured models directory (or default) if (!Path.IsPathFullyQualified(fileName)) { var modelsDir = MaINSettings.ModelsPath - ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath"); - if (!string.IsNullOrEmpty(modelsDir)) - fileName = Path.Combine(modelsDir, fileName); + ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath") + ?? Utils.DefaultModelsPath; + fileName = Path.Combine(modelsDir, fileName); } return fileName; @@ -286,6 +370,20 @@ private bool _manualImageGen; private string? _mmProjName; + // Download state (registered local models only) + private bool _isModelDownloaded; + private bool _isDownloading; + private string? _downloadError; + private CancellationTokenSource? _downloadCts; + private DownloadProgress? _downloadProgress; + + private static readonly bool _isDocker = + Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER") == "true"; + + private string _modelPathPlaceholder => _isDocker + ? "/app/Models (or leave blank to use default)" + : "Optional: model.gguf, C:\\dir\\model.gguf, or C:\\dir"; + private bool RequiresApiKey => _selectedBackend?.RequiresApiKey == true; private bool IsVertexBackend => _selectedBackend?.BackendType == BackendType.Vertex; private bool HasVertexRequiredFields => !string.IsNullOrWhiteSpace(_vertexProjectId) @@ -366,6 +464,7 @@ _vertexLocation = settings.VertexLocation; } + SyncLocalModelSelection(); OnModelNameChanged(); } else if (!Utils.NeedsConfiguration) @@ -382,6 +481,7 @@ _modelName = Utils.Model; _modelPath = Utils.Path; + SyncLocalModelSelection(); OnModelNameChanged(); } @@ -398,6 +498,7 @@ if (profile != null) { _modelName = profile.Model; + SyncLocalModelSelection(); OnModelNameChanged(); // sets _isRegisteredModel + auto-detects caps if (!_isRegisteredModel) // only restore manual caps for unregistered models { @@ -414,8 +515,14 @@ if (!string.IsNullOrEmpty(lastModel)) { _modelName = lastModel; + SyncLocalModelSelection(); OnModelNameChanged(); } + else + { + _modelName = null; + SyncLocalModelSelection(); + } } if (IsVertexBackend) @@ -442,8 +549,48 @@ _apiKeyInput = null; } + private void OnLocalModelSelected(IEnumerable selected) + { + _selectedLocalModels = selected; + _modelName = selected.FirstOrDefault()?.Id; + OnModelNameChanged(); + } + + private void SearchLocalModels(OptionsSearchEventArgs e) + { + e.Items = string.IsNullOrEmpty(e.Text) + ? _chatLocalModels + : _chatLocalModels.Where(m => + m.Name.Contains(e.Text, StringComparison.OrdinalIgnoreCase) || + m.Id.Contains(e.Text, StringComparison.OrdinalIgnoreCase)); + } + + private static string FormatLocalModelName(LocalModel m) + { + var caps = new List(); + if (m is IVisionModel) caps.Add("Vision"); + if (m is IReasoningModel) caps.Add("Reasoning"); + return caps.Count > 0 ? $"{m.Name} [{string.Join(", ", caps)}]" : m.Name; + } + + private void SyncLocalModelSelection() + { + if (_selectedBackend?.BackendType == BackendType.Self && !string.IsNullOrEmpty(_modelName)) + { + var match = _chatLocalModels.FirstOrDefault(m => m.Id == _modelName); + _selectedLocalModels = match != null ? [match] : []; + } + else + { + _selectedLocalModels = []; + } + } + private void OnModelNameChanged() { + _isModelDownloaded = false; + _downloadError = null; + if (string.IsNullOrWhiteSpace(_modelName)) { _isRegisteredModel = false; @@ -456,6 +603,9 @@ _detectedVision = model is IVisionModel; _detectedReasoning = model is IReasoningModel; _detectedImageGen = model is IImageGenerationModel; + + if (_selectedBackend?.BackendType == BackendType.Self && model is LocalModel localModel) + _isModelDownloaded = localModel.IsDownloaded(GetEffectiveModelsBasePath()); } else { @@ -469,6 +619,74 @@ } } + private void OnModelPathChanged() + { + if (_isRegisteredModel && _selectedBackend?.BackendType == BackendType.Self + && ModelRegistry.TryGetById(_modelName!, out var model) && model is LocalModel localModel) + { + _isModelDownloaded = localModel.IsDownloaded(GetEffectiveModelsBasePath()); + _downloadError = null; + } + } + + private string GetEffectiveModelsBasePath() + { + var raw = _modelPath?.Trim() ?? ""; + if (!string.IsNullOrEmpty(raw)) + { + return raw.EndsWith(".gguf", StringComparison.OrdinalIgnoreCase) + ? (Path.GetDirectoryName(raw) ?? "") + : raw; + } + return MaINSettings.ModelsPath + ?? Environment.GetEnvironmentVariable("MaIN_ModelsPath") + ?? Utils.DefaultModelsPath; + } + + private async Task DownloadModelAsync() + { + _isDownloading = true; + _downloadError = null; + _downloadProgress = null; + _downloadCts = new CancellationTokenSource(); + StateHasChanged(); + try + { + var basePath = GetEffectiveModelsBasePath(); + Directory.CreateDirectory(basePath); + Environment.SetEnvironmentVariable("MaIN_ModelsPath", basePath); + MaINSettings.ModelsPath = basePath; + var progress = new Progress(p => + { + _downloadProgress = p; + _ = InvokeAsync(StateHasChanged); + }); + await AIHub.Model().EnsureDownloadedAsync(_modelName!, progress, _downloadCts.Token); + _isModelDownloaded = true; + } + catch (OperationCanceledException) { } + catch (Exception ex) + { + _downloadError = ex.Message; + } + finally + { + _isDownloading = false; + _downloadCts = null; + StateHasChanged(); + } + } + + private static string FormatBytes(long bytes) => bytes switch + { + >= 1024L * 1024 * 1024 => $"{bytes / (1024.0 * 1024 * 1024):F1} GB", + >= 1024 * 1024 => $"{bytes / (1024.0 * 1024):F1} MB", + >= 1024 => $"{bytes / 1024.0:F1} KB", + _ => $"{bytes} B" + }; + + private static string FormatSpeed(double bytesPerSecond) => $"{FormatBytes((long)bytesPerSecond)}/s"; + private async Task LoadApiKeyPreview() { if (_selectedBackend == null || !_selectedBackend.RequiresApiKey) diff --git a/src/MaIN.InferPage/MaIN.InferPage.csproj b/src/MaIN.InferPage/MaIN.InferPage.csproj index ba1ff18a..66aafede 100644 --- a/src/MaIN.InferPage/MaIN.InferPage.csproj +++ b/src/MaIN.InferPage/MaIN.InferPage.csproj @@ -1,7 +1,7 @@ - net8.0;net10.0 + net10.0 enable enable false diff --git a/src/MaIN.InferPage/Program.cs b/src/MaIN.InferPage/Program.cs index ef07369d..36a6d31a 100644 --- a/src/MaIN.InferPage/Program.cs +++ b/src/MaIN.InferPage/Program.cs @@ -75,16 +75,11 @@ { if (string.IsNullOrEmpty(modelPathArg)) { - Console.WriteLine("Error: A model path must be provided using --path when a local model is specified."); - return; - } - - var envModelsPath = Environment.GetEnvironmentVariable("MaIN_ModelsPath"); - if (string.IsNullOrEmpty(envModelsPath)) - { - Console.Write("Please enter the MaIN_ModelsPath: "); - envModelsPath = Console.ReadLine(); - Environment.SetEnvironmentVariable("MaIN_ModelsPath", envModelsPath); + var defaultPath = Utils.DefaultModelsPath; + Directory.CreateDirectory(defaultPath); + Environment.SetEnvironmentVariable("MaIN_ModelsPath", defaultPath); + Utils.Path = defaultPath; + Console.WriteLine($"No --path provided. Using default models directory: {defaultPath}"); } } } @@ -129,9 +124,9 @@ } app.UseHttpsRedirection(); -app.UseStaticFiles(); app.UseAntiforgery(); app.Services.UseMaIN(); +app.MapStaticAssets(); app.MapRazorComponents() .AddInteractiveServerRenderMode(); diff --git a/src/MaIN.InferPage/Utils.cs b/src/MaIN.InferPage/Utils.cs index 582fe267..1fc14820 100644 --- a/src/MaIN.InferPage/Utils.cs +++ b/src/MaIN.InferPage/Utils.cs @@ -16,6 +16,8 @@ public static class Utils public static bool NeedsConfiguration { get; set; } + public static string DefaultModelsPath => System.IO.Path.Combine(Directory.GetCurrentDirectory(), "models"); + // Manual capability overrides for unregistered models (set from Settings UI) public static bool? ManualVision { get; set; } public static bool? ManualReasoning { get; set; } @@ -67,6 +69,20 @@ public static void ApplySettings( { Path = null; } + + if (backendType == BackendType.Self && Path == null) + { + // Prefer an already-configured path (e.g. MaIN_ModelsPath set by Docker ENV) so that + // volume mounts are respected without the user having to re-enter the path. + var defaultPath = Environment.GetEnvironmentVariable("MaIN_ModelsPath") + ?? (!string.IsNullOrEmpty(mainSettings.ModelsPath) ? mainSettings.ModelsPath : null) + ?? DefaultModelsPath; + Directory.CreateDirectory(defaultPath); + Path = defaultPath; + mainSettings.ModelsPath = defaultPath; + Environment.SetEnvironmentVariable("MaIN_ModelsPath", defaultPath); + } + HasApiKey = !string.IsNullOrEmpty(apiKey); NeedsConfiguration = false; diff --git a/src/MaIN.InferPage/appsettings.Production.json b/src/MaIN.InferPage/appsettings.Production.json new file mode 100644 index 00000000..7152d15b --- /dev/null +++ b/src/MaIN.InferPage/appsettings.Production.json @@ -0,0 +1,14 @@ +{ + "Logging": { + "LogLevel": { + "LLama": "Debug" + } + }, + "Kestrel": { + "EndPoints": { + "Http": { + "Url": "http://+:5555" + } + } + } +} diff --git a/src/MaIN.Infrastructure/Models/DownloadProgress.cs b/src/MaIN.Infrastructure/Models/DownloadProgress.cs new file mode 100644 index 00000000..bcc8e3b4 --- /dev/null +++ b/src/MaIN.Infrastructure/Models/DownloadProgress.cs @@ -0,0 +1,15 @@ +namespace MaIN.Infrastructure.Models; + +public readonly record struct DownloadProgress( + long BytesRead, + long? TotalBytes, + double BytesPerSecond) +{ + public double? Percentage => TotalBytes is > 0 + ? (double)BytesRead / TotalBytes.Value * 100 + : null; + + public TimeSpan? Eta => TotalBytes.HasValue && BytesPerSecond > 0 + ? TimeSpan.FromSeconds((TotalBytes.Value - BytesRead) / BytesPerSecond) + : null; +} diff --git a/src/MaIN.Services/MaIN.Services.csproj b/src/MaIN.Services/MaIN.Services.csproj index 50459831..003ba66f 100644 --- a/src/MaIN.Services/MaIN.Services.csproj +++ b/src/MaIN.Services/MaIN.Services.csproj @@ -15,9 +15,9 @@ - - - + + + diff --git a/src/MaIN.Services/Services/DataSourceProvider.cs b/src/MaIN.Services/Services/DataSourceProvider.cs index 1dce392c..686b5bc0 100644 --- a/src/MaIN.Services/Services/DataSourceProvider.cs +++ b/src/MaIN.Services/Services/DataSourceProvider.cs @@ -52,11 +52,7 @@ public string FetchTextData(object? sourceDetails) public async Task FetchApiData(object? details, string? filter, IHttpClientFactory httpClientFactory, Dictionary properties) { - var apiDetails = JsonSerializer.Deserialize(details!.ToString()!, - new JsonSerializerOptions() - { - PropertyNameCaseInsensitive = true, - }); + var apiDetails = details as AgentApiSourceDetails; var httpClient = httpClientFactory.CreateClient(); diff --git a/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs b/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs index 8dee7c23..e84c5c12 100644 --- a/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs +++ b/src/MaIN.Services/Services/LLMService/Memory/Embeddings/LLamaEmbedderMaINClone.cs @@ -74,7 +74,7 @@ public async Task> GetEmbeddings(string input, Cancellati { // Create a fresh context for each embedding call (0.26.0 pattern) using var context = _weights.CreateContext(_params, _logger); - NativeApi.llama_set_embeddings(context.NativeHandle, true); + //NativeApi.em.llama_set_embeddings(context.NativeHandle, true); var tokens = context.Tokenize(input, special: true); if (tokens.Length > context.ContextSize) diff --git a/src/MaIN.Services/Services/LLMService/OllamaService.cs b/src/MaIN.Services/Services/LLMService/OllamaService.cs index 978ff6c8..f1f5e0db 100644 --- a/src/MaIN.Services/Services/LLMService/OllamaService.cs +++ b/src/MaIN.Services/Services/LLMService/OllamaService.cs @@ -24,9 +24,11 @@ public sealed class OllamaService( private bool HasApiKey => !string.IsNullOrEmpty(_settings.OllamaKey) || !string.IsNullOrEmpty(Environment.GetEnvironmentVariable(LLMApiRegistry.Ollama.ApiKeyEnvName)); + private string LocalBaseUrl => (_settings.OllamaBaseUrl ?? "http://localhost:11434").TrimEnd('/'); + protected override string HttpClientName => HasApiKey ? ServiceConstants.HttpClients.OllamaClient : ServiceConstants.HttpClients.OllamaLocalClient; - protected override string ChatCompletionsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaOpenAiChatCompletions : ServiceConstants.ApiUrls.OllamaLocalOpenAiChatCompletions; - protected override string ModelsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaModels : ServiceConstants.ApiUrls.OllamaLocalModels; + protected override string ChatCompletionsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaOpenAiChatCompletions : $"{LocalBaseUrl}/v1/chat/completions"; + protected override string ModelsUrl => HasApiKey ? ServiceConstants.ApiUrls.OllamaModels : $"{LocalBaseUrl}/v1/models"; protected override Type ExpectedParamsType => typeof(OllamaInferenceParams); protected override string GetApiKey() diff --git a/src/entrypoint-ollama.sh b/src/entrypoint-ollama.sh new file mode 100644 index 00000000..224548f9 --- /dev/null +++ b/src/entrypoint-ollama.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -e + +# Start Ollama in the background, binding to all interfaces so port 11434 +# is reachable from the host for model management (ollama pull, etc.) +OLLAMA_HOST=0.0.0.0:11434 ollama serve & +OLLAMA_PID=$! + +# Forward SIGTERM/SIGINT to both child processes +_term() { + kill "$OLLAMA_PID" 2>/dev/null + kill "$INFERPAGE_PID" 2>/dev/null +} +trap _term SIGTERM SIGINT + +# Wait until Ollama's HTTP API is accepting connections +echo "[entrypoint] Waiting for Ollama to be ready..." +until bash -c 'printf "" 2>/dev/null >> /dev/tcp/localhost/11434' 2>/dev/null; do + sleep 1 +done +echo "[entrypoint] Ollama is ready." + +# Start InferPage in the background so we can wait on both PIDs +dotnet MaIN.InferPage.dll & +INFERPAGE_PID=$! + +# Block until either process exits, then shut down the other +wait -n "$OLLAMA_PID" "$INFERPAGE_PID" +EXIT_CODE=$? +kill "$OLLAMA_PID" "$INFERPAGE_PID" 2>/dev/null +exit $EXIT_CODE