Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions app/MindWork AI Studio/Assistants/I18N/allTexts.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"

-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"

-- Start new chat in workspace '{0}'
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace '{0}'"

Expand Down Expand Up @@ -5095,6 +5098,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."

-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."

-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."

Expand Down Expand Up @@ -5329,6 +5335,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"

-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."

-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."

Expand Down
6 changes: 4 additions & 2 deletions app/MindWork AI Studio/Components/ChatComponent.razor
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
</ChildContent>
<FooterContent>
<MudElement Style="flex: 0 0 auto;">
<MudTextField
<UserPromptComponent
T="string"
@ref="@this.inputField"
@bind-Text="@this.userInput"
Expand All @@ -50,8 +50,11 @@
Disabled="@this.IsInputForbidden()"
Immediate="@true"
OnKeyUp="@this.InputKeyEvent"
WhenTextChangedAsync="@(_ =>this.CalculateTokenCount())"
UserAttributes="@USER_INPUT_ATTRIBUTES"
Class="@this.UserInputClass"
DebounceTime="TimeSpan.FromSeconds(1)"
HelperText="@this.TokenCountMessage"
Style="@this.UserInputStyle"/>
</MudElement>
<MudToolBar WrapContent="true" Gutters="@false" Class="border border-solid rounded" Style="border-color: lightgrey;">
Expand Down Expand Up @@ -124,7 +127,6 @@
<MudIconButton Icon="@Icons.Material.Filled.Error" Color="Color.Error"/>
</MudTooltip>
}
<MudIconButton />
</MudToolBar>
</FooterContent>
</InnerScrolling>
26 changes: 25 additions & 1 deletion app/MindWork AI Studio/Components/ChatComponent.razor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.Services;

using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
Expand Down Expand Up @@ -36,6 +37,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable

[Inject]
private IDialogService DialogService { get; init; } = null!;

[Inject]
private RustService RustService { get; init; } = null!;

private const Placement TOOLBAR_TOOLTIP_PLACEMENT = Placement.Top;
private static readonly Dictionary<string, object?> USER_INPUT_ATTRIBUTES = new();
Expand All @@ -58,10 +62,12 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
private Guid currentWorkspaceId = Guid.Empty;
private CancellationTokenSource? cancellationTokenSource;
private HashSet<FileAttachment> chatDocumentPaths = [];
private string tokenCount = "0";
private string TokenCountMessage => $"{this.T("Estimated amount of tokens:")} {this.tokenCount}";

// Unfortunately, we need the input field reference to blur the focus away. Without
// this, we cannot clear the input field.
private MudTextField<string> inputField = null!;
private UserPromptComponent<string> inputField = null!;

#region Overrides of ComponentBase

Expand Down Expand Up @@ -405,6 +411,9 @@ private async Task InputKeyEvent(KeyboardEventArgs keyEvent)
// Was a modifier key pressed as well?
var isModifier = keyEvent.AltKey || keyEvent.CtrlKey || keyEvent.MetaKey || keyEvent.ShiftKey;

if (isEnter)
await this.CalculateTokenCount();

// Depending on the user's settings, might react to shortcuts:
switch (this.SettingsManager.ConfigurationData.Chat.ShortcutSendBehavior)
{
Expand Down Expand Up @@ -523,6 +532,7 @@ private async Task SendMessage(bool reuseLastUserPrompt = false)
this.userInput = string.Empty;
this.chatDocumentPaths.Clear();
await this.inputField.BlurAsync();
this.tokenCount = "0";

// Enable the stream state for the chat component:
this.isStreaming = true;
Expand Down Expand Up @@ -901,6 +911,20 @@ private Task EditLastBlock(IContent block)
return Task.CompletedTask;
}

private async Task CalculateTokenCount()
{
if (this.inputField.Value is null)
{
this.tokenCount = "0";
return;
}
var response = await this.RustService.GetTokenCount(this.inputField.Value);
if (response is null)
return;
this.tokenCount = response.TokenCount.ToString();
this.StateHasChanged();
}

#region Overrides of MSGComponentBase

protected override async Task ProcessIncomingMessage<T>(ComponentBase? sendingComponent, Event triggeredEvent, T? data) where T : default
Expand Down
68 changes: 68 additions & 0 deletions app/MindWork AI Studio/Components/UserPromptComponent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using Microsoft.AspNetCore.Components;
using Timer = System.Timers.Timer;

namespace AIStudio.Components;

/// <summary>
/// Debounced multi-line text input built on <see cref="MudTextField{T}"/>.
/// Keeps the base API while adding a debounce timer.
/// Callers can override any property as usual.
/// </summary>
public class UserPromptComponent<T> : MudTextField<T>
{
[Parameter]
public TimeSpan DebounceTime { get; set; } = TimeSpan.FromMilliseconds(800);

[Parameter]
public Func<string, Task> WhenTextChangedAsync { get; set; } = _ => Task.CompletedTask;

private readonly Timer debounceTimer = new();
private string text = string.Empty;
private string lastParameterText = string.Empty;
private string lastNotifiedText = string.Empty;
private bool isInitialized;

protected override async Task OnInitializedAsync()
{
this.text = this.Text ?? string.Empty;
this.lastParameterText = this.text;
this.lastNotifiedText = this.text;
this.debounceTimer.AutoReset = false;
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
this.debounceTimer.Elapsed += (_, _) =>
{
this.debounceTimer.Stop();
if (this.text == this.lastNotifiedText)
return;

this.lastNotifiedText = this.text;
this.InvokeAsync(async () => await this.TextChanged.InvokeAsync(this.text));
this.InvokeAsync(async () => await this.WhenTextChangedAsync(this.text));
};

this.isInitialized = true;
await base.OnInitializedAsync();
}

protected override async Task OnParametersSetAsync()
{
// Ensure the timer uses the latest debouncing interval:
if (!this.isInitialized)
return;

if(Math.Abs(this.debounceTimer.Interval - this.DebounceTime.TotalMilliseconds) > 1)
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;

// Only sync when the parent's parameter actually changed since the last change:
if (this.Text != this.lastParameterText)
{
this.text = this.Text ?? string.Empty;
this.lastParameterText = this.text;
}

this.debounceTimer.Stop();
this.debounceTimer.Start();

await base.OnParametersSetAsync();
}
}
2 changes: 2 additions & 0 deletions app/MindWork AI Studio/Pages/Information.razor
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@
<ThirdPartyComponent Name="sysinfo" Developer="Guillaume Gomez & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/GuillaumeGomez/sysinfo/blob/main/LICENSE" RepositoryUrl="https://github.com/GuillaumeGomez/sysinfo" UseCase="@T("This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.")"/>
<ThirdPartyComponent Name="tempfile" Developer="Steven Allen, Ashley Mannix & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/Stebalien/tempfile/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/Stebalien/tempfile" UseCase="@T("This library is used to create temporary folders for saving the certificate and private key for communication with Qdrant.")"/>
<ThirdPartyComponent Name="Lua-CSharp" Developer="Yusuke Nakada & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/nuskey8/Lua-CSharp/blob/main/LICENSE" RepositoryUrl="https://github.com/nuskey8/Lua-CSharp" UseCase="@T("We use Lua as the language for plugins. Lua-CSharp lets Lua scripts communicate with AI Studio and vice versa. Thank you, Yusuke Nakada, for this great library.")" />
<ThirdPartyComponent Name="DeepSeek-V3.2 Tokenizer" Developer="DeepSeek-AI" LicenseName="MIT" LicenseUrl="https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md" RepositoryUrl="https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main" UseCase="@T("We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.")" />
<ThirdPartyComponent Name="Tokenizer" Developer="Anthony Moi, Nicolas Patry, Pierric Cistac, Arthur Zucker & Open Source Community" LicenseName="Apache-2.0" LicenseUrl="https://github.com/huggingface/tokenizers/blob/main/LICENSE" RepositoryUrl="https://github.com/huggingface/tokenizers" UseCase="@T("The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.")" />
<ThirdPartyComponent Name="HtmlAgilityPack" Developer="ZZZ Projects & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE" RepositoryUrl="https://github.com/zzzprojects/html-agility-pack" UseCase="@T("We use the HtmlAgilityPack to extract content from the web. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
<ThirdPartyComponent Name="ReverseMarkdown" Developer="Babu Annamalai & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/mysticmind/reversemarkdown-net/blob/master/LICENSE" RepositoryUrl="https://github.com/mysticmind/reversemarkdown-net" UseCase="@T("This library is used to convert HTML to Markdown. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
<ThirdPartyComponent Name="wikEd diff" Developer="Cacycle & Open Source Community" LicenseName="None (public domain)" LicenseUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff#License" RepositoryUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff" UseCase="@T("This library is used to display the differences between two texts. This is necessary, e.g., for the grammar and spelling assistant.")"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "Der ausge
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Wähle zuerst einen Anbieter aus"

-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Geschätzte Anzahl an Tokens:"

-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Neuen Chat im Arbeitsbereich \"{0}\" starten"

Expand Down Expand Up @@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startprotokollda
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Sehen Sie sich den Quellcode von AI Studio auf GitHub an – wir freuen uns über ihre Beiträge."

-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "Die Tokenizer‑Bibliothek dient als Basis‑Framework für die Integration des DeepSeek‑Tokenizers."

-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID-Konflikt: Die Plugin-ID stimmt nicht mit der ID der Unternehmenskonfiguration überein."

Expand Down Expand Up @@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "Dies ist eine Bib
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Verwendetes .NET SDK"

-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "Wir verwenden den DeepSeek‑Tokenizer, um die Token‑Anzahl einer Eingabe zu schätzen."

-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "Diese Bibliothek wird verwendet, um Sidecar-Prozesse zu verwalten und sicherzustellen, dass veraltete oder Zombie-Sidecars erkannt und beendet werden."

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"

-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"

-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace \"{0}\""

Expand Down Expand Up @@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."

-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."

-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."

Expand Down Expand Up @@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"

-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."

-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."

Expand Down
6 changes: 6 additions & 0 deletions app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace AIStudio.Tools.Rust;

public sealed class TokenCountInfo
{
public int TokenCount { get; set; }
}
27 changes: 27 additions & 0 deletions app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using AIStudio.Tools.Rust;

namespace AIStudio.Tools.Services;

public sealed partial class RustService
{
public async Task<TokenCountInfo?> GetTokenCount(string text)
{
try
{
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
var payload = new { text };
var response = await this.http.PostAsJsonAsync("/system/tokenizer/count", payload, this.jsonRustSerializerOptions, cts.Token);
response.EnsureSuccessStatusCode();
return await response.Content.ReadFromJsonAsync<TokenCountInfo>(this.jsonRustSerializerOptions, cancellationToken: cts.Token);
}
catch (Exception e)
{
if(this.logger is not null)
this.logger.LogError(e, "Error while getting token count from Rust service.");
else
Console.WriteLine($"Error while getting token count from Rust service: '{e}'.");

return null;
}
}
}
1 change: 1 addition & 0 deletions runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pptx-to-md = "0.4.0"
tempfile = "3.8"
strum_macros = "0.27"
sysinfo = "0.38.0"
tokenizers = "0.22.2"

# Fixes security vulnerability downstream, where the upstream is not fixed yet:
time = "0.3.47" # -> Rocket
Expand Down
3 changes: 2 additions & 1 deletion runtime/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ pub mod qdrant;
pub mod certificate_factory;
pub mod runtime_api_token;
pub mod stale_process_cleanup;
mod sidecar_types;
mod sidecar_types;
pub mod tokenizer;
8 changes: 6 additions & 2 deletions runtime/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use mindwork_ai_studio::environment::is_dev;
use mindwork_ai_studio::log::init_logging;
use mindwork_ai_studio::metadata::MetaData;
use mindwork_ai_studio::runtime_api::start_runtime_api;

use mindwork_ai_studio::tokenizer::{init_tokenizer};

#[tokio::main]
async fn main() {
Expand Down Expand Up @@ -43,8 +43,12 @@ async fn main() {
info!("Running in production mode.");
}

if let Err(e) = init_tokenizer() {
warn!(Source = "Tokenizer"; "Error during the initialisation of the tokenizer: {}", e);
}

generate_runtime_certificate();
start_runtime_api();

start_tauri();
}
}
1 change: 1 addition & 0 deletions runtime/src/runtime_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pub fn start_runtime_api() {
crate::file_data::extract_data,
crate::log::get_log_paths,
crate::log::log_event,
crate::tokenizer::tokenizer_count,
crate::app_window::register_shortcut,
crate::app_window::validate_shortcut,
crate::app_window::suspend_shortcuts,
Expand Down
Loading