diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
index 022339505..8ef24cc03 100644
--- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
+++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
@@ -1618,6 +1618,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
+
-- Start new chat in workspace '{0}'
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace '{0}'"
@@ -5095,6 +5098,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@@ -5329,6 +5335,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor
index 3c49a4b5e..07ecd7a3c 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor
@@ -34,7 +34,7 @@
-
@@ -124,7 +127,6 @@
}
-
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
index c7bd4dce4..986f37c50 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
@@ -3,6 +3,7 @@
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
+using AIStudio.Tools.Services;
using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
@@ -36,6 +37,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
[Inject]
private IDialogService DialogService { get; init; } = null!;
+
+ [Inject]
+ private RustService RustService { get; init; } = null!;
private const Placement TOOLBAR_TOOLTIP_PLACEMENT = Placement.Top;
private static readonly Dictionary USER_INPUT_ATTRIBUTES = new();
@@ -58,10 +62,12 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
private Guid currentWorkspaceId = Guid.Empty;
private CancellationTokenSource? cancellationTokenSource;
private HashSet chatDocumentPaths = [];
+ private string tokenCount = "0";
+ private string TokenCountMessage => $"{this.T("Estimated amount of tokens:")} {this.tokenCount}";
// Unfortunately, we need the input field reference to blur the focus away. Without
// this, we cannot clear the input field.
- private MudTextField inputField = null!;
+ private UserPromptComponent inputField = null!;
#region Overrides of ComponentBase
@@ -405,6 +411,9 @@ private async Task InputKeyEvent(KeyboardEventArgs keyEvent)
// Was a modifier key pressed as well?
var isModifier = keyEvent.AltKey || keyEvent.CtrlKey || keyEvent.MetaKey || keyEvent.ShiftKey;
+ if (isEnter)
+ await this.CalculateTokenCount();
+
// Depending on the user's settings, might react to shortcuts:
switch (this.SettingsManager.ConfigurationData.Chat.ShortcutSendBehavior)
{
@@ -523,6 +532,7 @@ private async Task SendMessage(bool reuseLastUserPrompt = false)
this.userInput = string.Empty;
this.chatDocumentPaths.Clear();
await this.inputField.BlurAsync();
+ this.tokenCount = "0";
// Enable the stream state for the chat component:
this.isStreaming = true;
@@ -901,6 +911,20 @@ private Task EditLastBlock(IContent block)
return Task.CompletedTask;
}
+ private async Task CalculateTokenCount()
+ {
+ if (this.inputField.Value is null)
+ {
+ this.tokenCount = "0";
+ return;
+ }
+ var response = await this.RustService.GetTokenCount(this.inputField.Value);
+ if (response is null)
+ return;
+ this.tokenCount = response.TokenCount.ToString();
+ this.StateHasChanged();
+ }
+
#region Overrides of MSGComponentBase
protected override async Task ProcessIncomingMessage(ComponentBase? sendingComponent, Event triggeredEvent, T? data) where T : default
diff --git a/app/MindWork AI Studio/Components/UserPromptComponent.cs b/app/MindWork AI Studio/Components/UserPromptComponent.cs
new file mode 100644
index 000000000..03139a525
--- /dev/null
+++ b/app/MindWork AI Studio/Components/UserPromptComponent.cs
@@ -0,0 +1,68 @@
+using Microsoft.AspNetCore.Components;
+using Timer = System.Timers.Timer;
+
+namespace AIStudio.Components;
+
+///
+/// Debounced multi-line text input built on .
+/// Keeps the base API while adding a debounce timer.
+/// Callers can override any property as usual.
+///
+public class UserPromptComponent : MudTextField
+{
+ [Parameter]
+ public TimeSpan DebounceTime { get; set; } = TimeSpan.FromMilliseconds(800);
+
+ [Parameter]
+ public Func WhenTextChangedAsync { get; set; } = _ => Task.CompletedTask;
+
+ private readonly Timer debounceTimer = new();
+ private string text = string.Empty;
+ private string lastParameterText = string.Empty;
+ private string lastNotifiedText = string.Empty;
+ private bool isInitialized;
+
+ protected override async Task OnInitializedAsync()
+ {
+ this.text = this.Text ?? string.Empty;
+ this.lastParameterText = this.text;
+ this.lastNotifiedText = this.text;
+ this.debounceTimer.AutoReset = false;
+ this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
+ this.debounceTimer.Elapsed += (_, _) =>
+ {
+ this.debounceTimer.Stop();
+ if (this.text == this.lastNotifiedText)
+ return;
+
+ this.lastNotifiedText = this.text;
+ this.InvokeAsync(async () => await this.TextChanged.InvokeAsync(this.text));
+ this.InvokeAsync(async () => await this.WhenTextChangedAsync(this.text));
+ };
+
+ this.isInitialized = true;
+ await base.OnInitializedAsync();
+ }
+
+ protected override async Task OnParametersSetAsync()
+ {
+ // Ensure the timer uses the latest debouncing interval:
+ if (!this.isInitialized)
+ return;
+
+ if(Math.Abs(this.debounceTimer.Interval - this.DebounceTime.TotalMilliseconds) > 1)
+ this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
+
+ // Only sync when the parent's parameter actually changed since the last change:
+ if (this.Text != this.lastParameterText)
+ {
+ this.text = this.Text ?? string.Empty;
+ this.lastParameterText = this.text;
+ }
+
+ this.debounceTimer.Stop();
+ this.debounceTimer.Start();
+
+ await base.OnParametersSetAsync();
+ }
+}
diff --git a/app/MindWork AI Studio/Pages/Information.razor b/app/MindWork AI Studio/Pages/Information.razor
index a859a142e..2c1838ebd 100644
--- a/app/MindWork AI Studio/Pages/Information.razor
+++ b/app/MindWork AI Studio/Pages/Information.razor
@@ -291,6 +291,8 @@
+
+
diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
index c518d4393..8fd50401f 100644
--- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
@@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "Der ausge
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Wähle zuerst einen Anbieter aus"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Geschätzte Anzahl an Tokens:"
+
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Neuen Chat im Arbeitsbereich \"{0}\" starten"
@@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startprotokollda
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Sehen Sie sich den Quellcode von AI Studio auf GitHub an – wir freuen uns über ihre Beiträge."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "Die Tokenizer‑Bibliothek dient als Basis‑Framework für die Integration des DeepSeek‑Tokenizers."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID-Konflikt: Die Plugin-ID stimmt nicht mit der ID der Unternehmenskonfiguration überein."
@@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "Dies ist eine Bib
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Verwendetes .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "Wir verwenden den DeepSeek‑Tokenizer, um die Token‑Anzahl einer Eingabe zu schätzen."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "Diese Bibliothek wird verwendet, um Sidecar-Prozesse zu verwalten und sicherzustellen, dass veraltete oder Zombie-Sidecars erkannt und beendet werden."
diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
index a4fdfd5c1..216f8eab6 100644
--- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
@@ -1620,6 +1620,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
+-- Estimated amount of tokens:
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
+
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace \"{0}\""
@@ -5097,6 +5100,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
+-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
+
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@@ -5331,6 +5337,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
+-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
+
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."
diff --git a/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs b/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
new file mode 100644
index 000000000..c0e491bf4
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/TokenCountInfo.cs
@@ -0,0 +1,6 @@
+namespace AIStudio.Tools.Rust;
+
+public sealed class TokenCountInfo
+{
+ public int TokenCount { get; set; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs
new file mode 100644
index 000000000..e01272dbe
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs
@@ -0,0 +1,27 @@
+using AIStudio.Tools.Rust;
+
+namespace AIStudio.Tools.Services;
+
+public sealed partial class RustService
+{
+ public async Task GetTokenCount(string text)
+ {
+ try
+ {
+ var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
+ var payload = new { text };
+ var response = await this.http.PostAsJsonAsync("/system/tokenizer/count", payload, this.jsonRustSerializerOptions, cts.Token);
+ response.EnsureSuccessStatusCode();
+ return await response.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions, cancellationToken: cts.Token);
+ }
+ catch (Exception e)
+ {
+ if(this.logger is not null)
+ this.logger.LogError(e, "Error while getting token count from Rust service.");
+ else
+ Console.WriteLine($"Error while getting token count from Rust service: '{e}'.");
+
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml
index b3c1b32e7..c2b19342a 100644
--- a/runtime/Cargo.toml
+++ b/runtime/Cargo.toml
@@ -42,6 +42,7 @@ pptx-to-md = "0.4.0"
tempfile = "3.8"
strum_macros = "0.27"
sysinfo = "0.38.0"
+tokenizers = "0.22.2"
# Fixes security vulnerability downstream, where the upstream is not fixed yet:
time = "0.3.47" # -> Rocket
diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs
index 1b13e0991..102efbe2e 100644
--- a/runtime/src/lib.rs
+++ b/runtime/src/lib.rs
@@ -17,4 +17,5 @@ pub mod qdrant;
pub mod certificate_factory;
pub mod runtime_api_token;
pub mod stale_process_cleanup;
-mod sidecar_types;
\ No newline at end of file
+mod sidecar_types;
+pub mod tokenizer;
\ No newline at end of file
diff --git a/runtime/src/main.rs b/runtime/src/main.rs
index 00a7ba905..a210de540 100644
--- a/runtime/src/main.rs
+++ b/runtime/src/main.rs
@@ -11,7 +11,7 @@ use mindwork_ai_studio::environment::is_dev;
use mindwork_ai_studio::log::init_logging;
use mindwork_ai_studio::metadata::MetaData;
use mindwork_ai_studio::runtime_api::start_runtime_api;
-
+use mindwork_ai_studio::tokenizer::{init_tokenizer};
#[tokio::main]
async fn main() {
@@ -43,8 +43,12 @@ async fn main() {
info!("Running in production mode.");
}
+ if let Err(e) = init_tokenizer() {
+ warn!(Source = "Tokenizer"; "Error during the initialisation of the tokenizer: {}", e);
+ }
+
generate_runtime_certificate();
start_runtime_api();
start_tauri();
-}
\ No newline at end of file
+}
diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs
index 64bc8174a..6ceeb1e2b 100644
--- a/runtime/src/runtime_api.rs
+++ b/runtime/src/runtime_api.rs
@@ -89,6 +89,7 @@ pub fn start_runtime_api() {
crate::file_data::extract_data,
crate::log::get_log_paths,
crate::log::log_event,
+ crate::tokenizer::tokenizer_count,
crate::app_window::register_shortcut,
crate::app_window::validate_shortcut,
crate::app_window::suspend_shortcuts,
diff --git a/runtime/src/tokenizer.rs b/runtime/src/tokenizer.rs
new file mode 100644
index 000000000..3614b3968
--- /dev/null
+++ b/runtime/src/tokenizer.rs
@@ -0,0 +1,54 @@
+use std::fs;
+use std::path::{PathBuf};
+use std::sync::OnceLock;
+use rocket::{post};
+use rocket::serde::json::Json;
+use rocket::serde::Serialize;
+use serde::Deserialize;
+use tokenizers::Error;
+use tokenizers::tokenizer::Tokenizer;
+use crate::api_token::APIToken;
+
+static TOKENIZER: OnceLock = OnceLock::new();
+
+static TEXT: &str = "";
+
+pub fn init_tokenizer() -> Result<(), Error>{
+ let mut target_dir = PathBuf::from("target");
+ target_dir.push("tokenizers");
+ fs::create_dir_all(&target_dir)?;
+
+ let mut local_tokenizer_path = target_dir.clone();
+ local_tokenizer_path.push("tokenizer.json");
+
+ TOKENIZER.set(Tokenizer::from_file(local_tokenizer_path)?).expect("Could not set the tokenizer.");
+ Ok(())
+}
+
+pub fn get_token_count(mut text: &str) -> usize {
+ if text.is_empty() {
+ text = TEXT;
+ }
+ match TOKENIZER.get().unwrap().encode(text, true) {
+ Ok(encoding) => encoding.len(),
+ Err(_) => 0,
+ }
+}
+
+#[derive(Deserialize)]
+pub struct SetTokenText {
+ pub text: String,
+}
+
+#[derive(Serialize)]
+pub struct GetTokenCount{
+ token_count: usize,
+}
+
+
+#[post("/system/tokenizer/count", data = "")]
+pub fn tokenizer_count(_token: APIToken, req: Json) -> Json {
+ Json(GetTokenCount {
+ token_count: get_token_count(&req.text),
+ })
+}
\ No newline at end of file