diff --git a/.github/workflows/build-python-steps.yml b/.github/workflows/build-python-steps.yml new file mode 100644 index 0000000..63901e8 --- /dev/null +++ b/.github/workflows/build-python-steps.yml @@ -0,0 +1,115 @@ +name: Build Python SDK + +on: + workflow_call: + inputs: + version: + required: true + type: string + useWinML: + required: false + type: boolean + default: false + platform: + required: false + type: string + default: 'windows' + +permissions: + contents: read + +jobs: + build: + runs-on: ${{ inputs.platform }}-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + # Needed to download Foundry Local Core from Azure Artifacts + - name: Setup .NET SDK for NuGet authentication + uses: actions/setup-dotnet@v5 + with: + dotnet-version: '9.0.x' + env: + NUGET_AUTH_TOKEN: ${{ secrets.AZURE_DEVOPS_PAT }} + + # Clone test-data-shared from Azure DevOps (models for integration tests) + - name: Checkout test-data-shared from Azure DevOps + shell: pwsh + working-directory: ${{ github.workspace }}/.. + run: | + $pat = "${{ secrets.AZURE_DEVOPS_PAT }}" + $encodedPat = [Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(":$pat")) + + git config --global http.https://dev.azure.com.extraheader "AUTHORIZATION: Basic $encodedPat" + + git lfs install + git clone --depth 1 https://dev.azure.com/microsoft/windows.ai.toolkit/_git/test-data-shared test-data-shared + + Write-Host "Clone completed successfully to ${{ github.workspace }}/../test-data-shared" + + - name: Checkout specific commit in test-data-shared + shell: pwsh + working-directory: ${{ github.workspace }}/../test-data-shared + run: | + git checkout 231f820fe285145b7ea4a449b112c1228ce66a41 + if ($LASTEXITCODE -ne 0) { + Write-Error "Git checkout failed." + exit 1 + } + + - name: Install dependencies + working-directory: sdk_v2/python + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements-dev.txt + python -m pip install -e . + + - name: Download native binaries (WinML) + if: ${{ inputs.useWinML == true }} + working-directory: sdk_v2/python + run: foundry-local-install --winml + + - name: Download native binaries (Standard) + if: ${{ inputs.useWinML == false }} + working-directory: sdk_v2/python + run: foundry-local-install + + - name: Set package version + shell: pwsh + working-directory: sdk_v2/python + run: | + $versionFile = "src/version.py" + $content = Get-Content $versionFile -Raw + $content = $content -replace '__version__\s*=\s*"[^"]*"', "__version__ = `"${{ inputs.version }}`"" + Set-Content -Path $versionFile -Value $content + Write-Host "Updated version to ${{ inputs.version }}" + + - name: Run tests + working-directory: sdk_v2/python + run: python -m pytest test/ -v --junitxml=test-results.xml + + - name: Build wheel + working-directory: sdk_v2/python + run: python -m build --wheel --outdir dist/ + + - name: Upload Python packages + uses: actions/upload-artifact@v4 + with: + name: python-sdk-${{ inputs.platform }}${{ inputs.useWinML == true && '-winml' || '' }} + path: sdk_v2/python/dist/* + + - name: Upload flcore logs + uses: actions/upload-artifact@v4 + if: always() + with: + name: python-sdk-${{ inputs.platform }}${{ inputs.useWinML == true && '-winml' || '' }}-logs + path: sdk_v2/python/logs/** diff --git a/.github/workflows/foundry-local-sdk-build.yml b/.github/workflows/foundry-local-sdk-build.yml index 1190ac9..a22ec8a 100644 --- a/.github/workflows/foundry-local-sdk-build.yml +++ b/.github/workflows/foundry-local-sdk-build.yml @@ -29,6 +29,12 @@ jobs: version: '0.9.0.${{ github.run_number }}' platform: 'windows' secrets: inherit + build-python-windows: + uses: ./.github/workflows/build-python-steps.yml + with: + version: '0.9.0.${{ github.run_number }}' + platform: 'windows' + secrets: inherit build-cs-windows-WinML: uses: ./.github/workflows/build-cs-steps.yml @@ -44,6 +50,13 @@ jobs: platform: 'windows' useWinML: true secrets: inherit + build-python-windows-WinML: + uses: ./.github/workflows/build-python-steps.yml + with: + version: '0.9.0.${{ github.run_number }}' + platform: 'windows' + useWinML: true + secrets: inherit build-cs-macos: uses: ./.github/workflows/build-cs-steps.yml @@ -56,4 +69,11 @@ jobs: with: version: '0.9.0.${{ github.run_number }}' platform: 'macos' - secrets: inherit \ No newline at end of file + secrets: inherit + build-python-macos: + uses: ./.github/workflows/build-python-steps.yml + with: + version: '0.9.0.${{ github.run_number }}' + platform: 'macos' + secrets: inherit + diff --git a/sdk_v2/python/.gitignore b/sdk_v2/python/.gitignore new file mode 100644 index 0000000..543c109 --- /dev/null +++ b/sdk_v2/python/.gitignore @@ -0,0 +1,20 @@ +# Native binaries downloaded from NuGet (per-platform) +packages/ + +# Build / egg info +*.egg-info/ +dist/ +build/ +*.whl +*.tar.gz +__pycache__/ + +# Logs +logs/ + +# IDE +.vscode/ +.idea/ + +# pytest +.pytest_cache/ diff --git a/sdk_v2/python/LICENSE.txt b/sdk_v2/python/LICENSE.txt new file mode 100644 index 0000000..48bc6bb --- /dev/null +++ b/sdk_v2/python/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/sdk_v2/python/README.md b/sdk_v2/python/README.md new file mode 100644 index 0000000..e00212b --- /dev/null +++ b/sdk_v2/python/README.md @@ -0,0 +1,209 @@ +# Foundry Local Python SDK + +The Foundry Local Python SDK provides a Python interface for interacting with local AI models via the Foundry Local Core native library. It allows you to discover, download, load, and run inference on models directly on your local machine — no cloud required. + +## Features + +- **Model Discovery** – browse and search the model catalog +- **Model Management** – download, cache, load, and unload models +- **Chat Completions** – OpenAI-compatible chat API (non-streaming and streaming) +- **Tool Calling** – function-calling support with chat completions +- **Audio Transcription** – Whisper-based speech-to-text (non-streaming and streaming) +- **Built-in Web Service** – optional HTTP endpoint for multi-process scenarios +- **Native Performance** – ctypes FFI to AOT-compiled Foundry Local Core + +## Installation + +```bash +pip install foundry-local-sdk +``` + +### Building from source + +```bash +cd sdk_v2/python +pip install -e . +``` + +### WinML Binaries (Windows Only) + +To use WinML execution providers, install the winml native binaries instead of the default cross-plat ones: + +```bash +foundry-local-install --winml +``` + +This downloads the winml variants of the Foundry Local Core, OnnxRuntime, and OnnxRuntimeGenAI packages. WinML is only supported on Windows. + +You can also combine flags: + +```bash +# winml with nightly builds +foundry-local-install --winml --nightly +``` + +> **Note:** winml and cross-plat binaries cannot coexist in the same target directory. Running `foundry-local-install --winml` will overwrite any previously installed cross-plat binaries (and vice versa). + +## Quick Start + +```python +from foundry_local_sdk import Configuration, FoundryLocalManager + +# 1. Initialize +config = Configuration(app_name="MyApp") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# 2. Discover models +catalog = manager.catalog +models = catalog.list_models() +for m in models: + print(f" {m.alias}") + +# 3. Load a model +model = catalog.get_model("phi-3.5-mini") +model.load() + +# 4. Chat +client = model.get_chat_client() +response = client.complete_chat([ + {"role": "user", "content": "Why is the sky blue?"} +]) +print(response.choices[0].message.content) + +# 5. Cleanup +model.unload() +``` + +## Usage + +### Initialization + +Create a `Configuration` and initialize the singleton `FoundryLocalManager`. + +```python +from foundry_local_sdk import Configuration, FoundryLocalManager +from foundry_local_sdk.configuration import LogLevel + +config = Configuration( + app_name="MyApp", + model_cache_dir="/path/to/cache", # optional + log_level=LogLevel.INFORMATION, # optional (default: Warning) + additional_settings={"Bootstrap": "false"}, # optional +) +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance +``` + +### Discovering Models + +```python +catalog = manager.catalog + +# List all models in the catalog +models = catalog.list_models() + +# Get a specific model by alias +model = catalog.get_model("qwen2.5-0.5b") + +# Get a specific variant by ID +variant = catalog.get_model_variant("qwen2.5-0.5b-instruct-generic-cpu:4") + +# List locally cached models +cached = catalog.get_cached_models() + +# List currently loaded models +loaded = catalog.get_loaded_models() +``` + +### Loading and Running a Model + +```python +model = catalog.get_model("qwen2.5-0.5b") + +# Select a specific variant (optional – defaults to highest-priority cached variant) +cached = catalog.get_cached_models() +variant = next(v for v in cached if v.alias == "qwen2.5-0.5b") +model.select_variant(variant) + +# Load into memory +model.load() + +# Non-streaming chat +client = model.get_chat_client() +client.settings.temperature = 0.0 +client.settings.max_completion_tokens = 500 + +result = client.complete_chat([ + {"role": "user", "content": "What is 7 multiplied by 6?"} +]) +print(result.choices[0].message.content) # "42" + +# Streaming chat +messages = [{"role": "user", "content": "Tell me a joke"}] + +def on_chunk(chunk): + delta = chunk.choices[0].delta + if delta and delta.content: + print(delta.content, end="", flush=True) + +client.complete_streaming_chat(messages, on_chunk) + +# Unload when done +model.unload() +``` + +### Web Service (Optional) + +Start a built-in HTTP server for multi-process access. + +```python +manager.start_web_service() +print(f"Listening on: {manager.urls}") + +# ... use the service ... + +manager.stop_web_service() +``` + +## API Reference + +### Core Classes + +| Class | Description | +|---|---| +| `Configuration` | SDK configuration (app name, cache dir, log level, web service settings) | +| `FoundryLocalManager` | Singleton entry point – initialization, catalog access, web service | +| `Catalog` | Model discovery – listing, lookup by alias/ID, cached/loaded queries | +| `Model` | Groups variants under one alias – select, load, unload, create clients | +| `ModelVariant` | Specific model variant – download, cache, load/unload, create clients | + +### OpenAI Clients + +| Class | Description | +|---|---| +| `ChatClient` | Chat completions (non-streaming and streaming) with tool calling | +| `AudioClient` | Audio transcription (non-streaming and streaming) | + +### Internal / Detail + +| Class | Description | +|---|---| +| `CoreInterop` | ctypes FFI layer to the native Foundry Local Core library | +| `ModelLoadManager` | Load/unload via core interop or external web service | +| `ModelInfo` | Pydantic model for catalog entries | + +## Running Tests + +```bash +pip install -r requirements-dev.txt +python -m pytest test/ -v +``` + +See [test/README.md](test/README.md) for detailed test setup and structure. + +## Running Examples + +```bash +python examples/chat_completion.py +``` \ No newline at end of file diff --git a/sdk_v2/python/examples/chat_completion.py b/sdk_v2/python/examples/chat_completion.py new file mode 100644 index 0000000..7d9c687 --- /dev/null +++ b/sdk_v2/python/examples/chat_completion.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +"""Example: Chat completion using Foundry Local Python SDK. + +Demonstrates basic chat completion with the Foundry Local runtime, +including model discovery, loading, and inference. +""" + +from foundry_local_sdk import Configuration, FoundryLocalManager + +def main(): + # 1. Initialize the SDK + config = Configuration(app_name="ChatCompletionExample", \ + logs_dir="C:\\foundry-local\\Foundry-Local\\sdk_v2\\python\\logs", \ + model_cache_dir="C:\\foundry-local\\test-data-shared") + print("Initializing Foundry Local Manager...", end="") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + print("initialized!") + + # 2. Print available models in the catalog and cache + models = manager.catalog.list_models() + print("Available models in catalog:") + for m in models: + print(f" - {m.alias} ({m.id})") + + cached_models = manager.catalog.get_cached_models() + print("\nCached models:") + for m in cached_models: + print(f" - {m.alias} ({m.id})") + + CACHED_MODEL_ALIAS = "qwen2.5-0.5b" + + # 3. Find a model from the cache (+ download if not cached) + model = manager.catalog.get_model(CACHED_MODEL_ALIAS) + if model is None: + print(f"Model '{CACHED_MODEL_ALIAS}' not found in catalog.") + print("Available models:") + for m in manager.catalog.list_models(): + print(f" - {m.alias} ({m.id})") + return + + if not model.is_cached: + print(f"Downloading {model.alias}...") + model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) + print() + + # 4. Load the model + print(f"Loading {model.alias}...", end="") + model.load() + print("loaded!") + + try: + # 5. Create a chat client and send a message + client = model.get_chat_client() + + print("\n--- Non-streaming ---") + response = client.complete_chat( + messages=[{"role": "user", "content": "What is the capital of France? Reply briefly."}] + ) + print(f"Response: {response.choices[0].message.content}") + + # 6. Streaming example + print("\n--- Streaming ---") + + def on_chunk(chunk): + if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) + + client.complete_streaming_chat( + messages=[{"role": "user", "content": "Tell me a short joke."}], + user_callback=on_chunk, + ) + print() # newline after streaming + + finally: + # 7. Cleanup + model.unload() + print("\nModel unloaded.") + + +if __name__ == "__main__": + main() diff --git a/sdk_v2/python/pyproject.toml b/sdk_v2/python/pyproject.toml new file mode 100644 index 0000000..f0229b7 --- /dev/null +++ b/sdk_v2/python/pyproject.toml @@ -0,0 +1,56 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "foundry-local-sdk" +dynamic = ["version", "dependencies"] +description = "Foundry Local Manager Python SDK: Control-plane SDK for Foundry Local." +readme = "README.md" +requires-python = ">=3.10" +license = "MIT" +license-files = ["LICENSE.txt"] +authors = [ + {name = "Microsoft Corporation", email = "foundrylocaldevs@microsoft.com"}, +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +Homepage = "https://github.com/microsoft/Foundry-Local" + +[project.scripts] +foundry-local-install = "foundry_local_sdk.detail.native_downloader:main" + +[tool.setuptools.package-dir] +foundry_local_sdk = "src" +"foundry_local_sdk.detail" = "src/detail" +"foundry_local_sdk.openai" = "src/openai" + +[tool.setuptools] +packages = ["foundry_local_sdk", "foundry_local_sdk.detail", "foundry_local_sdk.openai"] + +[tool.setuptools.dynamic] +version = {attr = "foundry_local_sdk.version.__version__"} +dependencies = {file = ["requirements.txt"]} + +[tool.pytest.ini_options] +testpaths = ["test"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +timeout = 60 diff --git a/sdk_v2/python/requirements-dev.txt b/sdk_v2/python/requirements-dev.txt new file mode 100644 index 0000000..7d44fb3 --- /dev/null +++ b/sdk_v2/python/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +build +coverage +pytest>=7.0.0 +pytest-timeout>=2.1.0 diff --git a/sdk_v2/python/requirements.txt b/sdk_v2/python/requirements.txt new file mode 100644 index 0000000..fd6b922 --- /dev/null +++ b/sdk_v2/python/requirements.txt @@ -0,0 +1,3 @@ +pydantic>=2.0.0 +requests>=2.32.4 +openai>=2.24.0 \ No newline at end of file diff --git a/sdk_v2/python/src/__init__.py b/sdk_v2/python/src/__init__.py new file mode 100644 index 0000000..14534d1 --- /dev/null +++ b/sdk_v2/python/src/__init__.py @@ -0,0 +1,23 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging +import sys + +from .configuration import Configuration +from .foundry_local_manager import FoundryLocalManager +from .version import __version__ + +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.WARNING) + +_sc = logging.StreamHandler(stream=sys.stdout) +_formatter = logging.Formatter( + "[foundry-local] | %(asctime)s | %(levelname)-8s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S" +) +_sc.setFormatter(_formatter) +_logger.addHandler(_sc) +_logger.propagate = False + +__all__ = ["Configuration", "FoundryLocalManager", "__version__"] diff --git a/sdk_v2/python/src/catalog.py b/sdk_v2/python/src/catalog.py new file mode 100644 index 0000000..4e67d0e --- /dev/null +++ b/sdk_v2/python/src/catalog.py @@ -0,0 +1,147 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import datetime +import logging +import threading +from typing import List, Optional +from pydantic import TypeAdapter + +from .model import Model +from .model_variant import ModelVariant + +from .detail.core_interop import CoreInterop +from .detail.model_data_types import ModelInfo +from .detail.utils import get_cached_model_ids +from .detail.model_load_manager import ModelLoadManager +from .exception import FoundryLocalException + +logger = logging.getLogger(__name__) + +class Catalog(): + """Model catalog for discovering and querying available models. + + Provides methods to list models, look up by alias or ID, and query + cached or loaded models. The model list is refreshed every 6 hours. + """ + + def __init__(self, model_load_manager: ModelLoadManager, core_interop: CoreInterop): + """Initialize the Catalog. + + Args: + model_load_manager: Manager for loading/unloading models. + core_interop: Native interop layer for Foundry Local Core. + """ + self._core_interop = core_interop + self._model_load_manager = model_load_manager + self._lock = threading.Lock() + + self._models: List[ModelInfo] = [] + self._model_alias_to_model = {} + self._model_id_to_model_variant = {} + self._last_fetch = datetime.datetime.min + + response = core_interop.execute_command("get_catalog_name") + if response.error is not None: + raise FoundryLocalException(f"Failed to get catalog name: {response.error}") + + self.name = response.data + + def _update_models(self): + # refresh every 6 hours + if (datetime.datetime.now() - self._last_fetch) < datetime.timedelta(hours=6): + return + + response = self._core_interop.execute_command("get_model_list") + if response.error is not None: + raise FoundryLocalException(f"Failed to get model list: {response.error}") + + model_list_json = response.data + + with self._lock: + adapter = TypeAdapter(list[ModelInfo]) + models: List[ModelInfo] = adapter.validate_json(model_list_json) + + self._model_alias_to_model.clear() + self._model_id_to_model_variant.clear() + + for model_info in models: + variant = ModelVariant(model_info, self._model_load_manager, self._core_interop) + + value = self._model_alias_to_model.get(model_info.alias) + if value is None: + value = Model(variant, self._core_interop) + self._model_alias_to_model[model_info.alias] = value + else: + value._add_variant(variant) + + self._model_id_to_model_variant[variant.id] = variant + + self._last_fetch = datetime.datetime.now() + self._models = models + + def list_models(self) -> List[Model]: + """ + List the available models in the catalog. + :return: List of Model instances. + """ + self._update_models() + return list(self._model_alias_to_model.values()) + + def get_model(self, model_alias: str) -> Optional[Model]: + """ + Lookup a model by its alias. + :param model_alias: Model alias. + :return: Model if found. + """ + self._update_models() + return self._model_alias_to_model.get(model_alias) + + def get_model_variant(self, model_id: str) -> Optional[ModelVariant]: + """ + Lookup a model variant by its unique model id. + :param model_id: Model id. + :return: Model variant if found. + """ + self._update_models() + return self._model_id_to_model_variant.get(model_id) + + def get_cached_models(self) -> List[ModelVariant]: + """ + Get a list of currently downloaded models from the model cache. + :param ct: Optional cancellation event. + :return: List of ModelVariant instances. + """ + self._update_models() + + cached_model_ids = get_cached_model_ids(self._core_interop) + + cached_models = [] + for model_id in cached_model_ids: + model_variant = self._model_id_to_model_variant.get(model_id) + if model_variant is not None: + cached_models.append(model_variant) + + return cached_models + + def get_loaded_models(self) -> List[ModelVariant]: + """ + Get a list of the currently loaded models. + :param ct: Optional cancellation event. + :return: List of ModelVariant instances. + """ + self._update_models() + + loaded_model_ids = self._model_load_manager.list_loaded() + loaded_models = [] + + for model_id in loaded_model_ids: + model_variant = self._model_id_to_model_variant.get(model_id) + if model_variant is not None: + loaded_models.append(model_variant) + + return loaded_models \ No newline at end of file diff --git a/sdk_v2/python/src/configuration.py b/sdk_v2/python/src/configuration.py new file mode 100644 index 0000000..23967ef --- /dev/null +++ b/sdk_v2/python/src/configuration.py @@ -0,0 +1,163 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import logging +import re + +from typing import Optional, Dict +from urllib.parse import urlparse + +from .exception import FoundryLocalException + +from .logging_helper import LogLevel + +logger = logging.getLogger(__name__) + + +class Configuration: + """Configuration for Foundry Local SDK. + + Configuration values: + app_name: Your application name. MUST be set to a valid name. + foundry_local_core_path: Path to the Foundry Local Core native library. + app_data_dir: Application data directory. + Default: {home}/.{appname}, where {home} is the user's home directory + and {appname} is the app_name value. + model_cache_dir: Model cache directory. + Default: {appdata}/cache/models, where {appdata} is the app_data_dir value. + logs_dir: Log directory. + Default: {appdata}/logs + log_level: Logging level. + Valid values are: Verbose, Debug, Information, Warning, Error, Fatal. + Default: LogLevel.WARNING + web: Optional configuration for the built-in web service. + NOTE: This is not included in all builds. + additional_settings: Additional settings that Foundry Local Core can consume. + Keys and values are strings. + """ + + class WebService: + """Configuration settings if the optional web service is used.""" + + def __init__( + self, + urls: Optional[str] = None, + external_url: Optional[str] = None + ): + """Initialize WebService configuration. + + Args: + urls: Url/s to bind to the web service when + FoundryLocalManager.start_web_service() is called. + After startup, FoundryLocalManager.urls will contain the actual URL/s + the service is listening on. + Default: 127.0.0.1:0, which binds to a random ephemeral port. + Multiple URLs can be specified as a semi-colon separated list. + external_url: If the web service is running in a separate process, + it will be accessed using this URI. + Both processes should be using the same version of the SDK. + If a random port is assigned when creating the web service in the + external process the actual port must be provided here. + """ + self.urls = urls + self.external_url = external_url + + def __init__( + self, + app_name: str, + foundry_local_core_path: Optional[str] = None, + app_data_dir: Optional[str] = None, + model_cache_dir: Optional[str] = None, + logs_dir: Optional[str] = None, + log_level: Optional[LogLevel] = LogLevel.WARNING, + web: Optional['Configuration.WebService'] = None, + additional_settings: Optional[Dict[str, str]] = None + ): + """Initialize Configuration. + + Args: + app_name: Your application name. MUST be set to a valid name. + app_data_dir: Application data directory. Optional. + model_cache_dir: Model cache directory. Optional. + logs_dir: Log directory. Optional. + log_level: Logging level. Default: LogLevel.WARNING + web: Optional configuration for the built-in web service. + additional_settings: Additional settings dictionary. Optional. + """ + self.app_name = app_name + self.foundry_local_core_path = foundry_local_core_path + self.app_data_dir = app_data_dir + self.model_cache_dir = model_cache_dir + self.logs_dir = logs_dir + self.log_level = log_level + self.web = web + self.additional_settings = additional_settings + + # make sure app name only has safe characters as it's used as a directory name + self._safe_app_name_chars = re.compile(r'^[A-Za-z0-9._-]+$') + + def validate(self) -> None: + """Validate the configuration. + + Raises: + FoundryLocalException: If configuration is invalid. + """ + if not self.app_name: + raise FoundryLocalException( + "Configuration AppName must be set to a valid application name." + ) + + # Check for invalid filename characters + if not bool(self._safe_app_name_chars.match(self.app_name)): + raise FoundryLocalException("Configuration AppName value contains invalid characters.") + + if self.web is not None and self.web.external_url is not None: + parsed = urlparse(self.web.external_url) + if not parsed.port or parsed.port == 0: + raise FoundryLocalException("Configuration Web.ExternalUrl has invalid port.") + + def as_dictionary(self) -> Dict[str, str]: + """Convert configuration to a dictionary of string key-value pairs. + + Returns: + Dictionary containing configuration values as strings. + + Raises: + FoundryLocalException: If AppName is not set to a valid value. + """ + if not self.app_name: + raise FoundryLocalException( + "Configuration AppName must be set to a valid application name." + ) + + config_values = { + "AppName": self.app_name, + "LogLevel": str(self.log_level) + } + + if self.app_data_dir: + config_values["AppDataDir"] = self.app_data_dir + + if self.model_cache_dir: + config_values["ModelCacheDir"] = self.model_cache_dir + + if self.logs_dir: + config_values["LogsDir"] = self.logs_dir + + if self.foundry_local_core_path: + config_values["FoundryLocalCorePath"] = self.foundry_local_core_path + + if self.web is not None: + if self.web.urls is not None: + config_values["WebServiceUrls"] = self.web.urls + + # Emit any additional settings. + if self.additional_settings is not None: + for key, value in self.additional_settings.items(): + if not key: + continue # skip empty keys + config_values[key] = value if value is not None else "" + + return config_values diff --git a/sdk_v2/python/src/detail/__init__.py b/sdk_v2/python/src/detail/__init__.py new file mode 100644 index 0000000..d9eadf3 --- /dev/null +++ b/sdk_v2/python/src/detail/__init__.py @@ -0,0 +1,19 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Internal implementation details for Foundry Local SDK.""" + +from .core_interop import CoreInterop, InteropRequest, Response +from .model_data_types import ModelInfo, DeviceType, Runtime +from .model_load_manager import ModelLoadManager + +__all__ = [ + "CoreInterop", + "DeviceType", + "InteropRequest", + "ModelInfo", + "ModelLoadManager", + "Response", + "Runtime", +] diff --git a/sdk_v2/python/src/detail/core_interop.py b/sdk_v2/python/src/detail/core_interop.py new file mode 100644 index 0000000..2876ef7 --- /dev/null +++ b/sdk_v2/python/src/detail/core_interop.py @@ -0,0 +1,299 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import ctypes +import json +import logging +import os +import sys + +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Dict, Optional +from ..configuration import Configuration +from ..exception import FoundryLocalException +from .native_downloader import get_native_path, download_native_binaries + +logger = logging.getLogger(__name__) + +class InteropRequest: + """Request payload for a Foundry Local Core command. + + Args: + params: Dictionary of key-value string parameters. + """ + + def __init__(self, params: Dict[str, str] = None): + self.params = params or {} + + def to_json(self) -> str: + """Serialize the request to a JSON string.""" + return json.dumps({"Params": self.params}, ensure_ascii=False) + + +class RequestBuffer(ctypes.Structure): + """ctypes Structure matching the native ``RequestBuffer`` C struct.""" + + _fields_ = [ + ("Command", ctypes.c_void_p), + ("CommandLength", ctypes.c_int), + ("Data", ctypes.c_void_p), + ("DataLength", ctypes.c_int), + ] + + +class ResponseBuffer(ctypes.Structure): + """ctypes Structure matching the native ``ResponseBuffer`` C struct.""" + + _fields_ = [ + ("Data", ctypes.c_void_p), + ("DataLength", ctypes.c_int), + ("Error", ctypes.c_void_p), + ("ErrorLength", ctypes.c_int), + ] + + +@dataclass +class Response: + """Result from a Foundry Local Core command. + + Either ``data`` or ``error`` will be set, never both. + ``error`` is ``None`` when the command succeeded. + """ + + data: Optional[str] = None + error: Optional[str] = None + + +class CallbackHelper: + """Internal helper class to convert the callback from ctypes to a str and call the python callback.""" + @staticmethod + def callback(data_ptr, length, self_ptr): + self = None + try: + self = ctypes.cast(self_ptr, ctypes.POINTER(ctypes.py_object)).contents.value + + # convert to a string and pass to the python callback + data_bytes = ctypes.string_at(data_ptr, length) + data_str = data_bytes.decode('utf-8') + self._py_callback(data_str) + except Exception as e: + if self is not None and self.exception is None: + self.exception = e # keep the first only as they are likely all the same + + def __init__(self, py_callback: Callable[[str], None]): + self._py_callback = py_callback + self.exception = None + + +class CoreInterop: + """ctypes FFI layer for the Foundry Local Core native library. + + Provides ``execute_command`` and ``execute_command_with_callback`` to + invoke native commands exposed by ``Microsoft.AI.Foundry.Local.Core``. + """ + + _initialized = False + _flcore_library = None + _genai_library = None + + instance = None + + # Callback function for native interop. + # This returns a string and its length, and an optional user provided object. + CALLBACK_TYPE = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) + + @staticmethod + def _add_library_extension(name: str) -> str: + if sys.platform.startswith("win"): + return f"{name}.dll" + elif sys.platform.startswith("linux"): + return f"{name}.so" + elif sys.platform.startswith("darwin"): + return f"{name}.dylib" + else: + raise NotImplementedError("Unsupported platform") + + @staticmethod + def _initialize_native_libraries(base_path: str = None) -> Path: + """Load the native Foundry Local Core library and its dependencies. + + Resolution order: + 1. If ``base_path`` is provided explicitly, use that directory. + 2. Check the ``packages/{platform-key}/`` directory for previously + downloaded binaries (via ``get_native_path``). + 3. Download the native NuGet packages on the fly (lazy install) + and then load from the downloaded location. + + Returns: + Path to the directory containing the native libraries. + """ + if base_path: + resolved = Path(base_path).expanduser().resolve() + else: + # Check for pre-downloaded binaries + resolved = get_native_path() + if resolved is None: + # Lazy download on first use + logger.info("Native libraries not found — downloading from NuGet...") + resolved = download_native_binaries() + logger.info("Native libraries installed at %s", resolved) + + flcore_lib_name = CoreInterop._add_library_extension("Microsoft.AI.Foundry.Local.Core") + flcore_dll_path = resolved / flcore_lib_name + if not flcore_dll_path.exists(): + raise FileNotFoundError(f"Could not find the Foundry Local Core library at {flcore_dll_path}") + + if sys.platform.startswith("win"): + # Add the native directory to PATH so that P/Invoke within the .NET + # AOT Core library can find sibling DLLs (e.g. Bootstrap DLL for WinML). + native_dir_str = str(resolved) + current_path = os.environ.get("PATH", "") + if native_dir_str not in current_path: + os.environ["PATH"] = f"{native_dir_str};{current_path}" + + # we need to explicitly load the ORT and GenAI libraries first to ensure its dependencies load correctly + ort_lib_name = CoreInterop._add_library_extension("onnxruntime") + genai_lib_name = CoreInterop._add_library_extension("onnxruntime-genai") + CoreInterop._ort_library = ctypes.CDLL(str(resolved / ort_lib_name)) + CoreInterop._genai_library = ctypes.CDLL(str(resolved / genai_lib_name)) + + CoreInterop._flcore_library = ctypes.CDLL(str(flcore_dll_path)) + + # Set the function signatures + lib = CoreInterop._flcore_library + lib.execute_command.argtypes = [ctypes.POINTER(RequestBuffer), + ctypes.POINTER(ResponseBuffer)] + lib.execute_command.restype = None + + lib.free_response.argtypes = [ctypes.POINTER(ResponseBuffer)] + lib.free_response.restype = None + + # Set the callback function signature and delegate info + lib.execute_command_with_callback.argtypes = [ctypes.POINTER(RequestBuffer), + ctypes.POINTER(ResponseBuffer), + ctypes.c_void_p, # callback_fn + ctypes.c_void_p] # user_data + lib.execute_command_with_callback.restype = None + + return resolved + + @staticmethod + def _to_c_buffer(s: str): + # Helper: encodes strings into unmanaged memory + if s is None: + return ctypes.c_void_p(0), 0, None + + buf = s.encode("utf-8") + ptr = ctypes.create_string_buffer(buf) # keeps memory alive in Python + return ctypes.cast(ptr, ctypes.c_void_p), len(buf), ptr + + def __init__(self, config: Configuration): + if not CoreInterop._initialized: + lib_path = config.foundry_local_core_path + native_dir = CoreInterop._initialize_native_libraries(lib_path) + CoreInterop._initialized = True + + # Pass the full path to the Core DLL so the native layer can + # discover sibling DLLs (e.g. the WinML Bootstrap DLL) via + # Path.GetDirectoryName(FoundryLocalCorePath). + if not config.foundry_local_core_path: + flcore_lib_name = CoreInterop._add_library_extension("Microsoft.AI.Foundry.Local.Core") + config.foundry_local_core_path = str(native_dir / flcore_lib_name) + + # Auto-detect WinML Bootstrap: if the Bootstrap DLL is present + # in the native binaries directory and the user hasn't explicitly + # set the Bootstrap config, enable it automatically. + if sys.platform.startswith("win"): + bootstrap_dll = native_dir / "Microsoft.WindowsAppRuntime.Bootstrap.dll" + if bootstrap_dll.exists(): + if config.additional_settings is None: + config.additional_settings = {} + if "Bootstrap" not in config.additional_settings: + logger.info("WinML Bootstrap DLL detected — enabling Bootstrap") + config.additional_settings["Bootstrap"] = "true" + + request = InteropRequest(params=config.as_dictionary()) + response = self.execute_command("initialize", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to initialize Foundry.Local.Core: {response.error}") + + logger.info("Foundry.Local.Core initialized successfully: %s", response.data) + + def _execute_command(self, command: str, interop_request: InteropRequest = None, + callback: CoreInterop.CALLBACK_TYPE = None): + cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command) + data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None) + + req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len) + resp = ResponseBuffer() + lib = CoreInterop._flcore_library + + if (callback is not None): + # If a callback is provided, use the execute_command_with_callback method + # We need a helper to do the initial conversion from ctypes to Python and pass it through to the + # provided callback function + callback_helper = CallbackHelper(callback) + callback_py_obj = ctypes.py_object(callback_helper) + callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p) + callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback) + + lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr) + + if callback_helper.exception is not None: + raise callback_helper.exception + else: + lib.execute_command(ctypes.byref(req), ctypes.byref(resp)) + + req = None # Free Python reference to request + + response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None + error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None + + # C# owns the memory in the response so we need to free it explicitly + lib.free_response(resp) + + return Response(data=response_str, error=error_str) + + def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response: + """Execute a command synchronously. + + Args: + command_name: The native command name (e.g. ``"get_model_list"``). + command_input: Optional request parameters. + + Returns: + A ``Response`` with ``data`` on success or ``error`` on failure. + """ + logger.debug("Executing command: %s Input: %s", command_name, + command_input.params if command_input else None) + + response = self._execute_command(command_name, command_input) + return response + + def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest], + callback: Callable[[str], None]) -> Response: + """Execute a command with a streaming callback. + + The ``callback`` receives incremental string data from the native layer + (e.g. streaming chat tokens or download progress). + + Args: + command_name: The native command name. + command_input: Optional request parameters. + callback: Called with each incremental string response. + + Returns: + A ``Response`` with ``data`` on success or ``error`` on failure. + """ + logger.debug("Executing command with callback: %s Input: %s", command_name, + command_input.params if command_input else None) + response = self._execute_command(command_name, command_input, callback) + return response + + + diff --git a/sdk_v2/python/src/detail/model_data_types.py b/sdk_v2/python/src/detail/model_data_types.py new file mode 100644 index 0000000..5f367c0 --- /dev/null +++ b/sdk_v2/python/src/detail/model_data_types.py @@ -0,0 +1,93 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from typing import Optional, List +from pydantic import BaseModel, Field + +from .utils import StrEnum + +# ---------- ENUMS ---------- +class DeviceType(StrEnum): + """Device types supported by model variants.""" + + CPU = "CPU" + GPU = "GPU" + NPU = "NPU" + +# TODO: Do we need this any more? +# class ExecutionProvider(StrEnum): +# """Enumeration of common execution providers supported by the model.""" + +# CPU = "CPUExecutionProvider" +# WEBGPU = "WebGpuExecutionProvider" +# CUDA = "CUDAExecutionProvider" + +# def get_alias(self) -> str: +# """ +# Get the alias for the execution provider. + +# Returns: +# str: Alias of the execution provider. +# """ +# return self.value.replace("ExecutionProvider", "").lower() + +# ---------- DATA MODELS ---------- + +class PromptTemplate(BaseModel): + """Prompt template strings for system, user, assistant, and raw prompt roles.""" + + system: Optional[str] = Field(default=None, alias="system") + user: Optional[str] = Field(default=None, alias="user") + assistant: Optional[str] = Field(default=None, alias="assistant") + prompt: Optional[str] = Field(default=None, alias="prompt") + + +class Runtime(BaseModel): + """Runtime configuration specifying the device type and execution provider.""" + + device_type: DeviceType = Field(alias="deviceType") + execution_provider: str = Field(alias="executionProvider") + + +class Parameter(BaseModel): + """A named parameter with an optional string value.""" + + name: str + value: Optional[str] = None + + +class ModelSettings(BaseModel): + """Model-specific settings containing a list of parameters.""" + + parameters: Optional[List[Parameter]] = Field(default=None, alias="parameters") + + +class ModelInfo(BaseModel): + """Catalog metadata for a single model variant. + + Fields are populated from the JSON response of the ``get_model_list`` command. + """ + + id: str = Field(alias="id", description="Unique identifier of the model. Generally :") + name: str = Field(alias="name", description="Model variant name") + version: int = Field(alias="version") + alias: str = Field(..., description="Alias of the model") + display_name: Optional[str] = Field(alias="displayName") + provider_type: str = Field(alias="providerType") + uri: str = Field(alias="uri") + model_type: str = Field(alias="modelType") + prompt_template: Optional[PromptTemplate] = Field(default=None, alias="promptTemplate") + publisher: Optional[str] = Field(alias="publisher") + model_settings: Optional[ModelSettings] = Field(default=None, alias="modelSettings") + license: Optional[str] = Field(alias="license") + license_description: Optional[str] = Field(alias="licenseDescription") + cached: bool = Field(alias="cached") + task: Optional[str] = Field(alias="task") + runtime: Optional[Runtime] = Field(alias="runtime") + file_size_mb: Optional[int] = Field(alias="fileSizeMb") + supports_tool_calling: Optional[bool] = Field(alias="supportsToolCalling") + max_output_tokens: Optional[int] = Field(alias="maxOutputTokens") + min_fl_version: Optional[str] = Field(alias="minFLVersion") + created_at_unix: int = Field(alias="createdAt") diff --git a/sdk_v2/python/src/detail/model_load_manager.py b/sdk_v2/python/src/detail/model_load_manager.py new file mode 100644 index 0000000..ccf9086 --- /dev/null +++ b/sdk_v2/python/src/detail/model_load_manager.py @@ -0,0 +1,158 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import json +import logging +import requests + +from typing import List +from urllib.parse import quote + +from ..exception import FoundryLocalException +from ..version import __version__ as sdk_version +from .core_interop import CoreInterop, InteropRequest + +logger = logging.getLogger(__name__) + + +class ModelLoadManager: + """Manages loading and unloading of models in Foundry Local. + + Can operate in two modes: direct interop with Foundry Local Core, or via + an external web service if the configuration provides a + ``WebServiceExternalUrl`` value. + """ + + _headers = {"user-agent": f"foundry-local-python-sdk/{sdk_version}"} + + def __init__(self, core_interop: CoreInterop, external_service_url: str = None): + self._core_interop = core_interop + self._external_service_url = external_service_url + + def load(self, model_id: str) -> None: + """ + Load a model by its ID. + :param model_id: The ID of the model to load. + :raises NotImplementedError: If loading via external service is attempted. + """ + if self._external_service_url: + self._web_load_model(model_id) + return + + request = InteropRequest({"Model": model_id}) + response = self._core_interop.execute_command("load_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to load model {model_id}: {response.error}") + + def unload(self, model_id: str) -> None: + """ + Unload a model by its ID. + :param model_id: The ID of the model to unload. + """ + if self._external_service_url: + self._web_unload_model(model_id) + return + + request = InteropRequest({"Model": model_id}) + response = self._core_interop.execute_command("unload_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to unload model {model_id}: {response.error}") + + def list_loaded(self) -> list[str]: + """ + List loaded models. + :return: List of loaded model IDs + """ + if self._external_service_url: + return self._web_list_loaded_models() + + response = self._core_interop.execute_command("list_loaded_models") + if response.error is not None: + raise FoundryLocalException(f"Failed to list loaded models: {response.error}") + + try: + model_ids = json.loads(response.data) + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {response.data}") from e + + return model_ids + + def _web_list_loaded_models(self) -> List[str]: + try: + response = requests.get(f"{self._external_service_url}/models/loaded", headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error listing loaded models from {self._external_service_url}: {response.reason}" + ) + + content = response.text + logger.debug("Loaded models json from %s: %s", self._external_service_url, content) + + model_list = json.loads(content) + return model_list if model_list is not None else [] + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when listing loaded models from {self._external_service_url}" + ) from e + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {response.data}") from e + + def _web_load_model(self, model_id: str) -> None: + """ + Load a model via the external web service. + + :param model_id: The ID of the model to load + :raises FoundryLocalException: If the HTTP request fails or response is invalid + """ + try: + encoded_model_id = quote(model_id) + url = f"{self._external_service_url}/models/load/{encoded_model_id}" + + # Future: add query params like load timeout + # query_params = { + # # "timeout": "30" + # } + # response = requests.get(url, params=query_params) + + response = requests.get(url, headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error loading model {model_id} from {self._external_service_url}: " + f"{response.reason}" + ) + + content = response.text + logger.info("Model %s loaded successfully from %s: %s", + model_id, self._external_service_url, content) + + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when loading model {model_id} from {self._external_service_url}: {e}" + ) from e + + def _web_unload_model(self, model_id: str) -> None: + try: + encoded_model_id = quote(model_id) + url = f"{self._external_service_url}/models/unload/{encoded_model_id}" + + response = requests.get(url, headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error unloading model {model_id} from {self._external_service_url}: " + f"{response.reason}" + ) + + content = response.text + logger.info("Model %s unloaded successfully from %s: %s", + model_id, self._external_service_url, content) + + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when unloading model {model_id} from {self._external_service_url}: {e}" + ) from e diff --git a/sdk_v2/python/src/detail/native_downloader.py b/sdk_v2/python/src/detail/native_downloader.py new file mode 100644 index 0000000..7569870 --- /dev/null +++ b/sdk_v2/python/src/detail/native_downloader.py @@ -0,0 +1,511 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Native library downloader for Foundry Local SDK. + +Downloads the native Microsoft.AI.Foundry.Local.Core libraries and their +dependencies (OnnxRuntime, OnnxRuntimeGenAI) from NuGet feeds. + +Can be invoked as: + foundry-local-install [--nightly] [--winml] + +Or programmatically: + from foundry_local_sdk.detail.native_downloader import download_native_binaries + download_native_binaries() + download_native_binaries(use_winml=True) # for WinML binaries + +Native binaries are also downloaded automatically on first SDK use, +so manual installation is only needed to pre-download for offline +scenarios or CI. +""" + +from __future__ import annotations + +import argparse +import os +import platform +import shutil +import sys +import tempfile +import zipfile +from pathlib import Path +from typing import Optional + +import requests + +# --------------------------------------------------------------------------- +# Platform / RID mapping +# --------------------------------------------------------------------------- + +# Maps Python (sys.platform, platform.machine()) to NuGet Runtime Identifier +PLATFORM_MAP: dict[str, str] = { + "win32-AMD64": "win-x64", + "win32-ARM64": "win-arm64", + "linux-x86_64": "linux-x64", + "darwin-arm64": "osx-arm64", +} + +# Maps Python sys.platform to native shared library extension +EXT_MAP: dict[str, str] = { + "win32": ".dll", + "linux": ".so", + "darwin": ".dylib", +} + + +def _get_process_arch() -> str: + """Return the architecture of the running Python process. + + On Windows, ``platform.machine()`` prefers the ``PROCESSOR_ARCHITEW6432`` + environment variable, which contains the *hardware* architecture even when + an x64 Python process is running under ARM64 emulation. We use + ``PROCESSOR_ARCHITECTURE`` instead, which always reflects the process. + """ + if sys.platform == "win32": + return os.environ.get("PROCESSOR_ARCHITECTURE", platform.machine()) + return platform.machine() + + +def _get_platform_key() -> str: + """Get the current platform key (e.g. 'win32-AMD64'). + + Uses the Python **process** architecture (not the underlying hardware) + because ctypes can only load libraries that match the process bitness + (e.g. x64 Python needs x64 DLLs, even on ARM64 hardware). + + To get native ARM64 performance, install an ARM64 Python interpreter. + """ + return f"{sys.platform}-{_get_process_arch()}" + + +def _get_rid() -> str | None: + """Get the NuGet Runtime Identifier for the current platform.""" + return PLATFORM_MAP.get(_get_platform_key()) + + +def _get_ext() -> str: + """Get the native library file extension for the current platform.""" + for plat_prefix, ext in EXT_MAP.items(): + if sys.platform.startswith(plat_prefix): + return ext + raise RuntimeError(f"Unsupported platform: {sys.platform}") + + +def _get_project_root() -> Path: + """Get the Python SDK project root (sdk_v2/python/).""" + # __file__ is src/detail/native_downloader.py + return Path(__file__).resolve().parent.parent.parent + + +def _get_native_dir() -> Path: + """Get the directory where native binaries should be stored. + + Binaries are placed under ``packages/{platform-key}/`` relative to the + Python SDK project root (e.g. ``sdk_v2/python/packages/win32-ARM64/``). + """ + return _get_project_root() / "packages" / _get_platform_key() + + +def _get_required_files() -> list[str]: + """Get the list of required native library files.""" + ext = _get_ext() + # On Linux/macOS the ORT libraries are shipped with a "lib" prefix + ort_prefix = "" if sys.platform == "win32" else "lib" + return [ + f"Microsoft.AI.Foundry.Local.Core{ext}", + f"{ort_prefix}onnxruntime{ext}", + f"{ort_prefix}onnxruntime-genai{ext}", + ] + + +# --------------------------------------------------------------------------- +# Artifact definitions +# +# To update a package version, just change the value here. +# --------------------------------------------------------------------------- + +NUGET_FEED = "https://api.nuget.org/v3/index.json" +ORT_NIGHTLY_FEED = "https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" + + +def _get_artifacts(use_winml: bool = False) -> list[dict[str, str]]: + """Build the list of NuGet packages to download. + + Args: + use_winml: When True, download WinML-specific packages instead of + the default Foundry packages. WinML packages use the DirectML + execution provider and are only supported on Windows. + + Returns: + List of artifact dicts with 'name', 'version', and 'feed' keys. + """ + linux = sys.platform.startswith("linux") + + if use_winml: + return [ + { + "name": "Microsoft.AI.Foundry.Local.Core.WinML", + "version": "0.9.0.6-rc2", + "feed": ORT_NIGHTLY_FEED, + }, + { + "name": "Microsoft.ML.OnnxRuntime.Foundry", + "version": "1.23.2.3", + "feed": NUGET_FEED, + }, + { + "name": "Microsoft.ML.OnnxRuntimeGenAI.WinML", + "version": "0.12.1", + "feed": NUGET_FEED, + }, + ] + + return [ + { + "name": "Microsoft.AI.Foundry.Local.Core", + "version": "0.9.0-dev-20260227T222239-2a3af92", + "feed": ORT_NIGHTLY_FEED, + }, + { + "name": "Microsoft.ML.OnnxRuntime.Foundry" if not linux else "Microsoft.ML.OnnxRuntime.Gpu.Linux", + "version": "1.24.1" if linux else "1.24.1.1", + "feed": NUGET_FEED, + }, + { + "name": "Microsoft.ML.OnnxRuntimeGenAI.Foundry", + "version": "0.12.1", + "feed": NUGET_FEED, + }, + ] + + +# --------------------------------------------------------------------------- +# NuGet V3 API helpers +# --------------------------------------------------------------------------- + +# Modified from js sdk's approach to downloading nuget files via http +_service_index_cache: dict[str, dict] = {} + + +def _get_base_address(feed_url: str) -> str: + """Get the PackageBaseAddress from a NuGet V3 service index. + + Args: + feed_url: The NuGet V3 feed service index URL. + + Returns: + The base address URL for package content. + """ + if feed_url not in _service_index_cache: + resp = requests.get(feed_url, timeout=30) + resp.raise_for_status() + _service_index_cache[feed_url] = resp.json() + + service_index = _service_index_cache[feed_url] + resources = service_index.get("resources", []) + + for r in resources: + rtype = r.get("@type", "") + if rtype.startswith("PackageBaseAddress/3.0.0"): + base = r["@id"] + return base if base.endswith("/") else base + "/" + + raise RuntimeError(f"Could not find PackageBaseAddress/3.0.0 in NuGet feed: {feed_url}") + + +def _resolve_latest_version(feed_url: str, package_name: str) -> str: + """Resolve the latest version of a package from a NuGet feed. + + Used for nightly builds where the version is not pinned. + + Args: + feed_url: NuGet V3 feed URL. + package_name: Package name to look up. + + Returns: + The latest version string. + """ + base_address = _get_base_address(feed_url) + name_lower = package_name.lower() + versions_url = f"{base_address}{name_lower}/index.json" + + resp = requests.get(versions_url, timeout=30) + resp.raise_for_status() + data = resp.json() + + versions = data.get("versions", []) + if not versions: + raise RuntimeError(f"No versions found for {package_name} at {versions_url}") + + # Sort descending — lexicographic sort picks latest date-stamped dev versions + versions.sort(reverse=True) + latest = versions[0] + print(f" Resolved latest version: {latest}") + return latest + + +def _resolve_nupkg_url(feed_url: str, package_name: str, version: str) -> str: + """Construct the direct download URL for a NuGet package. + + Args: + feed_url: NuGet V3 feed URL. + package_name: Package name. + version: Package version. + + Returns: + Direct URL to the .nupkg file. + """ + base_address = _get_base_address(feed_url) + name_lower = package_name.lower() + ver_lower = version.lower() + return f"{base_address}{name_lower}/{ver_lower}/{name_lower}.{ver_lower}.nupkg" + + +def _download_file(url: str, dest: Path): + """Download a file from a URL, following redirects. + + Args: + url: URL to download. + dest: Destination file path. + """ + resp = requests.get(url, stream=True, timeout=120, allow_redirects=True) + resp.raise_for_status() + + with open(dest, "wb") as f: + for chunk in resp.iter_content(chunk_size=8192): + f.write(chunk) + + +def _extract_native_binaries(nupkg_path: Path, rid: str, ext: str, bin_dir: Path) -> list[str]: + """Extract native binaries from a .nupkg (ZIP) file. + + Extracts files matching: runtimes/{rid}/native/*.{ext} + + Args: + nupkg_path: Path to the .nupkg file. + rid: NuGet Runtime Identifier (e.g. 'win-x64'). + ext: File extension to look for (e.g. '.dll'). + bin_dir: Directory to extract files into. + + Returns: + List of extracted file names. + """ + target_prefix = f"runtimes/{rid}/native/".lower() + extracted = [] + + with zipfile.ZipFile(nupkg_path, "r") as zf: + for entry in zf.namelist(): + entry_lower = entry.lower() + if entry_lower.startswith(target_prefix) and entry_lower.endswith(ext): + # Extract just the filename (flat, no directory structure) + filename = Path(entry).name + target_path = bin_dir / filename + with zf.open(entry) as src, open(target_path, "wb") as dst: + shutil.copyfileobj(src, dst) + extracted.append(filename) + print(f" Extracted {filename}") + + return extracted + + +def _create_ort_symlinks(bin_dir: Path): + """Create OnnxRuntime symlinks on Linux/macOS. + + Workaround for ORT issue https://github.com/microsoft/onnxruntime/issues/27263. + The native Core library expects 'onnxruntime.dll' but on Linux/macOS + the actual file is named 'libonnxruntime.so/.dylib'. + """ + if sys.platform == "win32": + return + + ext = ".dylib" if sys.platform == "darwin" else ".so" + lib_name = f"libonnxruntime{ext}" + link_name = "onnxruntime.dll" + lib_path = bin_dir / lib_name + link_path = bin_dir / link_name + + if lib_path.exists() and not link_path.exists(): + os.symlink(lib_name, link_path) + print(f" Created symlink: {link_name} -> {lib_name}") + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def get_native_path() -> Path | None: + """Check if native libraries are already downloaded and return their path. + + Returns: + Path to the native libraries directory, or None if not found. + """ + native_dir = _get_native_dir() + required = _get_required_files() + + if native_dir.exists() and all((native_dir / f).exists() for f in required): + return native_dir + + return None + + +def download_native_binaries( + use_nightly: bool = False, + target_dir: Path | None = None, + use_winml: bool = False, +) -> Path: + """Download native libraries from NuGet feeds. + + Args: + use_nightly: Whether to use nightly builds. + target_dir: Override target directory. Defaults to package-local packages/ dir. + use_winml: Download WinML-specific packages (DirectML execution provider). + Only supported on Windows. + + Returns: + Path to the directory containing the downloaded binaries. + + Raises: + RuntimeError: If the current platform is not supported or download fails. + """ + if use_winml and sys.platform != "win32": + raise RuntimeError("WinML packages are only supported on Windows.") + rid = _get_rid() + if not rid: + raise RuntimeError( + f"Unsupported platform: {_get_platform_key()}. " + f"Supported platforms: {', '.join(PLATFORM_MAP.keys())}" + ) + + ext = _get_ext() + bin_dir = target_dir or _get_native_dir() + required = _get_required_files() + + # Check if already installed + if bin_dir.exists() and all((bin_dir / f).exists() for f in required): + if use_nightly: + print("[foundry-local] Nightly requested. Forcing reinstall...") + shutil.rmtree(bin_dir) + else: + print("[foundry-local] Native libraries already installed.") + return bin_dir + + variant = "winml" if use_winml else "cross-plat" + print(f"[foundry-local] Installing native libraries for {rid} ({variant})...") + bin_dir.mkdir(parents=True, exist_ok=True) + + artifacts = _get_artifacts(use_winml=use_winml) + with tempfile.TemporaryDirectory(prefix="foundry-install-") as temp_dir: + temp_path = Path(temp_dir) + for artifact in artifacts: + if use_nightly and artifact["feed"] == ORT_NIGHTLY_FEED: + artifact = {**artifact, "version": None} + _install_package(artifact, rid, ext, bin_dir, temp_path) + + _create_ort_symlinks(bin_dir) + + # Verify required files + missing = [f for f in required if not (bin_dir / f).exists()] + if missing: + raise RuntimeError( + f"Installation incomplete. Missing files: {', '.join(missing)}. " + f"Directory contents: {[f.name for f in bin_dir.iterdir()]}" + ) + + print("[foundry-local] Installation complete.") + return bin_dir + + +def _install_package( + artifact: dict[str, str | None], + rid: str, + ext: str, + bin_dir: Path, + temp_dir: Path, +): + """Download and extract a single NuGet package. + + Args: + artifact: Dict with 'name', 'version', 'feed' keys. + rid: NuGet Runtime Identifier. + ext: Library file extension. + bin_dir: Directory to extract binaries into. + temp_dir: Temporary directory for downloads. + """ + pkg_name = artifact["name"] + feed_url = artifact["feed"] + pkg_ver = artifact.get("version") + + # Resolve version if not specified (nightly) + if not pkg_ver: + print(f" Resolving latest version for {pkg_name}...") + pkg_ver = _resolve_latest_version(feed_url, pkg_name) + + print(f" Downloading {pkg_name} {pkg_ver}...") + download_url = _resolve_nupkg_url(feed_url, pkg_name, pkg_ver) + + nupkg_path = temp_dir / f"{pkg_name}.{pkg_ver}.nupkg" + _download_file(download_url, nupkg_path) + + print(f" Extracting {pkg_name}...") + extracted = _extract_native_binaries(nupkg_path, rid, ext, bin_dir) + + if not extracted: + print(f" Warning: No files found for RID {rid} in {pkg_name}") + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + + +def main(args: list[str] | None = None): + """CLI entry point for downloading native binaries. + + Usage: + foundry-local-install [--nightly] [--winml] + """ + parser = argparse.ArgumentParser( + description=( + "Download platform-specific native libraries (Foundry Local Core, " + "OnnxRuntime, OnnxRuntimeGenAI) required by the Foundry Local SDK. " + "This is optional — libraries are also downloaded automatically on first use." + ), + prog="foundry-local-install", + ) + parser.add_argument( + "--nightly", + action="store_true", + help="Download latest nightly build (resolves latest version from ORT-Nightly feed)", + ) + parser.add_argument( + "--target", + type=str, + default=None, + help="Override target directory for native libraries", + ) + parser.add_argument( + "--winml", + action="store_true", + help="Download WinML-specific packages (DirectML execution provider, Windows only)", + ) + + parsed = parser.parse_args(args) + + target = Path(parsed.target) if parsed.target else None + + try: + path = download_native_binaries( + use_nightly=parsed.nightly, + target_dir=target, + use_winml=parsed.winml, + ) + print(f"[foundry-local] Native libraries installed at: {path}") + except Exception as e: + print(f"[foundry-local] Installation failed: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/sdk_v2/python/src/detail/utils.py b/sdk_v2/python/src/detail/utils.py new file mode 100644 index 0000000..0d51a88 --- /dev/null +++ b/sdk_v2/python/src/detail/utils.py @@ -0,0 +1,40 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import json +import logging +import sys + +if sys.version_info >= (3, 11): + from enum import StrEnum +else: + from enum import Enum + + class StrEnum(str, Enum): + def __str__(self) -> str: + return self.value + +from ..exception import FoundryLocalException + +from .core_interop import CoreInterop + +logger = logging.getLogger(__name__) + + +def get_cached_model_ids(core_interop: CoreInterop) -> list[str]: + """Get the list of models that have been downloaded and are cached.""" + + response = core_interop.execute_command("get_cached_models") + if response.error is not None: + raise FoundryLocalException(f"Failed to get cached models: {response.error}") + + # response is json array of strings + try: + model_ids = json.loads(response.data) + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {response.data}") from e + + return model_ids diff --git a/sdk_v2/python/src/exception.py b/sdk_v2/python/src/exception.py new file mode 100644 index 0000000..0cff6a9 --- /dev/null +++ b/sdk_v2/python/src/exception.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +class FoundryLocalException(Exception): + """Base exception for Foundry Local SDK errors.""" diff --git a/sdk_v2/python/src/foundry_local_manager.py b/sdk_v2/python/src/foundry_local_manager.py new file mode 100644 index 0000000..4486eaf --- /dev/null +++ b/sdk_v2/python/src/foundry_local_manager.py @@ -0,0 +1,118 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import logging +import threading + +from .catalog import Catalog +from .configuration import Configuration +from .logging_helper import set_default_logger_severity +from .detail.core_interop import CoreInterop +from .detail.model_load_manager import ModelLoadManager +from .exception import FoundryLocalException + +logger = logging.getLogger(__name__) + + +class FoundryLocalManager: + """Singleton manager for Foundry Local SDK operations. + + Call ``FoundryLocalManager.initialize(config)`` once at startup, then access + the singleton via ``FoundryLocalManager.instance``. + + Attributes: + instance: The singleton ``FoundryLocalManager`` instance (set after ``initialize``). + catalog: The model ``Catalog`` for discovering and managing models. + urls: Bound URL(s) after ``start_web_service()`` is called, or ``None``. + """ + + _lock = threading.Lock() + instance: FoundryLocalManager = None + + @staticmethod + def initialize(config: Configuration): + """Initialize the Foundry Local SDK with the given configuration. + + This method must be called before using any other part of the SDK. + + Args: + config: Configuration object for the SDK. + """ + # Delegate singleton creation to the constructor, which enforces + # the singleton invariant under a lock and sets `instance`. + FoundryLocalManager(config) + + def __init__(self, config: Configuration): + # Enforce singleton creation under a class-level lock and ensure + # that `FoundryLocalManager.instance` is set exactly once. + with FoundryLocalManager._lock: + if FoundryLocalManager.instance is not None: + raise FoundryLocalException( + "FoundryLocalManager is a singleton and has already been initialized." + ) + config.validate() + self.config = config + self._initialize() + FoundryLocalManager.instance = self + + self.urls = None + + def _initialize(self): + set_default_logger_severity(self.config.log_level) + + external_service_url = self.config.web.external_url if self.config.web else None + + self._core_interop = CoreInterop(self.config) + self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url) + self.catalog = Catalog(self._model_load_manager, self._core_interop) + + def ensure_eps_downloaded(self) -> None: + """Ensure execution providers are downloaded and registered (synchronous). + Only relevant when using WinML. + + Raises: + FoundryLocalException: If execution provider download fails. + """ + result = self._core_interop.execute_command("ensure_eps_downloaded") + + if result.error is not None: + raise FoundryLocalException(f"Error ensuring execution providers downloaded: {result.error}") + + def start_web_service(self): + """Start the optional web service. + + If provided, the service will be bound to the value of Configuration.web.urls. + The default of http://127.0.0.1:0 will be used otherwise, which binds to a random ephemeral port. + + FoundryLocalManager.urls will be updated with the actual URL/s the service is listening on. + """ + with FoundryLocalManager._lock: + response = self._core_interop.execute_command("start_service") + + if response.error is not None: + raise FoundryLocalException(f"Error starting web service: {response.error}") + + bound_urls = json.loads(response.data) + if bound_urls is None or len(bound_urls) == 0: + raise FoundryLocalException("Failed to get bound URLs from web service start response.") + + self.urls = bound_urls + + def stop_web_service(self): + """Stop the optional web service.""" + + with FoundryLocalManager._lock: + if self.urls is None: + raise FoundryLocalException("Web service is not running.") + + response = self._core_interop.execute_command("stop_service") + + if response.error is not None: + raise FoundryLocalException(f"Error stopping web service: {response.error}") + + self.urls = None diff --git a/sdk_v2/python/src/imodel.py b/sdk_v2/python/src/imodel.py new file mode 100644 index 0000000..56b3b03 --- /dev/null +++ b/sdk_v2/python/src/imodel.py @@ -0,0 +1,91 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Callable, Optional + +from .openai.chat_client import ChatClient +from .openai.audio_client import AudioClient + +class IModel(ABC): + """Abstract interface for a model that can be downloaded, loaded, and used for inference.""" + + @property + @abstractmethod + def id(self) -> str: + """Unique model id.""" + pass + + @property + @abstractmethod + def alias(self) -> str: + """Model alias.""" + pass + + @property + @abstractmethod + def is_cached(self) -> bool: + """True if the model is present in the local cache.""" + pass + + @property + @abstractmethod + def is_loaded(self) -> bool: + """True if the model is loaded into memory.""" + pass + + @abstractmethod + def download(self, progress_callback: Callable[[float], None] = None) -> None: + """ + Download the model to local cache if not already present. + :param progress_callback: Optional callback function for download progress as a percentage (0.0 to 100.0). + """ + pass + + @abstractmethod + def get_path(self) -> str: + """ + Gets the model path if cached. + :return: Path of model directory. + """ + pass + + @abstractmethod + def load(self,) -> None: + """ + Load the model into memory if not already loaded. + """ + pass + + @abstractmethod + def remove_from_cache(self) -> None: + """ + Remove the model from the local cache. + """ + pass + + @abstractmethod + def unload(self) -> None: + """ + Unload the model if loaded. + """ + pass + + @abstractmethod + def get_chat_client(self) -> ChatClient: + """ + Get an OpenAI API based ChatClient. + :return: ChatClient instance. + """ + pass + + @abstractmethod + def get_audio_client(self) -> AudioClient: + """ + Get an OpenAI API based AudioClient. + :return: AudioClient instance. + """ + pass diff --git a/sdk_v2/python/src/logging_helper.py b/sdk_v2/python/src/logging_helper.py new file mode 100644 index 0000000..5698f85 --- /dev/null +++ b/sdk_v2/python/src/logging_helper.py @@ -0,0 +1,39 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging +import sys + +# Inline StrEnum compat shim to avoid importing detail (which triggers core_interop → configuration circular import) +if sys.version_info >= (3, 11): + from enum import StrEnum +else: + from enum import Enum + + class StrEnum(str, Enum): + def __str__(self) -> str: + return self.value + +# Map the python logging levels to the Foundry Local Core names +class LogLevel(StrEnum): + VERBOSE = "Verbose" + DEBUG = "Debug" + INFORMATION = "Information" + WARNING = "Warning" + ERROR = "Error" + FATAL = "Fatal" + +LOG_LEVEL_MAP = { + LogLevel.VERBOSE: logging.DEBUG, # No direct equivalent for Trace in Python logging + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFORMATION: logging.INFO, + LogLevel.WARNING: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, +} + +def set_default_logger_severity(config_level: LogLevel): + py_level = LOG_LEVEL_MAP.get(config_level, logging.INFO) + logger = logging.getLogger(__name__.split(".", maxsplit=1)[0]) + logger.setLevel(py_level) diff --git a/sdk_v2/python/src/model.py b/sdk_v2/python/src/model.py new file mode 100644 index 0000000..621073a --- /dev/null +++ b/sdk_v2/python/src/model.py @@ -0,0 +1,135 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import logging +from typing import Callable, List, Optional + +from .imodel import IModel +from .openai.chat_client import ChatClient +from .openai.audio_client import AudioClient +from .model_variant import ModelVariant +from .exception import FoundryLocalException +from .detail.core_interop import CoreInterop +from .detail.utils import get_cached_model_ids + +logger = logging.getLogger(__name__) + + +class Model(IModel): + """A model identified by an alias that groups one or more ``ModelVariant`` instances. + + Operations are delegated to the currently selected variant. + """ + + def __init__(self, model_variant: ModelVariant, core_interop: CoreInterop): + self._alias = model_variant.alias + self._variants: List[ModelVariant] = [model_variant] + # Variants are sorted by Core, so the first one added is the default + self._selected_variant = model_variant + self._core_interop = core_interop + + def _add_variant(self, variant: ModelVariant) -> None: + if variant.alias != self._alias: + raise FoundryLocalException( + f"Variant alias {variant.alias} does not match model alias {self._alias}" + ) + + self._variants.append(variant) + + # Prefer the highest priority locally cached variant + if variant.info.cached and not self._selected_variant.info.cached: + self._selected_variant = variant + + def select_variant(self, variant: ModelVariant) -> None: + """ + Select a specific model variant by its ModelVariant object. + The selected variant will be used for IModel operations. + + :param variant: ModelVariant to select + :raises FoundryLocalException: If variant is not valid for this model + """ + if variant not in self._variants: + raise FoundryLocalException( + f"Model {self._alias} does not have a {variant.id} variant." + ) + + self._selected_variant = variant + + def get_latest_version(self, variant: ModelVariant) -> ModelVariant: + """ + Get the latest version of the specified model variant. + + :param variant: Model variant + :return: ModelVariant for latest version. Same as variant if that is the latest version + :raises FoundryLocalException: If variant is not valid for this model + """ + # Variants are sorted by version, so the first one matching the name is the latest version + for v in self._variants: + if v.info.name == variant.info.name: + return v + + raise FoundryLocalException( + f"Model {self._alias} does not have a {variant.id} variant." + ) + + @property + def variants(self) -> List[ModelVariant]: + """List of all variants for this model.""" + return self._variants.copy() # Return a copy to prevent external modification + + @property + def selected_variant(self) -> ModelVariant: + """Currently selected variant.""" + return self._selected_variant + + @property + def id(self) -> str: + """Model Id of the currently selected variant.""" + return self._selected_variant.id + + @property + def alias(self) -> str: + """Alias of this model.""" + return self._alias + + @property + def is_cached(self) -> bool: + """Is the currently selected variant cached locally?""" + cached_models = get_cached_model_ids(self._core_interop) + return self.id in cached_models + + @property + def is_loaded(self) -> bool: + """Is the currently selected variant loaded in memory?""" + return self._selected_variant.is_loaded + + def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None: + """Download the currently selected variant.""" + self._selected_variant.download(progress_callback) + + def get_path(self) -> str: + """Get the path to the currently selected variant.""" + return self._selected_variant.get_path() + + def load(self) -> None: + """Load the currently selected variant into memory.""" + self._selected_variant.load() + + def unload(self) -> None: + """Unload the currently selected variant from memory.""" + self._selected_variant.unload() + + def remove_from_cache(self) -> None: + """Remove the currently selected variant from the local cache.""" + self._selected_variant.remove_from_cache() + + def get_chat_client(self) -> ChatClient: + """Get a chat client for the currently selected variant.""" + return self._selected_variant.get_chat_client() + + def get_audio_client(self) -> AudioClient: + """Get an audio client for the currently selected variant.""" + return self._selected_variant.get_audio_client() diff --git a/sdk_v2/python/src/model_variant.py b/sdk_v2/python/src/model_variant.py new file mode 100644 index 0000000..ca60fa9 --- /dev/null +++ b/sdk_v2/python/src/model_variant.py @@ -0,0 +1,130 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import ctypes +import logging +from typing import Callable, Optional + +from .imodel import IModel + +from .detail.core_interop import CoreInterop, InteropRequest +from .detail.model_data_types import ModelInfo +from .detail.utils import get_cached_model_ids +from .detail.model_load_manager import ModelLoadManager +from .openai.audio_client import AudioClient +from .openai.chat_client import ChatClient + +logger = logging.getLogger(__name__) + + +class ModelVariant(IModel): + """A specific variant of a model (e.g. a particular device type, version, or quantization). + + Implements ``IModel`` and provides download, cache, load/unload, and + client-creation operations for a single model variant. + """ + + def __init__(self, model_info: ModelInfo, model_load_manager: ModelLoadManager, core_interop: CoreInterop): + """Initialize a ModelVariant. + + Args: + model_info: Catalog metadata for this variant. + model_load_manager: Manager for loading/unloading models. + core_interop: Native interop layer for Foundry Local Core. + """ + self._model_info = model_info + self._model_load_manager = model_load_manager + self._core_interop = core_interop + + self._id = model_info.id + self._alias = model_info.alias + + @property + def id(self) -> str: + """Unique model variant ID (e.g. ``name:version``).""" + return self._id + + @property + def alias(self) -> str: + """Model alias shared across variants.""" + return self._alias + + @property + def info(self) -> ModelInfo: + """Full catalog metadata for this variant.""" + return self._model_info + + @property + def is_cached(self) -> bool: + """``True`` if this variant is present in the local model cache.""" + cached_model_ids = get_cached_model_ids(self._core_interop) + return self.id in cached_model_ids + + @property + def is_loaded(self) -> bool: + """``True`` if this variant is currently loaded into memory.""" + loaded_model_ids = self._model_load_manager.list_loaded() + return self.id in loaded_model_ids + + def download(self, progress_callback: Callable[[float], None] = None): + """Download this variant to the local cache. + + Args: + progress_callback: Optional callback receiving download progress as a + percentage (0.0 to 100.0). + """ + request = InteropRequest(params={"Model": self.id}) + if progress_callback is None: + response = self._core_interop.execute_command("download_model", request) + else: + response = self._core_interop.execute_command_with_callback( + "download_model", request, + lambda pct_str: progress_callback(float(pct_str)) + ) + + logger.info("Download response: %s", response) + if response.error is not None: + raise FoundryLocalException(f"Failed to download model: {response.error}") + + def get_path(self, ct: Optional[object] = None) -> str: + """Get the local file-system path to this variant if cached. + + Returns: + Path to the model directory. + + Raises: + FoundryLocalException: If the model path cannot be retrieved. + """ + request = InteropRequest(params={"Model": self.id}) + response = self._core_interop.execute_command("get_model_path", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to get model path: {response.error}") + + return response.data + + def load(self, ct: Optional[object] = None) -> None: + """Load this variant into memory for inference.""" + self._model_load_manager.load(self.id) + + def remove_from_cache(self, ct: Optional[object] = None) -> None: + """Remove this variant from the local model cache.""" + request = InteropRequest(params={"Model": self.id}) + response = self._core_interop.execute_command("remove_cached_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to remove model from cache: {response.error}") + + + def unload(self, ct: Optional[object] = None) -> None: + """Unload this variant from memory.""" + self._model_load_manager.unload(self.id) + + def get_chat_client(self) -> ChatClient: + """Create an OpenAI-compatible ``ChatClient`` for this variant.""" + return ChatClient(self.id, self._core_interop) + + def get_audio_client(self) -> AudioClient: + """Create an OpenAI-compatible ``AudioClient`` for this variant.""" + return AudioClient(self.id, self._core_interop) \ No newline at end of file diff --git a/sdk_v2/python/src/openai/__init__.py b/sdk_v2/python/src/openai/__init__.py new file mode 100644 index 0000000..eaf1412 --- /dev/null +++ b/sdk_v2/python/src/openai/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""OpenAI-compatible clients for chat completions and audio transcription.""" + +from .chat_client import ChatClient, ChatSettings +from .audio_client import AudioClient + +__all__ = ["AudioClient", "ChatClient", "ChatSettings"] diff --git a/sdk_v2/python/src/openai/audio_client.py b/sdk_v2/python/src/openai/audio_client.py new file mode 100644 index 0000000..0e70381 --- /dev/null +++ b/sdk_v2/python/src/openai/audio_client.py @@ -0,0 +1,151 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from typing import Callable, Optional + +from ..detail.core_interop import CoreInterop, InteropRequest + +logger = logging.getLogger(__name__) + + +class AudioSettings: + """Settings supported by Foundry Local for audio transcription. + + Attributes: + language: Language of the audio (e.g. ``"en"``). + temperature: Sampling temperature (0.0 for deterministic results). + """ + + def __init__( + self, + language: Optional[str] = None, + temperature: Optional[float] = None, + ): + self.language = language + self.temperature = temperature + + +@dataclass +class AudioTranscriptionResponse: + """Response from an audio transcription request. + + Attributes: + text: The transcribed text. + """ + + text: str + + +class AudioClient: + """OpenAI-compatible audio transcription client backed by Foundry Local Core. + + Supports non-streaming and streaming transcription of audio files. + + Attributes: + model_id: The ID of the loaded Whisper model variant. + settings: Tunable ``AudioSettings`` (language, temperature). + """ + + def __init__(self, model_id: str, core_interop: CoreInterop): + self.model_id = model_id + self.settings = AudioSettings() + self._core_interop = core_interop + + @staticmethod + def _validate_audio_file_path(audio_file_path: str) -> None: + """Validate that the audio file path is a non-empty string.""" + if not isinstance(audio_file_path, str) or audio_file_path.strip() == "": + raise ValueError("Audio file path must be a non-empty string.") + + def _create_request_json(self, audio_file_path: str) -> str: + """Build the JSON payload for the ``audio_transcribe`` native command.""" + request: dict = { + "Model": self.model_id, + "FileName": audio_file_path, + } + + metadata: dict[str, str] = {} + + if self.settings.language is not None: + request["Language"] = self.settings.language + metadata["language"] = self.settings.language + + if self.settings.temperature is not None: + request["Temperature"] = self.settings.temperature + metadata["temperature"] = str(self.settings.temperature) + + if metadata: + request["metadata"] = metadata + + return json.dumps(request) + + def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: + """Transcribe an audio file (non-streaming). + + Args: + audio_file_path: Path to the audio file to transcribe. + + Returns: + An ``AudioTranscriptionResponse`` containing the transcribed text. + + Raises: + FoundryLocalException: If *audio_file_path* is empty or the native command fails. + """ + self._validate_audio_file_path(audio_file_path) + + request_json = self._create_request_json(audio_file_path) + request = InteropRequest(params={"OpenAICreateRequest": request_json}) + + response = self._core_interop.execute_command("audio_transcribe", request) + if response.error is not None: + raise FoundryLocalException( + f"Audio transcription failed for model '{self.model_id}': {response.error}" + ) + + data = json.loads(response.data) + return AudioTranscriptionResponse(text=data.get("text", "")) + + def transcribe_streaming( + self, + audio_file_path: str, + callback: Callable[[AudioTranscriptionResponse], None], + ) -> None: + """Transcribe an audio file with streaming chunks. + + Each chunk is passed to *callback* as an ``AudioTranscriptionResponse``. + + Args: + audio_file_path: Path to the audio file to transcribe. + callback: Called with each incremental transcription chunk. + + Raises: + FoundryLocalException: If *audio_file_path* is empty or the native command fails. + TypeError: If *callback* is not callable. + """ + self._validate_audio_file_path(audio_file_path) + + if not callable(callback): + raise TypeError("Callback must be a valid function.") + + request_json = self._create_request_json(audio_file_path) + request = InteropRequest(params={"OpenAICreateRequest": request_json}) + + def callback_handler(chunk_str: str): + chunk_data = json.loads(chunk_str) + chunk = AudioTranscriptionResponse(text=chunk_data.get("text", "")) + callback(chunk) + + response = self._core_interop.execute_command_with_callback( + "audio_transcribe", request, callback_handler + ) + if response.error is not None: + raise FoundryLocalException( + f"Streaming audio transcription failed for model '{self.model_id}': {response.error}" + ) \ No newline at end of file diff --git a/sdk_v2/python/src/openai/chat_client.py b/sdk_v2/python/src/openai/chat_client.py new file mode 100644 index 0000000..d7e1879 --- /dev/null +++ b/sdk_v2/python/src/openai/chat_client.py @@ -0,0 +1,167 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import logging +import json + +from ..detail.core_interop import CoreInterop, InteropRequest +from ..exception import FoundryLocalException +from openai.types.chat.chat_completion_message_param import * +from openai.types.chat.completion_create_params import CompletionCreateParamsBase, \ + CompletionCreateParamsNonStreaming, \ + CompletionCreateParamsStreaming +from openai.types.shared_params import Metadata +from openai.types.chat import ChatCompletion +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from typing import Callable, List, Optional + +logger = logging.getLogger(__name__) + + +class ChatSettings: + """Settings supported by Foundry Local""" + def __init__( + self, + frequency_penalty: Optional[float] = None, + max_completion_tokens: Optional[int] = None, + n: Optional[int] = None, + temperature: Optional[float] = None, + presence_penalty: Optional[float] = None, + random_seed: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None + ): + self.frequency_penalty = frequency_penalty + self.max_completion_tokens = max_completion_tokens + self.n = n + self.temperature = temperature + self.presence_penalty = presence_penalty + self.random_seed = random_seed + self.top_k = top_k + self.top_p = top_p + +class ChatClient: + """OpenAI-compatible chat completions client backed by Foundry Local Core. + + Supports non-streaming and streaming completions with optional tool calling. + + Attributes: + model_id: The ID of the loaded model variant. + settings: Tunable ``ChatSettings`` (temperature, max tokens, etc.). + """ + + def __init__(self, model_id: str, core_interop: CoreInterop): + self.model_id = model_id + self.settings = ChatSettings() + self._core_interop = core_interop + + def _validate_messages(self, messages: List[ChatCompletionMessageParam]) -> None: + """Validate the messages list before sending to the native layer.""" + if not messages: + raise ValueError("messages must be a non-empty list.") + for i, msg in enumerate(messages): + if not isinstance(msg, dict): + raise ValueError(f"messages[{i}] must be a dict, got {type(msg).__name__}.") + if "role" not in msg: + raise ValueError(f"messages[{i}] is missing required key 'role'.") + if "content" not in msg: + raise ValueError(f"messages[{i}] is missing required key 'content'.") + + def _apply_settings(self, chat_request: CompletionCreateParamsBase): + if self.settings.frequency_penalty is not None: + chat_request["frequency_penalty"] = self.settings.frequency_penalty + if self.settings.max_completion_tokens is not None: + chat_request["max_completion_tokens"] = self.settings.max_completion_tokens + if self.settings.n is not None: + chat_request["n"] = self.settings.n + if self.settings.temperature is not None: + chat_request["temperature"] = self.settings.temperature + if self.settings.presence_penalty is not None: + chat_request["presence_penalty"] = self.settings.presence_penalty + if self.settings.top_p is not None: + chat_request["top_p"] = self.settings.top_p + + # metadata is treated as Record by the core — values must be strings + if self.settings.top_k is not None: + chat_request["metadata"]["top_k"] = str(self.settings.top_k) + if self.settings.random_seed is not None: + chat_request["metadata"]["random_seed"] = str(self.settings.random_seed) + + def _create_request(self, messages: List[ChatCompletionMessageParam], streaming: bool) -> str: + request = CompletionCreateParamsBase( + { + "model": self.model_id, + "messages": messages, + "metadata": {} + } + ) + + self._apply_settings(request) + + if streaming: + chat_request = CompletionCreateParamsStreaming(request) + else: + chat_request = CompletionCreateParamsNonStreaming(request) + + chat_request_json = json.dumps(chat_request) + + return chat_request_json + + def complete_chat(self, messages: List[ChatCompletionMessageParam]): + """Perform a non-streaming chat completion. + + Args: + messages: Conversation history as a list of OpenAI message dicts. + + Returns: + A ``ChatCompletion`` response. + + Raises: + ValueError: If messages is None, empty, or contains malformed entries. + FoundryLocalException: If the native command returns an error. + """ + self._validate_messages(messages) + chat_request_json = self._create_request(messages, streaming=False) + + # Send the request to the chat API + request = InteropRequest(params={"OpenAICreateRequest": chat_request_json}) + response = self._core_interop.execute_command("chat_completions", request) + if response.error is not None: + raise FoundryLocalException(f"Error during chat completion: {response.error}") + + completion = ChatCompletion.model_validate_json(response.data) + + return completion + + def complete_streaming_chat(self, messages: List[ChatCompletionMessageParam], + user_callback: Callable[[ChatCompletionChunk], None]): + """Perform a streaming chat completion. + + Each incremental ``ChatCompletionChunk`` is passed to *user_callback*. + + Args: + messages: Conversation history. + user_callback: Called with each streaming chunk. + + Raises: + ValueError: If messages is None, empty, or contains malformed entries. + TypeError: If user_callback is not callable. + FoundryLocalException: If the native command returns an error. + """ + self._validate_messages(messages) + if not callable(user_callback): + raise TypeError("user_callback must be a callable.") + chat_request_json = self._create_request(messages, streaming=True) + + def callback_handler(response_str: str): + completion = ChatCompletionChunk.model_validate_json(response_str) + user_callback(completion) + + request = InteropRequest(params={"OpenAICreateRequest": chat_request_json}) + response = self._core_interop.execute_command_with_callback("chat_completions", request, callback_handler) + if response.error is not None: + raise FoundryLocalException(f"Error during streaming chat completion: {response.error}") \ No newline at end of file diff --git a/sdk_v2/python/src/version.py b/sdk_v2/python/src/version.py new file mode 100644 index 0000000..e8df13b --- /dev/null +++ b/sdk_v2/python/src/version.py @@ -0,0 +1,6 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +__version__ = "0.9.0" diff --git a/sdk_v2/python/test/README.md b/sdk_v2/python/test/README.md new file mode 100644 index 0000000..8b5aaa5 --- /dev/null +++ b/sdk_v2/python/test/README.md @@ -0,0 +1,79 @@ +# Foundry Local Python SDK – Test Suite + +This test suite mirrors the structure of the JS (`sdk_v2/js/test/`) and C# (`sdk_v2/cs/test/`) SDK test suites. + +## Prerequisites + +1. **Python 3.9+** (tested with 3.12/3.13) +2. **SDK installed in editable mode** from the `sdk_v2/python` directory: + ```bash + pip install -e . + ``` +3. **Test dependencies**: + ```bash + pip install -r requirements-test.txt + ``` +4. **Test model data** – the `test-data-shared` folder must exist as a sibling of the git repo root + (e.g. `../test-data-shared` relative to the repo). It should contain cached models for + `qwen2.5-0.5b` and `whisper-tiny`. + +## Running the tests + +From the `sdk_v2/python` directory: + +```bash +# Run all tests +python -m pytest test/ + +# Run with verbose output +python -m pytest test/ -v + +# Run a specific test file +python -m pytest test/test_catalog.py + +# Run a specific test class or function +python -m pytest test/test_catalog.py::TestCatalog::test_should_list_models + +# List all collected tests without running them +python -m pytest test/ --collect-only +``` + +## Test structure + +``` +test/ +├── conftest.py # Shared fixtures & config (equivalent to testUtils.ts) +├── test_foundry_local_manager.py # FoundryLocalManager initialization (2 tests) +├── test_catalog.py # Catalog listing, lookup, error cases (9 tests) +├── test_model.py # Model caching & load/unload lifecycle (2 tests) +├── detail/ +│ └── test_model_load_manager.py # ModelLoadManager core interop & web service (5 tests) +└── openai/ + ├── test_chat_client.py # Chat completions, streaming, error validation (7 tests) + └── test_audio_client.py # Audio transcription (7 tests) +``` + +**Total: 32 tests** + +## Key conventions + +| Concept | Python (pytest) | JS (Mocha) | C# (TUnit) | +|---|---|---|---| +| Shared setup | `conftest.py` (auto-discovered) | `testUtils.ts` (explicit import) | `Utils.cs` (`[Before(Assembly)]`) | +| Session fixture | `@pytest.fixture(scope="session")` | manual singleton | `[Before(Assembly)]` static | +| Teardown | `yield` + cleanup in fixture | `after()` hook | `[After(Assembly)]` | +| Skip in CI | `@skip_in_ci` marker | `IS_RUNNING_IN_CI` + `this.skip()` | `[SkipInCI]` attribute | +| Expected failure | `@pytest.mark.xfail` | N/A | N/A | +| Timeout | `@pytest.mark.timeout(30)` | `this.timeout(30000)` | `[Timeout(30000)]` | + +## CI environment detection + +Tests that require the web service are skipped when either `TF_BUILD=true` (Azure DevOps) or +`GITHUB_ACTIONS=true` is set. + +## Test models + +| Alias | Use | Variant | +|---|---|---| +| `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` | +| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` | diff --git a/sdk_v2/python/test/__init__.py b/sdk_v2/python/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk_v2/python/test/conftest.py b/sdk_v2/python/test/conftest.py new file mode 100644 index 0000000..e8ba439 --- /dev/null +++ b/sdk_v2/python/test/conftest.py @@ -0,0 +1,146 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Shared test configuration and fixtures for Foundry Local Python SDK tests. + +NOTE: "conftest.py" is a special filename that pytest uses to auto-discover +fixtures and shared utilities. All fixtures defined here are automatically +available to every test file without needing an explicit import. +This serves the same role as testUtils.ts in the JS SDK. +""" + +from __future__ import annotations + +import os +import logging + +import pytest + +from pathlib import Path + +from foundry_local_sdk.configuration import Configuration, LogLevel +from foundry_local_sdk.foundry_local_manager import FoundryLocalManager + +logger = logging.getLogger(__name__) + +TEST_MODEL_ALIAS = "qwen2.5-0.5b" +AUDIO_MODEL_ALIAS = "whisper-tiny" + +def get_git_repo_root() -> Path: + """Walk upward from __file__ until we find a .git directory.""" + current = Path(__file__).resolve().parent + while True: + if (current / ".git").exists(): + return current + parent = current.parent + if parent == current: + raise RuntimeError("Could not find git repo root") + current = parent + + +def get_test_data_shared_path() -> str: + """Return absolute path to the test-data-shared folder (sibling of the repo root).""" + repo_root = get_git_repo_root() + return str(repo_root.parent / "test-data-shared") + + +def is_running_in_ci() -> bool: + """Check TF_BUILD (Azure DevOps) and GITHUB_ACTIONS env vars.""" + azure_devops = os.environ.get("TF_BUILD", "false").lower() == "true" + github_actions = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true" + return azure_devops or github_actions + + +IS_RUNNING_IN_CI = is_running_in_ci() + +skip_in_ci = pytest.mark.skipif(IS_RUNNING_IN_CI, reason="Skipped in CI environments") + + +def get_test_config() -> Configuration: + """Build a Configuration suitable for integration tests.""" + repo_root = get_git_repo_root() + return Configuration( + app_name="FoundryLocalTest", + # foundry_local_core_path = str(repo_root / "sdk_v2" / "python" / "native" / "win32-x64"), + model_cache_dir=get_test_data_shared_path(), + log_level=LogLevel.WARNING, + logs_dir=str(repo_root / "sdk_v2" / "python" / "logs"), + additional_settings={"Bootstrap": "false"}, + ) + + +def get_multiply_tool(): + """Tool definition for the multiply_numbers function-calling test.""" + return { + "type": "function", + "function": { + "name": "multiply_numbers", + "description": "A tool for multiplying two numbers.", + "parameters": { + "type": "object", + "properties": { + "first": { + "type": "integer", + "description": "The first number in the operation", + }, + "second": { + "type": "integer", + "description": "The second number in the operation", + }, + }, + "required": ["first", "second"], + }, + }, + } + + +# --------------------------------------------------------------------------- +# Session-scoped fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="session") +def manager(): + """Initialize FoundryLocalManager once for the entire test session.""" + # Reset singleton in case a previous run left state + FoundryLocalManager.instance = None + + config = get_test_config() + FoundryLocalManager.initialize(config) + mgr = FoundryLocalManager.instance + assert mgr is not None, "FoundryLocalManager.initialize did not set instance" + + yield mgr + + # Teardown: unload all loaded models + try: + catalog = mgr.catalog + loaded = catalog.get_loaded_models() + for model_variant in loaded: + try: + model_variant.unload() + except Exception as e: + logger.warning("Failed to unload model %s during teardown: %s", model_variant.id, e) + except Exception as e: + logger.warning("Failed to get loaded models during teardown: %s", e) + + # Reset the singleton so that other test sessions start clean + FoundryLocalManager.instance = None + + +@pytest.fixture(scope="session") +def catalog(manager): + """Return the Catalog from the session-scoped manager.""" + return manager.catalog + + +@pytest.fixture(scope="session") +def core_interop(manager): + """Return the CoreInterop from the session-scoped manager (internal, for component tests).""" + return manager._core_interop + + +@pytest.fixture(scope="session") +def model_load_manager(manager): + """Return the ModelLoadManager from the session-scoped manager (internal, for component tests).""" + return manager._model_load_manager diff --git a/sdk_v2/python/test/detail/__init__.py b/sdk_v2/python/test/detail/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk_v2/python/test/detail/test_model_load_manager.py b/sdk_v2/python/test/detail/test_model_load_manager.py new file mode 100644 index 0000000..fe55cf1 --- /dev/null +++ b/sdk_v2/python/test/detail/test_model_load_manager.py @@ -0,0 +1,144 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for ModelLoadManager – mirrors modelLoadManager.test.ts.""" + +from __future__ import annotations + +import pytest + +from foundry_local_sdk.detail.model_load_manager import ModelLoadManager +from test.conftest import TEST_MODEL_ALIAS, IS_RUNNING_IN_CI, skip_in_ci + + +class TestModelLoadManagerCoreInterop: + """ModelLoadManager tests using Core Interop (no external URL).""" + + def _get_model_id(self, catalog) -> str: + """Resolve the variant ID for the test model alias.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached" + return variant.id + + def test_should_load_model(self, catalog, core_interop): + """Load model via core interop and verify it appears in loaded list.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + assert model_id in loaded + + # Cleanup + mlm.unload(model_id) + + def test_should_unload_model(self, catalog, core_interop): + """Load then unload model via core interop.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + assert model_id in loaded + + mlm.unload(model_id) + loaded = mlm.list_loaded() + assert model_id not in loaded + + def test_should_list_loaded_models(self, catalog, core_interop): + """list_loaded() should return an array containing the loaded model.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + + assert isinstance(loaded, list) + assert model_id in loaded + + # Cleanup + mlm.unload(model_id) + + +class TestModelLoadManagerExternalService: + """ModelLoadManager tests using external web service URL (skipped in CI).""" + + @skip_in_ci + def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop): + """Load/unload model through the web service endpoint.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None + model_id = variant.id + + # Start web service + try: + manager.start_web_service() + except Exception as e: + pytest.skip(f"Failed to start web service: {e}") + + urls = manager.urls + if not urls or len(urls) == 0: + pytest.skip("Web service started but no URLs returned") + + service_url = urls[0] + + try: + # Setup: load via core interop + setup_mlm = ModelLoadManager(core_interop) + setup_mlm.load(model_id) + loaded = setup_mlm.list_loaded() + assert model_id in loaded + + # Unload via external service + ext_mlm = ModelLoadManager(core_interop, service_url) + ext_mlm.unload(model_id) + + # Verify via core interop + loaded = setup_mlm.list_loaded() + assert model_id not in loaded + finally: + try: + manager.stop_web_service() + except Exception: + pass + + @skip_in_ci + def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop): + """list_loaded() through the web service endpoint should match core interop.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None + model_id = variant.id + + try: + manager.start_web_service() + except Exception as e: + pytest.skip(f"Failed to start web service: {e}") + + urls = manager.urls + if not urls or len(urls) == 0: + pytest.skip("Web service started but no URLs returned") + + service_url = urls[0] + + try: + # Setup: load via core + setup_mlm = ModelLoadManager(core_interop) + setup_mlm.load(model_id) + + # Verify via external service + ext_mlm = ModelLoadManager(core_interop, service_url) + loaded = ext_mlm.list_loaded() + assert isinstance(loaded, list) + assert model_id in loaded + + # Cleanup + setup_mlm.unload(model_id) + finally: + try: + manager.stop_web_service() + except Exception: + pass diff --git a/sdk_v2/python/test/openai/__init__.py b/sdk_v2/python/test/openai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk_v2/python/test/openai/test_audio_client.py b/sdk_v2/python/test/openai/test_audio_client.py new file mode 100644 index 0000000..d62be07 --- /dev/null +++ b/sdk_v2/python/test/openai/test_audio_client.py @@ -0,0 +1,156 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for AudioClient – mirrors audioClient.test.ts.""" + +from __future__ import annotations + +import pytest + +from test.conftest import AUDIO_MODEL_ALIAS, get_git_repo_root + +# Recording.mp3 lives at sdk_v2/testdata/Recording.mp3 relative to the repo root +AUDIO_FILE_PATH = str(get_git_repo_root() / "sdk_v2" / "testdata" / "Recording.mp3") +EXPECTED_TEXT = ( + " And lots of times you need to give people more than one link at a time." + " You a band could give their fans a couple new videos from the live concert" + " behind the scenes photo gallery and album to purchase like these next few links." +) + + +def _get_loaded_audio_model(catalog): + """Helper: ensure the whisper model is selected, loaded, and return Model.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) + assert cached_variant is not None, f"{AUDIO_MODEL_ALIAS} should be cached" + + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + model.load() + return model + + +class TestAudioClient: + """Audio Client Tests.""" + + def test_should_transcribe_audio(self, catalog): + """Non-streaming transcription of Recording.mp3.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + response = audio_client.transcribe(AUDIO_FILE_PATH) + + assert response is not None + assert hasattr(response, "text") + assert isinstance(response.text, str) + assert len(response.text) > 0 + assert response.text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_with_temperature(self, catalog): + """Non-streaming transcription with explicit temperature.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + response = audio_client.transcribe(AUDIO_FILE_PATH) + + assert response is not None + assert isinstance(response.text, str) + assert len(response.text) > 0 + assert response.text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_streaming(self, catalog): + """Streaming transcription of Recording.mp3.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + chunks = [] + + def on_chunk(chunk): + assert chunk is not None + assert hasattr(chunk, "text") + assert isinstance(chunk.text, str) + assert len(chunk.text) > 0 + chunks.append(chunk.text) + + audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk) + + full_text = "".join(chunks) + assert full_text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_streaming_with_temperature(self, catalog): + """Streaming transcription with explicit temperature.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + chunks = [] + + def on_chunk(chunk): + assert chunk is not None + assert isinstance(chunk.text, str) + chunks.append(chunk.text) + + audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk) + + full_text = "".join(chunks) + assert full_text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_raise_for_empty_audio_file_path(self, catalog): + """transcribe('') should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): + audio_client.transcribe("") + + def test_should_raise_for_streaming_empty_audio_file_path(self, catalog): + """transcribe_streaming('') should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): + audio_client.transcribe_streaming("", lambda chunk: None) + + def test_should_raise_for_streaming_invalid_callback(self, catalog): + """transcribe_streaming with invalid callback should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + for invalid_callback in [None, 42, {}, "not a function"]: + with pytest.raises(TypeError, match="Callback must be a valid function"): + audio_client.transcribe_streaming(AUDIO_FILE_PATH, invalid_callback) diff --git a/sdk_v2/python/test/openai/test_chat_client.py b/sdk_v2/python/test/openai/test_chat_client.py new file mode 100644 index 0000000..3ff0710 --- /dev/null +++ b/sdk_v2/python/test/openai/test_chat_client.py @@ -0,0 +1,153 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for ChatClient – mirrors chatClient.test.ts.""" + +from __future__ import annotations + +import pytest + +from test.conftest import TEST_MODEL_ALIAS + + +def _get_loaded_chat_model(catalog): + """Helper: ensure the test model is selected, loaded, and return Model + ChatClient.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert cached_variant is not None, f"{TEST_MODEL_ALIAS} should be cached" + + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + model.load() + return model + + +class TestChatClient: + """Chat Client Tests.""" + + def test_should_perform_chat_completion(self, catalog): + """Non-streaming chat: 7 * 6 should include '42' in the response.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_completion_tokens = 500 + client.settings.temperature = 0.0 # deterministic + + result = client.complete_chat([ + {"role": "user", + "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"} + ]) + + assert result is not None + assert result.choices is not None + assert len(result.choices) > 0 + assert result.choices[0].message is not None + content = result.choices[0].message.content + assert isinstance(content, str) + assert "42" in content + finally: + model.unload() + + def test_should_perform_streaming_chat_completion(self, catalog): + """Streaming chat: 7 * 6 = 42, then follow-up +25 = 67.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_completion_tokens = 500 + client.settings.temperature = 0.0 + + messages = [ + {"role": "user", + "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"} + ] + + # ---- First question ---- + full_content = [] + chunk_count = [0] + + def on_chunk_1(chunk): + chunk_count[0] += 1 + delta = getattr(chunk.choices[0], "delta", None) if chunk.choices else None + if delta and delta.content: + full_content.append(delta.content) + + client.complete_streaming_chat(messages, on_chunk_1) + + assert chunk_count[0] > 0 + first_response = "".join(full_content) + assert isinstance(first_response, str) + assert "42" in first_response + + # ---- Follow-up question ---- + messages.append({"role": "assistant", "content": first_response}) + messages.append({"role": "user", "content": "Add 25 to the previous answer. Think hard to be sure of the answer."}) + + full_content.clear() + chunk_count[0] = 0 + + def on_chunk_2(chunk): + chunk_count[0] += 1 + delta = getattr(chunk.choices[0], "delta", None) if chunk.choices else None + if delta and delta.content: + full_content.append(delta.content) + + client.complete_streaming_chat(messages, on_chunk_2) + + assert chunk_count[0] > 0 + second_response = "".join(full_content) + assert isinstance(second_response, str) + assert "67" in second_response + finally: + model.unload() + + def test_should_raise_for_empty_messages(self, catalog): + """complete_chat with empty list should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(Exception): + client.complete_chat([]) + + def test_should_raise_for_none_messages(self, catalog): + """complete_chat with None should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(Exception): + client.complete_chat(None) + + def test_should_raise_for_streaming_empty_messages(self, catalog): + """complete_streaming_chat with empty list should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(Exception): + client.complete_streaming_chat([], lambda chunk: None) + + def test_should_raise_for_streaming_none_messages(self, catalog): + """complete_streaming_chat with None should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(Exception): + client.complete_streaming_chat(None, lambda chunk: None) + + def test_should_raise_for_streaming_invalid_callback(self, catalog): + """complete_streaming_chat with invalid callback should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + messages = [{"role": "user", "content": "Hello"}] + + for invalid_callback in [None, 42, {}, "not a function"]: + with pytest.raises((TypeError, Exception)): + client.complete_streaming_chat(messages, invalid_callback) diff --git a/sdk_v2/python/test/test_catalog.py b/sdk_v2/python/test/test_catalog.py new file mode 100644 index 0000000..6e1c7b5 --- /dev/null +++ b/sdk_v2/python/test/test_catalog.py @@ -0,0 +1,74 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for Catalog – mirrors catalog.test.ts.""" + +from __future__ import annotations + +from test.conftest import TEST_MODEL_ALIAS + + +class TestCatalog: + """Catalog Tests.""" + + def test_should_initialize_with_catalog_name(self, catalog): + """Catalog should expose a non-empty name string.""" + assert isinstance(catalog.name, str) + assert len(catalog.name) > 0 + + def test_should_list_models(self, catalog): + """list_models() should return a non-empty list containing the test model.""" + models = catalog.list_models() + assert isinstance(models, list) + assert len(models) > 0 + + # Verify test model is present + aliases = {m.alias for m in models} + assert TEST_MODEL_ALIAS in aliases + + def test_should_get_model_by_alias(self, catalog): + """get_model() should return a Model whose alias matches.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + assert model.alias == TEST_MODEL_ALIAS + + def test_should_return_none_for_empty_alias(self, catalog): + """get_model('') should return None (unknown alias).""" + result = catalog.get_model("") + assert result is None + + def test_should_return_none_for_unknown_alias(self, catalog): + """get_model() with a random alias should return None.""" + result = catalog.get_model("definitely-not-a-real-model-alias-12345") + assert result is None + + def test_should_get_cached_models(self, catalog): + """get_cached_models() should return a list with at least the test model.""" + cached = catalog.get_cached_models() + assert isinstance(cached, list) + assert len(cached) > 0 + + # At least the test model should be cached + aliases = {m.alias for m in cached} + assert TEST_MODEL_ALIAS in aliases + + def test_should_get_model_variant_by_id(self, catalog): + """get_model_variant() with a valid ID should return the variant.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + variant = cached[0] + + result = catalog.get_model_variant(variant.id) + assert result is not None + assert result.id == variant.id + + def test_should_return_none_for_empty_variant_id(self, catalog): + """get_model_variant('') should return None.""" + result = catalog.get_model_variant("") + assert result is None + + def test_should_return_none_for_unknown_variant_id(self, catalog): + """get_model_variant() with a random ID should return None.""" + result = catalog.get_model_variant("definitely-not-a-real-model-id-12345") + assert result is None diff --git a/sdk_v2/python/test/test_foundry_local_manager.py b/sdk_v2/python/test/test_foundry_local_manager.py new file mode 100644 index 0000000..b0a9c4e --- /dev/null +++ b/sdk_v2/python/test/test_foundry_local_manager.py @@ -0,0 +1,22 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for FoundryLocalManager – mirrors foundryLocalManager.test.ts.""" + +from __future__ import annotations + + +class TestFoundryLocalManager: + """Foundry Local Manager Tests.""" + + def test_should_initialize_successfully(self, manager): + """Manager singleton should be non-None after initialize().""" + assert manager is not None + + def test_should_return_catalog(self, manager): + """Manager should expose a Catalog with a non-empty name.""" + catalog = manager.catalog + assert catalog is not None + assert isinstance(catalog.name, str) + assert len(catalog.name) > 0 diff --git a/sdk_v2/python/test/test_model.py b/sdk_v2/python/test/test_model.py new file mode 100644 index 0000000..593ef73 --- /dev/null +++ b/sdk_v2/python/test/test_model.py @@ -0,0 +1,58 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for Model – mirrors model.test.ts.""" + +from __future__ import annotations + +from test.conftest import TEST_MODEL_ALIAS, AUDIO_MODEL_ALIAS + + +class TestModel: + """Model Tests.""" + + def test_should_verify_cached_models(self, catalog): + """Cached models from test-data-shared should include qwen and whisper.""" + cached = catalog.get_cached_models() + assert isinstance(cached, list) + assert len(cached) > 0 + + # Check qwen model is cached + qwen = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert qwen is not None, f"{TEST_MODEL_ALIAS} should be cached" + assert qwen.is_cached is True + + # Check whisper model is cached + whisper = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) + assert whisper is not None, f"{AUDIO_MODEL_ALIAS} should be cached" + assert whisper.is_cached is True + + def test_should_load_and_unload_model(self, catalog): + """Load/unload cycle should toggle is_loaded on the selected variant.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert cached_variant is not None + + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + + # Ensure it's not loaded initially (or unload if it is) + if model.is_loaded: + model.unload() + assert model.is_loaded is False + + try: + model.load() + assert model.is_loaded is True + + model.unload() + assert model.is_loaded is False + finally: + # Safety cleanup + if model.is_loaded: + model.unload()