From e9f31457fea013efa3f9a087d1da7427145a891f Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 16:34:26 +0100
Subject: [PATCH 1/9] updated integration tests and guidance

---
 .github/workflows/python-merge-tests.yml      | 314 +++++++++++++++---
 python/.github/skills/python-testing/SKILL.md |  63 +++-
 python/CODING_STANDARD.md                     |  28 ++
 python/DEV_SETUP.md                           |  14 +-
 python/packages/ag-ui/pyproject.toml          |   2 +-
 python/packages/anthropic/pyproject.toml      |   2 +-
 .../anthropic/tests/test_anthropic_client.py  |   8 +
 .../azure-ai/tests/test_agent_provider.py     |   6 +
 .../tests/test_azure_ai_agent_client.py       |  14 +
 .../azure-ai/tests/test_azure_ai_client.py    |   6 +
 .../packages/azure-ai/tests/test_provider.py  |   1 +
 .../integration_tests/test_01_single_agent.py |   2 +
 .../integration_tests/test_02_multi_agent.py  |   2 +
 .../test_03_reliable_streaming.py             |   2 +
 ..._04_single_agent_orchestration_chaining.py |   2 +
 ...5_multi_agent_orchestration_concurrency.py |   2 +
 ..._multi_agent_orchestration_conditionals.py |   2 +
 ...test_07_single_agent_orchestration_hitl.py |   2 +
 .../test_09_workflow_shared_state.py          |   2 +
 .../test_10_workflow_no_shared_state.py       |   2 +
 .../test_11_workflow_parallel.py              |   2 +
 .../test_12_workflow_hitl.py                  |   2 +
 .../core/agent_framework/observability.py     |   6 +-
 .../agent_framework/openai/_chat_client.py    |   8 +-
 python/packages/core/pyproject.toml           |   2 +-
 .../azure/test_azure_assistants_client.py     |  11 +
 .../tests/azure/test_azure_chat_client.py     |   9 +
 .../azure/test_azure_responses_client.py      |   7 +
 python/packages/core/tests/core/test_mcp.py   |   2 +
 .../tests/openai/test_assistant_provider.py   |   2 +
 .../openai/test_openai_assistants_client.py   |  13 +
 .../tests/openai/test_openai_chat_client.py   |   2 +
 .../openai/test_openai_responses_client.py    |   4 +
 .../test_01_dt_single_agent.py                |   2 +
 .../test_02_dt_multi_agent.py                 |   2 +
 .../test_03_dt_single_agent_streaming.py      |   2 +
 ..._dt_single_agent_orchestration_chaining.py |   2 +
 ...t_multi_agent_orchestration_concurrency.py |   2 +
 ..._multi_agent_orchestration_conditionals.py |   2 +
 ...t_07_dt_single_agent_orchestration_hitl.py |   2 +
 .../ollama/tests/test_ollama_chat_client.py   |   8 +
 python/packages/orchestrations/pyproject.toml |   2 +-
 python/pyproject.toml                         |   3 +-
 43 files changed, 510 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 0c09ff394e..45e3d68d50 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -16,6 +16,7 @@ permissions:
 env:
   # Configure a constant location for the uv cache
   UV_CACHE_DIR: /tmp/.uv-cache
+  UV_PYTHON: "3.13"
   RUN_INTEGRATION_TESTS: "true"
   RUN_SAMPLES_TESTS: ${{ vars.RUN_SAMPLES_TESTS }}
 
@@ -27,6 +28,12 @@ jobs:
       pull-requests: read
     outputs:
       pythonChanges: ${{ steps.filter.outputs.python }}
+      coreChanged: ${{ steps.filter.outputs.core }}
+      openaiChanged: ${{ steps.filter.outputs.openai }}
+      azureChanged: ${{ steps.filter.outputs.azure }}
+      miscChanged: ${{ steps.filter.outputs.misc }}
+      functionsChanged: ${{ steps.filter.outputs.functions }}
+      azureAiChanged: ${{ steps.filter.outputs.azure-ai }}
     steps:
       - uses: actions/checkout@v6
       - uses: dorny/paths-filter@v3
@@ -35,6 +42,27 @@ jobs:
           filters: |
             python:
               - 'python/**'
+            core:
+              - 'python/packages/core/agent_framework/_*.py'
+              - 'python/packages/core/agent_framework/_workflows/**'
+              - 'python/packages/core/agent_framework/exceptions.py'
+              - 'python/packages/core/agent_framework/observability.py'
+            openai:
+              - 'python/packages/core/agent_framework/openai/**'
+              - 'python/packages/core/tests/openai/**'
+            azure:
+              - 'python/packages/core/agent_framework/azure/**'
+              - 'python/packages/core/tests/azure/**'
+            misc:
+              - 'python/packages/anthropic/**'
+              - 'python/packages/ollama/**'
+              - 'python/packages/core/agent_framework/_mcp.py'
+              - 'python/packages/core/tests/core/test_mcp.py'
+            functions:
+              - 'python/packages/azurefunctions/**'
+              - 'python/packages/durabletask/**'
+            azure-ai:
+              - 'python/packages/azure-ai/**'
       # run only if 'python' files were changed
       - name: python tests
         if: steps.filter.outputs.python == 'true'
@@ -43,50 +71,244 @@ jobs:
       - name: not python tests
         if: steps.filter.outputs.python != 'true'
         run: echo "NOT python file"
-  python-tests-core:
-    name: Python Tests - Core
+  # Unit tests: always run all non-integration tests across all packages
+  python-tests-unit:
+    name: Python Tests - Unit
     needs: paths-filter
-    if: github.event_name != 'pull_request' && needs.paths-filter.outputs.pythonChanges == 'true'
-    runs-on: ${{ matrix.os }}
-    environment: ${{ matrix.environment }}
-    strategy:
-      fail-fast: true
-      matrix:
-        python-version: ["3.10"]
-        os: [ubuntu-latest]
-        environment: ["integration"]
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true'
+    runs-on: ubuntu-latest
+    environment: integration
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (unit tests only)
+        run: >
+          uv run poe all-tests
+          -m "not integration"
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+        working-directory: ./python
+      - name: Surface failing tests
+        if: always()
+        uses: pmeier/pytest-results-action@v0.7.2
+        with:
+          path: ./python/**.xml
+          summary: true
+          display-options: fEX
+          fail-on-empty: false
+          title: Unit test results
+
+  # OpenAI integration tests
+  python-tests-openai:
+    name: Python Tests - OpenAI Integration
+    needs: paths-filter
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true' &&
+      (github.event_name != 'merge_group' ||
+       needs.paths-filter.outputs.openaiChanged == 'true' ||
+       needs.paths-filter.outputs.coreChanged == 'true')
+    runs-on: ubuntu-latest
+    environment: integration
     env:
-      UV_PYTHON: ${{ matrix.python-version }}
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      OPENAI_EMBEDDINGS_MODEL_ID: ${{ vars.OPENAI_EMBEDDING_MODEL_ID }}
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
-      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-      ANTHROPIC_CHAT_MODEL_ID: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
-      OPENAI_EMBEDDING_MODEL_ID: ${{ vars.OPENAI__EMBEDDINGMODELID }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (OpenAI integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/core/tests/openai
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+        working-directory: ./python
+      - name: Test OpenAI samples
+        timeout-minutes: 10
+        if: env.RUN_SAMPLES_TESTS == 'true'
+        run: uv run pytest tests/samples/ -m "openai"
+        working-directory: ./python
+      - name: Surface failing tests
+        if: always()
+        uses: pmeier/pytest-results-action@v0.7.2
+        with:
+          path: ./python/**.xml
+          summary: true
+          display-options: fEX
+          fail-on-empty: false
+          title: OpenAI integration test results
+
+  # Azure OpenAI integration tests
+  python-tests-azure-openai:
+    name: Python Tests - Azure OpenAI Integration
+    needs: paths-filter
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true' &&
+      (github.event_name != 'merge_group' ||
+       needs.paths-filter.outputs.azureChanged == 'true' ||
+       needs.paths-filter.outputs.coreChanged == 'true')
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
       AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
       AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
       AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Azure CLI Login
+        if: github.event_name != 'pull_request'
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Test with pytest (Azure OpenAI integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/core/tests/azure
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+        working-directory: ./python
+      - name: Test Azure samples
+        timeout-minutes: 10
+        if: env.RUN_SAMPLES_TESTS == 'true'
+        run: uv run pytest tests/samples/ -m "azure"
+        working-directory: ./python
+      - name: Surface failing tests
+        if: always()
+        uses: pmeier/pytest-results-action@v0.7.2
+        with:
+          path: ./python/**.xml
+          summary: true
+          display-options: fEX
+          fail-on-empty: false
+          title: Azure OpenAI integration test results
+
+  # Misc integration tests (Anthropic, Ollama, MCP)
+  python-tests-misc-integration:
+    name: Python Tests - Misc Integration
+    needs: paths-filter
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true' &&
+      (github.event_name != 'merge_group' ||
+       needs.paths-filter.outputs.miscChanged == 'true' ||
+       needs.paths-filter.outputs.coreChanged == 'true')
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+      ANTHROPIC_CHAT_MODEL_ID: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
       LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
-      # For Azure Functions integration tests
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (Anthropic, Ollama, MCP integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/anthropic/tests
+          packages/ollama/tests
+          packages/core/tests/core/test_mcp.py
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+        working-directory: ./python
+      - name: Surface failing tests
+        if: always()
+        uses: pmeier/pytest-results-action@v0.7.2
+        with:
+          path: ./python/**.xml
+          summary: true
+          display-options: fEX
+          fail-on-empty: false
+          title: Misc integration test results
+
+  # Azure Functions + Durable Task integration tests
+  python-tests-functions:
+    name: Python Tests - Functions Integration
+    needs: paths-filter
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true' &&
+      (github.event_name != 'merge_group' ||
+       needs.paths-filter.outputs.functionsChanged == 'true' ||
+       needs.paths-filter.outputs.coreChanged == 'true')
+    runs-on: ubuntu-latest
+    environment: integration
+    env:
+      OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
+      OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
+      AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
       FUNCTIONS_WORKER_RUNTIME: "python"
       DURABLE_TASK_SCHEDULER_CONNECTION_STRING: "Endpoint=http://localhost:8080;TaskHub=default;Authentication=None"
       AzureWebJobsStorage: "UseDevelopmentStorage=true"
-
     defaults:
       run:
         working-directory: python
     steps:
       - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: ${{ env.UV_PYTHON }}
           os: ${{ runner.os }}
-        env:
-          # Configure a constant location for the uv cache
-          UV_CACHE_DIR: /tmp/.uv-cache
       - name: Azure CLI Login
         if: github.event_name != 'pull_request'
         uses: azure/login@v2
@@ -97,13 +319,15 @@ jobs:
       - name: Set up Azure Functions Integration Test Emulators
         uses: ./.github/actions/azure-functions-integration-setup
         id: azure-functions-setup
-      - name: Test with pytest
-        run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
-        working-directory: ./python
-      - name: Test core samples
-        timeout-minutes: 10
-        if: env.RUN_SAMPLES_TESTS == 'true'
-        run: uv run pytest tests/samples/ -m "openai" -m "azure"
+      - name: Test with pytest (Functions + Durable Task integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/azurefunctions/tests/integration_tests
+          packages/durabletask/tests/integration_tests
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
         working-directory: ./python
       - name: Surface failing tests
         if: always()
@@ -113,22 +337,20 @@ jobs:
           summary: true
           display-options: fEX
           fail-on-empty: false
-          title: Test results
+          title: Functions integration test results
 
   python-tests-azure-ai:
     name: Python Tests - Azure AI
     needs: paths-filter
-    if: github.event_name != 'pull_request' && needs.paths-filter.outputs.pythonChanges == 'true'
-    runs-on: ${{ matrix.os }}
-    environment: ${{ matrix.environment }}
-    strategy:
-      fail-fast: true
-      matrix:
-        python-version: ["3.10"]
-        os: [ubuntu-latest]
-        environment: ["integration"]
+    if: >
+      github.event_name != 'pull_request' &&
+      needs.paths-filter.outputs.pythonChanges == 'true' &&
+      (github.event_name != 'merge_group' ||
+       needs.paths-filter.outputs.azureAiChanged == 'true' ||
+       needs.paths-filter.outputs.coreChanged == 'true')
+    runs-on: ubuntu-latest
+    environment: integration
     env:
-      UV_PYTHON: ${{ matrix.python-version }}
       AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZUREAI__ENDPOINT }}
       AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREAI__DEPLOYMENTNAME }}
       LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
@@ -141,11 +363,8 @@ jobs:
         id: python-setup
         uses: ./.github/actions/python-setup
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: ${{ env.UV_PYTHON }}
           os: ${{ runner.os }}
-        env:
-          # Configure a constant location for the uv cache
-          UV_CACHE_DIR: /tmp/.uv-cache
       - name: Azure CLI Login
         if: github.event_name != 'pull_request'
         uses: azure/login@v2
@@ -179,11 +398,14 @@ jobs:
     runs-on: ubuntu-latest
     needs:
       [
-        python-tests-core,
-        python-tests-azure-ai
+        python-tests-unit,
+        python-tests-openai,
+        python-tests-azure-openai,
+        python-tests-misc-integration,
+        python-tests-functions,
+        python-tests-azure-ai,
       ]
     steps:
-
       - name: Fail workflow if tests failed
         id: check_tests_failed
         if: contains(join(needs.*.result, ','), 'failure')
diff --git a/python/.github/skills/python-testing/SKILL.md b/python/.github/skills/python-testing/SKILL.md
index d38423b72d..ff0d9167b5 100644
--- a/python/.github/skills/python-testing/SKILL.md
+++ b/python/.github/skills/python-testing/SKILL.md
@@ -25,6 +25,12 @@ uv run poe all-tests
 
 # With coverage
 uv run poe all-tests-cov
+
+# Run only unit tests (exclude integration tests)
+uv run poe all-tests -m "not integration"
+
+# Run only integration tests
+uv run poe all-tests -m integration
 ```
 
 ## Test Configuration
@@ -32,6 +38,7 @@ uv run poe all-tests-cov
 - **Async mode**: `asyncio_mode = "auto"` is enabled — do NOT use `@pytest.mark.asyncio`, but do mark tests with `async def` and use `await` for async calls
 - **Timeout**: Default 60 seconds per test
 - **Import mode**: `importlib` for cross-package isolation
+- **Parallelization**: Large packages (core, ag-ui, orchestrations, anthropic) use `pytest-xdist` (`-n auto --dist worksteal`) in their `poe test` task. The `all-tests` task also uses xdist across all packages.
 
 ## Test Directory Structure
 
@@ -72,9 +79,59 @@ packages/core/
 
 ## Integration Tests
 
-Tests marked with `@skip_if_..._integration_tests_disabled` require:
-- `RUN_INTEGRATION_TESTS=true` environment variable
-- Appropriate API keys in environment or `.env` file
+Integration tests require external services (OpenAI, Azure, etc.) and are controlled by three markers:
+
+1. **`@pytest.mark.flaky`** — marks the test as potentially flaky since it depends on external services
+2. **`@pytest.mark.integration`** — used for test selection, so integration tests can be included/excluded with `-m integration` / `-m "not integration"`
+3. **`@skip_if_..._integration_tests_disabled`** decorator — skips the test when `RUN_INTEGRATION_TESTS` is not `true` or the required API keys are missing
+
+### Adding New Integration Tests
+
+All three markers must be applied to every new integration test:
+
+```python
+@pytest.mark.flaky
+@pytest.mark.integration
+@skip_if_openai_integration_tests_disabled
+async def test_openai_chat_completion() -> None:
+    ...
+```
+
+For test files where all tests are integration tests (e.g., Azure Functions, Durable Task), use the module-level `pytestmark` list:
+
+```python
+pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
+    pytest.mark.sample("01_single_agent"),
+    pytest.mark.usefixtures("function_app_for_test"),
+]
+```
+
+### CI Workflow
+
+The merge CI workflow (`python-merge-tests.yml`) splits integration tests into parallel jobs by provider with change-based detection:
+
+- **Unit tests** — always run all non-integration tests
+- **OpenAI integration** — runs when `packages/core/agent_framework/openai/` or core infrastructure changes
+- **Azure OpenAI integration** — runs when `packages/core/agent_framework/azure/` or core changes
+- **Misc integration** — Anthropic, Ollama, MCP tests; runs when their packages or core change
+- **Functions integration** — Azure Functions + Durable Task; runs when their packages or core change
+- **Azure AI integration** — runs when `packages/azure-ai/` or core changes
+
+Core infrastructure changes (e.g., `_agents.py`, `_types.py`) trigger all integration test jobs. Scheduled and manual runs always execute all jobs.
+
+### Updating the CI When Adding Integration Tests for a New Provider
+
+When adding integration tests for a new provider package, you must update **two things** in `python-merge-tests.yml`:
+
+1. **Add a path filter** for the new provider in the `paths-filter` job so the CI knows which file changes should trigger those tests.
+2. **Assign the tests to a CI job** — either add them to the existing `python-tests-misc-integration` job, or create a dedicated job if the provider:
+   - Has a large number of integration tests
+   - Requires special infrastructure setup (emulators, Docker containers, etc.)
+   - Has long-running tests that would slow down the misc job
+
+The `python-tests-misc-integration` job is intended for small integration test suites that don't need dedicated infrastructure. When a provider's integration tests grow large or gain special requirements, split them out into their own job (like `python-tests-functions` was split out for Azure Functions + Durable Task).
 
 ## Best Practices
 
diff --git a/python/CODING_STANDARD.md b/python/CODING_STANDARD.md
index 617698030a..ff8b012ec9 100644
--- a/python/CODING_STANDARD.md
+++ b/python/CODING_STANDARD.md
@@ -664,3 +664,31 @@ packages/core/
 - Factory functions with parameters should be regular functions, not fixtures (fixtures can't accept arguments)
 - Import factory functions explicitly: `from conftest import create_test_request`
 - Fixtures should use simple names that describe what they provide: `mapper`, `test_request`, `mock_client`
+
+### Integration Test Markers
+
+New integration tests that call external services must have all three markers:
+
+```python
+@pytest.mark.flaky
+@pytest.mark.integration
+@skip_if_openai_integration_tests_disabled
+async def test_chat_completion() -> None:
+    ...
+```
+
+- `@pytest.mark.flaky` — marks the test as potentially flaky since it depends on external services
+- `@pytest.mark.integration` — enables selecting/excluding integration tests with `-m integration` / `-m "not integration"`
+- `@skip_if_..._integration_tests_disabled` — skips the test when `RUN_INTEGRATION_TESTS` is not set or API keys are missing
+
+For test modules where all tests are integration tests, use `pytestmark`:
+
+```python
+pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
+    pytest.mark.sample("01_single_agent"),
+]
+```
+
+When adding integration tests for a new provider, update the path filters and job assignments in `python-merge-tests.yml`. See the `python-testing` skill for details.
diff --git a/python/DEV_SETUP.md b/python/DEV_SETUP.md
index 025e3ce36e..370b4ffd2d 100644
--- a/python/DEV_SETUP.md
+++ b/python/DEV_SETUP.md
@@ -121,10 +121,20 @@ client = OpenAIChatClient(env_file_path="openai.env")
 
 ## Tests
 
-All the tests are located in the `tests` folder of each package. There are tests that are marked with a `@skip_if_..._integration_tests_disabled` decorator, these are integration tests that require an external service to be running, like OpenAI or Azure OpenAI.
+All the tests are located in the `tests` folder of each package. There are tests that are marked with `@pytest.mark.integration` and `@skip_if_..._integration_tests_disabled` decorators — these are integration tests that require an external service to be running, like OpenAI or Azure OpenAI.
 
 If you want to run these tests, you need to set the environment variable `RUN_INTEGRATION_TESTS` to `true` and have the appropriate key per services set in your environment or in a `.env` file.
 
+You can select or exclude integration tests using pytest markers:
+
+```bash
+# Run only unit tests (exclude integration tests)
+uv run poe all-tests -m "not integration"
+
+# Run only integration tests
+uv run poe all-tests -m integration
+```
+
 Alternatively, you can run them using VSCode Tasks. Open the command palette
 (`Ctrl+Shift+P`) and type `Tasks: Run Task`. Select `Test` from the list.
 
@@ -134,6 +144,8 @@ If you want to run the tests for a single package, you can use the `uv run poe t
 uv run poe --directory packages/core test
 ```
 
+Large packages (core, ag-ui, orchestrations, anthropic) use `pytest-xdist` for parallel test execution within the package. The `all-tests` task also uses xdist across all packages.
+
 These commands also output the coverage report.
 
 ## Code quality checks
diff --git a/python/packages/ag-ui/pyproject.toml b/python/packages/ag-ui/pyproject.toml
index f29a986de7..461467d616 100644
--- a/python/packages/ag-ui/pyproject.toml
+++ b/python/packages/ag-ui/pyproject.toml
@@ -70,4 +70,4 @@ include = "../../shared_tasks.toml"
 
 [tool.poe.tasks]
 mypy = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_ag_ui"
-test = "pytest --cov=agent_framework_ag_ui --cov-report=term-missing:skip-covered tests/ag_ui"
+test = "pytest --cov=agent_framework_ag_ui --cov-report=term-missing:skip-covered -n auto --dist worksteal tests/ag_ui"
diff --git a/python/packages/anthropic/pyproject.toml b/python/packages/anthropic/pyproject.toml
index 76aa7e043f..48064681c9 100644
--- a/python/packages/anthropic/pyproject.toml
+++ b/python/packages/anthropic/pyproject.toml
@@ -82,7 +82,7 @@ executor.type = "uv"
 include = "../../shared_tasks.toml"
 [tool.poe.tasks]
 mypy = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_anthropic"
-test = "pytest --cov=agent_framework_anthropic --cov-report=term-missing:skip-covered tests"
+test = "pytest --cov=agent_framework_anthropic --cov-report=term-missing:skip-covered -n auto --dist worksteal tests"
 
 [build-system]
 requires = ["flit-core >= 3.11,<4.0"]
diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py
index 78f4b7de8d..1c27efefdc 100644
--- a/python/packages/anthropic/tests/test_anthropic_client.py
+++ b/python/packages/anthropic/tests/test_anthropic_client.py
@@ -915,6 +915,7 @@ def get_weather(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_basic_chat() -> None:
     """Integration test for basic chat completion."""
@@ -932,6 +933,7 @@ async def test_anthropic_client_integration_basic_chat() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_streaming_chat() -> None:
     """Integration test for streaming chat completion."""
@@ -948,6 +950,7 @@ async def test_anthropic_client_integration_streaming_chat() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_function_calling() -> None:
     """Integration test for function calling."""
@@ -968,6 +971,7 @@ async def test_anthropic_client_integration_function_calling() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_hosted_tools() -> None:
     """Integration test for hosted tools."""
@@ -993,6 +997,7 @@ async def test_anthropic_client_integration_hosted_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_with_system_message() -> None:
     """Integration test with system message."""
@@ -1010,6 +1015,7 @@ async def test_anthropic_client_integration_with_system_message() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_temperature_control() -> None:
     """Integration test with temperature control."""
@@ -1027,6 +1033,7 @@ async def test_anthropic_client_integration_temperature_control() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_ordering() -> None:
     """Integration test with ordering."""
@@ -1047,6 +1054,7 @@ async def test_anthropic_client_integration_ordering() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_anthropic_integration_tests_disabled
 async def test_anthropic_client_integration_images() -> None:
     """Integration test with images."""
diff --git a/python/packages/azure-ai/tests/test_agent_provider.py b/python/packages/azure-ai/tests/test_agent_provider.py
index 3e25e1506d..4ff183a72b 100644
--- a/python/packages/azure-ai/tests/test_agent_provider.py
+++ b/python/packages/azure-ai/tests/test_agent_provider.py
@@ -779,6 +779,8 @@ def test_from_azure_ai_agent_tools_unknown_dict() -> None:
 # region Integration Tests
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_create_agent() -> None:
     """Integration test: Create an agent using the provider."""
@@ -801,6 +803,8 @@ async def test_integration_create_agent() -> None:
                 await provider._agents_client.delete_agent(agent.id)  # type: ignore
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_get_agent() -> None:
     """Integration test: Get an existing agent using the provider."""
@@ -825,6 +829,8 @@ async def test_integration_get_agent() -> None:
             await provider._agents_client.delete_agent(created.id)  # type: ignore
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_create_and_run() -> None:
     """Integration test: Create an agent and run a conversation."""
diff --git a/python/packages/azure-ai/tests/test_azure_ai_agent_client.py b/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
index 1daf611bef..360b502f1b 100644
--- a/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
+++ b/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
@@ -1379,6 +1379,7 @@ def get_weather(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_get_response() -> None:
     """Test Azure AI Chat Client response."""
@@ -1404,6 +1405,7 @@ async def test_azure_ai_chat_client_get_response() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_get_response_tools() -> None:
     """Test Azure AI Chat Client response with tools."""
@@ -1425,6 +1427,7 @@ async def test_azure_ai_chat_client_get_response_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_streaming() -> None:
     """Test Azure AI Chat Client streaming response."""
@@ -1456,6 +1459,7 @@ async def test_azure_ai_chat_client_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_streaming_tools() -> None:
     """Test Azure AI Chat Client streaming response with tools."""
@@ -1483,6 +1487,7 @@ async def test_azure_ai_chat_client_streaming_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_basic_run() -> None:
     """Test Agent basic run functionality with AzureAIAgentClient."""
@@ -1500,6 +1505,7 @@ async def test_azure_ai_chat_client_agent_basic_run() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_basic_run_streaming() -> None:
     """Test Agent basic streaming functionality with AzureAIAgentClient."""
@@ -1520,6 +1526,7 @@ async def test_azure_ai_chat_client_agent_basic_run_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_thread_persistence() -> None:
     """Test Agent session persistence across runs with AzureAIAgentClient."""
@@ -1546,6 +1553,7 @@ async def test_azure_ai_chat_client_agent_thread_persistence() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_existing_thread_id() -> None:
     """Test Agent existing thread ID functionality with AzureAIAgentClient."""
@@ -1584,6 +1592,7 @@ async def test_azure_ai_chat_client_agent_existing_thread_id() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_code_interpreter():
     """Test Agent with code interpreter through AzureAIAgentClient."""
@@ -1604,6 +1613,7 @@ async def test_azure_ai_chat_client_agent_code_interpreter():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_file_search():
     """Test Agent with file search through AzureAIAgentClient."""
@@ -1651,6 +1661,7 @@ async def test_azure_ai_chat_client_agent_file_search():
             await client.close()
 
 
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_hosted_mcp_tool() -> None:
     """Integration test for MCP tool with Azure AI Agent using Microsoft Learn MCP."""
@@ -1686,6 +1697,7 @@ async def test_azure_ai_chat_client_agent_hosted_mcp_tool() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_level_tool_persistence():
     """Test that agent-level tools persist across multiple runs with AzureAIAgentClient."""
@@ -1711,6 +1723,7 @@ async def test_azure_ai_chat_client_agent_level_tool_persistence():
         assert any(term in second_response.text.lower() for term in ["miami", "sunny", "25"])
 
 
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_chat_options_run_level() -> None:
     """Test ChatOptions parameter coverage at run level."""
@@ -1735,6 +1748,7 @@ async def test_azure_ai_chat_client_agent_chat_options_run_level() -> None:
         assert len(response.text) > 0
 
 
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_azure_ai_chat_client_agent_chat_options_agent_level() -> None:
     """Test ChatOptions parameter coverage agent level."""
diff --git a/python/packages/azure-ai/tests/test_azure_ai_client.py b/python/packages/azure-ai/tests/test_azure_ai_client.py
index 5e08ddc066..bf818f9408 100644
--- a/python/packages/azure-ai/tests/test_azure_ai_client.py
+++ b/python/packages/azure-ai/tests/test_azure_ai_client.py
@@ -1329,6 +1329,7 @@ async def client() -> AsyncGenerator[AzureAIClient, None]:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 @pytest.mark.parametrize(
     "option_name,option_value,needs_validation",
@@ -1443,6 +1444,7 @@ async def test_integration_options(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 @pytest.mark.parametrize(
     "option_name,option_value,needs_validation",
@@ -1541,6 +1543,7 @@ async def test_integration_agent_options(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_web_search() -> None:
     async with temporary_chat_client(agent_name="af-int-test-web-search") as client:
@@ -1586,6 +1589,7 @@ async def test_integration_web_search() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_agent_hosted_mcp_tool() -> None:
     """Integration test for MCP tool with Azure Response Agent using Microsoft Learn MCP."""
@@ -1610,6 +1614,7 @@ async def test_integration_agent_hosted_mcp_tool() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_agent_hosted_code_interpreter_tool():
     """Test Azure Responses Client agent with code interpreter tool through AzureAIClient."""
@@ -1628,6 +1633,7 @@ async def test_integration_agent_hosted_code_interpreter_tool():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_integration_agent_existing_session():
     """Test Azure Responses Client agent with existing session to continue conversations across agent instances."""
diff --git a/python/packages/azure-ai/tests/test_provider.py b/python/packages/azure-ai/tests/test_provider.py
index 44d98b4397..e960ec986c 100644
--- a/python/packages/azure-ai/tests/test_provider.py
+++ b/python/packages/azure-ai/tests/test_provider.py
@@ -698,6 +698,7 @@ def mock_normalize_tools(tools):
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_ai_integration_tests_disabled
 async def test_provider_create_and_get_agent_integration() -> None:
     """Integration test for provider create_agent and get_agent."""
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_01_single_agent.py b/python/packages/azurefunctions/tests/integration_tests/test_01_single_agent.py
index fe9308dee3..00dd096b56 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_01_single_agent.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_01_single_agent.py
@@ -19,6 +19,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("01_single_agent"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_02_multi_agent.py b/python/packages/azurefunctions/tests/integration_tests/test_02_multi_agent.py
index 9d326d801d..1e0465264f 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_02_multi_agent.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_02_multi_agent.py
@@ -18,6 +18,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("02_multi_agent"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_03_reliable_streaming.py b/python/packages/azurefunctions/tests/integration_tests/test_03_reliable_streaming.py
index 8c348f45ce..23c58a2a95 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_03_reliable_streaming.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_03_reliable_streaming.py
@@ -22,6 +22,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("03_reliable_streaming"),
     pytest.mark.usefixtures("function_app_for_test"),
     pytest.mark.skip(reason="Temp disabled to fix test instability - needs investigation into root cause"),
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_04_single_agent_orchestration_chaining.py b/python/packages/azurefunctions/tests/integration_tests/test_04_single_agent_orchestration_chaining.py
index 2ca2812800..9c52c9a937 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_04_single_agent_orchestration_chaining.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_04_single_agent_orchestration_chaining.py
@@ -22,6 +22,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("04_single_agent_orchestration_chaining"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_05_multi_agent_orchestration_concurrency.py b/python/packages/azurefunctions/tests/integration_tests/test_05_multi_agent_orchestration_concurrency.py
index 061ccde730..7759b528cd 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_05_multi_agent_orchestration_concurrency.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_05_multi_agent_orchestration_concurrency.py
@@ -22,6 +22,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.orchestration,
     pytest.mark.sample("05_multi_agent_orchestration_concurrency"),
     pytest.mark.usefixtures("function_app_for_test"),
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_06_multi_agent_orchestration_conditionals.py b/python/packages/azurefunctions/tests/integration_tests/test_06_multi_agent_orchestration_conditionals.py
index f1fc725c9e..c40e4ab408 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_06_multi_agent_orchestration_conditionals.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_06_multi_agent_orchestration_conditionals.py
@@ -22,6 +22,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.orchestration,
     pytest.mark.sample("06_multi_agent_orchestration_conditionals"),
     pytest.mark.usefixtures("function_app_for_test"),
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_07_single_agent_orchestration_hitl.py b/python/packages/azurefunctions/tests/integration_tests/test_07_single_agent_orchestration_hitl.py
index 16bae905ea..17356fac50 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_07_single_agent_orchestration_hitl.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_07_single_agent_orchestration_hitl.py
@@ -24,6 +24,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("07_single_agent_orchestration_hitl"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_09_workflow_shared_state.py b/python/packages/azurefunctions/tests/integration_tests/test_09_workflow_shared_state.py
index 26bb20e5b4..66527134a5 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_09_workflow_shared_state.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_09_workflow_shared_state.py
@@ -23,6 +23,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("09_workflow_shared_state"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_10_workflow_no_shared_state.py b/python/packages/azurefunctions/tests/integration_tests/test_10_workflow_no_shared_state.py
index 88b610ac70..88739057f0 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_10_workflow_no_shared_state.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_10_workflow_no_shared_state.py
@@ -23,6 +23,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("10_workflow_no_shared_state"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_11_workflow_parallel.py b/python/packages/azurefunctions/tests/integration_tests/test_11_workflow_parallel.py
index 81f7466e5d..683ab7e0be 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_11_workflow_parallel.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_11_workflow_parallel.py
@@ -25,6 +25,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("11_workflow_parallel"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/azurefunctions/tests/integration_tests/test_12_workflow_hitl.py b/python/packages/azurefunctions/tests/integration_tests/test_12_workflow_hitl.py
index 8f3c87e339..2b31c17c7a 100644
--- a/python/packages/azurefunctions/tests/integration_tests/test_12_workflow_hitl.py
+++ b/python/packages/azurefunctions/tests/integration_tests/test_12_workflow_hitl.py
@@ -25,6 +25,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("12_workflow_hitl"),
     pytest.mark.usefixtures("function_app_for_test"),
 ]
diff --git a/python/packages/core/agent_framework/observability.py b/python/packages/core/agent_framework/observability.py
index fbc9cb0f53..11023aef67 100644
--- a/python/packages/core/agent_framework/observability.py
+++ b/python/packages/core/agent_framework/observability.py
@@ -28,7 +28,7 @@
 from opentelemetry import metrics, trace
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.semconv.attributes import service_attributes
-from opentelemetry.semconv_ai import Meters, SpanAttributes
+from opentelemetry.semconv_ai import Meters
 
 from . import __version__ as version_info
 from ._settings import load_settings
@@ -1826,9 +1826,7 @@ def _capture_response(
     span.set_attributes(attributes)
     attrs: dict[str, Any] = {k: v for k, v in attributes.items() if k in GEN_AI_METRIC_ATTRIBUTES}
     if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)):
-        token_usage_histogram.record(
-            input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT}
-        )
+        token_usage_histogram.record(input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT})
     if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)):
         token_usage_histogram.record(output_tokens, {**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_OUTPUT})
     if operation_duration_histogram and duration is not None:
diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py
index 5d6f66491c..f08d80e990 100644
--- a/python/packages/core/agent_framework/openai/_chat_client.py
+++ b/python/packages/core/agent_framework/openai/_chat_client.py
@@ -411,9 +411,7 @@ def _parse_response_update_from_openai(
         # See https://github.com/microsoft/agent-framework/issues/3434
         if chunk.usage:
             contents.append(
-                Content.from_usage(
-                    usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk
-                )
+                Content.from_usage(usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk)
             )
 
         for choice in chunk.choices:
@@ -591,7 +589,9 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]:
         # See https://github.com/microsoft/agent-framework/issues/4084
         for msg in all_messages:
             msg_content: Any = msg.get("content")
-            if isinstance(msg_content, list) and all(isinstance(c, dict) and c.get("type") == "text" for c in msg_content):
+            if isinstance(msg_content, list) and all(
+                isinstance(c, dict) and c.get("type") == "text" for c in msg_content
+            ):
                 msg["content"] = "\n".join(c.get("text", "") for c in msg_content)
 
         return all_messages
diff --git a/python/packages/core/pyproject.toml b/python/packages/core/pyproject.toml
index ec1c2dd40e..9b0c9e60a8 100644
--- a/python/packages/core/pyproject.toml
+++ b/python/packages/core/pyproject.toml
@@ -125,7 +125,7 @@ executor.type = "uv"
 include = "../../shared_tasks.toml"
 [tool.poe.tasks]
 mypy = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework"
-test = "pytest --cov=agent_framework --cov-report=term-missing:skip-covered tests"
+test = "pytest --cov=agent_framework --cov-report=term-missing:skip-covered -n auto --dist worksteal tests"
 
 [tool.flit.module]
 name = "agent_framework"
diff --git a/python/packages/core/tests/azure/test_azure_assistants_client.py b/python/packages/core/tests/azure/test_azure_assistants_client.py
index 98ed522250..b8fe809d28 100644
--- a/python/packages/core/tests/azure/test_azure_assistants_client.py
+++ b/python/packages/core/tests/azure/test_azure_assistants_client.py
@@ -260,6 +260,7 @@ def get_weather(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_get_response() -> None:
     """Test Azure Assistants Client response."""
@@ -285,6 +286,7 @@ async def test_azure_assistants_client_get_response() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_get_response_tools() -> None:
     """Test Azure Assistants Client response with tools."""
@@ -306,6 +308,7 @@ async def test_azure_assistants_client_get_response_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_streaming() -> None:
     """Test Azure Assistants Client streaming response."""
@@ -337,6 +340,7 @@ async def test_azure_assistants_client_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_streaming_tools() -> None:
     """Test Azure Assistants Client streaming response with tools."""
@@ -364,6 +368,7 @@ async def test_azure_assistants_client_streaming_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_with_existing_assistant() -> None:
     """Test Azure Assistants Client with existing assistant ID."""
@@ -392,6 +397,7 @@ async def test_azure_assistants_client_with_existing_assistant() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_agent_basic_run():
     """Test Agent basic run functionality with AzureOpenAIAssistantsClient."""
@@ -409,6 +415,7 @@ async def test_azure_assistants_agent_basic_run():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_agent_basic_run_streaming():
     """Test Agent basic streaming functionality with AzureOpenAIAssistantsClient."""
@@ -429,6 +436,7 @@ async def test_azure_assistants_agent_basic_run_streaming():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_agent_session_persistence():
     """Test Agent session persistence across runs with AzureOpenAIAssistantsClient."""
@@ -458,6 +466,7 @@ async def test_azure_assistants_agent_session_persistence():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_agent_existing_session_id():
     """Test Agent with existing session ID to continue conversations across agent instances."""
@@ -503,6 +512,7 @@ async def test_azure_assistants_agent_existing_session_id():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_agent_code_interpreter():
     """Test Agent with code interpreter through AzureOpenAIAssistantsClient."""
@@ -523,6 +533,7 @@ async def test_azure_assistants_agent_code_interpreter():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_assistants_client_agent_level_tool_persistence():
     """Test that agent-level tools persist across multiple runs with Azure Assistants Client."""
diff --git a/python/packages/core/tests/azure/test_azure_chat_client.py b/python/packages/core/tests/azure/test_azure_chat_client.py
index 6752d01144..9459c1f3c4 100644
--- a/python/packages/core/tests/azure/test_azure_chat_client.py
+++ b/python/packages/core/tests/azure/test_azure_chat_client.py
@@ -647,6 +647,7 @@ def get_weather(location: str) -> str:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_response() -> None:
     """Test Azure OpenAI chat completion responses."""
@@ -677,6 +678,7 @@ async def test_azure_openai_chat_client_response() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_response_tools() -> None:
     """Test AzureOpenAI chat completion responses."""
@@ -698,6 +700,7 @@ async def test_azure_openai_chat_client_response_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_streaming() -> None:
     """Test Azure OpenAI chat completion responses."""
@@ -733,6 +736,7 @@ async def test_azure_openai_chat_client_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_streaming_tools() -> None:
     """Test AzureOpenAI chat completion responses."""
@@ -760,6 +764,7 @@ async def test_azure_openai_chat_client_streaming_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_agent_basic_run():
     """Test Azure OpenAI chat client agent basic run functionality with AzureOpenAIChatClient."""
@@ -776,6 +781,7 @@ async def test_azure_openai_chat_client_agent_basic_run():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_agent_basic_run_streaming():
     """Test Azure OpenAI chat client agent basic streaming functionality with AzureOpenAIChatClient."""
@@ -794,6 +800,7 @@ async def test_azure_openai_chat_client_agent_basic_run_streaming():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_agent_session_persistence():
     """Test Azure OpenAI chat client agent session persistence across runs with AzureOpenAIChatClient."""
@@ -819,6 +826,7 @@ async def test_azure_openai_chat_client_agent_session_persistence():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_openai_chat_client_agent_existing_session():
     """Test Azure OpenAI chat client agent with existing session to continue conversations across agent instances."""
@@ -854,6 +862,7 @@ async def test_azure_openai_chat_client_agent_existing_session():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_azure_chat_client_agent_level_tool_persistence():
     """Test that agent-level tools persist across multiple runs with Azure Chat Client."""
diff --git a/python/packages/core/tests/azure/test_azure_responses_client.py b/python/packages/core/tests/azure/test_azure_responses_client.py
index 2a718d0ce5..8d244940f9 100644
--- a/python/packages/core/tests/azure/test_azure_responses_client.py
+++ b/python/packages/core/tests/azure/test_azure_responses_client.py
@@ -254,6 +254,7 @@ def test_serialize(azure_openai_unit_test_env: dict[str, str]) -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 @pytest.mark.parametrize(
     "option_name,option_value,needs_validation",
@@ -392,6 +393,7 @@ async def test_integration_options(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_web_search() -> None:
     client = AzureOpenAIResponsesClient(credential=AzureCliCredential())
@@ -440,6 +442,7 @@ async def test_integration_web_search() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_client_file_search() -> None:
     """Test Azure responses client with file search tool."""
@@ -469,6 +472,7 @@ async def test_integration_client_file_search() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_client_file_search_streaming() -> None:
     """Test Azure responses client with file search tool and streaming."""
@@ -500,6 +504,7 @@ async def test_integration_client_file_search_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_client_agent_hosted_mcp_tool() -> None:
     """Integration test for MCP tool with Azure Response Agent using Microsoft Learn MCP."""
@@ -524,6 +529,7 @@ async def test_integration_client_agent_hosted_mcp_tool() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_client_agent_hosted_code_interpreter_tool():
     """Test Azure Responses Client agent with code interpreter tool."""
@@ -543,6 +549,7 @@ async def test_integration_client_agent_hosted_code_interpreter_tool():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_integration_client_agent_existing_session():
     """Test Azure Responses Client agent with existing session to continue conversations across agent instances."""
diff --git a/python/packages/core/tests/core/test_mcp.py b/python/packages/core/tests/core/test_mcp.py
index 09d5b11754..514a0ab51c 100644
--- a/python/packages/core/tests/core/test_mcp.py
+++ b/python/packages/core/tests/core/test_mcp.py
@@ -1105,6 +1105,7 @@ def test_local_mcp_streamable_http_tool_init():
 
 # Integration test
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_mcp_integration_tests_disabled
 async def test_streamable_http_integration():
     """Test MCP StreamableHTTP integration."""
@@ -1133,6 +1134,7 @@ async def test_streamable_http_integration():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_mcp_integration_tests_disabled
 async def test_mcp_connection_reset_integration():
     """Test that connection reset works correctly with a real MCP server.
diff --git a/python/packages/core/tests/openai/test_assistant_provider.py b/python/packages/core/tests/openai/test_assistant_provider.py
index b33ebda17d..35f7267d48 100644
--- a/python/packages/core/tests/openai/test_assistant_provider.py
+++ b/python/packages/core/tests/openai/test_assistant_provider.py
@@ -763,6 +763,8 @@ def test_merge_single_user_tool(self, mock_async_openai: MagicMock) -> None:
 )
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 class TestOpenAIAssistantProviderIntegration:
     """Integration tests requiring real OpenAI API."""
diff --git a/python/packages/core/tests/openai/test_openai_assistants_client.py b/python/packages/core/tests/openai/test_openai_assistants_client.py
index da14a810a1..e0bf89ba5a 100644
--- a/python/packages/core/tests/openai/test_openai_assistants_client.py
+++ b/python/packages/core/tests/openai/test_openai_assistants_client.py
@@ -1075,6 +1075,7 @@ def get_weather(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_get_response() -> None:
     """Test OpenAI Assistants Client response."""
@@ -1100,6 +1101,7 @@ async def test_get_response() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_get_response_tools() -> None:
     """Test OpenAI Assistants Client response with tools."""
@@ -1121,6 +1123,7 @@ async def test_get_response_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_streaming() -> None:
     """Test OpenAI Assistants Client streaming response."""
@@ -1152,6 +1155,7 @@ async def test_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_streaming_tools() -> None:
     """Test OpenAI Assistants Client streaming response with tools."""
@@ -1182,6 +1186,7 @@ async def test_streaming_tools() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_with_existing_assistant() -> None:
     """Test OpenAI Assistants Client with existing assistant ID."""
@@ -1210,6 +1215,7 @@ async def test_with_existing_assistant() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 @pytest.mark.skip(reason="OpenAI file search functionality is currently broken - tracked in GitHub issue")
 async def test_file_search() -> None:
@@ -1236,6 +1242,7 @@ async def test_file_search() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 @pytest.mark.skip(reason="OpenAI file search functionality is currently broken - tracked in GitHub issue")
 async def test_file_search_streaming() -> None:
@@ -1270,6 +1277,7 @@ async def test_file_search_streaming() -> None:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_openai_assistants_agent_basic_run():
     """Test Agent basic run functionality with OpenAIAssistantsClient."""
@@ -1287,6 +1295,7 @@ async def test_openai_assistants_agent_basic_run():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_openai_assistants_agent_basic_run_streaming():
     """Test Agent basic streaming functionality with OpenAIAssistantsClient."""
@@ -1307,6 +1316,7 @@ async def test_openai_assistants_agent_basic_run_streaming():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_openai_assistants_agent_session_persistence():
     """Test Agent session persistence across runs with OpenAIAssistantsClient."""
@@ -1336,6 +1346,7 @@ async def test_openai_assistants_agent_session_persistence():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_openai_assistants_agent_existing_session_id():
     """Test Agent with existing session ID to continue conversations across agent instances."""
@@ -1381,6 +1392,7 @@ async def test_openai_assistants_agent_existing_session_id():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_openai_assistants_agent_code_interpreter():
     """Test Agent with code interpreter through OpenAIAssistantsClient."""
@@ -1401,6 +1413,7 @@ async def test_openai_assistants_agent_code_interpreter():
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_agent_level_tool_persistence():
     """Test that agent-level tools persist across multiple runs with OpenAI Assistants Client."""
diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py
index 8aa2c1f890..4d3f3fd9da 100644
--- a/python/packages/core/tests/openai/test_openai_chat_client.py
+++ b/python/packages/core/tests/openai/test_openai_chat_client.py
@@ -1087,6 +1087,7 @@ class OutputStruct(BaseModel):
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 @pytest.mark.parametrize(
     "option_name,option_value,needs_validation",
@@ -1225,6 +1226,7 @@ async def test_integration_options(
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_integration_web_search() -> None:
     client = OpenAIChatClient(model_id="gpt-4o-search-preview")
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index 6c98f3bdfa..ff154d7aaf 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -2362,6 +2362,7 @@ async def get_api_key() -> str:
 
 
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 @pytest.mark.parametrize(
     "option_name,option_value,needs_validation",
@@ -2500,6 +2501,7 @@ async def test_integration_options(
 
 @pytest.mark.timeout(300)
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_integration_web_search() -> None:
     client = OpenAIResponsesClient(model_id="gpt-5")
@@ -2553,6 +2555,7 @@ async def test_integration_web_search() -> None:
     "race condition. See https://github.com/microsoft/agent-framework/issues/1669"
 )
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_integration_file_search() -> None:
     openai_responses_client = OpenAIResponsesClient()
@@ -2586,6 +2589,7 @@ async def test_integration_file_search() -> None:
     "potential race condition. See https://github.com/microsoft/agent-framework/issues/1669"
 )
 @pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_openai_integration_tests_disabled
 async def test_integration_streaming_file_search() -> None:
     openai_responses_client = OpenAIResponsesClient()
diff --git a/python/packages/durabletask/tests/integration_tests/test_01_dt_single_agent.py b/python/packages/durabletask/tests/integration_tests/test_01_dt_single_agent.py
index 43795f9ef1..736c7022e8 100644
--- a/python/packages/durabletask/tests/integration_tests/test_01_dt_single_agent.py
+++ b/python/packages/durabletask/tests/integration_tests/test_01_dt_single_agent.py
@@ -14,6 +14,8 @@
 
 # Module-level markers - applied to all tests in this module
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("01_single_agent"),
     pytest.mark.integration_test,
     pytest.mark.requires_azure_openai,
diff --git a/python/packages/durabletask/tests/integration_tests/test_02_dt_multi_agent.py b/python/packages/durabletask/tests/integration_tests/test_02_dt_multi_agent.py
index 9d7d8588ac..2dad4e6b7a 100644
--- a/python/packages/durabletask/tests/integration_tests/test_02_dt_multi_agent.py
+++ b/python/packages/durabletask/tests/integration_tests/test_02_dt_multi_agent.py
@@ -18,6 +18,8 @@
 
 # Module-level markers - applied to all tests in this module
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("02_multi_agent"),
     pytest.mark.integration_test,
     pytest.mark.requires_azure_openai,
diff --git a/python/packages/durabletask/tests/integration_tests/test_03_dt_single_agent_streaming.py b/python/packages/durabletask/tests/integration_tests/test_03_dt_single_agent_streaming.py
index 41e8bf15bb..1e311ecdce 100644
--- a/python/packages/durabletask/tests/integration_tests/test_03_dt_single_agent_streaming.py
+++ b/python/packages/durabletask/tests/integration_tests/test_03_dt_single_agent_streaming.py
@@ -34,6 +34,8 @@
 
 # Module-level markers - applied to all tests in this file
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("03_single_agent_streaming"),
     pytest.mark.integration_test,
     pytest.mark.requires_azure_openai,
diff --git a/python/packages/durabletask/tests/integration_tests/test_04_dt_single_agent_orchestration_chaining.py b/python/packages/durabletask/tests/integration_tests/test_04_dt_single_agent_orchestration_chaining.py
index 27508a6ddd..de70f9eada 100644
--- a/python/packages/durabletask/tests/integration_tests/test_04_dt_single_agent_orchestration_chaining.py
+++ b/python/packages/durabletask/tests/integration_tests/test_04_dt_single_agent_orchestration_chaining.py
@@ -23,6 +23,8 @@
 
 # Module-level markers - applied to all tests in this module
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("04_single_agent_orchestration_chaining"),
     pytest.mark.integration_test,
     pytest.mark.requires_azure_openai,
diff --git a/python/packages/durabletask/tests/integration_tests/test_05_dt_multi_agent_orchestration_concurrency.py b/python/packages/durabletask/tests/integration_tests/test_05_dt_multi_agent_orchestration_concurrency.py
index c13b07c01e..88fe96487e 100644
--- a/python/packages/durabletask/tests/integration_tests/test_05_dt_multi_agent_orchestration_concurrency.py
+++ b/python/packages/durabletask/tests/integration_tests/test_05_dt_multi_agent_orchestration_concurrency.py
@@ -24,6 +24,8 @@
 
 # Module-level markers
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("05_multi_agent_orchestration_concurrency"),
     pytest.mark.integration_test,
     pytest.mark.requires_dts,
diff --git a/python/packages/durabletask/tests/integration_tests/test_06_dt_multi_agent_orchestration_conditionals.py b/python/packages/durabletask/tests/integration_tests/test_06_dt_multi_agent_orchestration_conditionals.py
index 1fc59279f9..bf4700824b 100644
--- a/python/packages/durabletask/tests/integration_tests/test_06_dt_multi_agent_orchestration_conditionals.py
+++ b/python/packages/durabletask/tests/integration_tests/test_06_dt_multi_agent_orchestration_conditionals.py
@@ -24,6 +24,8 @@
 
 # Module-level markers
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("06_multi_agent_orchestration_conditionals"),
     pytest.mark.integration_test,
     pytest.mark.requires_dts,
diff --git a/python/packages/durabletask/tests/integration_tests/test_07_dt_single_agent_orchestration_hitl.py b/python/packages/durabletask/tests/integration_tests/test_07_dt_single_agent_orchestration_hitl.py
index fa713aaec7..2d4a07a98f 100644
--- a/python/packages/durabletask/tests/integration_tests/test_07_dt_single_agent_orchestration_hitl.py
+++ b/python/packages/durabletask/tests/integration_tests/test_07_dt_single_agent_orchestration_hitl.py
@@ -24,6 +24,8 @@
 
 # Module-level markers
 pytestmark = [
+    pytest.mark.flaky,
+    pytest.mark.integration,
     pytest.mark.sample("07_single_agent_orchestration_hitl"),
     pytest.mark.integration_test,
     pytest.mark.requires_dts,
diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py
index 6e10830bf4..03006125d1 100644
--- a/python/packages/ollama/tests/test_ollama_chat_client.py
+++ b/python/packages/ollama/tests/test_ollama_chat_client.py
@@ -470,6 +470,8 @@ async def test_cmc_with_invalid_content_type(
         await ollama_client.get_response(messages=chat_history)
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_cmc_integration_with_tool_call(
     chat_history: list[Message],
@@ -485,6 +487,8 @@ async def test_cmc_integration_with_tool_call(
     assert tool_result.result == "Hello World"
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_cmc_integration_with_chat_completion(
     chat_history: list[Message],
@@ -497,6 +501,8 @@ async def test_cmc_integration_with_chat_completion(
     assert "hello" in result.text.lower()
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_cmc_streaming_integration_with_tool_call(
     chat_history: list[Message],
@@ -522,6 +528,8 @@ async def test_cmc_streaming_integration_with_tool_call(
                 assert tool_call.name == "hello_world"
 
 
+@pytest.mark.flaky
+@pytest.mark.integration
 @skip_if_azure_integration_tests_disabled
 async def test_cmc_streaming_integration_with_chat_completion(
     chat_history: list[Message],
diff --git a/python/packages/orchestrations/pyproject.toml b/python/packages/orchestrations/pyproject.toml
index 8284482993..d4bd40c411 100644
--- a/python/packages/orchestrations/pyproject.toml
+++ b/python/packages/orchestrations/pyproject.toml
@@ -80,7 +80,7 @@ executor.type = "uv"
 include = "../../shared_tasks.toml"
 [tool.poe.tasks]
 mypy = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_orchestrations"
-test = "pytest --cov=agent_framework_orchestrations --cov-report=term-missing:skip-covered tests"
+test = "pytest --cov=agent_framework_orchestrations --cov-report=term-missing:skip-covered -n auto --dist worksteal tests"
 
 [build-system]
 requires = ["flit-core >= 3.11,<4.0"]
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 06bd32f0f0..a16624a0e7 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -171,6 +171,7 @@ markers = [
     "azure: marks tests as Azure provider specific",
     "azure-ai: marks tests as Azure AI provider specific",
     "openai: marks tests as OpenAI provider specific",
+    "integration: marks tests as integration tests that require external services",
 ]
 
 [tool.coverage.run]
@@ -229,7 +230,7 @@ build-meta = "python -m flit build"
 build = ["build-packages", "build-meta"]
 publish = "uv publish"
 # combined checks
-check-packages = "python scripts/run_tasks_in_packages_if_exists.py fmt lint pyright mypy"
+check-packages = "python scripts/run_tasks_in_packages_if_exists.py fmt lint pyright"
 check = ["check-packages", "samples-lint", "samples-syntax", "test", "markdown-code-lint"]
 
 [tool.poe.tasks.all-tests-cov]

From 5df4cf1b05e5ff94c67a5f2b6ee48c96706cc202 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 16:40:27 +0100
Subject: [PATCH 2/9] fixed merge test

---
 .github/workflows/python-merge-tests.yml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 45e3d68d50..1e98f315c3 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -131,8 +131,6 @@ jobs:
         working-directory: python
     steps:
       - uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.checkout-ref }}
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
@@ -185,8 +183,6 @@ jobs:
         working-directory: python
     steps:
       - uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.checkout-ref }}
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
@@ -245,8 +241,6 @@ jobs:
         working-directory: python
     steps:
       - uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.checkout-ref }}
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
@@ -301,8 +295,6 @@ jobs:
         working-directory: python
     steps:
       - uses: actions/checkout@v6
-        with:
-          ref: ${{ inputs.checkout-ref }}
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup

From f859eb43a0b3d1f9691c93e43ecaac512d8e53e9 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 16:58:43 +0100
Subject: [PATCH 3/9] updated integration tests

---
 .../workflows/python-integration-tests.yml    | 177 +++++++++++++++---
 1 file changed, 155 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 22af38d9c2..b1966032ee 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -1,6 +1,6 @@
 #
 # Dedicated Python integration tests workflow, called from the manual integration test orchestrator.
-# Runs all tests (unit + integration).
+# Runs all tests (unit + integration) split into parallel jobs by provider.
 #
 
 name: python-integration-tests
@@ -19,25 +19,158 @@ permissions:
 
 env:
   UV_CACHE_DIR: /tmp/.uv-cache
+  UV_PYTHON: "3.13"
   RUN_INTEGRATION_TESTS: "true"
 
 jobs:
-  python-tests-core:
-    name: Python Integration Tests - Core
+  # Unit tests: all non-integration tests across all packages
+  python-tests-unit:
+    name: Python Integration Tests - Unit
+    runs-on: ubuntu-latest
+    environment: integration
+    timeout-minutes: 60
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+          persist-credentials: false
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (unit tests only)
+        run: >
+          uv run poe all-tests
+          -m "not integration"
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+
+  # OpenAI integration tests
+  python-tests-openai:
+    name: Python Integration Tests - OpenAI
     runs-on: ubuntu-latest
     environment: integration
     timeout-minutes: 60
     env:
-      UV_PYTHON: "3.10"
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+          persist-credentials: false
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (OpenAI integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/core/tests/openai
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+
+  # Azure OpenAI integration tests
+  python-tests-azure-openai:
+    name: Python Integration Tests - Azure OpenAI
+    runs-on: ubuntu-latest
+    environment: integration
+    timeout-minutes: 60
+    env:
+      AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
+      AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+          persist-credentials: false
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Azure CLI Login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Test with pytest (Azure OpenAI integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/core/tests/azure
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+
+  # Misc integration tests (Anthropic, Ollama, MCP)
+  python-tests-misc-integration:
+    name: Python Integration Tests - Misc
+    runs-on: ubuntu-latest
+    environment: integration
+    timeout-minutes: 60
+    env:
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       ANTHROPIC_CHAT_MODEL_ID: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
+      LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+          persist-credentials: false
+      - name: Set up python and install the project
+        id: python-setup
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Test with pytest (Anthropic, Ollama, MCP integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/anthropic/tests
+          packages/ollama/tests
+          packages/core/tests/core/test_mcp.py
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
+
+  # Azure Functions + Durable Task integration tests
+  python-tests-functions:
+    name: Python Integration Tests - Functions
+    runs-on: ubuntu-latest
+    environment: integration
+    timeout-minutes: 60
+    env:
+      OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
+      OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
       AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
       AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
-      LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
       FUNCTIONS_WORKER_RUNTIME: "python"
       DURABLE_TASK_SCHEDULER_CONNECTION_STRING: "Endpoint=http://localhost:8080;TaskHub=default;Authentication=None"
       AzureWebJobsStorage: "UseDevelopmentStorage=true"
@@ -49,37 +182,38 @@ jobs:
         with:
           ref: ${{ inputs.checkout-ref }}
           persist-credentials: false
-
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
         with:
-          python-version: "3.10"
+          python-version: ${{ env.UV_PYTHON }}
           os: ${{ runner.os }}
-        env:
-          UV_CACHE_DIR: /tmp/.uv-cache
-
       - name: Azure CLI Login
         uses: azure/login@v2
         with:
           client-id: ${{ secrets.AZURE_CLIENT_ID }}
           tenant-id: ${{ secrets.AZURE_TENANT_ID }}
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-
       - name: Set up Azure Functions Integration Test Emulators
         uses: ./.github/actions/azure-functions-integration-setup
         id: azure-functions-setup
+      - name: Test with pytest (Functions + Durable Task integration)
+        run: >
+          uv run pytest --import-mode=importlib
+          packages/azurefunctions/tests/integration_tests
+          packages/durabletask/tests/integration_tests
+          -m integration
+          -n logical --dist loadfile --dist worksteal
+          --timeout=120 --session-timeout=900 --timeout_method thread
+          --retries 2 --retry-delay 5
 
-      - name: Test with pytest
-        run: uv run poe all-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
-
+  # Azure AI integration tests
   python-tests-azure-ai:
     name: Python Integration Tests - Azure AI
     runs-on: ubuntu-latest
     environment: integration
     timeout-minutes: 60
     env:
-      UV_PYTHON: "3.10"
       AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZUREAI__ENDPOINT }}
       AZURE_AI_MODEL_DEPLOYMENT_NAME: ${{ vars.AZUREAI__DEPLOYMENTNAME }}
       LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
@@ -91,23 +225,18 @@ jobs:
         with:
           ref: ${{ inputs.checkout-ref }}
           persist-credentials: false
-
       - name: Set up python and install the project
         id: python-setup
         uses: ./.github/actions/python-setup
         with:
-          python-version: "3.10"
+          python-version: ${{ env.UV_PYTHON }}
           os: ${{ runner.os }}
-        env:
-          UV_CACHE_DIR: /tmp/.uv-cache
-
       - name: Azure CLI Login
         uses: azure/login@v2
         with:
           client-id: ${{ secrets.AZURE_CLIENT_ID }}
           tenant-id: ${{ secrets.AZURE_TENANT_ID }}
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-
       - name: Test with pytest
         timeout-minutes: 15
         run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
@@ -117,7 +246,11 @@ jobs:
     runs-on: ubuntu-latest
     needs:
       [
-        python-tests-core,
+        python-tests-unit,
+        python-tests-openai,
+        python-tests-azure-openai,
+        python-tests-misc-integration,
+        python-tests-functions,
         python-tests-azure-ai
       ]
     steps:

From 156a159861338499ac8b81085418b44242cbe30e Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 17:08:07 +0100
Subject: [PATCH 4/9] fix: remove duplicate --dist loadfile flag from
 pytest-xdist config

Only one --dist mode can be active at a time; the second value silently
overrides the first. Keep --dist worksteal (dynamic load balancing) and
remove the redundant --dist loadfile from all workflow files and
pyproject.toml configs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/python-integration-tests.yml | 12 ++++++------
 .github/workflows/python-merge-tests.yml       | 12 ++++++------
 python/packages/azure-ai/pyproject.toml        |  2 +-
 python/pyproject.toml                          |  4 ++--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index b1966032ee..57b35b44a7 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -47,7 +47,7 @@ jobs:
         run: >
           uv run poe all-tests
           -m "not integration"
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
 
@@ -80,7 +80,7 @@ jobs:
           uv run pytest --import-mode=importlib
           packages/core/tests/openai
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
 
@@ -119,7 +119,7 @@ jobs:
           uv run pytest --import-mode=importlib
           packages/core/tests/azure
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
 
@@ -154,7 +154,7 @@ jobs:
           packages/ollama/tests
           packages/core/tests/core/test_mcp.py
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
 
@@ -203,7 +203,7 @@ jobs:
           packages/azurefunctions/tests/integration_tests
           packages/durabletask/tests/integration_tests
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
 
@@ -239,7 +239,7 @@ jobs:
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
       - name: Test with pytest
         timeout-minutes: 15
-        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
+        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
 
   python-integration-tests-check:
     if: always()
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 1e98f315c3..6d93d0b3ae 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -95,7 +95,7 @@ jobs:
         run: >
           uv run poe all-tests
           -m "not integration"
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
         working-directory: ./python
@@ -142,7 +142,7 @@ jobs:
           uv run pytest --import-mode=importlib
           packages/core/tests/openai
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
         working-directory: ./python
@@ -201,7 +201,7 @@ jobs:
           uv run pytest --import-mode=importlib
           packages/core/tests/azure
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
         working-directory: ./python
@@ -254,7 +254,7 @@ jobs:
           packages/ollama/tests
           packages/core/tests/core/test_mcp.py
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
         working-directory: ./python
@@ -317,7 +317,7 @@ jobs:
           packages/azurefunctions/tests/integration_tests
           packages/durabletask/tests/integration_tests
           -m integration
-          -n logical --dist loadfile --dist worksteal
+          -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
         working-directory: ./python
@@ -366,7 +366,7 @@ jobs:
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
       - name: Test with pytest
         timeout-minutes: 15
-        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist loadfile --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
+        run: uv run --directory packages/azure-ai poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
         working-directory: ./python
       - name: Test Azure AI samples
         timeout-minutes: 10
diff --git a/python/packages/azure-ai/pyproject.toml b/python/packages/azure-ai/pyproject.toml
index ba4a95b8b8..8fb0adfbd1 100644
--- a/python/packages/azure-ai/pyproject.toml
+++ b/python/packages/azure-ai/pyproject.toml
@@ -85,7 +85,7 @@ test = "pytest --cov=agent_framework_azure_ai --cov-report=term-missing:skip-cov
 [tool.poe.tasks.integration-tests]
 cmd = """
 pytest --import-mode=importlib
--n logical --dist loadfile --dist worksteal
+-n logical --dist worksteal
 tests
 """
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index a16624a0e7..259caffaf9 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -256,7 +256,7 @@ pytest --import-mode=importlib
 --ignore-glob=packages/lab/**
 --ignore-glob=packages/devui/**
 -rs
--n logical --dist loadfile --dist worksteal
+-n logical --dist worksteal
     packages/**/tests
 """
 
@@ -266,7 +266,7 @@ pytest --import-mode=importlib
 --ignore-glob=packages/lab/**
 --ignore-glob=packages/devui/**
 -rs
--n logical --dist loadfile --dist worksteal
+-n logical --dist worksteal
     packages/**/tests
 """
 

From 2040fadc65f3b01afbf89ac71c11e2018d4d3dc9 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 17:30:53 +0100
Subject: [PATCH 5/9] docs: add keep-in-sync notes for merge and integration
 test workflows

Both python-merge-tests.yml and python-integration-tests.yml share the
same parallel job structure. Added sync reminders in workflow file
comments, the python-testing SKILL.md, and CODING_STANDARD.md.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/python-integration-tests.yml |  4 ++++
 .github/workflows/python-merge-tests.yml       |  5 +++++
 python/.github/skills/python-testing/SKILL.md  | 15 ++++++++++++---
 python/CODING_STANDARD.md                      |  2 +-
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 57b35b44a7..ef7cae2503 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -2,6 +2,10 @@
 # Dedicated Python integration tests workflow, called from the manual integration test orchestrator.
 # Runs all tests (unit + integration) split into parallel jobs by provider.
 #
+# NOTE: This workflow and python-merge-tests.yml share the same set of parallel
+# test jobs. Keep them in sync — when adding, removing, or modifying a job here,
+# apply the same change to python-merge-tests.yml.
+#
 
 name: python-integration-tests
 
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 6d93d0b3ae..edd539847e 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -1,4 +1,9 @@
 name: Python - Merge - Tests
+#
+# NOTE: This workflow and python-integration-tests.yml share the same set of
+# parallel test jobs. Keep them in sync — when adding, removing, or modifying a
+# job here, apply the same change to python-integration-tests.yml.
+#
 
 on:
   workflow_dispatch:
diff --git a/python/.github/skills/python-testing/SKILL.md b/python/.github/skills/python-testing/SKILL.md
index ff0d9167b5..b07ec6f8fc 100644
--- a/python/.github/skills/python-testing/SKILL.md
+++ b/python/.github/skills/python-testing/SKILL.md
@@ -121,12 +121,21 @@ The merge CI workflow (`python-merge-tests.yml`) splits integration tests into p
 
 Core infrastructure changes (e.g., `_agents.py`, `_types.py`) trigger all integration test jobs. Scheduled and manual runs always execute all jobs.
 
+### Keeping CI Workflows in Sync
+
+Two workflow files define the same set of parallel test jobs:
+
+- **`python-merge-tests.yml`** — runs on PRs, merge queue, schedule, and manual dispatch. Uses path-based change detection to skip unaffected integration jobs.
+- **`python-integration-tests.yml`** — called from the manual integration test orchestrator (`integration-tests-manual.yml`). Always runs all jobs (no path filtering).
+
+These workflows must be kept in sync. When you add, remove, or modify a test job, update **both** files. The job structure, pytest commands, and xdist flags should match between them. The only difference is that `python-merge-tests.yml` has path filters and conditional job execution, while `python-integration-tests.yml` does not.
+
 ### Updating the CI When Adding Integration Tests for a New Provider
 
-When adding integration tests for a new provider package, you must update **two things** in `python-merge-tests.yml`:
+When adding integration tests for a new provider package, you must update **both** `python-merge-tests.yml` and `python-integration-tests.yml`:
 
-1. **Add a path filter** for the new provider in the `paths-filter` job so the CI knows which file changes should trigger those tests.
-2. **Assign the tests to a CI job** — either add them to the existing `python-tests-misc-integration` job, or create a dedicated job if the provider:
+1. **Add a path filter** for the new provider in the `paths-filter` job in `python-merge-tests.yml` so the CI knows which file changes should trigger those tests.
+2. **Add the test job to both workflow files** — either add them to the existing `python-tests-misc-integration` job, or create a dedicated job if the provider:
    - Has a large number of integration tests
    - Requires special infrastructure setup (emulators, Docker containers, etc.)
    - Has long-running tests that would slow down the misc job
diff --git a/python/CODING_STANDARD.md b/python/CODING_STANDARD.md
index ff8b012ec9..778d2b4965 100644
--- a/python/CODING_STANDARD.md
+++ b/python/CODING_STANDARD.md
@@ -691,4 +691,4 @@ pytestmark = [
 ]
 ```
 
-When adding integration tests for a new provider, update the path filters and job assignments in `python-merge-tests.yml`. See the `python-testing` skill for details.
+When adding integration tests for a new provider, update the path filters and job assignments in **both** `python-merge-tests.yml` and `python-integration-tests.yml` — these workflows must be kept in sync. See the `python-testing` skill for details.

From cf48785760a41feb6cb8b7926a96ddf7bd33c2e4 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Mon, 23 Feb 2026 20:50:05 +0100
Subject: [PATCH 6/9] refactor: remove RUN_INTEGRATION_TESTS flag

Integration test gating now uses two mechanisms:
- `@pytest.mark.integration` for test selection via `-m` filtering
- `skip_if_*_disabled` for credential/service availability checks

The RUN_INTEGRATION_TESTS env var was redundant since the marker handles
selection and the skip decorators already check for actual credentials.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/python-integration-tests.yml           | 1 -
 .github/workflows/python-merge-tests.yml                 | 1 -
 python/.github/skills/python-testing/SKILL.md            | 4 ++--
 python/CODING_STANDARD.md                                | 2 +-
 python/DEV_SETUP.md                                      | 4 +---
 python/packages/anthropic/tests/test_anthropic_client.py | 7 ++-----
 python/packages/azure-ai/tests/test_agent_provider.py    | 7 ++-----
 .../azure-ai/tests/test_azure_ai_agent_client.py         | 7 ++-----
 python/packages/azure-ai/tests/test_azure_ai_client.py   | 9 ++-------
 python/packages/azure-ai/tests/test_provider.py          | 9 ++-------
 .../azurefunctions/tests/integration_tests/.env.example  | 1 -
 .../azurefunctions/tests/integration_tests/conftest.py   | 7 -------
 .../core/tests/azure/test_azure_assistants_client.py     | 7 ++-----
 .../packages/core/tests/azure/test_azure_chat_client.py  | 7 ++-----
 .../core/tests/azure/test_azure_responses_client.py      | 7 ++-----
 python/packages/core/tests/core/test_mcp.py              | 8 ++------
 .../core/tests/openai/test_assistant_provider.py         | 7 ++-----
 .../core/tests/openai/test_openai_assistants_client.py   | 7 ++-----
 .../core/tests/openai/test_openai_chat_client.py         | 7 ++-----
 .../core/tests/openai/test_openai_responses_client.py    | 7 ++-----
 .../durabletask/tests/integration_tests/.env.example     | 4 ----
 .../durabletask/tests/integration_tests/README.md        | 3 +--
 .../durabletask/tests/integration_tests/conftest.py      | 5 -----
 python/packages/ollama/tests/test_ollama_chat_client.py  | 7 ++-----
 24 files changed, 33 insertions(+), 102 deletions(-)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index ef7cae2503..3ec8d456b3 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -24,7 +24,6 @@ permissions:
 env:
   UV_CACHE_DIR: /tmp/.uv-cache
   UV_PYTHON: "3.13"
-  RUN_INTEGRATION_TESTS: "true"
 
 jobs:
   # Unit tests: all non-integration tests across all packages
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index edd539847e..1ad170a6b0 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -22,7 +22,6 @@ env:
   # Configure a constant location for the uv cache
   UV_CACHE_DIR: /tmp/.uv-cache
   UV_PYTHON: "3.13"
-  RUN_INTEGRATION_TESTS: "true"
   RUN_SAMPLES_TESTS: ${{ vars.RUN_SAMPLES_TESTS }}
 
 jobs:
diff --git a/python/.github/skills/python-testing/SKILL.md b/python/.github/skills/python-testing/SKILL.md
index b07ec6f8fc..4b61f27a55 100644
--- a/python/.github/skills/python-testing/SKILL.md
+++ b/python/.github/skills/python-testing/SKILL.md
@@ -9,7 +9,7 @@ description: >
 
 We strive for at least 85% test coverage across the codebase, with a focus on core packages and critical paths. Tests should be fast, reliable, and maintainable.
 When adding new code, check that the relevant sections of the codebase are covered by tests, and add new tests as needed. When modifying existing code, update or add tests to cover the changes.
-We run tests in two stages, for a PR each commit is tested with `RUN_INTEGRATION_TESTS=false` (unit tests only), and the full suite with `RUN_INTEGRATION_TESTS=true` is run when merging.
+We run tests in two stages, for a PR each commit is tested with unit tests only (using `-m "not integration"`), and the full suite including integration tests is run when merging.
 
 ## Running Tests
 
@@ -83,7 +83,7 @@ Integration tests require external services (OpenAI, Azure, etc.) and are contro
 
 1. **`@pytest.mark.flaky`** — marks the test as potentially flaky since it depends on external services
 2. **`@pytest.mark.integration`** — used for test selection, so integration tests can be included/excluded with `-m integration` / `-m "not integration"`
-3. **`@skip_if_..._integration_tests_disabled`** decorator — skips the test when `RUN_INTEGRATION_TESTS` is not `true` or the required API keys are missing
+3. **`@skip_if_..._integration_tests_disabled`** decorator — skips the test when the required API keys or service endpoints are missing
 
 ### Adding New Integration Tests
 
diff --git a/python/CODING_STANDARD.md b/python/CODING_STANDARD.md
index 778d2b4965..21d87e5b8c 100644
--- a/python/CODING_STANDARD.md
+++ b/python/CODING_STANDARD.md
@@ -679,7 +679,7 @@ async def test_chat_completion() -> None:
 
 - `@pytest.mark.flaky` — marks the test as potentially flaky since it depends on external services
 - `@pytest.mark.integration` — enables selecting/excluding integration tests with `-m integration` / `-m "not integration"`
-- `@skip_if_..._integration_tests_disabled` — skips the test when `RUN_INTEGRATION_TESTS` is not set or API keys are missing
+- `@skip_if_..._integration_tests_disabled` — skips the test when required API keys or service endpoints are missing
 
 For test modules where all tests are integration tests, use `pytestmark`:
 
diff --git a/python/DEV_SETUP.md b/python/DEV_SETUP.md
index 370b4ffd2d..3769a5df9e 100644
--- a/python/DEV_SETUP.md
+++ b/python/DEV_SETUP.md
@@ -121,9 +121,7 @@ client = OpenAIChatClient(env_file_path="openai.env")
 
 ## Tests
 
-All the tests are located in the `tests` folder of each package. There are tests that are marked with `@pytest.mark.integration` and `@skip_if_..._integration_tests_disabled` decorators — these are integration tests that require an external service to be running, like OpenAI or Azure OpenAI.
-
-If you want to run these tests, you need to set the environment variable `RUN_INTEGRATION_TESTS` to `true` and have the appropriate key per services set in your environment or in a `.env` file.
+All the tests are located in the `tests` folder of each package. Tests marked with `@pytest.mark.integration` and `@skip_if_..._integration_tests_disabled` are integration tests that require external services (e.g., OpenAI, Azure OpenAI). They are automatically skipped when the required API keys or service endpoints are not configured in your environment or `.env` file.
 
 You can select or exclude integration tests using pytest markers:
 
diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py
index 1c27efefdc..d7c4c9afc7 100644
--- a/python/packages/anthropic/tests/test_anthropic_client.py
+++ b/python/packages/anthropic/tests/test_anthropic_client.py
@@ -29,11 +29,8 @@
 VALID_PNG_BASE64 = b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
 
 skip_if_anthropic_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("ANTHROPIC_API_KEY", "") in ("", "test-api-key-12345"),
-    reason="No real ANTHROPIC_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("ANTHROPIC_API_KEY", "") in ("", "test-api-key-12345"),
+    reason="No real ANTHROPIC_API_KEY provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/azure-ai/tests/test_agent_provider.py b/python/packages/azure-ai/tests/test_agent_provider.py
index 4ff183a72b..b394c15b4a 100644
--- a/python/packages/azure-ai/tests/test_agent_provider.py
+++ b/python/packages/azure-ai/tests/test_agent_provider.py
@@ -29,11 +29,8 @@
 )
 
 skip_if_azure_ai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/"),
-    reason="No real AZURE_AI_PROJECT_ENDPOINT provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/"),
+    reason="No real AZURE_AI_PROJECT_ENDPOINT provided; skipping integration tests.",
 )
 
 # region Provider Initialization Tests
diff --git a/python/packages/azure-ai/tests/test_azure_ai_agent_client.py b/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
index 360b502f1b..b35efb6268 100644
--- a/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
+++ b/python/packages/azure-ai/tests/test_azure_ai_agent_client.py
@@ -50,11 +50,8 @@
 from agent_framework_azure_ai import AzureAIAgentClient, AzureAISettings
 
 skip_if_azure_ai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/"),
-    reason="No real AZURE_AI_PROJECT_ENDPOINT provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/"),
+    reason="No real AZURE_AI_PROJECT_ENDPOINT provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/azure-ai/tests/test_azure_ai_client.py b/python/packages/azure-ai/tests/test_azure_ai_client.py
index bf818f9408..4ec1b90971 100644
--- a/python/packages/azure-ai/tests/test_azure_ai_client.py
+++ b/python/packages/azure-ai/tests/test_azure_ai_client.py
@@ -47,14 +47,9 @@
 from agent_framework_azure_ai._shared import from_azure_ai_tools
 
 skip_if_azure_ai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/")
+    os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/")
     or os.getenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", "") == "",
-    reason=(
-        "No real AZURE_AI_PROJECT_ENDPOINT or AZURE_AI_MODEL_DEPLOYMENT_NAME provided; skipping integration tests."
-        if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-        else "Integration tests are disabled."
-    ),
+    reason="No real AZURE_AI_PROJECT_ENDPOINT or AZURE_AI_MODEL_DEPLOYMENT_NAME provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/azure-ai/tests/test_provider.py b/python/packages/azure-ai/tests/test_provider.py
index e960ec986c..3765f17f1c 100644
--- a/python/packages/azure-ai/tests/test_provider.py
+++ b/python/packages/azure-ai/tests/test_provider.py
@@ -20,14 +20,9 @@
 from agent_framework_azure_ai import AzureAIProjectAgentProvider
 
 skip_if_azure_ai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/")
+    os.getenv("AZURE_AI_PROJECT_ENDPOINT", "") in ("", "https://test-project.cognitiveservices.azure.com/")
     or os.getenv("AZURE_AI_MODEL_DEPLOYMENT_NAME", "") == "",
-    reason=(
-        "No real AZURE_AI_PROJECT_ENDPOINT or AZURE_AI_MODEL_DEPLOYMENT_NAME provided; skipping integration tests."
-        if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-        else "Integration tests are disabled."
-    ),
+    reason="No real AZURE_AI_PROJECT_ENDPOINT or AZURE_AI_MODEL_DEPLOYMENT_NAME provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/azurefunctions/tests/integration_tests/.env.example b/python/packages/azurefunctions/tests/integration_tests/.env.example
index a8dc5d88b4..072a0de92c 100644
--- a/python/packages/azurefunctions/tests/integration_tests/.env.example
+++ b/python/packages/azurefunctions/tests/integration_tests/.env.example
@@ -2,7 +2,6 @@
 AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
 AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=your-deployment-name
 FUNCTIONS_WORKER_RUNTIME=python
-RUN_INTEGRATION_TESTS=true
 
 # Azure Functions Configuration
 AzureWebJobsStorage=UseDevelopmentStorage=true
diff --git a/python/packages/azurefunctions/tests/integration_tests/conftest.py b/python/packages/azurefunctions/tests/integration_tests/conftest.py
index cec3758aff..3f6060d93d 100644
--- a/python/packages/azurefunctions/tests/integration_tests/conftest.py
+++ b/python/packages/azurefunctions/tests/integration_tests/conftest.py
@@ -90,13 +90,6 @@ def _should_skip_azure_functions_integration_tests() -> tuple[bool, str]:
     """Determine whether Azure Functions integration tests should be skipped."""
     _load_env_file_if_present()
 
-    run_integration_tests = os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    if not run_integration_tests:
-        return (
-            True,
-            "Integration tests are disabled. Set RUN_INTEGRATION_TESTS=true to enable Azure Functions sample tests.",
-        )
-
     # Check for Azure Functions Core Tools
     if not _check_func_cli_available():
         return (
diff --git a/python/packages/core/tests/azure/test_azure_assistants_client.py b/python/packages/core/tests/azure/test_azure_assistants_client.py
index b8fe809d28..3c51881279 100644
--- a/python/packages/core/tests/azure/test_azure_assistants_client.py
+++ b/python/packages/core/tests/azure/test_azure_assistants_client.py
@@ -23,11 +23,8 @@
 from agent_framework.azure import AzureOpenAIAssistantsClient
 
 skip_if_azure_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
-    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
+    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/core/tests/azure/test_azure_chat_client.py b/python/packages/core/tests/azure/test_azure_chat_client.py
index 9459c1f3c4..3e88504493 100644
--- a/python/packages/core/tests/azure/test_azure_chat_client.py
+++ b/python/packages/core/tests/azure/test_azure_chat_client.py
@@ -37,11 +37,8 @@
 # region Service Setup
 
 skip_if_azure_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
-    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
+    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/core/tests/azure/test_azure_responses_client.py b/python/packages/core/tests/azure/test_azure_responses_client.py
index 8d244940f9..4e9b25ca6a 100644
--- a/python/packages/core/tests/azure/test_azure_responses_client.py
+++ b/python/packages/core/tests/azure/test_azure_responses_client.py
@@ -23,11 +23,8 @@
 from agent_framework.azure import AzureOpenAIResponsesClient
 
 skip_if_azure_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
-    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("AZURE_OPENAI_ENDPOINT", "") in ("", "https://test-endpoint.com"),
+    reason="No real AZURE_OPENAI_ENDPOINT provided; skipping integration tests.",
 )
 
 logger = logging.getLogger(__name__)
diff --git a/python/packages/core/tests/core/test_mcp.py b/python/packages/core/tests/core/test_mcp.py
index 514a0ab51c..65b4015093 100644
--- a/python/packages/core/tests/core/test_mcp.py
+++ b/python/packages/core/tests/core/test_mcp.py
@@ -34,12 +34,8 @@
 
 # Integration test skip condition
 skip_if_mcp_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true" or os.getenv("LOCAL_MCP_URL", "") == "",
-    reason=(
-        "No LOCAL_MCP_URL provided; skipping integration tests."
-        if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-        else "Integration tests are disabled."
-    ),
+    os.getenv("LOCAL_MCP_URL", "") == "",
+    reason="No LOCAL_MCP_URL provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/core/tests/openai/test_assistant_provider.py b/python/packages/core/tests/openai/test_assistant_provider.py
index 35f7267d48..2aa8c89f84 100644
--- a/python/packages/core/tests/openai/test_assistant_provider.py
+++ b/python/packages/core/tests/openai/test_assistant_provider.py
@@ -755,11 +755,8 @@ def test_merge_single_user_tool(self, mock_async_openai: MagicMock) -> None:
 # region Integration Tests
 
 skip_if_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
-    reason="No real OPENAI_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
+    reason="No real OPENAI_API_KEY provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/core/tests/openai/test_openai_assistants_client.py b/python/packages/core/tests/openai/test_openai_assistants_client.py
index e0bf89ba5a..cf8d74f959 100644
--- a/python/packages/core/tests/openai/test_openai_assistants_client.py
+++ b/python/packages/core/tests/openai/test_openai_assistants_client.py
@@ -25,11 +25,8 @@
 from agent_framework.openai import OpenAIAssistantsClient
 
 skip_if_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
-    reason="No real OPENAI_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
+    reason="No real OPENAI_API_KEY provided; skipping integration tests.",
 )
 
 INTEGRATION_TEST_MODEL = "gpt-4.1-nano"
diff --git a/python/packages/core/tests/openai/test_openai_chat_client.py b/python/packages/core/tests/openai/test_openai_chat_client.py
index 4d3f3fd9da..fae303ed22 100644
--- a/python/packages/core/tests/openai/test_openai_chat_client.py
+++ b/python/packages/core/tests/openai/test_openai_chat_client.py
@@ -24,11 +24,8 @@
 from agent_framework.openai._exceptions import OpenAIContentFilterException
 
 skip_if_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
-    reason="No real OPENAI_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
+    reason="No real OPENAI_API_KEY provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index ff154d7aaf..12e5b42d6d 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -40,11 +40,8 @@
 from agent_framework.openai._exceptions import OpenAIContentFilterException
 
 skip_if_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
-    reason="No real OPENAI_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
+    reason="No real OPENAI_API_KEY provided; skipping integration tests.",
 )
 
 
diff --git a/python/packages/durabletask/tests/integration_tests/.env.example b/python/packages/durabletask/tests/integration_tests/.env.example
index a36cf771f8..4e5abff232 100644
--- a/python/packages/durabletask/tests/integration_tests/.env.example
+++ b/python/packages/durabletask/tests/integration_tests/.env.example
@@ -11,7 +11,3 @@ TASKHUB=default
 # Redis Configuration (for streaming tests)
 REDIS_CONNECTION_STRING=redis://localhost:6379
 REDIS_STREAM_TTL_MINUTES=10
-
-# Integration Test Control
-# Set to 'true' to enable integration tests
-RUN_INTEGRATION_TESTS=true
diff --git a/python/packages/durabletask/tests/integration_tests/README.md b/python/packages/durabletask/tests/integration_tests/README.md
index 59da266460..6946cec665 100644
--- a/python/packages/durabletask/tests/integration_tests/README.md
+++ b/python/packages/durabletask/tests/integration_tests/README.md
@@ -16,7 +16,6 @@ Required variables:
 - `AZURE_OPENAI_ENDPOINT`
 - `AZURE_OPENAI_CHAT_DEPLOYMENT_NAME`
 - `AZURE_OPENAI_API_KEY` (optional if using Azure CLI authentication)
-- `RUN_INTEGRATION_TESTS` (set to `true`)
 - `ENDPOINT` (default: http://localhost:8080)
 - `TASKHUB` (default: default)
 
@@ -75,7 +74,7 @@ pytestmark = [
 ## Troubleshooting
 
 **Tests are skipped:**
-Ensure `RUN_INTEGRATION_TESTS=true` is set in your `.env` file.
+Ensure the required environment variables (e.g., `AZURE_OPENAI_ENDPOINT`) are set in your `.env` file.
 
 **DTS connection failed:**
 Check that the DTS emulator container is running: `docker ps | grep dts-emulator`
diff --git a/python/packages/durabletask/tests/integration_tests/conftest.py b/python/packages/durabletask/tests/integration_tests/conftest.py
index 014128cf37..475963c057 100644
--- a/python/packages/durabletask/tests/integration_tests/conftest.py
+++ b/python/packages/durabletask/tests/integration_tests/conftest.py
@@ -289,9 +289,6 @@ def pytest_configure(config: pytest.Config) -> None:
 
 def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
     """Skip tests based on markers and environment availability."""
-    run_integration = os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    skip_integration = pytest.mark.skip(reason="RUN_INTEGRATION_TESTS not set to 'true'")
-
     # Check Azure OpenAI environment variables
     azure_openai_vars = ["AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
     azure_openai_available = all(os.getenv(var) for var in azure_openai_vars)
@@ -308,8 +305,6 @@ def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item
     skip_redis = pytest.mark.skip(reason="Redis is not available at redis://localhost:6379")
 
     for item in items:
-        if "integration_test" in item.keywords and not run_integration:
-            item.add_marker(skip_integration)
         if "requires_azure_openai" in item.keywords and not azure_openai_available:
             item.add_marker(skip_azure_openai)
         if "requires_dts" in item.keywords and not dts_available:
diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py
index 03006125d1..490bbc0a15 100644
--- a/python/packages/ollama/tests/test_ollama_chat_client.py
+++ b/python/packages/ollama/tests/test_ollama_chat_client.py
@@ -26,11 +26,8 @@
 # region Service Setup
 
 skip_if_azure_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OLLAMA_MODEL_ID", "") in ("", "test-model"),
-    reason="No real Ollama chat model provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OLLAMA_MODEL_ID", "") in ("", "test-model"),
+    reason="No real Ollama chat model provided; skipping integration tests.",
 )
 
 

From 31353d60c302ee90412d64fe635af99498031517 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 24 Feb 2026 09:15:18 +0100
Subject: [PATCH 7/9] fix: sync missing env vars from merge-tests to
 integration-tests

Add OPENAI_EMBEDDINGS_MODEL_ID and AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME
to python-integration-tests.yml to match python-merge-tests.yml.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/python-integration-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 3ec8d456b3..ad9258c20f 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -63,6 +63,7 @@ jobs:
     env:
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
+      OPENAI_EMBEDDINGS_MODEL_ID: ${{ vars.OPENAI_EMBEDDING_MODEL_ID }}
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
     defaults:
       run:
@@ -96,6 +97,7 @@ jobs:
     env:
       AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__CHATDEPLOYMENTNAME }}
       AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__RESPONSESDEPLOYMENTNAME }}
+      AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZUREOPENAI__EMBEDDINGDEPLOYMENTNAME }}
       AZURE_OPENAI_ENDPOINT: ${{ vars.AZUREOPENAI__ENDPOINT }}
     defaults:
       run:

From 8b670ff384a0fe7fd4a955b3e1d735f124686419 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 24 Feb 2026 09:17:02 +0100
Subject: [PATCH 8/9] fix: remove remaining RUN_INTEGRATION_TESTS from
 embedding tests and docs

Missed test_openai_embedding_client.py and vector-stores README in the
earlier cleanup.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../vector-stores-and-embeddings/README.md         |  2 +-
 .../tests/openai/test_openai_embedding_client.py   | 14 ++++----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/docs/features/vector-stores-and-embeddings/README.md b/docs/features/vector-stores-and-embeddings/README.md
index 42ffc98041..02e71ab028 100644
--- a/docs/features/vector-stores-and-embeddings/README.md
+++ b/docs/features/vector-stores-and-embeddings/README.md
@@ -167,7 +167,7 @@ This feature ports the vector store abstractions, embedding generator abstractio
 
 #### 1.4 — Tests and samples
 - Unit tests for types, protocol, base class, OpenAI client, Azure OpenAI client
-- Integration tests for OpenAI and Azure OpenAI (gated behind `RUN_INTEGRATION_TESTS` + credentials, `@pytest.mark.flaky`)
+- Integration tests for OpenAI and Azure OpenAI (gated behind credentials check, `@pytest.mark.flaky`)
 - Samples in `samples/02-agents/embeddings/` — `openai_embeddings.py`, `azure_openai_embeddings.py`
 
 ---
diff --git a/python/packages/core/tests/openai/test_openai_embedding_client.py b/python/packages/core/tests/openai/test_openai_embedding_client.py
index f4a9d6052b..f6fbbad6aa 100644
--- a/python/packages/core/tests/openai/test_openai_embedding_client.py
+++ b/python/packages/core/tests/openai/test_openai_embedding_client.py
@@ -259,20 +259,14 @@ def test_azure_otel_provider_name() -> None:
 # --- Integration tests ---
 
 skip_if_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
-    reason="No real OPENAI_API_KEY provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"),
+    reason="No real OPENAI_API_KEY provided; skipping integration tests.",
 )
 
 skip_if_azure_openai_integration_tests_disabled = pytest.mark.skipif(
-    os.getenv("RUN_INTEGRATION_TESTS", "false").lower() != "true"
-    or not os.getenv("AZURE_OPENAI_ENDPOINT")
+    not os.getenv("AZURE_OPENAI_ENDPOINT")
     or (not os.getenv("AZURE_OPENAI_API_KEY") and not os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")),
-    reason="No Azure OpenAI credentials provided; skipping integration tests."
-    if os.getenv("RUN_INTEGRATION_TESTS", "false").lower() == "true"
-    else "Integration tests are disabled.",
+    reason="No Azure OpenAI credentials provided; skipping integration tests.",
 )
 
 

From 8f72b3f9450b99516ab88d65666a8810d6cdb3e4 Mon Sep 17 00:00:00 2001
From: eavanvalkenburg <github@vanvalkenburg.eu>
Date: Tue, 24 Feb 2026 10:26:28 +0100
Subject: [PATCH 9/9] set functions tests to 3.10

---
 .github/workflows/python-integration-tests.yml | 1 +
 .github/workflows/python-merge-tests.yml       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index ad9258c20f..56525b442e 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -170,6 +170,7 @@ jobs:
     environment: integration
     timeout-minutes: 60
     env:
+      UV_PYTHON: "3.10"
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 1ad170a6b0..6d169948db 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -285,6 +285,7 @@ jobs:
     runs-on: ubuntu-latest
     environment: integration
     env:
+      UV_PYTHON: "3.10"
       OPENAI_CHAT_MODEL_ID: ${{ vars.OPENAI__CHATMODELID }}
       OPENAI_RESPONSES_MODEL_ID: ${{ vars.OPENAI__RESPONSESMODELID }}
       OPENAI_API_KEY: ${{ secrets.OPENAI__APIKEY }}