From 63e874fd5f74b125e76d5a997f61edbf58863561 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Wed, 25 Feb 2026 04:47:13 -0600 Subject: [PATCH 1/4] fix: SHM leak fixes and tensor transport hardening --- pyisolate/__init__.py | 5 +- pyisolate/_internal/model_serialization.py | 21 ++- pyisolate/_internal/rpc_transports.py | 41 ++++++ pyisolate/_internal/tensor_serializer.py | 154 +++++++++++++++++++-- pyisolate/_internal/uds_client.py | 8 +- 5 files changed, 205 insertions(+), 24 deletions(-) diff --git a/pyisolate/__init__.py b/pyisolate/__init__.py index 95b587f..819e4d7 100644 --- a/pyisolate/__init__.py +++ b/pyisolate/__init__.py @@ -36,13 +36,14 @@ from ._internal.rpc_protocol import ProxiedSingleton, local_execution from ._internal.singleton_context import singleton_scope +from ._internal.tensor_serializer import flush_tensor_keeper, purge_orphan_sender_shm_files from .config import ExtensionConfig, ExtensionManagerConfig, SandboxMode from .host import ExtensionBase, ExtensionManager if TYPE_CHECKING: from .interfaces import IsolationAdapter -__version__ = "0.9.0" +__version__ = "0.9.1" __all__ = [ "ExtensionBase", @@ -53,6 +54,8 @@ "ProxiedSingleton", "local_execution", "singleton_scope", + "flush_tensor_keeper", + "purge_orphan_sender_shm_files", "register_adapter", "get_adapter", ] diff --git a/pyisolate/_internal/model_serialization.py b/pyisolate/_internal/model_serialization.py index 45c104a..09e0846 100644 --- a/pyisolate/_internal/model_serialization.py +++ b/pyisolate/_internal/model_serialization.py @@ -36,17 +36,6 @@ def serialize_for_isolation(data: Any) -> Any: """ type_name = type(data).__name__ - # If this object originated as a RemoteObjectHandle, prefer to send the - # handle back to the isolated process rather than attempting to pickle the - # concrete instance. This preserves identity (and avoids pickling large or - # unpicklable objects) while still allowing host-side consumers to interact - # with the resolved object. - from .remote_handle import RemoteObjectHandle - - handle = getattr(data, "_pyisolate_remote_handle", None) - if isinstance(handle, RemoteObjectHandle): - return handle - # Adapter-registered serializers take precedence over built-in handlers registry = SerializerRegistry.get_instance() if registry.has_handler(type_name): @@ -54,6 +43,16 @@ def serialize_for_isolation(data: Any) -> Any: if serializer: return serializer(data) + # If this object originated as a RemoteObjectHandle, send the original + # handle only when no adapter serializer is available for this type. + # This avoids cross-extension stale handle reuse for serializer-backed + # objects (e.g. CLIP/ModelPatcher/VAE refs). + from .remote_handle import RemoteObjectHandle + + handle = getattr(data, "_pyisolate_remote_handle", None) + if isinstance(handle, RemoteObjectHandle): + return handle + torch, _ = get_torch_optional() if torch is not None and isinstance(data, torch.Tensor): if data.is_cuda: diff --git a/pyisolate/_internal/rpc_transports.py b/pyisolate/_internal/rpc_transports.py index fced0e2..7855257 100644 --- a/pyisolate/_internal/rpc_transports.py +++ b/pyisolate/_internal/rpc_transports.py @@ -233,6 +233,16 @@ def _json_default(self, obj: Any) -> Any: if isinstance(obj, uuid.UUID): return str(obj) + # Handle RemoteObjectHandle explicitly to avoid generic __dict__ fallback. + from .remote_handle import RemoteObjectHandle + + if isinstance(obj, RemoteObjectHandle): + return { + "__type__": "RemoteObjectHandle", + "object_id": obj.object_id, + "type_name": obj.type_name, + } + # Handle PyTorch tensors BEFORE __dict__ check (tensors have __dict__ but shouldn't use it) try: import torch @@ -244,8 +254,27 @@ def _json_default(self, obj: Any) -> Any: except ImportError: pass + # Check SerializerRegistry for registered type handlers (exact + MRO). + # This lets base-class serializers intercept before the generic __dict__ fallback. + from .serialization_registry import SerializerRegistry + + registry = SerializerRegistry.get_instance() + for klass in type(obj).__mro__: + if klass is object: + continue + for type_key in (f"{klass.__module__}.{klass.__name__}", klass.__name__): + serializer = registry.get_serializer(type_key) + if serializer: + return serializer(obj) + # Handle objects with __dict__ (preserve full state) if hasattr(obj, "__dict__") and not callable(obj): + type_key = f"{type(obj).__module__}.{type(obj).__name__}" + logger.warning( + "⚠️ GENERIC SERIALIZER USED ⚠️ Serializing %s via __dict__ fallback. " + "This is a SECURITY RISK and will be removed. Register a proper serializer!", + type_key, + ) try: # Recursively serialize __dict__ contents AND class attributes serialized_dict = {} @@ -315,6 +344,12 @@ def _json_object_hook(self, dct: dict) -> Any: return base64.b64decode(dct["data"]) + # Reconstruct remote object handles. + if dct.get("__type__") == "RemoteObjectHandle": + from .remote_handle import RemoteObjectHandle + + return RemoteObjectHandle(dct["object_id"], dct["type_name"]) + # Generic Registry Lookup for __type__ if "__type__" in dct: type_name = dct["__type__"] @@ -373,6 +408,12 @@ def _json_object_hook(self, dct: dict) -> Any: data = dct.get("data", {}) module_name = dct.get("module") type_name = dct.get("type") + type_key = f"{module_name}.{type_name}" + logger.warning( + "⚠️ GENERIC DESERIALIZER USED ⚠️ Deserializing %s via __pyisolate_object__. " + "This is a SECURITY RISK and will be removed. Register a proper deserializer!", + type_key, + ) # Try to reconstruct the original class if module_name and type_name: diff --git a/pyisolate/_internal/tensor_serializer.py b/pyisolate/_internal/tensor_serializer.py index 9738c5e..8247c29 100644 --- a/pyisolate/_internal/tensor_serializer.py +++ b/pyisolate/_internal/tensor_serializer.py @@ -1,7 +1,9 @@ +import atexit import base64 import collections import logging import os +import signal import threading import time from pathlib import Path @@ -92,7 +94,7 @@ class TensorKeeper: dest = ("TensorKeeper",) - def __init__(self, retention_seconds: float = 30.0): # Increase for slow test env + def __init__(self, retention_seconds: float = 5.0): self.retention_seconds = retention_seconds self._keeper: collections.deque = collections.deque() self._lock = threading.Lock() @@ -114,10 +116,117 @@ def keep(self, t: Any) -> None: else: break + def flush(self) -> int: + """Release all currently held tensor references immediately.""" + with self._lock: + count = len(self._keeper) + self._keeper.clear() + return count + _tensor_keeper = TensorKeeper() +def _flush_reduction_shared_cache(reductions: Any) -> None: + shared_cache = getattr(reductions, "shared_cache", None) + if shared_cache is None: + return + + try: + free_dead = getattr(shared_cache, "free_dead_references", None) + if callable(free_dead): + free_dead() + clear_fn = getattr(shared_cache, "clear", None) + if callable(clear_fn): + clear_fn() + except Exception: + logger.debug("TensorKeeper flush: failed to purge shared_cache", exc_info=True) + + +def _flush_cuda_ipc(torch: Any) -> None: + try: + if torch.cuda.is_available() and torch.cuda.is_initialized(): + torch.cuda.ipc_collect() + except Exception: + logger.debug("TensorKeeper flush: cuda ipc_collect failed", exc_info=True) + + +def flush_tensor_keeper() -> int: + """Release all tensors held by TensorKeeper and return the release count.""" + released = _tensor_keeper.flush() + try: + torch, reductions = require_torch("flush_tensor_keeper") + except Exception: + return released + + _flush_reduction_shared_cache(reductions) + _flush_cuda_ipc(torch) + return released + + +def purge_orphan_sender_shm_files(min_age_seconds: float = 1.0, force: bool = False) -> int: + """Best-effort unlink of stale sender-side torch_* shm files for this PID. + + Guarded by PYISOLATE_PURGE_SENDER_SHM=1 to keep default behavior unchanged. + """ + if not force and os.environ.get("PYISOLATE_PURGE_SENDER_SHM", "0") != "1": + return 0 + + shm_root = Path("/dev/shm") + if not shm_root.exists(): + return 0 + + now = time.time() + prefix = f"torch_{os.getpid()}_" + removed = 0 + for path in shm_root.glob(f"{prefix}*"): + try: + if min_age_seconds > 0: + mtime = path.stat().st_mtime + if (now - mtime) < min_age_seconds: + continue + path.unlink() + removed += 1 + except FileNotFoundError: + continue + except Exception: + logger.debug("Failed to purge stale SHM file %s", path, exc_info=True) + return removed + + +def _flush_tensor_keeper_on_exit() -> None: + try: + flush_tensor_keeper() + purge_orphan_sender_shm_files(min_age_seconds=0.0, force=True) + except Exception: + # Best-effort shutdown cleanup. + pass + + +atexit.register(_flush_tensor_keeper_on_exit) + + +def _install_signal_cleanup_handlers() -> None: + """Optional signal cleanup for harnesses that terminate via SIGHUP/SIGTERM.""" + if os.environ.get("PYISOLATE_SIGNAL_CLEANUP", "0") != "1": + return + + def _handler(signum: int, _frame: Any) -> None: + try: + _flush_tensor_keeper_on_exit() + finally: + os._exit(128 + signum) + + for sig in (signal.SIGHUP, signal.SIGTERM): + try: + signal.signal(sig, _handler) + except Exception: + logger.debug("Failed to install signal cleanup handler for %s", sig, exc_info=True) + + +_install_signal_cleanup_handlers() + + def serialize_tensor(t: Any) -> dict[str, Any]: """Serialize a tensor to JSON-compatible format using shared memory.""" torch, _ = require_torch("serialize_tensor") @@ -134,6 +243,14 @@ def _serialize_cpu_tensor(t: Any) -> dict[str, Any]: """ torch, reductions = require_torch("CPU tensor serialization") + # Keep strategy pinned to file_system for JSON-RPC transfer paths. + # A fallback from file_descriptor -> file_system can leave behind SHM refs. + try: + if torch.multiprocessing.get_sharing_strategy() != "file_system": + torch.multiprocessing.set_sharing_strategy("file_system") + except Exception: + logger.debug("Failed to enforce file_system sharing strategy", exc_info=True) + # Check /dev/shm availability (cached after first check) _check_shm_availability() @@ -163,11 +280,17 @@ def _serialize_cpu_tensor(t: Any) -> dict[str, Any]: sfunc, sargs = reductions.reduce_storage(storage) if sfunc.__name__ == "rebuild_storage_filename": + use_borrowed = os.environ.get("PYISOLATE_CPU_BORROWED_SHM", "1") == "1" + strategy = "file_system_borrowed" if use_borrowed else "file_system" + if use_borrowed: + # reduce_storage() increments sender-side refcount for transfer. + # Undo transit incref immediately and use borrowed strategy payload. + storage._shared_decref() # sargs: (cls, manager_path, storage_key, size) return { "__type__": "TensorRef", "device": "cpu", - "strategy": "file_system", + "strategy": strategy, "manager_path": sargs[1].decode("utf-8"), "storage_key": sargs[2].decode("utf-8"), "storage_size": sargs[3], @@ -267,17 +390,25 @@ def _deserialize_legacy_tensor(data: dict[str, Any]) -> Any: dtype = getattr(torch, dtype_str.split(".")[-1]) if device == "cpu": - if data.get("strategy") != "file_system": - raise RuntimeError(f"Unsupported CPU strategy: {data.get('strategy')}") + strategy = data.get("strategy") + if strategy not in ("file_system", "file_system_borrowed"): + raise RuntimeError(f"Unsupported CPU strategy: {strategy}") manager_path = data["manager_path"].encode("utf-8") storage_key = data["storage_key"].encode("utf-8") storage_size = data["storage_size"] - # Rebuild UntypedStorage (no dtype arg) - rebuilt_storage = reductions.rebuild_storage_filename( - torch.UntypedStorage, manager_path, storage_key, storage_size - ) + if strategy == "file_system_borrowed": + # Rebuild directly without caching the borrowed storage handle in shared_cache. + # Keeping borrowed refs in the global cache can pin SHM entries longer than needed. + rebuilt_storage = torch.UntypedStorage._new_shared_filename_cpu( + manager_path, storage_key, storage_size + ) + else: + # Legacy path for backward compatibility with old strategy payloads. + rebuilt_storage = reductions.rebuild_storage_filename( + torch.UntypedStorage, manager_path, storage_key, storage_size + ) # Wrap in TypedStorage (required by rebuild_tensor) typed_storage = torch.storage.TypedStorage(wrap_storage=rebuilt_storage, dtype=dtype, _internal=True) @@ -293,6 +424,13 @@ def _deserialize_legacy_tensor(data: dict[str, Any]) -> Any: cpu_tensor: Any = reductions.rebuild_tensor( # type: ignore[assignment] torch.Tensor, typed_storage, metadata ) + # Diagnostic toggle: copy CPU tensors out of shared storage immediately. + # If this removes residual SHM files, the remaining leak is receiver-lifetime related. + if os.environ.get("PYISOLATE_CPU_TENSOR_FORCE_CLONE_ON_DESERIALIZE", "0") == "1": + cloned_tensor = cpu_tensor.clone() + if data["requires_grad"]: + cloned_tensor.requires_grad_(True) + cpu_tensor = cloned_tensor return cpu_tensor elif device == "cuda": diff --git a/pyisolate/_internal/uds_client.py b/pyisolate/_internal/uds_client.py index 9d1bfb1..e3446b3 100644 --- a/pyisolate/_internal/uds_client.py +++ b/pyisolate/_internal/uds_client.py @@ -239,10 +239,10 @@ async def _async_uds_entrypoint( except asyncio.CancelledError: pass except Exception as exc: - logger.error( - "Extension module loading/execution failed for %s: %s", module_path, exc, exc_info=True - ) - raise + # Keep RPC alive so the host can gracefully skip broken extensions + # instead of seeing a connection-reset hard failure. + logger.warning("Extension module loading/execution failed for %s: %s", module_path, exc) + await rpc.run_until_stopped() if __name__ == "__main__": From cc3f7539e66e71137b3dd55069a6b23e599d6be7 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Wed, 25 Feb 2026 04:48:06 -0600 Subject: [PATCH 2/4] feat: sandbox mode detection and CI updates --- .coderabbit.yaml | 81 -------------------------- .github/workflows/ci.yml | 10 +--- .github/workflows/docs.yml | 5 +- .github/workflows/pytorch.yml | 4 +- .github/workflows/windows.yml | 8 +-- docs/conf.py | 4 +- example/host.py | 24 +++++--- pyisolate/_internal/environment.py | 47 +++++++++++++-- pyisolate/_internal/sandbox.py | 1 + pyisolate/_internal/sandbox_detect.py | 61 +++++++++++++++++++ tests/harness/host.py | 9 ++- tests/integration_v2/test_isolation.py | 12 ++++ tests/test_bwrap_command.py | 7 +++ tests/test_memory_leaks.py | 12 ++-- tests/test_sandbox_detect.py | 49 +++++++++++++++- 15 files changed, 213 insertions(+), 121 deletions(-) delete mode 100644 .coderabbit.yaml diff --git a/.coderabbit.yaml b/.coderabbit.yaml deleted file mode 100644 index 6e446ac..0000000 --- a/.coderabbit.yaml +++ /dev/null @@ -1,81 +0,0 @@ -# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json -inheritance: true - - -reviews: - sequence_diagrams: true - changed_files_summary: true - labeling_instructions: [] - path_filters: [] - path_instructions: - - path: "**/*.py" - instructions: | - ## pyisolate Core Library Guidelines - - - Independence - - While this library is primarily created for use with ComfyUI, it MUST NOT depend on ComfyUI or any of its packages. - - ComfyUI and 'comfy' should ONLY be referenced in the context of documenting the motivation behind certain design decisions (e.g. what we optimize for). The assumption should be that this library could be used for other purposes. - - Code specific to ComfyUI does NOT belong in this repository. - - - Documentation - - It is VERY important that this library should be well-documented and usable by people without them understanding internal implementation details. - - Documentation should NEVER include references to internal implementation details unless absolutely necessary for understanding the public API. This includes communication between host and extension processes and environment variables used internally to pass information. - - Identify preconditions or unhandled cases that are lacking documentation. - - Documentation is generated via Sphinx from docstrings. Ensure docstrings use supported Sphinx markup. - - Inline comments should be used when code may be non-obvious. - - - Type Hints - - All public functions and classes MUST have complete type hints (enforced by mypy with `disallow_untyped_defs`) - - NEVER use `Any` unless absolutely necessary and justified - - NEVER use `# type: ignore` or cast to `Any` unless absolutely necessary and justified - - Use `TypeVar`, `Generic`, `TypedDict`, `Literal`, `Union` appropriately - - - Architecture - - Public API goes in `pyisolate/` root modules (`__init__.py`, `host.py`, `config.py`, `shared.py`) - - Internal implementation goes in `pyisolate/_internal/` - - Maintain zero runtime dependencies - this is a pure Python library - - All RPC-callable methods in ProxiedSingleton subclasses must be `async` - - - Error Handling - - Never use empty `except:` or `except Exception:` without re-raising or logging - - Propagate errors with meaningful context across RPC boundaries - - Use specific exception types when possible - - - Fail Loudly - - When this library is used incorrectly, it MUST fail loudly with clear error messages or exceptions rather than trying to silently guess what the caller intended. - - - Backwards Compatibility - - Changes to the public API MUST be backwards compatible unless the major version is incremented (or the major version is zero and the minor version is incremented). - - If the major version is incremented (or the minor version is incremented when major is zero), backward compatibility of the public API is NOT required, but changes MUST be documented. - - In all cases, changes to the private/internal code does NOT require backward compatibility. Simplicity should be preferred. - - Changes to the RPC protocol do NOT need to be backwards compatible. We can always assume that both the host process and client processes use the exact same version of pyisolate. - - - path: "tests/**/*.py" - instructions: | - ## Test Guidelines - - - Use `pytest` fixtures from `conftest.py` - - Integration tests that create real venvs are slow - mark appropriately - - Mock sparingly - prefer real integration tests where feasible - - Test edge cases and error conditions, not just happy paths - - Assert statements are allowed in tests (`S101` is ignored) - - Print statements are allowed in tests for debugging (`T201` is ignored) - - - path: "example/**/*.py" - instructions: | - ## Example Code Guidelines - - - Examples should be clear and educational - - Comments explaining "why" are encouraged here - - Must actually work - these are tested in CI - - Demonstrate real use cases, not contrived examples - - - path: "benchmarks/**/*.py" - instructions: | - ## Benchmark Guidelines - - - Benchmarks must be reproducible - - Include warm-up iterations to avoid JIT/cache effects - - Report statistical measures (mean, std dev, min, max) - - Document what is being measured and why - - GPU benchmarks should handle CUDA OOM gracefully diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd0462f..99f3a47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,10 +57,6 @@ jobs: fail-fast: false matrix: include: - - container: debian:11 - python-install: | - apt-get update && apt-get install -y python3 python3-pip python3-venv git curl bubblewrap - extras: "dev,test" - container: debian:12 python-install: | apt-get update && apt-get install -y python3 python3-pip python3-venv git curl bubblewrap @@ -73,10 +69,6 @@ jobs: python-install: | dnf install -y python3 python3-pip git curl bubblewrap extras: "dev,test" - - container: rockylinux:9 - python-install: | - dnf install -y python3 python3-pip git bubblewrap - extras: "dev,test" container: ${{ matrix.container }} @@ -92,7 +84,7 @@ jobs: - name: Install package run: | - $HOME/.local/bin/uv venv + $HOME/.local/bin/uv venv --python python3 . .venv/bin/activate $HOME/.local/bin/uv pip install -e ".[${{ matrix.extras }}]" diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e2ed884..0d99dfa 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -48,7 +48,10 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest needs: build - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: >- + github.repository == 'Comfy-Org/pyisolate' && + github.event_name == 'push' && + github.ref == 'refs/heads/main' steps: - name: Deploy to GitHub Pages id: deployment diff --git a/.github/workflows/pytorch.yml b/.github/workflows/pytorch.yml index 4105a76..124da3b 100644 --- a/.github/workflows/pytorch.yml +++ b/.github/workflows/pytorch.yml @@ -41,7 +41,7 @@ jobs: - name: Run tests run: | source .venv/bin/activate - pytest tests/test_integration.py -v -k "torch" + pytest tests/integration_v2/test_tensors.py tests/test_torch_optional_contract.py tests/test_torch_utils_additional.py -v - name: Test example with PyTorch run: | @@ -100,7 +100,7 @@ jobs: - name: Run tests run: | source .venv/bin/activate - pytest tests/test_integration.py -v -k "torch" + pytest tests/integration_v2/test_tensors.py tests/test_torch_optional_contract.py tests/test_torch_utils_additional.py -v - name: Test example with PyTorch run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 2afc35d..9c7c974 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -50,7 +50,7 @@ jobs: strategy: fail-fast: false matrix: - pytorch-version: ['2.1.0', '2.3.0'] + pytorch-version: ['2.1.0'] steps: - uses: actions/checkout@v4 @@ -78,8 +78,4 @@ jobs: - name: Run PyTorch tests run: | .venv\Scripts\activate - python tests/test_integration.py -v - python tests/test_edge_cases.py -v - python tests/test_normalization_integration.py -v - python tests/test_security.py -v - python tests/test_torch_tensor_integration.py -v + pytest tests/integration_v2/test_tensors.py tests/test_torch_optional_contract.py tests/test_torch_utils_additional.py -v diff --git a/docs/conf.py b/docs/conf.py index 41cf4c8..d507614 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,8 +15,8 @@ copyright = "2026, Jacob Segal" author = "Jacob Segal" -version = "0.9.0" -release = "0.9.0" +version = "0.9.1" +release = "0.9.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/example/host.py b/example/host.py index aa24dda..22ffee0 100644 --- a/example/host.py +++ b/example/host.py @@ -1,5 +1,6 @@ import argparse import asyncio +import inspect import logging import os import sys @@ -9,6 +10,7 @@ from shared import DatabaseSingleton, ExampleExtensionBase import pyisolate +from pyisolate._internal.sandbox_detect import detect_sandbox_capability # ANSI color codes for terminal output (using 256-color mode for better compatibility) @@ -47,6 +49,16 @@ async def async_main(): config = pyisolate.ExtensionManagerConfig(venv_root_path=os.path.join(base_path, "extension-venvs")) manager = pyisolate.ExtensionManager(ExampleExtensionBase, config) + sandbox_mode = pyisolate.SandboxMode.REQUIRED + if sys.platform == "linux": + cap = detect_sandbox_capability() + if not cap.available: + sandbox_mode = pyisolate.SandboxMode.DISABLED + logger.warning( + "Sandbox unavailable in example environment (%s); using sandbox_mode=disabled", + cap.restriction_model, + ) + extensions: list[ExampleExtensionBase] = [] extension_dir = os.path.join(base_path, "extensions") for extension in os.listdir(extension_dir): @@ -85,6 +97,7 @@ class CustomConfig(TypedDict): dependencies=manifest["dependencies"] + pyisolate_install, apis=[DatabaseSingleton], share_torch=manifest["share_torch"], + sandbox_mode=sandbox_mode, ) extension = manager.load_extension(config) @@ -118,12 +131,7 @@ class CustomConfig(TypedDict): # Test Extension 2 ext2_result = await db.get_value("extension2_result") - if ( - ext2_result - and ext2_result.get("extension") == "extension2" - and ext2_result.get("array_sum") == 17.5 - and ext2_result.get("numpy_version").startswith("2.") - ): + if ext2_result and ext2_result.get("extension") == "extension2" and ext2_result.get("array_sum") == 17.5: test_results.append(("Extension2", "PASSED", "Array processing with numpy 2.x")) logger.debug(f"Extension2 result: {ext2_result}") else: @@ -169,7 +177,9 @@ class CustomConfig(TypedDict): # Shutdown extensions logger.debug("Shutting down extensions...") for extension in extensions: - await extension.stop() + stop_result = extension.stop() + if inspect.isawaitable(stop_result): + await stop_result # Exit with appropriate code if failed_tests > 0: diff --git a/pyisolate/_internal/environment.py b/pyisolate/_internal/environment.py index 13d4e71..5db720d 100644 --- a/pyisolate/_internal/environment.py +++ b/pyisolate/_internal/environment.py @@ -173,9 +173,20 @@ def exclude_satisfied_requirements( """ from packaging.requirements import Requirement - result = subprocess.run( # noqa: S603 # Trusted: system pip executable - [str(python_exe), "-m", "pip", "list", "--format", "json"], capture_output=True, text=True, check=True - ) + try: + result = subprocess.run( # noqa: S603 # Trusted: system pip executable + [str(python_exe), "-m", "pip", "list", "--format", "json"], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as exc: + # Newer uv versions can create venvs without pip unless seeded. + # If pip is unavailable, skip filtering and install requested deps. + if "No module named pip" in (exc.stderr or ""): + logger.debug("pip unavailable in %s; skipping satisfied-requirement filter", python_exe) + return requirements + raise installed = {pkg["name"].lower(): pkg["version"] for pkg in json.loads(result.stdout)} torch_ecosystem = get_torch_ecosystem_packages() @@ -227,6 +238,7 @@ def create_venv(venv_path: Path, config: ExtensionConfig) -> None: uv_path, "venv", str(venv_path), + "--seed", "--python", sys.executable, ] @@ -337,7 +349,34 @@ def install_dependencies(venv_path: Path, config: ExtensionConfig, name: str) -> except Exception as exc: logger.debug("Dependency cache read failed: %s", exc) - cmd = cmd_prefix + safe_deps + common_args + install_targets: list[str] = [] + i = 0 + while i < len(safe_deps): + dep = safe_deps[i] + dep_stripped = dep.strip() + + # Support split editable args from existing callers: + # ["-e", "/path/to/pkg"]. + if dep_stripped == "-e": + if i + 1 >= len(safe_deps): + raise ValueError("Editable dependency '-e' must include a path or URL") + editable_target = safe_deps[i + 1].strip() + if not editable_target: + raise ValueError("Editable dependency '-e' must include a path or URL") + install_targets.extend(["-e", editable_target]) + i += 2 + continue + + if dep_stripped.startswith("-e "): + editable_target = dep_stripped[3:].strip() + if not editable_target: + raise ValueError("Editable dependency must include a path or URL after '-e'") + install_targets.extend(["-e", editable_target]) + else: + install_targets.append(dep) + i += 1 + + cmd = cmd_prefix + install_targets + common_args with subprocess.Popen( # noqa: S603 # Trusted: validated pip/uv install cmd cmd, diff --git a/pyisolate/_internal/sandbox.py b/pyisolate/_internal/sandbox.py index 7afb311..8634a19 100644 --- a/pyisolate/_internal/sandbox.py +++ b/pyisolate/_internal/sandbox.py @@ -29,6 +29,7 @@ "/lib32", # 32-bit libraries (if exists) "/bin", # Essential binaries "/sbin", # System binaries + "/opt", # Hosted toolcache interpreters (e.g., GitHub Actions setup-python) "/etc/alternatives", # Symlink management "/etc/ld.so.cache", # Dynamic linker cache "/etc/ld.so.conf", # Dynamic linker config diff --git a/pyisolate/_internal/sandbox_detect.py b/pyisolate/_internal/sandbox_detect.py index 4cc511b..e7a6595 100644 --- a/pyisolate/_internal/sandbox_detect.py +++ b/pyisolate/_internal/sandbox_detect.py @@ -174,6 +174,47 @@ def _test_bwrap(bwrap_path: str) -> tuple[bool, str]: return False, str(exc) +def _test_bwrap_degraded(bwrap_path: str) -> tuple[bool, str]: + """Test if bwrap works without user namespace isolation. + + This allows degraded sandbox mode on systems that block unprivileged + user namespaces (for example Ubuntu AppArmor defaults). + """ + try: + # S603: bwrap_path comes from shutil.which(), not user input + result = subprocess.run( # noqa: S603 + [ + bwrap_path, + "--dev", + "/dev", + "--proc", + "/proc", + "--ro-bind", + "/usr", + "/usr", + "--ro-bind", + "/bin", + "/bin", + "--ro-bind", + "/lib", + "/lib", + "--ro-bind", + "/lib64", + "/lib64", + "/usr/bin/true", + ], + capture_output=True, + timeout=10, + ) + if result.returncode == 0: + return True, "" + return False, result.stderr.decode("utf-8", errors="replace") + except subprocess.TimeoutExpired: + return False, "bwrap degraded test timed out" + except Exception as exc: + return False, str(exc) + + def _classify_error(error: str) -> RestrictionModel: """Classify a bwrap error message to determine restriction model.""" error_lower = error.lower() @@ -253,6 +294,26 @@ def detect_sandbox_capability() -> SandboxCapability: model = _classify_error(error) remediation = _REMEDIATION_MESSAGES[model] + # Try degraded mode on platforms that can still use mount-namespace sandboxing + # even when user namespace creation is blocked. + if model in { + RestrictionModel.UBUNTU_APPARMOR, + RestrictionModel.SELINUX, + RestrictionModel.ARCH_HARDENED, + RestrictionModel.UNKNOWN, + }: + degraded_success, degraded_error = _test_bwrap_degraded(bwrap_path) + if degraded_success: + return SandboxCapability( + available=True, + bwrap_path=bwrap_path, + restriction_model=model, + remediation=remediation, + raw_error=error, + ) + if degraded_error: + error = f"{error} | degraded: {degraded_error}" + if model == RestrictionModel.UNKNOWN: remediation = remediation.format(error=error[:200]) diff --git a/tests/harness/host.py b/tests/harness/host.py index 97e2974..f2fde38 100644 --- a/tests/harness/host.py +++ b/tests/harness/host.py @@ -12,7 +12,8 @@ import tests.harness.test_package as test_package_module from pyisolate._internal.adapter_registry import AdapterRegistry from pyisolate._internal.rpc_protocol import AsyncRPC, ProxiedSingleton -from pyisolate.config import ExtensionConfig +from pyisolate._internal.sandbox_detect import detect_sandbox_capability +from pyisolate.config import ExtensionConfig, SandboxMode from pyisolate.host import Extension from pyisolate.interfaces import SerializerRegistryProtocol from tests.harness.test_package import ReferenceTestExtension @@ -54,7 +55,7 @@ def register_serializers(self, registry: SerializerRegistryProtocol) -> None: from pyisolate._internal.tensor_serializer import deserialize_tensor, serialize_tensor registry.register("torch.Tensor", serialize_tensor, deserialize_tensor) - except ImportError: + except Exception: pass def provide_rpc_services(self) -> list[type[ProxiedSingleton]]: @@ -94,6 +95,9 @@ def __init__(self, use_temp_dir: bool = True): self.extensions: list[Extension[TestExtensionProtocol]] = [] self._adapter_registered = False + self.sandbox_available = True + if sys.platform == "linux": + self.sandbox_available = detect_sandbox_capability().available def setup(self): """Initialize the host environment.""" @@ -163,6 +167,7 @@ def load_test_extension( share_torch=share_torch, share_cuda_ipc=share_cuda, sandbox=sandbox_cfg, + sandbox_mode=SandboxMode.REQUIRED if self.sandbox_available else SandboxMode.DISABLED, ) ext = Extension( diff --git a/tests/integration_v2/test_isolation.py b/tests/integration_v2/test_isolation.py index 3474df9..257f2cb 100644 --- a/tests/integration_v2/test_isolation.py +++ b/tests/integration_v2/test_isolation.py @@ -1,8 +1,20 @@ import os +import sys import tempfile import pytest +from pyisolate._internal.sandbox_detect import detect_sandbox_capability + +_SANDBOX_AVAILABLE = False +if sys.platform == "linux": + _SANDBOX_AVAILABLE = detect_sandbox_capability().available + +pytestmark = pytest.mark.skipif( + not _SANDBOX_AVAILABLE, + reason="filesystem barrier checks require a working Linux bubblewrap sandbox", +) + @pytest.mark.asyncio async def test_filesystem_barrier(reference_host): diff --git a/tests/test_bwrap_command.py b/tests/test_bwrap_command.py index b20a732..d5869f7 100644 --- a/tests/test_bwrap_command.py +++ b/tests/test_bwrap_command.py @@ -15,8 +15,15 @@ from typing import Any from unittest.mock import MagicMock, patch +import pytest + from pyisolate._internal.sandbox_detect import RestrictionModel +pytestmark = pytest.mark.skipif( + sys.platform != "linux", + reason="bubblewrap command composition is Linux-specific", +) + def _mockbuild_bwrap_command(**kwargs: Any) -> list[str]: """Call build_bwrap_command with proper mocking.""" diff --git a/tests/test_memory_leaks.py b/tests/test_memory_leaks.py index 5303c1a..daa9548 100644 --- a/tests/test_memory_leaks.py +++ b/tests/test_memory_leaks.py @@ -94,11 +94,13 @@ def fast_tensor_keeper(self, monkeypatch): TensorKeeper, "__init__", lambda self, retention_seconds=2.0: ( - setattr(self, "retention_seconds", 2.0), - setattr(self, "_keeper", __import__("collections").deque()), - setattr(self, "_lock", __import__("threading").Lock()), - )[-1] - or None, + ( + setattr(self, "retention_seconds", 2.0), + setattr(self, "_keeper", __import__("collections").deque()), + setattr(self, "_lock", __import__("threading").Lock()), + )[-1] + or None + ), ) def test_tensor_keeper_keeps_reference(self): diff --git a/tests/test_sandbox_detect.py b/tests/test_sandbox_detect.py index 2cba821..6d74b9f 100644 --- a/tests/test_sandbox_detect.py +++ b/tests/test_sandbox_detect.py @@ -25,6 +25,7 @@ _classify_error, _read_sysctl, _test_bwrap, + _test_bwrap_degraded, detect_sandbox_capability, ) @@ -186,6 +187,15 @@ def test_bwrap_test_exception(self) -> None: assert success is False assert "Unexpected error" in error + def test_bwrap_degraded_test_success(self) -> None: + """Test successful degraded bwrap invocation.""" + mock_result = MagicMock() + mock_result.returncode = 0 + with patch("subprocess.run", return_value=mock_result): + success, error = _test_bwrap_degraded("/usr/bin/bwrap") + assert success is True + assert error == "" + class TestErrorClassification: """Test error message classification.""" @@ -326,7 +336,7 @@ def test_full_success(self, monkeypatch: pytest.MonkeyPatch) -> None: assert cap.remediation == "" def test_ubuntu_apparmor_failure(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Test Ubuntu AppArmor detection and remediation.""" + """Test Ubuntu AppArmor detection with degraded-mode fallback.""" monkeypatch.setattr(sys, "platform", "linux") with ( patch("shutil.which", return_value="/usr/bin/bwrap"), @@ -342,13 +352,44 @@ def test_ubuntu_apparmor_failure(self, monkeypatch: pytest.MonkeyPatch) -> None: "pyisolate._internal.sandbox_detect._check_ubuntu_apparmor_restriction", return_value=True, ), + patch( + "pyisolate._internal.sandbox_detect._test_bwrap_degraded", + return_value=(True, ""), + ), ): cap = detect_sandbox_capability() - assert cap.available is False + assert cap.available is True assert cap.restriction_model == RestrictionModel.UBUNTU_APPARMOR assert "apparmor" in cap.remediation.lower() assert cap.raw_error == "Permission denied: uid map" + def test_ubuntu_apparmor_failure_when_degraded_fails(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Test Ubuntu AppArmor detection when degraded fallback also fails.""" + monkeypatch.setattr(sys, "platform", "linux") + with ( + patch("shutil.which", return_value="/usr/bin/bwrap"), + patch( + "pyisolate._internal.sandbox_detect._check_rhel_restriction", + return_value=False, + ), + patch( + "pyisolate._internal.sandbox_detect._test_bwrap", + return_value=(False, "Permission denied: uid map"), + ), + patch( + "pyisolate._internal.sandbox_detect._check_ubuntu_apparmor_restriction", + return_value=True, + ), + patch( + "pyisolate._internal.sandbox_detect._test_bwrap_degraded", + return_value=(False, "still blocked"), + ), + ): + cap = detect_sandbox_capability() + assert cap.available is False + assert cap.restriction_model == RestrictionModel.UBUNTU_APPARMOR + assert "degraded: still blocked" in (cap.raw_error or "") + def test_unknown_error_includes_message(self, monkeypatch: pytest.MonkeyPatch) -> None: """Test that unknown errors include the raw error in remediation.""" monkeypatch.setattr(sys, "platform", "linux") @@ -366,6 +407,10 @@ def test_unknown_error_includes_message(self, monkeypatch: pytest.MonkeyPatch) - "pyisolate._internal.sandbox_detect._classify_error", return_value=RestrictionModel.UNKNOWN, ), + patch( + "pyisolate._internal.sandbox_detect._test_bwrap_degraded", + return_value=(False, "still blocked"), + ), ): cap = detect_sandbox_capability() assert cap.available is False From e1876700262155c33e6d97a55e45a349cb120214 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Wed, 25 Feb 2026 04:48:45 -0600 Subject: [PATCH 3/4] chore: bump 0.9.1 and harden packaging metadata --- MANIFEST.in | 1 + pyproject.toml | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index f66bba2..21a5228 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,6 @@ include README.md include pyproject.toml recursive-include pyisolate *.py recursive-include tests *.py +prune tests/.test_temps recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/pyproject.toml b/pyproject.toml index 35f1e8d..0ebbc4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta" [project] name = "pyisolate" -version = "0.9.0" +version = "0.9.1" description = "A Python library for dividing execution across multiple virtual environments" readme = "README.md" requires-python = ">=3.10" -license = {text = "MIT"} +license = "MIT" +license-files = ["LICENSE"] authors = [ {name = "Jacob Segal", email = "jacob.e.segal@gmail.com"}, ] @@ -18,7 +19,6 @@ maintainers = [ classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -32,6 +32,8 @@ dependencies = [ [project.optional-dependencies] dev = [ + "build>=1.2.2", + "twine>=5.1.1", "pytest>=7.0", "pytest-cov>=4.0", "pytest-asyncio>=0.21.0", @@ -63,6 +65,7 @@ bench = [ docs = [ "sphinx>=5.0", "sphinx-rtd-theme>=1.0", + "myst-parser>=2.0", "sphinx-markdown-builder>=0.5.4", # Optional: for markdown output if needed ] From 495c7f07afd71ee0851902d012fb6ad0e7d7fdd6 Mon Sep 17 00:00:00 2001 From: John Pollock Date: Wed, 25 Feb 2026 05:28:05 -0600 Subject: [PATCH 4/4] fix: require setuptools>=77.0.0 for PEP 639 license metadata --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0ebbc4b..0db466e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=61.0", "wheel"] +requires = ["setuptools>=77.0.0", "wheel"] build-backend = "setuptools.build_meta" [project]