From 18802d69875af15a624f5fac43d909ba0be56574 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Mon, 9 Mar 2026 23:19:30 -0700 Subject: [PATCH 1/2] feat(detection): route Apple Silicon YOLO to Neural Engine (NPU) On Apple Silicon, force CoreML compute_units to CPU_AND_NE so object detection runs on the 16-core Neural Engine instead of GPU, leaving GPU free for LLM/VLM inference. Changes: - env_config.py: add compute_units field to BackendSpec, MPS defaults to cpu_and_ne, monkey-patch coremltools.MLModel during YOLO load - detect.py: report compute_units in ready event - SKILL.md: add compute_units parameter, update acceleration table - test_env_config_ane.py: 15 unit tests for NPU logic --- skills/detection/yolo-detection-2026/SKILL.md | 25 +- .../yolo-detection-2026/scripts/detect.py | 7 +- .../yolo-detection-2026/scripts/env_config.py | 78 ++++- skills/lib/env_config.py | 78 ++++- skills/lib/test_env_config_ane.py | 276 ++++++++++++++++++ 5 files changed, 447 insertions(+), 17 deletions(-) create mode 100644 skills/lib/test_env_config_ane.py diff --git a/skills/detection/yolo-detection-2026/SKILL.md b/skills/detection/yolo-detection-2026/SKILL.md index 278d924..939099a 100644 --- a/skills/detection/yolo-detection-2026/SKILL.md +++ b/skills/detection/yolo-detection-2026/SKILL.md @@ -66,6 +66,15 @@ parameters: description: "Auto-convert model to optimized format for faster inference" group: Performance + - name: compute_units + label: "Apple Compute Units" + type: select + options: ["auto", "cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"] + default: "auto" + description: "CoreML compute target — 'auto' routes to Neural Engine (NPU), leaving GPU free for LLM/VLM" + group: Performance + platform: macos + capabilities: live_detection: script: scripts/detect.py @@ -89,13 +98,15 @@ Real-time object detection using the latest YOLO 2026 models. Detects 80+ COCO o The skill uses [`env_config.py`](../../lib/env_config.py) to **automatically detect hardware** and convert the model to the fastest format for your platform. Conversion happens once during deployment and is cached. -| Platform | Backend | Optimized Format | Expected Speedup | -|----------|---------|------------------|:----------------:| -| NVIDIA GPU | CUDA | TensorRT `.engine` | ~3-5x | -| Apple Silicon (M1+) | MPS | CoreML `.mlpackage` | ~2x | -| Intel CPU/GPU/NPU | OpenVINO | OpenVINO IR `.xml` | ~2-3x | -| AMD GPU | ROCm | ONNX Runtime | ~1.5-2x | -| CPU (any) | CPU | ONNX Runtime | ~1.5x | +| Platform | Backend | Optimized Format | Compute Units | Expected Speedup | +|----------|---------|------------------|:-------------:|:----------------:| +| NVIDIA GPU | CUDA | TensorRT `.engine` | GPU | ~3-5x | +| Apple Silicon (M1+) | MPS | CoreML `.mlpackage` | **Neural Engine** (NPU) | ~2x | +| Intel CPU/GPU/NPU | OpenVINO | OpenVINO IR `.xml` | CPU/GPU/NPU | ~2-3x | +| AMD GPU | ROCm | ONNX Runtime | GPU | ~1.5-2x | +| CPU (any) | CPU | ONNX Runtime | CPU | ~1.5x | + +> **Apple Silicon Note**: Detection defaults to `cpu_and_ne` (CPU + Neural Engine), keeping the GPU free for LLM/VLM inference. Set `compute_units: all` to include GPU if not running local LLM. ### How It Works diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py index d149374..40bea8b 100644 --- a/skills/detection/yolo-detection-2026/scripts/detect.py +++ b/skills/detection/yolo-detection-2026/scripts/detect.py @@ -248,7 +248,7 @@ def main(): perf.model_load_ms = env.load_ms perf.export_ms = env.export_ms - emit({ + ready_event = { "event": "ready", "model": f"yolo2026{model_size[0]}", "model_size": model_size, @@ -260,7 +260,10 @@ def main(): "fps": fps, "model_load_ms": round(env.load_ms, 1), "available_sizes": list(MODEL_SIZE_MAP.keys()), - }) + } + if hasattr(env, 'compute_units') and env.backend == "mps": + ready_event["compute_units"] = env.compute_units + emit(ready_event) except Exception as e: emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False}) sys.exit(1) diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index ff42e6f..7c46c05 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -40,6 +40,7 @@ class BackendSpec: model_suffix: str # file extension/dir to look for cached model half: bool = True # use FP16 extra_export_args: dict = field(default_factory=dict) + compute_units: Optional[str] = None # CoreML compute units: "cpu_and_ne", "all", etc. BACKEND_SPECS = { @@ -61,6 +62,7 @@ class BackendSpec: model_suffix=".mlpackage", half=True, extra_export_args={"nms": False}, + compute_units="cpu_and_ne", # Route to Neural Engine, leave GPU free for LLM/VLM ), "intel": BackendSpec( name="intel", @@ -86,6 +88,7 @@ class HardwareEnv: backend: str = "cpu" # "cuda" | "rocm" | "mps" | "intel" | "cpu" device: str = "cpu" # torch device string export_format: str = "onnx" # optimal export format + compute_units: str = "all" # CoreML compute units (Apple only) gpu_name: str = "" # human-readable GPU name gpu_memory_mb: int = 0 # GPU memory in MB driver_version: str = "" # GPU driver version @@ -113,9 +116,11 @@ def detect() -> "HardwareEnv": else: env._fallback_cpu() - # Set export format from backend spec + # Set export format and compute units from backend spec spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"]) env.export_format = spec.export_format + if spec.compute_units: + env.compute_units = spec.compute_units # Check if optimized runtime is available env.framework_ok = env._check_framework() @@ -439,6 +444,58 @@ def export_model(self, model, model_name: str) -> Optional[Path]: return None + def _load_coreml_with_compute_units(self, model_path: str): + """ + Load a CoreML model via YOLO with specific compute_units. + + Monkey-patches coremltools.MLModel to inject compute_units + (e.g. CPU_AND_NE for Neural Engine) since ultralytics doesn't + expose this parameter. Patch is scoped and immediately restored. + """ + from ultralytics import YOLO + + # Map string config → coremltools enum + _COMPUTE_UNIT_MAP = { + "all": "ALL", + "cpu_only": "CPU_ONLY", + "cpu_and_gpu": "CPU_AND_GPU", + "cpu_and_ne": "CPU_AND_NE", + } + + ct_enum_name = _COMPUTE_UNIT_MAP.get(self.compute_units) + if not ct_enum_name: + _log(f"Unknown compute_units '{self.compute_units}', using default") + return YOLO(model_path) + + try: + import coremltools as ct + target_units = getattr(ct.ComputeUnit, ct_enum_name, None) + if target_units is None: + _log(f"coremltools.ComputeUnit.{ct_enum_name} not available") + return YOLO(model_path) + + # Temporarily patch MLModel to inject compute_units + _OrigMLModel = ct.models.MLModel + + class _PatchedMLModel(_OrigMLModel): + def __init__(self, *args, **kwargs): + kwargs.setdefault('compute_units', target_units) + super().__init__(*args, **kwargs) + + ct.models.MLModel = _PatchedMLModel + try: + model = YOLO(model_path) + finally: + ct.models.MLModel = _OrigMLModel # Always restore + + _log(f"CoreML model loaded with compute_units={ct_enum_name} " + f"(Neural Engine preferred)") + return model + + except ImportError: + _log("coremltools not available, loading without compute_units") + return YOLO(model_path) + def load_optimized(self, model_name: str, use_optimized: bool = True): """ Load the best available model for this hardware. @@ -455,7 +512,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): optimized_path = self.get_optimized_path(model_name) if optimized_path.exists(): try: - model = YOLO(str(optimized_path)) + # On Apple Silicon: route CoreML to Neural Engine + if self.backend == "mps" and self.compute_units != "all": + model = self._load_coreml_with_compute_units( + str(optimized_path)) + else: + model = YOLO(str(optimized_path)) self.load_ms = (time.perf_counter() - t0) * 1000 _log(f"Loaded {self.export_format} model ({self.load_ms:.0f}ms)") return model, self.export_format @@ -467,7 +529,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): exported = self.export_model(pt_model, model_name) if exported: try: - model = YOLO(str(exported)) + # On Apple Silicon: route CoreML to Neural Engine + if self.backend == "mps" and self.compute_units != "all": + model = self._load_coreml_with_compute_units( + str(exported)) + else: + model = YOLO(str(exported)) self.load_ms = (time.perf_counter() - t0) * 1000 _log(f"Loaded freshly exported {self.export_format} model ({self.load_ms:.0f}ms)") return model, self.export_format @@ -508,7 +575,7 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): def to_dict(self) -> dict: """Serialize environment info for JSON output.""" - return { + d = { "backend": self.backend, "device": self.device, "export_format": self.export_format, @@ -519,6 +586,9 @@ def to_dict(self) -> dict: "export_ms": round(self.export_ms, 1), "load_ms": round(self.load_ms, 1), } + if self.backend == "mps": + d["compute_units"] = self.compute_units + return d # ─── CLI: run standalone for diagnostics ───────────────────────────────────── diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index ff42e6f..7c46c05 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -40,6 +40,7 @@ class BackendSpec: model_suffix: str # file extension/dir to look for cached model half: bool = True # use FP16 extra_export_args: dict = field(default_factory=dict) + compute_units: Optional[str] = None # CoreML compute units: "cpu_and_ne", "all", etc. BACKEND_SPECS = { @@ -61,6 +62,7 @@ class BackendSpec: model_suffix=".mlpackage", half=True, extra_export_args={"nms": False}, + compute_units="cpu_and_ne", # Route to Neural Engine, leave GPU free for LLM/VLM ), "intel": BackendSpec( name="intel", @@ -86,6 +88,7 @@ class HardwareEnv: backend: str = "cpu" # "cuda" | "rocm" | "mps" | "intel" | "cpu" device: str = "cpu" # torch device string export_format: str = "onnx" # optimal export format + compute_units: str = "all" # CoreML compute units (Apple only) gpu_name: str = "" # human-readable GPU name gpu_memory_mb: int = 0 # GPU memory in MB driver_version: str = "" # GPU driver version @@ -113,9 +116,11 @@ def detect() -> "HardwareEnv": else: env._fallback_cpu() - # Set export format from backend spec + # Set export format and compute units from backend spec spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"]) env.export_format = spec.export_format + if spec.compute_units: + env.compute_units = spec.compute_units # Check if optimized runtime is available env.framework_ok = env._check_framework() @@ -439,6 +444,58 @@ def export_model(self, model, model_name: str) -> Optional[Path]: return None + def _load_coreml_with_compute_units(self, model_path: str): + """ + Load a CoreML model via YOLO with specific compute_units. + + Monkey-patches coremltools.MLModel to inject compute_units + (e.g. CPU_AND_NE for Neural Engine) since ultralytics doesn't + expose this parameter. Patch is scoped and immediately restored. + """ + from ultralytics import YOLO + + # Map string config → coremltools enum + _COMPUTE_UNIT_MAP = { + "all": "ALL", + "cpu_only": "CPU_ONLY", + "cpu_and_gpu": "CPU_AND_GPU", + "cpu_and_ne": "CPU_AND_NE", + } + + ct_enum_name = _COMPUTE_UNIT_MAP.get(self.compute_units) + if not ct_enum_name: + _log(f"Unknown compute_units '{self.compute_units}', using default") + return YOLO(model_path) + + try: + import coremltools as ct + target_units = getattr(ct.ComputeUnit, ct_enum_name, None) + if target_units is None: + _log(f"coremltools.ComputeUnit.{ct_enum_name} not available") + return YOLO(model_path) + + # Temporarily patch MLModel to inject compute_units + _OrigMLModel = ct.models.MLModel + + class _PatchedMLModel(_OrigMLModel): + def __init__(self, *args, **kwargs): + kwargs.setdefault('compute_units', target_units) + super().__init__(*args, **kwargs) + + ct.models.MLModel = _PatchedMLModel + try: + model = YOLO(model_path) + finally: + ct.models.MLModel = _OrigMLModel # Always restore + + _log(f"CoreML model loaded with compute_units={ct_enum_name} " + f"(Neural Engine preferred)") + return model + + except ImportError: + _log("coremltools not available, loading without compute_units") + return YOLO(model_path) + def load_optimized(self, model_name: str, use_optimized: bool = True): """ Load the best available model for this hardware. @@ -455,7 +512,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): optimized_path = self.get_optimized_path(model_name) if optimized_path.exists(): try: - model = YOLO(str(optimized_path)) + # On Apple Silicon: route CoreML to Neural Engine + if self.backend == "mps" and self.compute_units != "all": + model = self._load_coreml_with_compute_units( + str(optimized_path)) + else: + model = YOLO(str(optimized_path)) self.load_ms = (time.perf_counter() - t0) * 1000 _log(f"Loaded {self.export_format} model ({self.load_ms:.0f}ms)") return model, self.export_format @@ -467,7 +529,12 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): exported = self.export_model(pt_model, model_name) if exported: try: - model = YOLO(str(exported)) + # On Apple Silicon: route CoreML to Neural Engine + if self.backend == "mps" and self.compute_units != "all": + model = self._load_coreml_with_compute_units( + str(exported)) + else: + model = YOLO(str(exported)) self.load_ms = (time.perf_counter() - t0) * 1000 _log(f"Loaded freshly exported {self.export_format} model ({self.load_ms:.0f}ms)") return model, self.export_format @@ -508,7 +575,7 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): def to_dict(self) -> dict: """Serialize environment info for JSON output.""" - return { + d = { "backend": self.backend, "device": self.device, "export_format": self.export_format, @@ -519,6 +586,9 @@ def to_dict(self) -> dict: "export_ms": round(self.export_ms, 1), "load_ms": round(self.load_ms, 1), } + if self.backend == "mps": + d["compute_units"] = self.compute_units + return d # ─── CLI: run standalone for diagnostics ───────────────────────────────────── diff --git a/skills/lib/test_env_config_ane.py b/skills/lib/test_env_config_ane.py new file mode 100644 index 0000000..dc032eb --- /dev/null +++ b/skills/lib/test_env_config_ane.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +""" +Unit tests for Apple Neural Engine (ANE) compute_units in env_config.py. + +Tests compute_units configuration, monkey-patch scoping, and CoreML +load-time injection — all mocked, no Apple hardware required. + +Run: python -m pytest skills/lib/test_env_config_ane.py -v +""" + +import platform +import subprocess +import sys +from pathlib import Path +from unittest import mock + +import pytest + +# Ensure env_config is importable from skills/lib/ +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from env_config import BackendSpec, BACKEND_SPECS, HardwareEnv, _log # noqa: E402 + + +# ── Tests: BackendSpec compute_units ──────────────────────────────────────── + +class TestBackendSpecComputeUnits: + """Verify compute_units field on backend specs.""" + + def test_mps_spec_has_cpu_and_ne(self): + """MPS backend defaults to cpu_and_ne (Neural Engine).""" + spec = BACKEND_SPECS["mps"] + assert spec.compute_units == "cpu_and_ne" + + def test_cuda_spec_has_no_compute_units(self): + """Non-Apple backends have no compute_units set.""" + assert BACKEND_SPECS["cuda"].compute_units is None + + def test_cpu_spec_has_no_compute_units(self): + assert BACKEND_SPECS["cpu"].compute_units is None + + def test_rocm_spec_has_no_compute_units(self): + assert BACKEND_SPECS["rocm"].compute_units is None + + def test_intel_spec_has_no_compute_units(self): + assert BACKEND_SPECS["intel"].compute_units is None + + +# ── Tests: HardwareEnv compute_units field ────────────────────────────────── + +class TestHardwareEnvComputeUnits: + """Verify compute_units is set correctly during detection.""" + + def test_default_compute_units_is_all(self): + """Default HardwareEnv has compute_units='all'.""" + env = HardwareEnv() + assert env.compute_units == "all" + + @mock.patch("env_config.platform.system", return_value="Darwin") + @mock.patch("env_config.platform.machine", return_value="arm64") + @mock.patch("env_config.subprocess.run") + @mock.patch("env_config.shutil.which", return_value=None) + @mock.patch("env_config.Path.is_dir", return_value=False) + def test_mps_sets_compute_units_cpu_and_ne( + self, _dir, _which, mock_run, _machine, _system + ): + """Apple Silicon detection sets compute_units to 'cpu_and_ne'.""" + mock_run.return_value = subprocess.CompletedProcess( + args=[], returncode=0, stdout="Apple M3 Max" + ) + + env = HardwareEnv() + result = env._try_mps() + assert result is True + + # Simulate what detect() does after _try_mps + spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"]) + if spec.compute_units: + env.compute_units = spec.compute_units + + assert env.backend == "mps" + assert env.compute_units == "cpu_and_ne" + + def test_to_dict_includes_compute_units_for_mps(self): + """to_dict() includes compute_units when backend is mps.""" + env = HardwareEnv() + env.backend = "mps" + env.compute_units = "cpu_and_ne" + d = env.to_dict() + assert "compute_units" in d + assert d["compute_units"] == "cpu_and_ne" + + def test_to_dict_excludes_compute_units_for_non_mps(self): + """to_dict() does NOT include compute_units for non-mps backends.""" + env = HardwareEnv() + env.backend = "cuda" + d = env.to_dict() + assert "compute_units" not in d + + +# ── Tests: _load_coreml_with_compute_units ────────────────────────────────── + +class TestLoadCoremlWithComputeUnits: + """Test the monkey-patch mechanism for CoreML compute_units.""" + + def test_monkey_patch_injects_compute_units(self): + """MLModel is temporarily patched to inject CPU_AND_NE.""" + env = HardwareEnv() + env.backend = "mps" + env.compute_units = "cpu_and_ne" + + # Create mock coremltools module + mock_ct = mock.MagicMock() + mock_ct.ComputeUnit.CPU_AND_NE = "CPU_AND_NE_SENTINEL" + mock_ct.ComputeUnit.ALL = "ALL_SENTINEL" + + # Track MLModel calls to verify compute_units was injected + original_mlmodel = mock.MagicMock() + mock_ct.models.MLModel = original_mlmodel + + captured_kwargs = {} + + mock_yolo_cls = mock.MagicMock() + + def capture_yolo_init(path): + """When YOLO loads the model, check if MLModel was patched.""" + # Simulate what YOLO does internally: call ct.models.MLModel + current_mlmodel = mock_ct.models.MLModel + # The patched class should be different from original + instance = current_mlmodel("test.mlpackage") + return mock.MagicMock() + + mock_yolo_cls.side_effect = capture_yolo_init + + with mock.patch.dict("sys.modules", {"coremltools": mock_ct}): + with mock.patch("env_config.YOLO", mock_yolo_cls, create=True): + # Can't easily test the full flow since YOLO import is inside + # the method. Instead, test the logic directly. + pass + + # Direct test: verify the patch class works correctly + class MockMLModel: + def __init__(self, *args, **kwargs): + self.kwargs = kwargs + + mock_ct.models.MLModel = MockMLModel + + with mock.patch.dict("sys.modules", {"coremltools": mock_ct}): + # Simulate the patching logic + _OrigMLModel = mock_ct.models.MLModel + target_units = mock_ct.ComputeUnit.CPU_AND_NE + + class _PatchedMLModel(_OrigMLModel): + def __init__(self, *args, **kwargs): + kwargs.setdefault('compute_units', target_units) + super().__init__(*args, **kwargs) + + # Verify patch injects compute_units + patched = _PatchedMLModel("test.mlpackage") + assert patched.kwargs.get('compute_units') == "CPU_AND_NE_SENTINEL" + + # Verify explicit override is preserved + explicit = _PatchedMLModel("test.mlpackage", compute_units="CUSTOM") + assert explicit.kwargs.get('compute_units') == "CUSTOM" + + def test_monkey_patch_restored_after_load(self): + """MLModel is restored to original after YOLO load, even on error.""" + env = HardwareEnv() + env.backend = "mps" + env.compute_units = "cpu_and_ne" + + mock_ct = mock.MagicMock() + mock_ct.ComputeUnit.CPU_AND_NE = "CPU_AND_NE_SENTINEL" + original_mlmodel = mock.MagicMock() + mock_ct.models.MLModel = original_mlmodel + + mock_yolo = mock.MagicMock(side_effect=Exception("test error")) + + with mock.patch.dict("sys.modules", { + "coremltools": mock_ct, + "ultralytics": mock.MagicMock(YOLO=mock_yolo), + }): + try: + env._load_coreml_with_compute_units("test.mlpackage") + except Exception: + pass + + # MLModel should be restored to original even after error + assert mock_ct.models.MLModel is original_mlmodel + + def test_unknown_compute_units_falls_back(self): + """Unknown compute_units string falls back to plain YOLO load.""" + env = HardwareEnv() + env.backend = "mps" + env.compute_units = "unknown_units" + + mock_yolo = mock.MagicMock() + mock_model = mock.MagicMock() + mock_yolo.return_value = mock_model + + with mock.patch.dict("sys.modules", { + "ultralytics": mock.MagicMock(YOLO=mock_yolo), + }): + result = env._load_coreml_with_compute_units("test.mlpackage") + mock_yolo.assert_called_once_with("test.mlpackage") + + def test_coremltools_missing_falls_back(self): + """If coremltools import fails, falls back to plain YOLO load.""" + env = HardwareEnv() + env.backend = "mps" + env.compute_units = "cpu_and_ne" + + mock_yolo = mock.MagicMock() + mock_model = mock.MagicMock() + mock_yolo.return_value = mock_model + + # Make coremltools import fail + with mock.patch.dict("sys.modules", { + "coremltools": None, + "ultralytics": mock.MagicMock(YOLO=mock_yolo), + }): + result = env._load_coreml_with_compute_units("test.mlpackage") + mock_yolo.assert_called_once_with("test.mlpackage") + + +# ── Tests: load_optimized integration ─────────────────────────────────────── + +class TestLoadOptimizedMPS: + """Test that load_optimized routes through compute_units on MPS.""" + + def test_mps_cached_model_uses_compute_units(self): + """When cached .mlpackage exists, loads via _load_coreml_with_compute_units.""" + env = HardwareEnv() + env.backend = "mps" + env.device = "mps" + env.export_format = "coreml" + env.framework_ok = True + env.compute_units = "cpu_and_ne" + + mock_model = mock.MagicMock() + + with mock.patch.object(env, "_load_coreml_with_compute_units", + return_value=mock_model) as mock_load: + with mock.patch.object(env, "get_optimized_path") as mock_path: + mock_path.return_value = mock.MagicMock(exists=lambda: True) + + with mock.patch.dict("sys.modules", { + "ultralytics": mock.MagicMock(), + }): + model, fmt = env.load_optimized("yolo26n") + + assert fmt == "coreml" + mock_load.assert_called_once() + + def test_mps_compute_units_all_skips_monkey_patch(self): + """When compute_units='all', loads via standard YOLO path.""" + env = HardwareEnv() + env.backend = "mps" + env.device = "mps" + env.export_format = "coreml" + env.framework_ok = True + env.compute_units = "all" # explicit: use all units including GPU + + mock_yolo = mock.MagicMock() + mock_model = mock.MagicMock() + mock_yolo.return_value = mock_model + + with mock.patch.object(env, "get_optimized_path") as mock_path: + mock_path.return_value = mock.MagicMock(exists=lambda: True) + + with mock.patch.dict("sys.modules", { + "ultralytics": mock.MagicMock(YOLO=mock_yolo), + }): + model, fmt = env.load_optimized("yolo26n") + + assert fmt == "coreml" + mock_yolo.assert_called_once() From 44dce05f72441ec68291f77ddbe9f8774770415e Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Mon, 9 Mar 2026 23:49:33 -0700 Subject: [PATCH 2/2] feat(skills): add deploy.bat Windows equivalents for all skills with deploy.sh - yolo-detection-2026: full GPU detection, venv, requirements, model optimization - homesafe-bench: npm install - smarthome-bench: yt-dlp/ffmpeg checks + npm install ROCm/MPS blocks omitted (Linux/macOS only). Uses py.exe launcher for Python discovery with fallback to python/python3 on PATH. --- skills/analysis/homesafe-bench/deploy.bat | 20 ++ skills/analysis/smarthome-bench/deploy.bat | 75 ++++++ .../detection/yolo-detection-2026/deploy.bat | 221 ++++++++++++++++++ 3 files changed, 316 insertions(+) create mode 100644 skills/analysis/homesafe-bench/deploy.bat create mode 100644 skills/analysis/smarthome-bench/deploy.bat create mode 100644 skills/detection/yolo-detection-2026/deploy.bat diff --git a/skills/analysis/homesafe-bench/deploy.bat b/skills/analysis/homesafe-bench/deploy.bat new file mode 100644 index 0000000..c616195 --- /dev/null +++ b/skills/analysis/homesafe-bench/deploy.bat @@ -0,0 +1,20 @@ +@echo off +REM HomeSafe-Bench deployment script (Windows) +REM Runs npm install to fetch openai SDK dependency + +cd /d "%~dp0" + +where npm >nul 2>&1 +if %errorlevel% neq 0 ( + echo ERROR: npm not found. Install Node.js from https://nodejs.org and retry. + exit /b 1 +) + +npm install +if %errorlevel% neq 0 ( + echo ERROR: npm install failed + exit /b 1 +) + +echo HomeSafe-Bench dependencies installed +exit /b 0 diff --git a/skills/analysis/smarthome-bench/deploy.bat b/skills/analysis/smarthome-bench/deploy.bat new file mode 100644 index 0000000..62a7764 --- /dev/null +++ b/skills/analysis/smarthome-bench/deploy.bat @@ -0,0 +1,75 @@ +@echo off +REM SmartHome-Bench deployment script (Windows) +REM Called by Aegis deployment agent during skill installation + +setlocal enabledelayedexpansion + +set "SKILL_DIR=%~dp0" +if "%SKILL_DIR:~-1%"=="\" set "SKILL_DIR=%SKILL_DIR:~0,-1%" +echo Deploying SmartHome-Bench from: %SKILL_DIR% + +REM ── Check system dependencies ──────────────────────────────────────────────── + +echo Checking system dependencies... + +where yt-dlp >nul 2>&1 +if %errorlevel% neq 0 ( + echo WARNING: yt-dlp not found. Attempting install via pip... + where pip >nul 2>&1 + if !errorlevel! equ 0 ( + pip install yt-dlp + ) else ( + where pip3 >nul 2>&1 + if !errorlevel! equ 0 ( + pip3 install yt-dlp + ) else ( + echo ERROR: Cannot install yt-dlp automatically. Please install manually: + echo pip install yt-dlp + echo OR download from https://github.com/yt-dlp/yt-dlp/releases + exit /b 1 + ) + ) +) + +REM Verify yt-dlp is now available +where yt-dlp >nul 2>&1 +if %errorlevel% neq 0 ( + echo ERROR: yt-dlp installation failed + exit /b 1 +) +for /f "tokens=*" %%V in ('yt-dlp --version 2^>nul') do echo yt-dlp: %%V + +where ffmpeg >nul 2>&1 +if %errorlevel% neq 0 ( + echo ERROR: ffmpeg not found. Please install manually: + echo winget install ffmpeg + echo OR download from https://ffmpeg.org/download.html + exit /b 1 +) +for /f "tokens=1-3" %%A in ('ffmpeg -version 2^>^&1') do ( + if "%%A"=="ffmpeg" echo ffmpeg: %%B %%C + goto :ffmpeg_done +) +:ffmpeg_done + +REM ── Install npm dependencies ───────────────────────────────────────────────── + +echo Installing npm dependencies... + +where npm >nul 2>&1 +if %errorlevel% neq 0 ( + echo ERROR: npm not found. Install Node.js from https://nodejs.org and retry. + exit /b 1 +) + +cd /d "%SKILL_DIR%" +npm install --production +if %errorlevel% neq 0 ( + echo ERROR: npm install failed + exit /b 1 +) + +echo SmartHome-Bench deployed successfully + +endlocal +exit /b 0 diff --git a/skills/detection/yolo-detection-2026/deploy.bat b/skills/detection/yolo-detection-2026/deploy.bat new file mode 100644 index 0000000..6661322 --- /dev/null +++ b/skills/detection/yolo-detection-2026/deploy.bat @@ -0,0 +1,221 @@ +@echo off +REM deploy.bat — Zero-assumption bootstrapper for YOLO 2026 Detection Skill (Windows) +REM +REM Probes the system for Python, GPU backends, and installs the minimum +REM viable stack. Called by Aegis skill-runtime-manager during installation. +REM +REM Uses skills\lib\env_config.py for hardware detection and model optimization. +REM +REM Exit codes: +REM 0 = success +REM 1 = fatal error (no Python found) +REM 2 = partial success (CPU-only fallback) + +setlocal enabledelayedexpansion + +set "SKILL_DIR=%~dp0" +REM Remove trailing backslash +if "%SKILL_DIR:~-1%"=="\" set "SKILL_DIR=%SKILL_DIR:~0,-1%" +set "VENV_DIR=%SKILL_DIR%\.venv" +set "LOG_PREFIX=[YOLO-2026-deploy]" + +REM Resolve lib dir (two levels up + lib) +set "LIB_DIR=" +if exist "%SKILL_DIR%\..\..\lib\env_config.py" ( + pushd "%SKILL_DIR%\..\..\lib" + set "LIB_DIR=!CD!" + popd +) + +REM ─── Step 1: Find Python ─────────────────────────────────────────────────── + +echo %LOG_PREFIX% Searching for Python...>&2 + +set "PYTHON_CMD=" + +REM Try the Windows Python launcher (py.exe) first — ships with python.org installer +for %%V in (3.12 3.11 3.10 3.9) do ( + if not defined PYTHON_CMD ( + py -%%V --version >nul 2>&1 + if !errorlevel! equ 0 ( + set "PYTHON_CMD=py -%%V" + ) + ) +) + +REM Fallback: bare python3 / python on PATH +if not defined PYTHON_CMD ( + python3 --version >nul 2>&1 + if !errorlevel! equ 0 ( + REM Verify version >= 3.9 + for /f "tokens=2 delims= " %%A in ('python3 --version 2^>^&1') do set "_pyver=%%A" + for /f "tokens=1,2 delims=." %%A in ("!_pyver!") do ( + if %%A geq 3 if %%B geq 9 set "PYTHON_CMD=python3" + ) + ) +) + +if not defined PYTHON_CMD ( + python --version >nul 2>&1 + if !errorlevel! equ 0 ( + for /f "tokens=2 delims= " %%A in ('python --version 2^>^&1') do set "_pyver=%%A" + for /f "tokens=1,2 delims=." %%A in ("!_pyver!") do ( + if %%A geq 3 if %%B geq 9 set "PYTHON_CMD=python" + ) + ) +) + +if not defined PYTHON_CMD ( + echo %LOG_PREFIX% ERROR: No Python ^>=3.9 found. Install Python 3.9+ and retry.>&2 + echo {"event": "error", "stage": "python", "message": "No Python >=3.9 found"} + exit /b 1 +) + +for /f "tokens=*" %%A in ('!PYTHON_CMD! --version 2^>^&1') do set "PY_VERSION=%%A" +echo %LOG_PREFIX% Using Python: %PYTHON_CMD% (%PY_VERSION%)>&2 +echo {"event": "progress", "stage": "python", "message": "Found %PY_VERSION%"} + +REM ─── Step 2: Create virtual environment ──────────────────────────────────── + +if not exist "%VENV_DIR%\Scripts\python.exe" ( + echo %LOG_PREFIX% Creating virtual environment...>&2 + %PYTHON_CMD% -m venv "%VENV_DIR%" + if !errorlevel! neq 0 ( + echo %LOG_PREFIX% ERROR: Failed to create virtual environment>&2 + echo {"event": "error", "stage": "venv", "message": "Failed to create venv"} + exit /b 1 + ) +) + +set "PIP=%VENV_DIR%\Scripts\pip.exe" +set "VPYTHON=%VENV_DIR%\Scripts\python.exe" + +"%PIP%" install --upgrade pip -q >nul 2>&1 + +echo {"event": "progress", "stage": "venv", "message": "Virtual environment ready"} + +REM ─── Step 2.5: Bundle env_config.py alongside detect.py ──────────────────── + +if defined LIB_DIR ( + if exist "%LIB_DIR%\env_config.py" ( + copy /Y "%LIB_DIR%\env_config.py" "%SKILL_DIR%\scripts\env_config.py" >nul 2>&1 + echo %LOG_PREFIX% Bundled env_config.py into scripts\>&2 + ) +) + +REM ─── Step 3: Detect hardware via env_config ──────────────────────────────── + +set "BACKEND=cpu" + +REM Find env_config.py — bundled copy or repo lib\ +set "ENV_CONFIG_DIR=" +if exist "%SKILL_DIR%\scripts\env_config.py" ( + set "ENV_CONFIG_DIR=%SKILL_DIR%\scripts" +) else if defined LIB_DIR ( + if exist "%LIB_DIR%\env_config.py" ( + set "ENV_CONFIG_DIR=%LIB_DIR%" + ) +) + +if defined ENV_CONFIG_DIR ( + echo %LOG_PREFIX% Detecting hardware via env_config.py...>&2 + + REM Run env_config detection via Python + for /f "tokens=*" %%B in ('"%VPYTHON%" -c "import sys; sys.path.insert(0, r'!ENV_CONFIG_DIR!'); from env_config import HardwareEnv; env = HardwareEnv.detect(); print(env.backend)" 2^>nul') do ( + set "DETECTED_BACKEND=%%B" + ) + + REM Validate backend value (Windows: only cuda, intel, cpu are realistic) + if "!DETECTED_BACKEND!"=="cuda" ( + set "BACKEND=cuda" + ) else if "!DETECTED_BACKEND!"=="intel" ( + set "BACKEND=intel" + ) else if "!DETECTED_BACKEND!"=="cpu" ( + set "BACKEND=cpu" + ) else ( + echo %LOG_PREFIX% env_config returned '!DETECTED_BACKEND!', falling back to heuristic>&2 + set "BACKEND=cpu" + ) + + echo %LOG_PREFIX% env_config detected backend: !BACKEND!>&2 +) else ( + echo %LOG_PREFIX% env_config.py not found, using heuristic detection...>&2 + + REM Fallback: inline GPU detection via nvidia-smi + where nvidia-smi >nul 2>&1 + if !errorlevel! equ 0 ( + for /f "tokens=*" %%G in ('nvidia-smi --query-gpu^=driver_version --format^=csv^,noheader 2^>nul') do ( + if not "%%G"=="" ( + set "BACKEND=cuda" + echo %LOG_PREFIX% Detected NVIDIA GPU ^(driver: %%G^)>&2 + ) + ) + ) +) + +echo {"event": "progress", "stage": "gpu", "backend": "!BACKEND!", "message": "Compute backend: !BACKEND!"} + +REM ─── Step 4: Install requirements ────────────────────────────────────────── + +set "REQ_FILE=%SKILL_DIR%\requirements_!BACKEND!.txt" + +if not exist "!REQ_FILE!" ( + echo %LOG_PREFIX% WARNING: !REQ_FILE! not found, falling back to CPU>&2 + set "REQ_FILE=%SKILL_DIR%\requirements_cpu.txt" + set "BACKEND=cpu" +) + +echo %LOG_PREFIX% Installing dependencies from !REQ_FILE! ...>&2 +echo {"event": "progress", "stage": "install", "message": "Installing !BACKEND! dependencies..."} + +if "!BACKEND!"=="cuda" ( + REM CUDA on Windows: install torch with CUDA index, then remaining deps + "%PIP%" install torch torchvision --index-url https://download.pytorch.org/whl/cu124 -q 2>&1 | findstr /V "^$" >nul + if !errorlevel! neq 0 ( + echo %LOG_PREFIX% WARNING: CUDA torch install failed, trying cu121...>&2 + "%PIP%" install torch torchvision --index-url https://download.pytorch.org/whl/cu121 -q 2>&1 | findstr /V "^$" >nul + ) + REM Install remaining requirements (ultralytics, etc.) + "%PIP%" install -r "!REQ_FILE!" -q 2>&1 | findstr /V "^$" >nul +) else ( + "%PIP%" install -r "!REQ_FILE!" -q 2>&1 | findstr /V "^$" >nul +) + +REM ─── Step 5: Pre-convert model to optimized format ───────────────────────── + +if "!BACKEND!" neq "cpu" ( + echo %LOG_PREFIX% Pre-converting model to optimized format for !BACKEND!...>&2 + echo {"event": "progress", "stage": "optimize", "message": "Converting model for !BACKEND! (~30-120s)..."} + + "%VPYTHON%" -c "import sys; sys.path.insert(0, r'!ENV_CONFIG_DIR!'); from env_config import HardwareEnv; env = HardwareEnv.detect(); from ultralytics import YOLO; model = YOLO('yolo26n.pt'); result = env.export_model(model, 'yolo26n'); print(f'Optimized model exported: {result}' if result else 'Export skipped or failed')" 2>&1 + + if !errorlevel! equ 0 ( + echo {"event": "progress", "stage": "optimize", "message": "Model optimization complete"} + ) else ( + echo %LOG_PREFIX% WARNING: Model optimization failed, will use PyTorch at runtime>&2 + echo {"event": "progress", "stage": "optimize", "message": "Optimization failed — PyTorch fallback"} + ) +) else if exist "%SKILL_DIR%\requirements_cpu.txt" ( + echo %LOG_PREFIX% Pre-converting model to ONNX for CPU...>&2 + echo {"event": "progress", "stage": "optimize", "message": "Converting model for cpu (~30-120s)..."} + + "%VPYTHON%" -c "import sys; sys.path.insert(0, r'!ENV_CONFIG_DIR!'); from env_config import HardwareEnv; env = HardwareEnv.detect(); from ultralytics import YOLO; model = YOLO('yolo26n.pt'); result = env.export_model(model, 'yolo26n'); print(f'Optimized model exported: {result}' if result else 'Export skipped or failed')" 2>&1 + + if !errorlevel! equ 0 ( + echo {"event": "progress", "stage": "optimize", "message": "Model optimization complete"} + ) else ( + echo %LOG_PREFIX% WARNING: Model optimization failed, will use PyTorch at runtime>&2 + echo {"event": "progress", "stage": "optimize", "message": "Optimization failed — PyTorch fallback"} + ) +) + +REM ─── Step 6: Verify installation ─────────────────────────────────────────── + +echo %LOG_PREFIX% Verifying installation...>&2 +"%VPYTHON%" -c "import sys, json; sys.path.insert(0, r'!ENV_CONFIG_DIR!'); from env_config import HardwareEnv; env = HardwareEnv.detect(); print(json.dumps(env.to_dict(), indent=2))" 2>&1 + +echo {"event": "complete", "backend": "!BACKEND!", "message": "YOLO 2026 skill installed (!BACKEND! backend)"} +echo %LOG_PREFIX% Done! Backend: !BACKEND!>&2 + +endlocal +exit /b 0