From 89f02d9d7059dacfe6ccd565ea81654c49828b80 Mon Sep 17 00:00:00 2001 From: Songhao Jia Date: Thu, 12 Feb 2026 12:35:35 -0800 Subject: [PATCH] add cuda backend to backend test infra (#17403) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/17403 Differential Revision: D93019490 --- .ci/scripts/test_backend.sh | 9 +++ .github/workflows/test-backend-cuda.yml | 30 ++++++++++ backends/cuda/test/tester.py | 71 +++++++++++++++++++++++ backends/test/harness/stages/serialize.py | 28 ++++++++- backends/test/suite/flow.py | 9 +++ backends/test/suite/flows/cuda.py | 26 +++++++++ 6 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test-backend-cuda.yml create mode 100644 backends/cuda/test/tester.py create mode 100644 backends/test/suite/flows/cuda.py diff --git a/.ci/scripts/test_backend.sh b/.ci/scripts/test_backend.sh index 1a8e3219be0..37dd8a70859 100755 --- a/.ci/scripts/test_backend.sh +++ b/.ci/scripts/test_backend.sh @@ -56,6 +56,15 @@ if [[ "$FLOW" == *vulkan* ]]; then EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" fi +if [[ "$FLOW" == *cuda* ]]; then + # Fix libstdc++ GLIBCXX version for CUDA backend. + # The embedded .so files in the CUDA blob require GLIBCXX_3.4.30 + # which the default conda libstdc++ doesn't have. + echo "Installing newer libstdc++ for CUDA backend..." + conda install -y -c conda-forge 'libstdcxx-ng>=12' + export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}" +fi + if [[ "$FLOW" == *arm* ]]; then # Setup ARM deps. diff --git a/.github/workflows/test-backend-cuda.yml b/.github/workflows/test-backend-cuda.yml new file mode 100644 index 00000000000..ac5c9b97c43 --- /dev/null +++ b/.github/workflows/test-backend-cuda.yml @@ -0,0 +1,30 @@ +name: Test CUDA Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-cuda.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-cuda: + uses: ./.github/workflows/_test_backend.yml + with: + backend: cuda + flows: '["cuda"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true + runner-linux: linux.g5.4xlarge.nvidia.gpu diff --git a/backends/cuda/test/tester.py b/backends/cuda/test/tester.py new file mode 100644 index 00000000000..e4ac2b366d4 --- /dev/null +++ b/backends/cuda/test/tester.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Optional, Tuple + +import executorch +import executorch.backends.test.harness.stages as BaseStages +import torch +from executorch.backends.cuda.cuda_backend import CudaBackend +from executorch.backends.cuda.cuda_partitioner import CudaPartitioner +from executorch.backends.test.harness import Tester as TesterBase +from executorch.backends.test.harness.stages import StageType +from executorch.exir import EdgeCompileConfig +from executorch.exir.backend.partitioner import Partitioner + + +def _create_default_partitioner() -> CudaPartitioner: + """Create a CudaPartitioner with default compile specs.""" + compile_specs = [CudaBackend.generate_method_name_compile_spec("forward")] + return CudaPartitioner(compile_specs) + + +class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): + """CUDA-specific ToEdgeTransformAndLower stage.""" + + def __init__( + self, + partitioners: Optional[List[Partitioner]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + ): + if partitioners is None: + partitioners = [_create_default_partitioner()] + + super().__init__( + default_partitioner_cls=_create_default_partitioner, + partitioners=partitioners, + edge_compile_config=edge_compile_config + or EdgeCompileConfig(_check_ir_validity=False), + ) + + +class CudaTester(TesterBase): + """ + Tester subclass for CUDA backend. + + This tester defines the recipe for lowering models to the CUDA backend + using AOTInductor compilation. + """ + + def __init__( + self, + module: torch.nn.Module, + example_inputs: Tuple[torch.Tensor], + dynamic_shapes: Optional[Tuple[Any]] = None, + ): + stage_classes = ( + executorch.backends.test.harness.Tester.default_stage_classes() + | { + StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, + } + ) + + super().__init__( + module=module, + stage_classes=stage_classes, + example_inputs=example_inputs, + dynamic_shapes=dynamic_shapes, + ) diff --git a/backends/test/harness/stages/serialize.py b/backends/test/harness/stages/serialize.py index a5be1631d98..e30da5c34c8 100644 --- a/backends/test/harness/stages/serialize.py +++ b/backends/test/harness/stages/serialize.py @@ -1,7 +1,7 @@ import copy import logging -from typing import Optional +from typing import Dict, Optional from executorch.backends.test.harness.stages.stage import Stage, StageType from executorch.exir import ExecutorchProgramManager @@ -23,12 +23,15 @@ class Serialize(Stage): def __init__(self): self.buffer = None + self.data_files: Dict[str, bytes] = {} def stage_type(self) -> StageType: return StageType.SERIALIZE def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None: self.buffer = artifact.buffer + # Capture external data files (e.g., .ptd files for CUDA backend) + self.data_files = artifact.data_files @property def artifact(self) -> bytes: @@ -40,8 +43,29 @@ def graph_module(self) -> None: def run_artifact(self, inputs): inputs_flattened, _ = tree_flatten(inputs) + + # Combine all external data files into a single buffer for data_map_buffer + # Most backends have at most one external data file, but we concatenate + # in case there are multiple (though this may not be fully supported) + data_map_buffer = None + if self.data_files: + # If there's exactly one data file, use it directly + # Otherwise, log a warning - multiple external files may need special handling + if len(self.data_files) == 1: + data_map_buffer = list(self.data_files.values())[0] + else: + # For multiple files, we use the first one and warn + # This is a limitation - proper handling would need runtime support + logger.warning( + f"Multiple external data files found ({list(self.data_files.keys())}). " + f"Using the first one. This may not work correctly for all backends." + ) + data_map_buffer = list(self.data_files.values())[0] + executorch_module = _load_for_executorch_from_buffer( - self.buffer, program_verification=Verification.Minimal + self.buffer, + data_map_buffer=data_map_buffer, + program_verification=Verification.Minimal, ) executorch_output = copy.deepcopy( executorch_module.run_method("forward", tuple(inputs_flattened)) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index f3c9ee75083..2209a0ce11f 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -147,4 +147,13 @@ def all_flows() -> dict[str, TestFlow]: except Exception as e: logger.info(f"Skipping ARM flow registration: {e}") + try: + from executorch.backends.test.suite.flows.cuda import CUDA_TEST_FLOW + + flows += [ + CUDA_TEST_FLOW, + ] + except Exception as e: + logger.info(f"Skipping CUDA flow registration: {e}") + return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/cuda.py b/backends/test/suite/flows/cuda.py new file mode 100644 index 00000000000..0c9c1d8bd63 --- /dev/null +++ b/backends/test/suite/flows/cuda.py @@ -0,0 +1,26 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from executorch.backends.cuda.test.tester import CudaTester +from executorch.backends.test.suite.flow import TestFlow + + +def _create_cuda_flow(name: str = "cuda") -> TestFlow: + """Create a test flow for the CUDA backend. + + The CUDA backend saves data externally (.so and weights blob in .ptd file). + The test harness serialize stage has been updated to support loading external + data via the data_map_buffer parameter of _load_for_executorch_from_buffer. + """ + return TestFlow( + name, + backend="cuda", + tester_factory=CudaTester, + quantize=False, + ) + + +CUDA_TEST_FLOW = _create_cuda_flow("cuda")