From 0befdc792142c42b1bc72e435a6ab5e5cf4362f7 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 20 May 2026 07:41:25 -0700 Subject: [PATCH] Revert "Bump PyTorch pins to 2.12 (#19643)" This reverts commit d66a37c3f709c48f397f03f1e8b8ba80be8e4d62. --- .ci/docker/build.sh | 5 -- .ci/docker/ci_commit_pins/pytorch.txt | 2 +- .ci/docker/common/install_cache.sh | 3 - .ci/docker/common/install_pytorch.sh | 8 +- .ci/docker/ubuntu/Dockerfile | 3 - .ci/scripts/utils.sh | 7 +- .github/workflows/mlx.yml | 4 +- backends/arm/_passes/arm_pass.py | 38 --------- .../test_per_channel_conversion.py | 15 ++-- .../llama3_2_vision/text_decoder/model.py | 14 ++-- .../text_decoder/test/test_text_decoder.py | 1 + .../models/parakeet/export_parakeet_tdt.py | 4 +- extension/llm/modules/attention.py | 8 +- install_requirements.py | 4 +- .../c10/torch/headeronly/macros/Macros.h | 79 ++++--------------- .../c10/torch/headeronly/util/BFloat16.h | 13 ++- torch_pin.py | 2 +- 17 files changed, 51 insertions(+), 159 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 24c4efce24a..0a4c6a6f757 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -97,10 +97,6 @@ esac TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt) BUILD_DOCS=1 -if [[ "${GCC_VERSION:-}" == "11" && -z "${SKIP_PYTORCH:-}" ]]; then - PYTORCH_BUILD_MAX_JOBS=6 -fi - # Copy requirements-lintrunner.txt from root to here cp ../../requirements-lintrunner.txt ./ @@ -113,7 +109,6 @@ docker build \ --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ --build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \ --build-arg "TORCH_VERSION=${TORCH_VERSION}" \ - --build-arg "PYTORCH_BUILD_MAX_JOBS=${PYTORCH_BUILD_MAX_JOBS:-}" \ --build-arg "BUCK2_VERSION=${BUCK2_VERSION}" \ --build-arg "LINTRUNNER=${LINTRUNNER:-}" \ --build-arg "BUILD_DOCS=${BUILD_DOCS}" \ diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index 242371cbebe..f6e39a63b92 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -release/2.12 +release/2.11 \ No newline at end of file diff --git a/.ci/docker/common/install_cache.sh b/.ci/docker/common/install_cache.sh index 82be8697320..7b7d39994ca 100755 --- a/.ci/docker/common/install_cache.sh +++ b/.ci/docker/common/install_cache.sh @@ -76,9 +76,6 @@ init_sccache() { # This is the remote cache bucket export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 export SCCACHE_S3_KEY_PREFIX=executorch - export SCCACHE_REGION=us-east-1 - export AWS_REGION=us-east-1 - export AWS_DEFAULT_REGION=us-east-1 export SCCACHE_IDLE_TIMEOUT=0 export SCCACHE_ERROR_LOG=/tmp/sccache_error.log export RUST_LOG=sccache::server=error diff --git a/.ci/docker/common/install_pytorch.sh b/.ci/docker/common/install_pytorch.sh index 3c80d093ab2..548a24f885d 100755 --- a/.ci/docker/common/install_pytorch.sh +++ b/.ci/docker/common/install_pytorch.sh @@ -27,12 +27,6 @@ install_pytorch_and_domains() { chown -R ci-user . export _GLIBCXX_USE_CXX11_ABI=1 - if [[ "$(uname -m)" == "aarch64" ]]; then - export BUILD_IGNORE_SVE_UNAVAILABLE=1 - fi - if [[ -n "${PYTORCH_BUILD_MAX_JOBS:-}" ]]; then - export MAX_JOBS="${PYTORCH_BUILD_MAX_JOBS}" - fi # Then build and install PyTorch conda_run python setup.py bdist_wheel pip_install "$(echo dist/*.whl)" @@ -40,7 +34,7 @@ install_pytorch_and_domains() { # Grab the pinned audio and vision commits from PyTorch TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=release/0.27 + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index 9a5b2536df0..0e2d7e48eb9 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -62,12 +62,9 @@ RUN bash ./install_cache.sh && rm install_cache.sh utils.sh ENV SCCACHE_BUCKET ossci-compiler-cache-circleci-v2 ENV SCCACHE_S3_KEY_PREFIX executorch ENV SCCACHE_REGION us-east-1 -ENV AWS_REGION us-east-1 -ENV AWS_DEFAULT_REGION us-east-1 ARG TORCH_VERSION ARG SKIP_PYTORCH -ARG PYTORCH_BUILD_MAX_JOBS COPY ./common/install_pytorch.sh install_pytorch.sh COPY ./common/utils.sh utils.sh RUN if [ -z "${SKIP_PYTORCH}" ]; then bash ./install_pytorch.sh; fi && rm install_pytorch.sh utils.sh diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index b312d0ede83..486745f4bf6 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -107,7 +107,7 @@ install_pytorch_and_domains() { local torch_release=$(cat version.txt) # Download key must match the upload key below (basename of dist/*.whl, # which always carries setup.py's resolved +gitHASH). Branch-ref pins - # like `release/2.12` would otherwise produce `+gitrelease` here and + # like `release/2.11` would otherwise produce `+gitrelease` here and # never hit the cache. local torch_short_hash=$(git rev-parse --short=7 HEAD) local torch_wheel_path="cached_artifacts/pytorch/executorch/pytorch_wheels/${system_name}/${python_version}" @@ -132,9 +132,6 @@ install_pytorch_and_domains() { # (e.g. executorch's requirements-ci.txt). pip install -r requirements-build.txt git submodule update --init --recursive - if [[ "$(uname -m)" == "aarch64" ]]; then - export BUILD_IGNORE_SVE_UNAVAILABLE=1 - fi USE_DISTRIBUTED=1 python setup.py bdist_wheel pip install "$(echo dist/*.whl)" @@ -178,7 +175,7 @@ install_pytorch_and_domains() { # Grab the pinned audio and vision commits from PyTorch TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=release/0.27 + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml index c4be146f862..d429db16053 100644 --- a/.github/workflows/mlx.yml +++ b/.github/workflows/mlx.yml @@ -120,10 +120,10 @@ jobs: --prompt-len 4 \ --max-new-tokens 5 2>&1) echo "$OUTPUT" - if echo "$OUTPUT" | grep -q "Generated token ids: \[167, 94, 253, 88, 227\]"; then + if echo "$OUTPUT" | grep -q "Generated token ids: \[167, 167, 81, 167, 81\]"; then echo "Success: Qwen 3.5 MoE MLX export + inference completed with expected output" else - echo "Failed: unexpected output (expected [167, 94, 253, 88, 227])" + echo "Failed: unexpected output (expected [167, 167, 81, 167, 81])" exit 1 fi echo "::endgroup::" diff --git a/backends/arm/_passes/arm_pass.py b/backends/arm/_passes/arm_pass.py index add0f3aeb20..1a1a179f456 100644 --- a/backends/arm/_passes/arm_pass.py +++ b/backends/arm/_passes/arm_pass.py @@ -9,14 +9,12 @@ from abc import abstractmethod from typing import Any, List, Optional, Set, Type -import torch from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY from executorch.backends.arm.tosa.mapping import TosaSpecialDtype from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_base import ExportPass, NodeMetadata, ProxyValue from torch.fx import GraphModule from torch.fx.passes.infra.pass_base import PassResult -from torch.utils import _pytree as pytree class ArmPass(ExportPass): @@ -81,13 +79,6 @@ def get_name(pass_) -> str: ) def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False): - if ( - op == exir_ops.edge.aten.bmm.default - and isinstance(meta, NodeMetadata) - and len(meta.data.get("input_qparams", {})) > 0 - ): - return self._call_quantized_bmm_without_fake_kernel(op, args, kwargs, meta) - if not updated: return super().call_operator(op, args, kwargs, meta) @@ -100,35 +91,6 @@ def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False) new_meta["stack_trace"] = f"{old_stack_trace}\n{traceback.format_stack()[-2]}" return super().call_operator(op, args, kwargs, NodeMetadata(new_meta)) - def _call_quantized_bmm_without_fake_kernel( - self, - op, - args: tuple[ProxyValue, ...], - kwargs: dict[str, Any], - meta: NodeMetadata, - ) -> ProxyValue: - old_val = meta.data["val"] - output_qparams = meta.data.get("output_qparams", {}) - dtype = ( - next(iter(output_qparams.values())).dtype - if len(output_qparams) > 0 - else old_val.dtype - ) - res_data = torch.empty_like(old_val, dtype=dtype) - - args_proxy, kwargs_proxy = pytree.tree_map_only( - ProxyValue, lambda x: x.proxy, (args, kwargs) - ) - res_proxy = self.tracer.create_proxy( - "call_function", - op, - args_proxy, - kwargs_proxy, - ) - res_proxy.node.meta.update(meta.data) - self.tracer.set_metadata(res_proxy.node, res_data) - return ProxyValue(res_data, res_proxy) - def call_submodule( self, graph_module: GraphModule, inputs: tuple[Any, ...] ) -> PassResult: diff --git a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py index 706d8ed3e14..b3034ff17ed 100644 --- a/backends/nxp/tests/generic_tests/test_per_channel_conversion.py +++ b/backends/nxp/tests/generic_tests/test_per_channel_conversion.py @@ -169,19 +169,14 @@ def test_per_channel_convolution(self, _, use_qat: bool): atol=1.0, ) - conv_nodes = [ - node - for node in exported_program.graph.nodes - if node.target == exir_ops.edge.aten.convolution.default - ] - assert len(conv_nodes) == 1 - - conv_node = conv_nodes[0] + nodes = list(exported_program.graph.nodes) + assert ( - conv_node.args[1].target + nodes[8].target == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default ) assert ( - conv_node.args[2].target + nodes[9].target == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default ) + assert nodes[10].target == exir_ops.edge.aten.convolution.default diff --git a/examples/models/llama3_2_vision/text_decoder/model.py b/examples/models/llama3_2_vision/text_decoder/model.py index 9f15f777045..8f3a620affc 100644 --- a/examples/models/llama3_2_vision/text_decoder/model.py +++ b/examples/models/llama3_2_vision/text_decoder/model.py @@ -181,19 +181,19 @@ def get_example_kwarg_inputs(self): return None def get_dynamic_shapes(self): - static = torch.export.Dim.STATIC + batch_size = 1 dim_seq_len = torch.export.Dim("token_dim", min=1, max=self.max_seq_len) # Hardcoding # of tiles to be 2. image tokens per tile is 1601. if self.use_kv_cache: dynamic_shapes = { - "tokens": {0: static, 1: dim_seq_len}, - "encoder_input": {0: static, 1: static, 2: static}, - "encoder_mask": {0: static, 1: dim_seq_len, 2: static}, - "mask": {0: static, 1: dim_seq_len, 2: static}, - "input_pos": {0: static, 1: dim_seq_len}, + "tokens": {0: batch_size, 1: dim_seq_len}, + "encoder_input": None, + "encoder_mask": {0: 1, 1: dim_seq_len, 2: None}, + "mask": {0: batch_size, 1: dim_seq_len, 2: None}, + "input_pos": {0: batch_size, 1: dim_seq_len}, } else: dynamic_shapes = { - "tokens": {0: static, 1: dim_seq_len}, + "tokens": {0: batch_size, 1: dim_seq_len}, } return dynamic_shapes diff --git a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py index 0ef7b298139..4af637212a8 100644 --- a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py +++ b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py @@ -69,6 +69,7 @@ def test_llama3_2_text_decoder_aoti(self) -> None: encoder, model.get_example_inputs(), kwargs=model.get_example_kwarg_inputs(), + dynamic_shapes=model.get_dynamic_shapes(), strict=True, ) with tempfile.TemporaryDirectory() as tmpdir: diff --git a/examples/models/parakeet/export_parakeet_tdt.py b/examples/models/parakeet/export_parakeet_tdt.py index 75943f6c4ae..6a18cd58218 100644 --- a/examples/models/parakeet/export_parakeet_tdt.py +++ b/examples/models/parakeet/export_parakeet_tdt.py @@ -360,8 +360,8 @@ def export_all( preprocessor_wrapper, (sample_audio, sample_length), dynamic_shapes={ - # min=10 frames = 0.1 sec @ 16kHz, max aligned with encoder limit. - "audio": {0: Dim.AUTO(min=1600, max=max_audio_samples)}, + # min=1600 samples = 0.1 sec @ 16kHz, max aligned with encoder limit + "audio": {0: Dim("audio_len", min=1600, max=max_audio_samples)}, "length": {}, }, strict=False, diff --git a/extension/llm/modules/attention.py b/extension/llm/modules/attention.py index 8869553875e..f9446ea3aa7 100644 --- a/extension/llm/modules/attention.py +++ b/extension/llm/modules/attention.py @@ -302,9 +302,11 @@ def false_fn(y): k, v = calculate_kv(y) else: # Expecting the k, v returning here to be the same size of self.kv_cache - # In eager, we expect this predicate to specialize. In export, keep it - # as a tensor predicate so AOTI does not introduce unbacked symbols. - k, v, cache_pos = torch.cond(torch.isnan(y).all(), true_fn, false_fn, (y,)) + # In eager, we expect this predicate to specialize. In export, this will + # become a SymBool so it's not specialized. + k, v, cache_pos = torch.cond( + torch.isnan(y).all().item(), true_fn, false_fn, (y,) + ) # Update key-value cache self.kv_cache.k_cache.copy_(k) self.kv_cache.v_cache.copy_(v) diff --git a/install_requirements.py b/install_requirements.py index 53204ffd3ee..b30068cbdb8 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -49,7 +49,7 @@ def install_requirements(use_pytorch_nightly): # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - ("torch==2.12.0" if use_pytorch_nightly else "torch"), + ("torch==2.11.0" if use_pytorch_nightly else "torch"), ] # Install the requirements for core ExecuTorch package. @@ -112,7 +112,7 @@ def install_optional_example_requirements(use_pytorch_nightly): print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ - ("torchvision==0.27.0" if use_pytorch_nightly else "torchvision"), + ("torchvision==0.26.0" if use_pytorch_nightly else "torchvision"), ("torchaudio==2.11.0" if use_pytorch_nightly else "torchaudio"), ] # Then install domain libraries diff --git a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h index cef99df3f56..63aa0d20d8e 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +++ b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h @@ -325,88 +325,41 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; #define C10_HIP_HOST_DEVICE #endif +#if defined(USE_ROCM) // C10_WARP_SIZE is only allowed for device code. -// Host code dynamically-sized launch configs _must_ use at::cuda::warp_size(). -// Host or device statically-sized arrays _must_ use either -// C10_WARP_SIZE_UPPER_BOUND or C10_WARP_SIZE_LOWER_BOUND, as needed. -// +// Host code _must_ use at::cuda::warp_size() // HIP header used to define warpSize as a constexpr that was either 32 or 64 // depending on the target device, and then always set it to 64 for host code. -// For a time, that allowed C10_WARP_SIZE to be defined like so: -// -// #ifdef USE_ROCM -// #define C10_WARP_SIZE warpSize -// #else -// #define C10_WARP_SIZE 32 -// #endif -// -// In ROCm 7, warpSize is no longer constexpr, matching CUDA behavior. -// We can now only use warpSize for C10_WARP_SIZE in device code and this is -// enforced by using __device__ in its definition. In host code where -// C10_WARP_SIZE was previously used as a compile-time constant, this will now -// cause a compile-time error. -// -// If an array was previously expected to be sized at compile-time using -// C10_WARP_SIZE, users must now use either C10_WARP_SIZE_UPPER_BOUND or -// C10_WARP_SIZE_LOWER_BOUND depending on the situation. -// -// If C10_WARP_SIZE was previously used to determine kernel launch sizes, users -// must now use at::cuda::warp_size() for the dynamic runtime query. -// -// Unfortunately, C10_WARP_SIZE has been public and available for both host and -// device since approximately 2019, so forcing it to be device-only would break -// existing code in the wild. -#if defined(USE_ROCM) +// Host pass of HIP compiler needs C10_WARP_SIZE defined to _something_ so we +// set it to something unreasonable to trigger obvious host code errors. + namespace at::cuda { TORCH_CUDA_CPP_API int warp_size(); } -#if defined(__HIPCC__) -static __host__ inline int C10_WARP_SIZE_INTERNAL() { +#ifdef __HIPCC__ +static inline int __host__ C10_WARP_SIZE_INTERNAL() { return at::cuda::warp_size(); } -// NOTE: __device__ C10_WARP_SIZE_INTERNAL -// For __SPIRV__, we must use dynamic warpSize. When not targeting __SPIRV__, -// we can use constexpr. This matches prior behavior. We preserve this for -// backward compatibility instead of forcing old code to use dynamic warpSize -// and losing constexpr. However, compiling for --offload-arch=amdgcnspirv -// could expose where C10_WARP_SIZE was used incorrectly where the dynamic -// warpSize is not allowed. -#if defined(__SPIRV__) -static __device__ inline int C10_WARP_SIZE_INTERNAL() { - return warpSize; -} -#else // __SPIRV__ -static __device__ inline constexpr int C10_WARP_SIZE_INTERNAL() { + +static inline constexpr int __device__ C10_WARP_SIZE_INTERNAL() { #if defined(__GFX9__) return 64; #else // __GFX9__ return 32; #endif // __GFX9__ } -#endif // __SPIRV__ -#if defined(__SPIRV__) -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 64 -#elif defined(__GFX9__) -#define C10_WARP_SIZE_LOWER_BOUND 64 -#define C10_WARP_SIZE_UPPER_BOUND 64 -#else -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 32 -#endif -#else // !__HIPCC__ +#else // __HIPCC__ static inline int C10_WARP_SIZE_INTERNAL() { return at::cuda::warp_size(); } -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 64 #endif // __HIPCC__ + #define C10_WARP_SIZE (C10_WARP_SIZE_INTERNAL()) -#else // !USE_ROCM +#define C10_WARP_SIZE_STATIC 64 + +#else // defined(USE_ROCM) #define C10_WARP_SIZE 32 -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 32 -#endif // USE_ROCM +#endif #if defined(_MSC_VER) && _MSC_VER <= 1900 #define __func__ __FUNCTION__ @@ -676,7 +629,7 @@ __host__ __device__ // This macro is used to find older C++ compilers // that don't support move optimization for return values. -#if (defined(__GNUC__) && __GNUC__ < 13 && __cplusplus < 202002L) || \ +#if (defined(__GNUC__) && __GNUC__ < 13) || \ (defined(__clang_major__) && __clang_major__ < 13) #define C10_RETURN_MOVE_IF_OLD_COMPILER 1 #else diff --git a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h index 9aa08c265bd..64479ba36f1 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +++ b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h @@ -12,7 +12,7 @@ #include #include -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) #include #endif @@ -46,7 +46,7 @@ struct alignas(2) BFloat16 { /* implicit */ inline C10_HOST_DEVICE BFloat16(float value); inline C10_HOST_DEVICE operator float() const; -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) inline C10_HOST_DEVICE BFloat16(const __nv_bfloat16& value); explicit inline C10_HOST_DEVICE operator __nv_bfloat16() const; #endif @@ -124,9 +124,8 @@ C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") /// Constructors inline C10_HOST_DEVICE BFloat16::BFloat16(float value) : -#if defined(__CUDACC__) && \ - (!defined(USE_ROCM) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 || \ - defined(USE_ROCM) && (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) && defined(__CUDA_ARCH__) && \ + __CUDA_ARCH__ >= 800 x(__bfloat16_as_ushort(__float2bfloat16(value))) #elif defined(__SYCL_DEVICE_ONLY__) && \ defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) @@ -140,7 +139,7 @@ inline C10_HOST_DEVICE BFloat16::BFloat16(float value) /// Implicit conversions inline C10_HOST_DEVICE BFloat16::operator float() const { -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) return __bfloat162float(*reinterpret_cast(&x)); #elif defined(__SYCL_DEVICE_ONLY__) && \ defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) @@ -150,7 +149,7 @@ inline C10_HOST_DEVICE BFloat16::operator float() const { #endif } -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) inline C10_HOST_DEVICE BFloat16::BFloat16(const __nv_bfloat16& value) { x = *reinterpret_cast(&value); } diff --git a/torch_pin.py b/torch_pin.py index 0c5cd50fe6d..3575d9a376d 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ -TORCH_VERSION = "2.12.0" +TORCH_VERSION = "2.11.0" # NIGHTLY_VERSION = "dev20260318" Temporarily pinning to stable release candidate. Revert https://github.com/pytorch/executorch/pull/18287