From fd907d52203a47a392f63cfc98b3a3a73b00f238 Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Wed, 11 Feb 2026 14:01:35 -0800
Subject: [PATCH 1/2] [et device 1/n] introduce cuda device type

This diff introduces new device type, cuda, which will be used for further device type support use.

Differential Revision: [D92928772](https://our.internmc.facebook.com/intern/diff/D92928772/)

[ghstack-poisoned]
---
 runtime/core/portable_type/device.h           | 30 ++++--
 .../core/portable_type/test/device_test.cpp   | 95 +++++++++++++++++++
 runtime/core/portable_type/test/targets.bzl   |  8 ++
 3 files changed, 124 insertions(+), 9 deletions(-)
 create mode 100644 runtime/core/portable_type/test/device_test.cpp
diff --git a/runtime/core/portable_type/device.h b/runtime/core/portable_type/device.h
index d789df8a84d..3214dcd9eaf 100644
--- a/runtime/core/portable_type/device.h
+++ b/runtime/core/portable_type/device.h
@@ -8,7 +8,8 @@
 
 #pragma once
 
-#include <executorch/runtime/platform/assert.h>
+#include <cstddef>
+#include <cstdint>
 
 namespace executorch {
 namespace runtime {
@@ -18,18 +19,22 @@ namespace etensor {
 /// Subset of https://github.com/pytorch/pytorch/blob/main/c10/core/Device.h
 enum class DeviceType : int8_t {
   CPU = 0,
+  CUDA = 1,
 };
 
-/// An index representing a specific device; For cpu it should always be -1 or 0
+/// Total number of device types, used for fixed-size registry arrays.
+constexpr size_t kNumDeviceTypes = 2;
+
+/// An index representing a specific device; e.g. GPU 0 vs GPU 1.
+/// -1 means the default/unspecified device for that type.
 using DeviceIndex = int8_t;
 
 /**
  * An abstraction for the compute device on which a tensor is located.
- * ExecuTorch doesn't allow dynamic dispatching based on device, so this type is
- * just a skeleton to allow certain kernels that expect device as an
- * argument to still be run.
  *
- * In ExecuTorch this is always expected to be CPU.
+ * Tensors carry a Device to express where their underlying data resides
+ * (e.g. CPU host memory vs CUDA device memory). The runtime uses this to
+ * dispatch memory allocation to the appropriate device allocator.
  */
 struct Device final {
   using Type = DeviceType;
@@ -39,7 +44,7 @@ struct Device final {
   /* implicit */ Device(DeviceType type, DeviceIndex index = -1)
       : type_(type), index_(index) {}
 
-  /// Returns the type of device this is. Only CPU is supported.
+  /// Returns the type of device the tensor data resides on.
   DeviceType type() const noexcept {
     return type_;
   }
@@ -49,12 +54,19 @@ struct Device final {
     return type_ == DeviceType::CPU;
   }
 
-  /// Returns the device index. Always 0 if specified or -1 if not provided.
+  /// Returns the optional device index. -1 means default/unspecified.
   DeviceIndex index() const noexcept {
-    ET_CHECK(index_ == 0 || index_ == -1);
     return index_;
   }
 
+  bool operator==(const Device& other) const noexcept {
+    return type_ == other.type_ && index_ == other.index_;
+  }
+
+  bool operator!=(const Device& other) const noexcept {
+    return !(*this == other);
+  }
+
  private:
   DeviceType type_;
   DeviceIndex index_ = -1;
diff --git a/runtime/core/portable_type/test/device_test.cpp b/runtime/core/portable_type/test/device_test.cpp
new file mode 100644
index 00000000000..d9359b2f866
--- /dev/null
+++ b/runtime/core/portable_type/test/device_test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/runtime/core/portable_type/device.h>
+
+#include <gtest/gtest.h>
+
+using executorch::runtime::etensor::Device;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
+using executorch::runtime::etensor::kNumDeviceTypes;
+
+// --- DeviceType enum ---
+
+TEST(DeviceTypeTest, EnumValues) {
+  EXPECT_EQ(static_cast<int8_t>(DeviceType::CPU), 0);
+  EXPECT_EQ(static_cast<int8_t>(DeviceType::CUDA), 1);
+}
+
+TEST(DeviceTypeTest, NumDeviceTypesCoversAllEnums) {
+  // kNumDeviceTypes must be large enough to index all defined device types.
+  EXPECT_GT(kNumDeviceTypes, static_cast<size_t>(DeviceType::CPU));
+  EXPECT_GT(kNumDeviceTypes, static_cast<size_t>(DeviceType::CUDA));
+}
+
+// --- Device: CPU ---
+
+TEST(DeviceTest, CpuDefaultIndex) {
+  Device d(DeviceType::CPU);
+  EXPECT_TRUE(d.is_cpu());
+  EXPECT_EQ(d.type(), DeviceType::CPU);
+  EXPECT_EQ(d.index(), -1);
+}
+
+TEST(DeviceTest, CpuExplicitIndex) {
+  Device d(DeviceType::CPU, 0);
+  EXPECT_TRUE(d.is_cpu());
+  EXPECT_EQ(d.index(), 0);
+}
+
+// --- Device: CUDA ---
+
+TEST(DeviceTest, CudaDefaultIndex) {
+  Device d(DeviceType::CUDA);
+  EXPECT_FALSE(d.is_cpu());
+  EXPECT_EQ(d.type(), DeviceType::CUDA);
+  EXPECT_EQ(d.index(), -1);
+}
+
+TEST(DeviceTest, CudaExplicitIndex) {
+  Device d(DeviceType::CUDA, 0);
+  EXPECT_EQ(d.index(), 0);
+}
+
+// --- Device: equality ---
+
+TEST(DeviceTest, EqualitySameTypeAndIndex) {
+  EXPECT_EQ(Device(DeviceType::CPU, 0), Device(DeviceType::CPU, 0));
+  EXPECT_EQ(Device(DeviceType::CUDA, 1), Device(DeviceType::CUDA, 1));
+}
+
+TEST(DeviceTest, InequalityDifferentType) {
+  EXPECT_NE(Device(DeviceType::CPU, 0), Device(DeviceType::CUDA, 0));
+}
+
+TEST(DeviceTest, InequalityDifferentIndex) {
+  EXPECT_NE(Device(DeviceType::CUDA, 0), Device(DeviceType::CUDA, 1));
+}
+
+TEST(DeviceTest, EqualityDefaultIndices) {
+  EXPECT_EQ(Device(DeviceType::CPU), Device(DeviceType::CPU));
+  EXPECT_EQ(Device(DeviceType::CUDA), Device(DeviceType::CUDA));
+  EXPECT_NE(Device(DeviceType::CPU), Device(DeviceType::CUDA));
+}
+
+// --- Device: implicit construction ---
+
+TEST(DeviceTest, ImplicitConstructionFromDeviceType) {
+  // Device constructor is implicit, allowing DeviceType → Device conversion.
+  Device d = DeviceType::CUDA;
+  EXPECT_EQ(d.index(), -1);
+}
+
+// --- Deprecated namespace aliases ---
+
+TEST(DeviceTest, DeprecatedNamespaceAliases) {
+  // Verify the torch::executor aliases still work.
+  torch::executor::Device d(torch::executor::DeviceType::CUDA, 0);
+  EXPECT_EQ(d.index(), 0);
+}
diff --git a/runtime/core/portable_type/test/targets.bzl b/runtime/core/portable_type/test/targets.bzl
index d8e82a15fba..a6671d7d400 100644
--- a/runtime/core/portable_type/test/targets.bzl
+++ b/runtime/core/portable_type/test/targets.bzl
@@ -47,6 +47,14 @@ def define_common_targets():
         ],
     )
 
+    runtime.cxx_test(
+        name = "device_test",
+        srcs = ["device_test.cpp"],
+        deps = [
+            "//executorch/runtime/core/portable_type:portable_type",
+        ],
+    )
+
     runtime.cxx_test(
         name = "tensor_impl_test",
         srcs = ["tensor_impl_test.cpp"],

From caf913edb1666ae71ab43d605e68104b079932ea Mon Sep 17 00:00:00 2001
From: gasoonjia <gasoonjia@icloud.com>
Date: Wed, 11 Feb 2026 14:34:04 -0800
Subject: [PATCH 2/2] Update on "[et device 1/n] introduce cuda device type"

This diff introduces new device type, cuda, which will be used for further device type support use.

Differential Revision: [D92928772](https://our.internmc.facebook.com/intern/diff/D92928772/)

[ghstack-poisoned]
---
 runtime/core/portable_type/device.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runtime/core/portable_type/device.h b/runtime/core/portable_type/device.h
index 3214dcd9eaf..cd15acb0cfe 100644
--- a/runtime/core/portable_type/device.h
+++ b/runtime/core/portable_type/device.h
@@ -15,8 +15,8 @@ namespace executorch {
 namespace runtime {
 namespace etensor {
 
-/// Denotes the specific genre of compute device.
-/// Subset of https://github.com/pytorch/pytorch/blob/main/c10/core/Device.h
+/// Represents the type of compute device.
+/// Note: ExecuTorch Device is distinct from PyTorch Device.
 enum class DeviceType : int8_t {
   CPU = 0,
   CUDA = 1,
@@ -54,7 +54,7 @@ struct Device final {
     return type_ == DeviceType::CPU;
   }
 
-  /// Returns the optional device index. -1 means default/unspecified.
+  /// Returns the device index, or -1 if default/unspecified.
   DeviceIndex index() const noexcept {
     return index_;
   }