From 08a37f2378ffbb73f92ddc9c89f3d98f255b4442 Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:00:48 -0600
Subject: [PATCH 1/3] query hsa for number of chiplets

---
 .../gpu/include/migraphx/gpu/context.hpp      | 75 +++++++++++++++++++
 src/targets/gpu/mlir.cpp                      |  5 +-
 2 files changed, 79 insertions(+), 1 deletion(-)
diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index db5bb7373f0..dbc2e58d7d4 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -42,6 +42,12 @@
 #include <unordered_map>
 #include <memory>
 
+// HSA is only available on non-Windows platforms
+#ifndef _WIN32
+#include "hsa/hsa.h"
+#include "hsa/hsa_ext_amd.h"
+#endif
+
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
@@ -210,6 +216,75 @@ struct hip_device
 
     std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
 
+    std::size_t get_chiplet_count() const
+    {
+#ifndef _WIN32
+        // Structure to pass data through HSA agent iteration
+        struct agent_info
+        {
+            std::size_t target_device_id;
+            std::size_t gpu_count;
+            uint32_t num_chiplets;
+            bool found;
+        };
+
+        hsa_status_t status = hsa_init();
+        if(status != HSA_STATUS_SUCCESS)
+        {
+            // If HSA init fails, return 1 as default (single chiplet)
+            return 1;
+        }
+
+        agent_info info{};
+        info.target_device_id = device_id;
+        info.gpu_count        = 0;
+        info.num_chiplets     = 0;
+        info.found            = false;
+
+        // Callback function for hsa_iterate_agents
+        // GPUs are enumerated in the same order as HIP device IDs
+        auto agent_callback = [](hsa_agent_t agent, void* data) -> hsa_status_t {
+            auto* info = static_cast<agent_info*>(data);
+
+            hsa_device_type_t device_type;
+            hsa_status_t err =
+                hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+            if(err != HSA_STATUS_SUCCESS)
+                return err;
+
+            if(device_type == HSA_DEVICE_TYPE_GPU)
+            {
+                // Check if this is the GPU we're looking for (by enumeration order)
+                if(info->gpu_count == info->target_device_id)
+                {
+                    err = hsa_agent_get_info(
+                        agent,
+                        static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
+                        &info->num_chiplets);
+                    if(err != HSA_STATUS_SUCCESS)
+                        return err;
+
+                    info->found = true;
+                    return HSA_STATUS_INFO_BREAK; // Stop iteration
+                }
+                info->gpu_count++;
+            }
+
+            return HSA_STATUS_SUCCESS;
+        };
+
+        // Iterate through all HSA agents to find matching GPU
+        status = hsa_iterate_agents(agent_callback, &info);
+
+        hsa_shut_down();
+        return info.num_chiplets;
+#else
+        // HSA not available on Windows, assume single chiplet
+        return 1;
+#endif
+    }
+
+
     std::size_t get_max_workitems_per_cu() const
     {
         return device_props.maxThreadsPerMultiProcessor;
diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp
index 9ca5c35330c..fc1438457c1 100644
--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -638,7 +638,8 @@ struct mlir_program
                             {"sym_name", sym_name},
                             {"kernel", std::string("mixr")},
                             {"arch", target_arch},
-                            {"num_cu", num_cu}});
+                            {"num_cu", num_cu},
+                            {"num_chiplets", num_chiplets}});
         if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{}))
         {
             ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}});
@@ -899,6 +900,7 @@ struct mlir_program
         const auto& device = migraphx_ctx.get_current_device();
         target_arch        = device.get_device_name();
         num_cu             = device.get_cu_count();
+        num_chiplets        = device.get_chiplet_count();
     }
 
     std::pair<std::size_t, std::size_t> get_launch_params() const
@@ -1067,6 +1069,7 @@ struct mlir_program
     std::deque<std::string> strings{};
     std::string target_arch = "";
     std::size_t num_cu      = 0;
+    std::size_t num_chiplets = 0;
     std::string sym_name;
 };
 

From f0a44cbe93f8b6fb1f3392923d0421338310cdaa Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:01:11 -0600
Subject: [PATCH 2/3] formatting

---
 src/targets/gpu/include/migraphx/gpu/context.hpp | 4 +---
 src/targets/gpu/mlir.cpp                         | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index dbc2e58d7d4..712bd4c7814 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -247,8 +247,7 @@ struct hip_device
             auto* info = static_cast<agent_info*>(data);
 
             hsa_device_type_t device_type;
-            hsa_status_t err =
-                hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
+            hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
             if(err != HSA_STATUS_SUCCESS)
                 return err;
 
@@ -284,7 +283,6 @@ struct hip_device
 #endif
     }
 
-
     std::size_t get_max_workitems_per_cu() const
     {
         return device_props.maxThreadsPerMultiProcessor;
diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp
index fc1438457c1..3d041ef1ba8 100644
--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -900,7 +900,7 @@ struct mlir_program
         const auto& device = migraphx_ctx.get_current_device();
         target_arch        = device.get_device_name();
         num_cu             = device.get_cu_count();
-        num_chiplets        = device.get_chiplet_count();
+        num_chiplets       = device.get_chiplet_count();
     }
 
     std::pair<std::size_t, std::size_t> get_launch_params() const

From f9a07c3b719ed2d5af4e2115519f572f0988441a Mon Sep 17 00:00:00 2001
From: kahmed10 <15948690+kahmed10@users.noreply.github.com>
Date: Wed, 10 Dec 2025 14:32:14 -0600
Subject: [PATCH 3/3] add TODO for future archs

---
 src/targets/gpu/include/migraphx/gpu/context.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp
index 712bd4c7814..52989a270de 100644
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -279,6 +279,9 @@ struct hip_device
         return info.num_chiplets;
 #else
         // HSA not available on Windows, assume single chiplet
+        // TODO: for future archs that have multiple chiplets,
+        // need a way to query on Windows or just hardcode
+        // based on gfx number
         return 1;
 #endif
     }