diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index db5bb7373f0..52989a270de 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -42,6 +42,12 @@ #include #include +// HSA is only available on non-Windows platforms +#ifndef _WIN32 +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" +#endif + namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { @@ -210,6 +216,76 @@ struct hip_device std::size_t get_cu_count() const { return device_props.multiProcessorCount; } + std::size_t get_chiplet_count() const + { +#ifndef _WIN32 + // Structure to pass data through HSA agent iteration + struct agent_info + { + std::size_t target_device_id; + std::size_t gpu_count; + uint32_t num_chiplets; + bool found; + }; + + hsa_status_t status = hsa_init(); + if(status != HSA_STATUS_SUCCESS) + { + // If HSA init fails, return 1 as default (single chiplet) + return 1; + } + + agent_info info{}; + info.target_device_id = device_id; + info.gpu_count = 0; + info.num_chiplets = 0; + info.found = false; + + // Callback function for hsa_iterate_agents + // GPUs are enumerated in the same order as HIP device IDs + auto agent_callback = [](hsa_agent_t agent, void* data) -> hsa_status_t { + auto* info = static_cast(data); + + hsa_device_type_t device_type; + hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if(err != HSA_STATUS_SUCCESS) + return err; + + if(device_type == HSA_DEVICE_TYPE_GPU) + { + // Check if this is the GPU we're looking for (by enumeration order) + if(info->gpu_count == info->target_device_id) + { + err = hsa_agent_get_info( + agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_XCC), + &info->num_chiplets); + if(err != HSA_STATUS_SUCCESS) + return err; + + info->found = true; + return HSA_STATUS_INFO_BREAK; // Stop iteration + } + info->gpu_count++; + } + + return HSA_STATUS_SUCCESS; + }; + + // Iterate through all HSA agents to find matching GPU + status = hsa_iterate_agents(agent_callback, &info); + + hsa_shut_down(); + return info.num_chiplets; +#else + // HSA not available on Windows, assume single chiplet + // TODO: for future archs that have multiple chiplets, + // need a way to query on Windows or just hardcode + // based on gfx number + return 1; +#endif + } + std::size_t get_max_workitems_per_cu() const { return device_props.maxThreadsPerMultiProcessor; diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp index 9ca5c35330c..3d041ef1ba8 100644 --- a/src/targets/gpu/mlir.cpp +++ b/src/targets/gpu/mlir.cpp @@ -638,7 +638,8 @@ struct mlir_program {"sym_name", sym_name}, {"kernel", std::string("mixr")}, {"arch", target_arch}, - {"num_cu", num_cu}}); + {"num_cu", num_cu}, + {"num_chiplets", num_chiplets}}); if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{})) { ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}}); @@ -899,6 +900,7 @@ struct mlir_program const auto& device = migraphx_ctx.get_current_device(); target_arch = device.get_device_name(); num_cu = device.get_cu_count(); + num_chiplets = device.get_chiplet_count(); } std::pair get_launch_params() const @@ -1067,6 +1069,7 @@ struct mlir_program std::deque strings{}; std::string target_arch = ""; std::size_t num_cu = 0; + std::size_t num_chiplets = 0; std::string sym_name; };