-
Notifications
You must be signed in to change notification settings - Fork 111
AIMIGRAPHX-414 Use HSA runtime to query number of chiplets (Linux only) #4496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,12 @@ | |
| #include <unordered_map> | ||
| #include <memory> | ||
|
|
||
| // HSA is only available on non-Windows platforms | ||
| #ifndef _WIN32 | ||
| #include "hsa/hsa.h" | ||
| #include "hsa/hsa_ext_amd.h" | ||
| #endif | ||
|
|
||
| namespace migraphx { | ||
| inline namespace MIGRAPHX_INLINE_NS { | ||
| namespace gpu { | ||
|
|
@@ -210,6 +216,76 @@ | |
|
|
||
| std::size_t get_cu_count() const { return device_props.multiProcessorCount; } | ||
|
|
||
| std::size_t get_chiplet_count() const | ||
| { | ||
| #ifndef _WIN32 | ||
| // Structure to pass data through HSA agent iteration | ||
| struct agent_info | ||
| { | ||
| std::size_t target_device_id; | ||
| std::size_t gpu_count; | ||
| uint32_t num_chiplets; | ||
| bool found; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like found is set but not used. Do we want to check if |
||
| }; | ||
|
|
||
| hsa_status_t status = hsa_init(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't do that here: https://github.com/ROCm/rocMLIR/blob/1f672e426c688289265eb94f2ab092d48b0690e4/mlir/lib/Dialect/Rock/IR/AmdArchDb.cpp#L214 is it needed? |
||
| if(status != HSA_STATUS_SUCCESS) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would refactor this into a macro called |
||
| { | ||
| // If HSA init fails, return 1 as default (single chiplet) | ||
| return 1; | ||
| } | ||
|
|
||
| agent_info info{}; | ||
| info.target_device_id = device_id; | ||
| info.gpu_count = 0; | ||
| info.num_chiplets = 0; | ||
| info.found = false; | ||
|
|
||
| // Callback function for hsa_iterate_agents | ||
| // GPUs are enumerated in the same order as HIP device IDs | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this always the case? any link to the docs? |
||
| auto agent_callback = [](hsa_agent_t agent, void* data) -> hsa_status_t { | ||
| auto* info = static_cast<agent_info*>(data); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find it confusing that we have |
||
|
|
||
| hsa_device_type_t device_type; | ||
| hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); | ||
| if(err != HSA_STATUS_SUCCESS) | ||
| return err; | ||
|
|
||
| if(device_type == HSA_DEVICE_TYPE_GPU) | ||
| { | ||
| // Check if this is the GPU we're looking for (by enumeration order) | ||
| if(info->gpu_count == info->target_device_id) | ||
| { | ||
| err = hsa_agent_get_info( | ||
| agent, | ||
| static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC), | ||
| &info->num_chiplets); | ||
| if(err != HSA_STATUS_SUCCESS) | ||
| return err; | ||
|
|
||
| info->found = true; | ||
| return HSA_STATUS_INFO_BREAK; // Stop iteration | ||
| } | ||
| info->gpu_count++; | ||
| } | ||
|
|
||
| return HSA_STATUS_SUCCESS; | ||
| }; | ||
|
|
||
| // Iterate through all HSA agents to find matching GPU | ||
| status = hsa_iterate_agents(agent_callback, &info); | ||
|
Check warning on line 276 in src/targets/gpu/include/migraphx/gpu/context.hpp
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check status |
||
|
|
||
| hsa_shut_down(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to call init and shut_down everytime? That might be expensive. We should probably collect all chiplets counts for all devices and store it in vector so we can query only once. Also if this is necessary(I am not sure this is the case as hip still needs to use hsa) then this should be called in a destructor so its always called. Could make a class that calls |
||
| return info.num_chiplets; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move all the HSA to a seperate function in a .cpp file? As this gets included by everyone. |
||
| #else | ||
| // HSA not available on Windows, assume single chiplet | ||
| // TODO: for future archs that have multiple chiplets, | ||
| // need a way to query on Windows or just hardcode | ||
| // based on gfx number | ||
| return 1; | ||
| #endif | ||
| } | ||
|
|
||
| std::size_t get_max_workitems_per_cu() const | ||
| { | ||
| return device_props.maxThreadsPerMultiProcessor; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Includes should use angle brackets <..>.