From c9128516975b7c1793b462d9700bac565b422e73 Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 11 Feb 2026 12:16:12 -0800 Subject: [PATCH] [ET-VK][profiling] Add additional profiling blocks This adds fine-grained ET_EVENT_TRACER profiling blocks to the Vulkan backend's execute() method in VulkanBackend.cpp. Previously, only GPU shader timestamps were logged. Now the following phases are individually traced: ETVK_COPY_INPUTS (host-to-GPU input transfer), ETVK_RESIZE (graph resize propagation), ETVK_COMPUTE_GRAPH_EXECUTE (GPU compute dispatch), ETVK_COPY_OUTPUTS (GPU-to-host output transfer), and ETVK_EXECUTE (overall delegate execution). The GPU shader timestamp extraction is also moved to occur right after execute() completes rather than at the end of the function, so it falls within the ETVK_EXECUTE span. Differential Revision: [D93000163](https://our.internmc.facebook.com/intern/diff/D93000163/) [ghstack-poisoned] --- backends/vulkan/runtime/VulkanBackend.cpp | 82 +++++++++++++++++++---- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 261585c381b..fbca5af5100 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -641,6 +641,21 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { const size_t num_inputs = compute_graph->inputs().size(); bool should_propagate_resize = false; +#ifdef ET_EVENT_TRACER_ENABLED + runtime::EventTracer* event_tracer = context.event_tracer(); + runtime::EventTracerEntry overall_event_tracer_entry = + event_tracer_start_profiling_delegate( + event_tracer, + "ETVK_EXECUTE", + /* delegate_debug_id = */ -1); +#endif // ET_EVENT_TRACER_ENABLED +#ifdef ET_EVENT_TRACER_ENABLED + runtime::EventTracerEntry copy_inputs_event_tracer_entry = + event_tracer_start_profiling_delegate( + event_tracer, + "ETVK_COPY_INPUTS", + /* delegate_debug_id = */ -1); +#endif // ET_EVENT_TRACER_ENABLED for (size_t i = 0; i < num_inputs; i++) { const ValueRef iref = compute_graph->inputs()[i].value; if (compute_graph->val_is_tensor(iref)) { @@ -669,13 +684,61 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { compute_graph->get_val_type(iref)); } } +#ifdef ET_EVENT_TRACER_ENABLED + event_tracer_end_profiling_delegate( + event_tracer, copy_inputs_event_tracer_entry); +#endif // ET_EVENT_TRACER_ENABLED if (should_propagate_resize || compute_graph->has_data_dependent_shapes()) { +#ifdef ET_EVENT_TRACER_ENABLED + runtime::EventTracerEntry resize_event_tracer_entry = + event_tracer_start_profiling_delegate( + event_tracer, + "ETVK_RESIZE", + /* delegate_debug_id = */ -1); +#endif // ET_EVENT_TRACER_ENABLED compute_graph->propagate_resize(); +#ifdef ET_EVENT_TRACER_ENABLED + event_tracer_end_profiling_delegate( + event_tracer, resize_event_tracer_entry); +#endif // ET_EVENT_TRACER_ENABLED } +#ifdef ET_EVENT_TRACER_ENABLED + runtime::EventTracerEntry execute_event_tracer_entry = + event_tracer_start_profiling_delegate( + event_tracer, + "ETVK_COMPUTE_GRAPH_EXECUTE", + /* delegate_debug_id = */ -1); +#endif // ET_EVENT_TRACER_ENABLED compute_graph->execute(); +#ifdef ET_EVENT_TRACER_ENABLED + event_tracer_end_profiling_delegate( + event_tracer, execute_event_tracer_entry); +#endif // ET_EVENT_TRACER_ENABLED +#ifdef ET_EVENT_TRACER_ENABLED + compute_graph->context()->querypool().extract_results(); + for (const auto& r : + compute_graph->context()->querypool().get_shader_timestamp_data()) { + std::string event_name = "{" + r.kernel_name + + ", \"dispatch_id\": " + std::to_string(r.dispatch_id) + "}"; + event_tracer_log_profiling_delegate( + event_tracer, + event_name.c_str(), + /* delegate_debug_id = */ -1, + r.start_time_ns, + r.end_time_ns); + } +#endif // ET_EVENT_TRACER_ENABLED + +#ifdef ET_EVENT_TRACER_ENABLED + runtime::EventTracerEntry copy_outputs_event_tracer_entry = + event_tracer_start_profiling_delegate( + event_tracer, + "ETVK_COPY_OUTPUTS", + /* delegate_debug_id = */ -1); +#endif // ET_EVENT_TRACER_ENABLED for (size_t i = 0; i < compute_graph->outputs().size(); i++) { const size_t o = i + num_inputs; const ValueRef oref = compute_graph->outputs()[i].value; @@ -701,21 +764,14 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { compute_graph->get_val_type(oref)); } } +#ifdef ET_EVENT_TRACER_ENABLED + event_tracer_end_profiling_delegate( + event_tracer, copy_outputs_event_tracer_entry); +#endif // ET_EVENT_TRACER_ENABLED #ifdef ET_EVENT_TRACER_ENABLED - runtime::EventTracer* event_tracer = context.event_tracer(); - compute_graph->context()->querypool().extract_results(); - for (const auto& r : - compute_graph->context()->querypool().get_shader_timestamp_data()) { - std::string event_name = "{" + r.kernel_name + - ", \"dispatch_id\": " + std::to_string(r.dispatch_id) + "}"; - event_tracer_log_profiling_delegate( - event_tracer, - event_name.c_str(), - /* delegate_debug_id = */ -1, - r.start_time_ns, - r.end_time_ns); - } + event_tracer_end_profiling_delegate( + event_tracer, overall_event_tracer_entry); #endif // ET_EVENT_TRACER_ENABLED return Error::Ok;