From 8786ba69ce78fe8ea9e7626ffff59fb1565559bf Mon Sep 17 00:00:00 2001 From: ssjia Date: Fri, 27 Mar 2026 11:22:01 -0700 Subject: [PATCH] [ET-VK][conv1d] Route conv1d to height-packed implementations in export pipeline Pull Request resolved: https://github.com/pytorch/executorch/pull/18334 Integrate the new height-packed conv1d_pw and conv1d_dw operators into the aten.convolution.default dispatch path so they are automatically used during model export. In op_registry.py, add a pick_conv_storage function that inspects the convolution node at partition time. For 1D convolutions where the op is pointwise (kernel_size=1) or depthwise (groups=C_in) and channels are 4-aligned, it selects HEIGHT_PACKED_TEXTURE for input/output instead of the default CHANNELS_PACKED_TEXTURE. All other cases (conv2d, grouped conv1d with K>1, unaligned channels) retain channels-packed behavior. In Convolution.cpp, add a height-packed routing block at the top of the conv1d path. When the input tensor is height-packed, it dispatches to et_vk.conv1d_pw.default or et_vk.conv1d_dw.default via VK_GET_OP_FN. Falls through to the existing channels-packed add_conv1d_node path otherwise. ghstack-source-id: 358903217 @exported-using-ghexport Differential Revision: [D97344090](https://our.internmc.facebook.com/intern/diff/D97344090/) --- backends/vulkan/op_registry.py | 42 ++++++++++++++++ .../runtime/graph/ops/impl/Convolution.cpp | 50 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index ddb843e2335..38215c2d827 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -802,6 +802,47 @@ def check_conv_node(node: torch.fx.Node) -> bool: return True + def pick_conv_storage( + node: torch.fx.Node, + ) -> Tuple[List[utils.TensorRepSet], utils.TensorRepSet]: + x = node.args[0] + assert isinstance(x, torch.fx.Node) + x_shape = x.meta["val"].size() + + # Default: channels-packed texture (conv2d and fallback conv1d) + input_storage = utils.CHANNELS_PACKED_TEXTURE + output_storage = utils.CHANNELS_PACKED_TEXTURE + + if len(x_shape) == 3: + # Conv1d: check if we can use height-packed + weight = node.args[1] + assert isinstance(weight, torch.fx.Node) + w_shape = weight.meta["val"].size() + groups = node.args[8] + + c_in = x_shape[1] + c_out = w_shape[0] + kernel_size = w_shape[2] + + is_pointwise = kernel_size == 1 + is_depthwise = ( + isinstance(groups, int) + and groups == c_in + and c_out == c_in + and w_shape[1] == 1 + ) + if is_pointwise or is_depthwise: + input_storage = utils.HEIGHT_PACKED_TEXTURE + output_storage = utils.HEIGHT_PACKED_TEXTURE + + # Build per-input storage list. The convolution op has variable args: + # aten.convolution.default: input, weight, bias, stride, padding, + # dilation, transposed, output_padding, groups + # et_vk.conv_with_clamp.default: + output_min, output_max + # All args after input are NO_STORAGE (prepacked or non-tensor) + inputs = [input_storage] + [utils.NO_STORAGE] * 10 + return inputs, output_storage + return OpFeatures( inputs_storage=[ utils.CHANNELS_PACKED_TEXTURE, # input @@ -820,6 +861,7 @@ def check_conv_node(node: torch.fx.Node) -> bool: supports_resize=True, supports_prepacking=True, are_node_inputs_supported_fn=check_conv_node, + pick_io_storage_fn=pick_conv_storage, ) diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 2da98926fad..9c518678502 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -686,6 +686,56 @@ void conv(ComputeGraph& graph, const std::vector& args) { true); } } else { + // Conv1d path + if (graph.packed_dim_of(args[0]) == WHCN::kHeightDim) { + // Height-packed: route to optimized conv1d implementations + const auto weight_sizes = graph.sizes_of(args[1]); + const int64_t groups_val = graph.get_int(args[8]); + const bool is_pointwise = weight_sizes.at(2) == 1; + const bool is_depthwise = + groups_val == weight_sizes.at(0) && weight_sizes.at(1) == 1; + + // Build unified 10-arg vector: + // in, weight, bias, stride, padding, dilation, groups, + // output_min, output_max, out + // For non-clamp (args.size() == 10): output_min/max = kDummyValueRef + // For clamp (args.size() == 12): output_min/max from args[9]/args[10] + ValueRef output_min = kDummyValueRef; + ValueRef output_max = kDummyValueRef; + ValueRef out; + if (args.size() == 10) { + out = args[9]; + } else { + output_min = args[9]; + output_max = args[10]; + out = args[11]; + } + + std::vector conv1d_args = { + args[0], + args[1], + args[2], + args[3], + args[4], + args[5], + args[8], + output_min, + output_max, + out}; + + if (is_pointwise) { + VK_GET_OP_FN("et_vk.conv1d_pw.default")(graph, conv1d_args); + } else if (is_depthwise) { + VK_GET_OP_FN("et_vk.conv1d_dw.default")(graph, conv1d_args); + } else { + VK_THROW( + "Height-packed conv1d only supports pointwise (K=1) or " + "depthwise (groups=C)"); + } + return; + } + + // Existing channels-packed fallback if (args.size() == 10) { // ordinary conv1d return add_conv1d_node(