InfiniTensor · PanZezhong1725 · Jun 10, 2026
diff --git a/include/infinicore/ops.hpp b/include/infinicore/ops.hpp
@@ -20,6 +20,7 @@
 #include "ops/blas_dot.hpp"
 #include "ops/causal_softmax.hpp"
 #include "ops/cdist.hpp"
+#include "ops/conv1d.hpp"
 #include "ops/conv2d.hpp"
 #include "ops/cross_entropy.hpp"
 #include "ops/embedding.hpp"

diff --git a/include/infinicore/ops/conv1d.hpp b/include/infinicore/ops/conv1d.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+#include <cstddef>
+#include <optional>
+
+namespace infinicore::op {
+class Conv1d {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
+                            const size_t *, const size_t *, const size_t *, size_t);
+    static void execute(Tensor output,
+                        Tensor input,
+                        Tensor weight,
+                        Tensor bias,
+                        const size_t *pads,
+                        const size_t *strides,
+                        const size_t *dilations,
+                        size_t n);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor conv1d(Tensor input,
+              Tensor weight,
+              std::optional<Tensor> bias,
+              size_t stride,
+              size_t padding,
+              size_t dilation,
+              size_t groups);
+void conv1d_(Tensor output,
+             Tensor input,
+             Tensor weight,
+             std::optional<Tensor> bias,
+             size_t stride,
+             size_t padding,
+             size_t dilation,
+             size_t groups);
+} // namespace infinicore::op
diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py
@@ -5,6 +5,7 @@
 from .avg_pool1d import avg_pool1d
 from .binary_cross_entropy_with_logits import binary_cross_entropy_with_logits
 from .causal_softmax import causal_softmax
+from .conv1d import conv1d
 from .embedding import embedding
 from .flash_attention import flash_attention
 from .gaussian_nll_loss import gaussian_nll_loss
@@ -41,6 +42,7 @@
 __all__ = [
     "adaptive_max_pool1d",
     "causal_softmax",
+    "conv1d",
     "embedding",
     "flash_attention",
     "gaussian_nll_loss",

diff --git a/python/infinicore/nn/functional/conv1d.py b/python/infinicore/nn/functional/conv1d.py
@@ -0,0 +1,41 @@
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def conv1d(
+    input: Tensor,
+    weight: Tensor,
+    bias: Tensor | None = None,
+    stride: int = 1,
+    padding: int = 0,
+    dilation: int = 1,
+    groups: int = 1,
+    *,
+    out: Tensor | None = None,
+) -> Tensor:
+    bias_tensor = bias._underlying if bias is not None else None
+
+    if out is None:
+        return Tensor(
+            _infinicore.conv1d(
+                input._underlying,
+                weight._underlying,
+                bias_tensor,
+                stride,
+                padding,
+                dilation,
+                groups,
+            )
+        )
+
+    _infinicore.conv1d_(
+        out._underlying,
+        input._underlying,
+        weight._underlying,
+        bias_tensor,
+        stride,
+        padding,
+        dilation,
+        groups,
+    )
+    return out
diff --git a/src/infinicore/ops/conv1d/conv1d.cc b/src/infinicore/ops/conv1d/conv1d.cc
@@ -0,0 +1,154 @@
+#include "infinicore/ops/conv1d.hpp"
+
+#include "../../utils.hpp"
+
+#include <stdexcept>
+
+namespace infinicore::op {
+
+common::OpDispatcher<Conv1d::schema> &Conv1d::dispatcher() {
+    static common::OpDispatcher<Conv1d::schema> dispatcher_;
+    return dispatcher_;
+}
+
+void Conv1d::execute(Tensor output,
+                     Tensor input,
+                     Tensor weight,
+                     Tensor bias,
+                     const size_t *pads,
+                     const size_t *strides,
+                     const size_t *dilations,
+                     size_t n) {
+    INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input, weight);
+    if (bias) {
+        INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, bias);
+    }
+    infinicore::context::setDevice(output->device());
+    auto device_type = output->device().getType();
+    auto func = dispatcher().lookup(device_type);
+
+    if (func == nullptr) {
+        throw std::runtime_error("No Conv1d implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
+    }
+
+    func(output, input, weight, bias, pads, strides, dilations, n);
+}
+
+static size_t conv1d_out_size(size_t input, size_t padding, size_t dilation, size_t kernel, size_t stride) {
+    if (stride == 0 || dilation == 0 || kernel == 0) {
+        throw std::runtime_error("conv1d: stride, dilation, and kernel size must be greater than zero");
+    }
+    size_t effective_kernel = dilation * (kernel - 1) + 1;
+    size_t padded_input = input + 2 * padding;
+    if (padded_input < effective_kernel) {
+        throw std::runtime_error("Invalid conv1d output shape (negative or zero)");
+    }
+    return (padded_input - effective_kernel) / stride + 1;
+}
+
+static void validate_conv1d_shapes(Tensor output,
+                                   Tensor input,
+                                   Tensor weight,
+                                   std::optional<Tensor> bias,
+                                   size_t groups) {
+    const auto &out_shape = output->shape();
+    const auto &in_shape = input->shape();
+    const auto &w_shape = weight->shape();
+
+    if (in_shape.size() != 3 || w_shape.size() != 3 || out_shape.size() != 3) {
+        throw std::runtime_error("conv1d expects input [N, C_in, L], weight [C_out, C_in/groups, K], and output [N, C_out, L_out]");
+    }
+    if (groups == 0) {
+        throw std::runtime_error("conv1d: groups must be greater than zero");
+    }
+    if (in_shape[1] % groups != 0 || w_shape[0] % groups != 0) {
+        throw std::runtime_error("conv1d: input channels and output channels must be divisible by groups");
+    }
+    if (w_shape[1] != in_shape[1] / groups) {
+        throw std::runtime_error("conv1d: weight input channels must equal input channels divided by groups");
+    }
+    if (out_shape[0] != in_shape[0] || out_shape[1] != w_shape[0]) {
+        throw std::runtime_error("conv1d: output batch or channel dimension is invalid");
+    }
+    if (bias) {
+        const auto &b_shape = (*bias)->shape();
+        if (b_shape.size() != 1 || b_shape[0] != w_shape[0]) {
+            throw std::runtime_error("conv1d: bias must have shape [C_out]");
+        }
+    }
+}
+
+Tensor conv1d(Tensor input,
+              Tensor weight,
+              std::optional<Tensor> bias,
+              size_t stride,
+              size_t padding,
+              size_t dilation,
+              size_t groups) {
+    const auto &in_shape = input->shape();
+    const auto &w_shape = weight->shape();
+    if (in_shape.size() != 3 || w_shape.size() != 3) {
+        throw std::runtime_error("conv1d expects input [N, C_in, L] and weight [C_out, C_in/groups, K]");
+    }
+
+    size_t l_out = conv1d_out_size(in_shape[2], padding, dilation, w_shape[2], stride);
+    Shape out_shape = {in_shape[0], w_shape[0], l_out};
+
+    auto output = Tensor::empty(out_shape, input->dtype(), input->device());
+    conv1d_(output, input, weight, bias, stride, padding, dilation, groups);
+    return output;
+}
+
+void conv1d_(Tensor output,
+             Tensor input,
+             Tensor weight,
+             std::optional<Tensor> bias,
+             size_t stride,
+             size_t padding,
+             size_t dilation,
+             size_t groups) {
+    validate_conv1d_shapes(output, input, weight, bias, groups);
+
+    size_t expected_l_out = conv1d_out_size(input->shape()[2], padding, dilation, weight->shape()[2], stride);
+    if (output->shape()[2] != expected_l_out) {
+        throw std::runtime_error("conv1d: output length is invalid");
+    }
+
+    size_t in_channels_per_group = input->shape()[1] / groups;
+    size_t out_channels_per_group = weight->shape()[0] / groups;
+
+    for (size_t group = 0; group < groups; ++group) {
+        Tensor group_input = groups == 1
+                               ? input
+                               : input->narrow({{1, group * in_channels_per_group, in_channels_per_group}})->contiguous();
+        Tensor group_weight = groups == 1
+                                ? weight
+                                : weight->narrow({{0, group * out_channels_per_group, out_channels_per_group}});
+        Tensor group_output = groups == 1
+                                ? output
+                                : Tensor::empty({output->shape()[0], out_channels_per_group, output->shape()[2]},
+                                                output->dtype(),
+                                                output->device());
+        Tensor group_bias;
+        if (bias) {
+            group_bias = groups == 1
+                           ? *bias
+                           : (*bias)->narrow({{0, group * out_channels_per_group, out_channels_per_group}});
+        }
+
+        Conv1d::execute(group_output,
+                        group_input,
+                        group_weight,
+                        group_bias,
+                        &padding,
+                        &stride,
+                        &dilation,
+                        1);
+
+        if (groups != 1) {
+            output->narrow({{1, group * out_channels_per_group, out_channels_per_group}})
+                ->copy_from(group_output);
+        }
+    }
+}
+} // namespace infinicore::op
diff --git a/src/infinicore/ops/conv1d/conv1d_infiniop.cc b/src/infinicore/ops/conv1d/conv1d_infiniop.cc
@@ -0,0 +1,69 @@
+#include "../../utils.hpp"
+#include "infinicore/common/hash.hpp"
+#include "infinicore/ops/common/cache.hpp"
+#include "infinicore/ops/conv1d.hpp"
+#include <infiniop.h>
+
+namespace infinicore::op::conv1d_impl::infiniop {
+
+thread_local common::OpCache<size_t, infiniopConvDescriptor_t> caches(
+    100,
+    [](infiniopConvDescriptor_t &desc) {
+        if (desc != nullptr) {
+            INFINICORE_CHECK_ERROR(infiniopDestroyConvDescriptor(desc));
+            desc = nullptr;
+        }
+    });
+
+void calculate(Tensor output,
+               Tensor input,
+               Tensor weight,
+               Tensor bias,
+               const size_t *pads,
+               const size_t *strides,
+               const size_t *dilations,
+               size_t n) {
+    size_t seed = hash_combine(output, input, weight, bias, n);
+    for (size_t i = 0; i < n; ++i) {
+        hash_combine(seed, pads[i], strides[i], dilations[i]);
+    }
+
+    auto device = context::getDevice();
+    auto &cache = caches.getCache(device);
+
+    auto desc_opt = cache.get(seed);
+    infiniopConvDescriptor_t desc = nullptr;
+
+    if (!desc_opt) {
+        INFINICORE_CHECK_ERROR(infiniopCreateConvDescriptor(
+            context::getInfiniopHandle(device), &desc,
+            output->desc(), input->desc(), weight->desc(),
+            bias ? bias->desc() : nullptr,
+            const_cast<size_t *>(pads),
+            const_cast<size_t *>(strides),
+            const_cast<size_t *>(dilations),
+            n));
+        cache.put(seed, desc);
+    } else {
+        desc = *desc_opt;
+    }
+
+    size_t workspace_size = 0;
+    INFINICORE_CHECK_ERROR(infiniopGetConvWorkspaceSize(desc, &workspace_size));
+    std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
+
+    INFINICORE_CHECK_ERROR(infiniopConv(
+        desc, workspace->data(), workspace_size,
+        output->data(),
+        input->data(),
+        weight->data(),
+        bias ? bias->data() : nullptr,
+        context::getStream()));
+}
+
+static bool registered = []() {
+    Conv1d::dispatcher().registerAll(&calculate, false);
+    return true;
+}();
+
+} // namespace infinicore::op::conv1d_impl::infiniop
diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp
@@ -34,6 +34,7 @@
 #include "ops/cat.hpp"
 #include "ops/causal_softmax.hpp"
 #include "ops/cdist.hpp"
+#include "ops/conv1d.hpp"
 #include "ops/cross_entropy.hpp"
 #include "ops/diff.hpp"
 #include "ops/digamma.hpp"
@@ -238,6 +239,7 @@ inline void bind(py::module &m) {
     bind_atanh(m);
     bind_addcmul(m);
     bind_cdist(m);
+    bind_conv1d(m);
     bind_binary_cross_entropy_with_logits(m);
     bind_reciprocal(m);
     bind_upsample_bilinear(m);