diff --git a/include/infinicore/ops.hpp b/include/infinicore/ops.hpp index 747735bcd..a13c5f7ba 100644 --- a/include/infinicore/ops.hpp +++ b/include/infinicore/ops.hpp @@ -20,6 +20,7 @@ #include "ops/blas_dot.hpp" #include "ops/causal_softmax.hpp" #include "ops/cdist.hpp" +#include "ops/conv1d.hpp" #include "ops/conv2d.hpp" #include "ops/cross_entropy.hpp" #include "ops/embedding.hpp" diff --git a/include/infinicore/ops/conv1d.hpp b/include/infinicore/ops/conv1d.hpp new file mode 100644 index 000000000..dbed8968a --- /dev/null +++ b/include/infinicore/ops/conv1d.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include "../device.hpp" +#include "common/op.hpp" + +#include +#include + +namespace infinicore::op { +class Conv1d { +public: + using schema = void (*)(Tensor, Tensor, Tensor, Tensor, + const size_t *, const size_t *, const size_t *, size_t); + static void execute(Tensor output, + Tensor input, + Tensor weight, + Tensor bias, + const size_t *pads, + const size_t *strides, + const size_t *dilations, + size_t n); + static common::OpDispatcher &dispatcher(); +}; + +Tensor conv1d(Tensor input, + Tensor weight, + std::optional bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups); +void conv1d_(Tensor output, + Tensor input, + Tensor weight, + std::optional bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups); +} // namespace infinicore::op diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py index a28128a1d..640d7d955 100644 --- a/python/infinicore/nn/functional/__init__.py +++ b/python/infinicore/nn/functional/__init__.py @@ -5,6 +5,7 @@ from .avg_pool1d import avg_pool1d from .binary_cross_entropy_with_logits import binary_cross_entropy_with_logits from .causal_softmax import causal_softmax +from .conv1d import conv1d from .embedding import embedding from .flash_attention import flash_attention from .gaussian_nll_loss import gaussian_nll_loss @@ -41,6 +42,7 @@ __all__ = [ "adaptive_max_pool1d", "causal_softmax", + "conv1d", "embedding", "flash_attention", "gaussian_nll_loss", diff --git a/python/infinicore/nn/functional/conv1d.py b/python/infinicore/nn/functional/conv1d.py new file mode 100644 index 000000000..dc7748105 --- /dev/null +++ b/python/infinicore/nn/functional/conv1d.py @@ -0,0 +1,41 @@ +from infinicore.lib import _infinicore +from infinicore.tensor import Tensor + + +def conv1d( + input: Tensor, + weight: Tensor, + bias: Tensor | None = None, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + groups: int = 1, + *, + out: Tensor | None = None, +) -> Tensor: + bias_tensor = bias._underlying if bias is not None else None + + if out is None: + return Tensor( + _infinicore.conv1d( + input._underlying, + weight._underlying, + bias_tensor, + stride, + padding, + dilation, + groups, + ) + ) + + _infinicore.conv1d_( + out._underlying, + input._underlying, + weight._underlying, + bias_tensor, + stride, + padding, + dilation, + groups, + ) + return out diff --git a/src/infinicore/ops/conv1d/conv1d.cc b/src/infinicore/ops/conv1d/conv1d.cc new file mode 100644 index 000000000..b11e9fb0f --- /dev/null +++ b/src/infinicore/ops/conv1d/conv1d.cc @@ -0,0 +1,154 @@ +#include "infinicore/ops/conv1d.hpp" + +#include "../../utils.hpp" + +#include + +namespace infinicore::op { + +common::OpDispatcher &Conv1d::dispatcher() { + static common::OpDispatcher dispatcher_; + return dispatcher_; +} + +void Conv1d::execute(Tensor output, + Tensor input, + Tensor weight, + Tensor bias, + const size_t *pads, + const size_t *strides, + const size_t *dilations, + size_t n) { + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input, weight); + if (bias) { + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, bias); + } + infinicore::context::setDevice(output->device()); + auto device_type = output->device().getType(); + auto func = dispatcher().lookup(device_type); + + if (func == nullptr) { + throw std::runtime_error("No Conv1d implementation found for device type: " + std::to_string(static_cast(device_type))); + } + + func(output, input, weight, bias, pads, strides, dilations, n); +} + +static size_t conv1d_out_size(size_t input, size_t padding, size_t dilation, size_t kernel, size_t stride) { + if (stride == 0 || dilation == 0 || kernel == 0) { + throw std::runtime_error("conv1d: stride, dilation, and kernel size must be greater than zero"); + } + size_t effective_kernel = dilation * (kernel - 1) + 1; + size_t padded_input = input + 2 * padding; + if (padded_input < effective_kernel) { + throw std::runtime_error("Invalid conv1d output shape (negative or zero)"); + } + return (padded_input - effective_kernel) / stride + 1; +} + +static void validate_conv1d_shapes(Tensor output, + Tensor input, + Tensor weight, + std::optional bias, + size_t groups) { + const auto &out_shape = output->shape(); + const auto &in_shape = input->shape(); + const auto &w_shape = weight->shape(); + + if (in_shape.size() != 3 || w_shape.size() != 3 || out_shape.size() != 3) { + throw std::runtime_error("conv1d expects input [N, C_in, L], weight [C_out, C_in/groups, K], and output [N, C_out, L_out]"); + } + if (groups == 0) { + throw std::runtime_error("conv1d: groups must be greater than zero"); + } + if (in_shape[1] % groups != 0 || w_shape[0] % groups != 0) { + throw std::runtime_error("conv1d: input channels and output channels must be divisible by groups"); + } + if (w_shape[1] != in_shape[1] / groups) { + throw std::runtime_error("conv1d: weight input channels must equal input channels divided by groups"); + } + if (out_shape[0] != in_shape[0] || out_shape[1] != w_shape[0]) { + throw std::runtime_error("conv1d: output batch or channel dimension is invalid"); + } + if (bias) { + const auto &b_shape = (*bias)->shape(); + if (b_shape.size() != 1 || b_shape[0] != w_shape[0]) { + throw std::runtime_error("conv1d: bias must have shape [C_out]"); + } + } +} + +Tensor conv1d(Tensor input, + Tensor weight, + std::optional bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups) { + const auto &in_shape = input->shape(); + const auto &w_shape = weight->shape(); + if (in_shape.size() != 3 || w_shape.size() != 3) { + throw std::runtime_error("conv1d expects input [N, C_in, L] and weight [C_out, C_in/groups, K]"); + } + + size_t l_out = conv1d_out_size(in_shape[2], padding, dilation, w_shape[2], stride); + Shape out_shape = {in_shape[0], w_shape[0], l_out}; + + auto output = Tensor::empty(out_shape, input->dtype(), input->device()); + conv1d_(output, input, weight, bias, stride, padding, dilation, groups); + return output; +} + +void conv1d_(Tensor output, + Tensor input, + Tensor weight, + std::optional bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups) { + validate_conv1d_shapes(output, input, weight, bias, groups); + + size_t expected_l_out = conv1d_out_size(input->shape()[2], padding, dilation, weight->shape()[2], stride); + if (output->shape()[2] != expected_l_out) { + throw std::runtime_error("conv1d: output length is invalid"); + } + + size_t in_channels_per_group = input->shape()[1] / groups; + size_t out_channels_per_group = weight->shape()[0] / groups; + + for (size_t group = 0; group < groups; ++group) { + Tensor group_input = groups == 1 + ? input + : input->narrow({{1, group * in_channels_per_group, in_channels_per_group}})->contiguous(); + Tensor group_weight = groups == 1 + ? weight + : weight->narrow({{0, group * out_channels_per_group, out_channels_per_group}}); + Tensor group_output = groups == 1 + ? output + : Tensor::empty({output->shape()[0], out_channels_per_group, output->shape()[2]}, + output->dtype(), + output->device()); + Tensor group_bias; + if (bias) { + group_bias = groups == 1 + ? *bias + : (*bias)->narrow({{0, group * out_channels_per_group, out_channels_per_group}}); + } + + Conv1d::execute(group_output, + group_input, + group_weight, + group_bias, + &padding, + &stride, + &dilation, + 1); + + if (groups != 1) { + output->narrow({{1, group * out_channels_per_group, out_channels_per_group}}) + ->copy_from(group_output); + } + } +} +} // namespace infinicore::op diff --git a/src/infinicore/ops/conv1d/conv1d_infiniop.cc b/src/infinicore/ops/conv1d/conv1d_infiniop.cc new file mode 100644 index 000000000..aa90106b2 --- /dev/null +++ b/src/infinicore/ops/conv1d/conv1d_infiniop.cc @@ -0,0 +1,69 @@ +#include "../../utils.hpp" +#include "infinicore/common/hash.hpp" +#include "infinicore/ops/common/cache.hpp" +#include "infinicore/ops/conv1d.hpp" +#include + +namespace infinicore::op::conv1d_impl::infiniop { + +thread_local common::OpCache caches( + 100, + [](infiniopConvDescriptor_t &desc) { + if (desc != nullptr) { + INFINICORE_CHECK_ERROR(infiniopDestroyConvDescriptor(desc)); + desc = nullptr; + } + }); + +void calculate(Tensor output, + Tensor input, + Tensor weight, + Tensor bias, + const size_t *pads, + const size_t *strides, + const size_t *dilations, + size_t n) { + size_t seed = hash_combine(output, input, weight, bias, n); + for (size_t i = 0; i < n; ++i) { + hash_combine(seed, pads[i], strides[i], dilations[i]); + } + + auto device = context::getDevice(); + auto &cache = caches.getCache(device); + + auto desc_opt = cache.get(seed); + infiniopConvDescriptor_t desc = nullptr; + + if (!desc_opt) { + INFINICORE_CHECK_ERROR(infiniopCreateConvDescriptor( + context::getInfiniopHandle(device), &desc, + output->desc(), input->desc(), weight->desc(), + bias ? bias->desc() : nullptr, + const_cast(pads), + const_cast(strides), + const_cast(dilations), + n)); + cache.put(seed, desc); + } else { + desc = *desc_opt; + } + + size_t workspace_size = 0; + INFINICORE_CHECK_ERROR(infiniopGetConvWorkspaceSize(desc, &workspace_size)); + std::shared_ptr workspace = context::allocateMemory(workspace_size); + + INFINICORE_CHECK_ERROR(infiniopConv( + desc, workspace->data(), workspace_size, + output->data(), + input->data(), + weight->data(), + bias ? bias->data() : nullptr, + context::getStream())); +} + +static bool registered = []() { + Conv1d::dispatcher().registerAll(&calculate, false); + return true; +}(); + +} // namespace infinicore::op::conv1d_impl::infiniop diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp index 0eb4fef98..ad944ed47 100644 --- a/src/infinicore/pybind11/ops.hpp +++ b/src/infinicore/pybind11/ops.hpp @@ -34,6 +34,7 @@ #include "ops/cat.hpp" #include "ops/causal_softmax.hpp" #include "ops/cdist.hpp" +#include "ops/conv1d.hpp" #include "ops/cross_entropy.hpp" #include "ops/diff.hpp" #include "ops/digamma.hpp" @@ -238,6 +239,7 @@ inline void bind(py::module &m) { bind_atanh(m); bind_addcmul(m); bind_cdist(m); + bind_conv1d(m); bind_binary_cross_entropy_with_logits(m); bind_reciprocal(m); bind_upsample_bilinear(m); diff --git a/src/infinicore/pybind11/ops/conv1d.hpp b/src/infinicore/pybind11/ops/conv1d.hpp new file mode 100644 index 000000000..baf2b3a6d --- /dev/null +++ b/src/infinicore/pybind11/ops/conv1d.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + +#include "infinicore/ops/conv1d.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind_conv1d(py::module &m) { + m.def( + "conv1d", + [](::infinicore::Tensor input, + ::infinicore::Tensor weight, + std::optional<::infinicore::Tensor> bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups) { + return op::conv1d(input, weight, bias, stride, padding, dilation, groups); + }, + py::arg("input"), + py::arg("weight"), + py::arg("bias") = py::none(), + py::arg("stride") = 1, + py::arg("padding") = 0, + py::arg("dilation") = 1, + py::arg("groups") = 1, + R"doc(Conv1d out-of-place.)doc"); + + m.def( + "conv1d_", + [](::infinicore::Tensor output, + ::infinicore::Tensor input, + ::infinicore::Tensor weight, + std::optional<::infinicore::Tensor> bias, + size_t stride, + size_t padding, + size_t dilation, + size_t groups) { + op::conv1d_(output, input, weight, bias, stride, padding, dilation, groups); + }, + py::arg("output"), + py::arg("input"), + py::arg("weight"), + py::arg("bias") = py::none(), + py::arg("stride") = 1, + py::arg("padding") = 0, + py::arg("dilation") = 1, + py::arg("groups") = 1, + R"doc(Conv1d in-place variant writing to provided output tensor.)doc"); +} + +} // namespace infinicore::ops diff --git a/test/infinicore/ops/conv1d.py b/test/infinicore/ops/conv1d.py index 0d4e22895..36853eaa5 100644 --- a/test/infinicore/ops/conv1d.py +++ b/test/infinicore/ops/conv1d.py @@ -89,9 +89,9 @@ def get_test_cases(self): def torch_operator(self, *args, **kwargs): return torch.nn.functional.conv1d(*args, **kwargs) - # def infinicore_operator(self, *args, **kwargs): - # """InfiniCore implementation (operator not yet available).""" - # return infinicore.nn.functional.conv1d(*args, **kwargs) + def infinicore_operator(self, *args, **kwargs): + """InfiniCore implementation.""" + return infinicore.nn.functional.conv1d(*args, **kwargs) def main():