Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "ops/blas_dot.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/conv1d.hpp"
#include "ops/conv2d.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
Expand Down
40 changes: 40 additions & 0 deletions include/infinicore/ops/conv1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

#include <cstddef>
#include <optional>

namespace infinicore::op {
class Conv1d {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
const size_t *, const size_t *, const size_t *, size_t);
static void execute(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const size_t *pads,
const size_t *strides,
const size_t *dilations,
size_t n);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor conv1d(Tensor input,
Tensor weight,
std::optional<Tensor> bias,
size_t stride,
size_t padding,
size_t dilation,
size_t groups);
void conv1d_(Tensor output,
Tensor input,
Tensor weight,
std::optional<Tensor> bias,
size_t stride,
size_t padding,
size_t dilation,
size_t groups);
} // namespace infinicore::op
2 changes: 2 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .avg_pool1d import avg_pool1d
from .binary_cross_entropy_with_logits import binary_cross_entropy_with_logits
from .causal_softmax import causal_softmax
from .conv1d import conv1d
from .embedding import embedding
from .flash_attention import flash_attention
from .gaussian_nll_loss import gaussian_nll_loss
Expand Down Expand Up @@ -41,6 +42,7 @@
__all__ = [
"adaptive_max_pool1d",
"causal_softmax",
"conv1d",
"embedding",
"flash_attention",
"gaussian_nll_loss",
Expand Down
41 changes: 41 additions & 0 deletions python/infinicore/nn/functional/conv1d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def conv1d(
input: Tensor,
weight: Tensor,
bias: Tensor | None = None,
stride: int = 1,
padding: int = 0,
dilation: int = 1,
groups: int = 1,
*,
out: Tensor | None = None,
) -> Tensor:
bias_tensor = bias._underlying if bias is not None else None

if out is None:
return Tensor(
_infinicore.conv1d(
input._underlying,
weight._underlying,
bias_tensor,
stride,
padding,
dilation,
groups,
)
)

_infinicore.conv1d_(
out._underlying,
input._underlying,
weight._underlying,
bias_tensor,
stride,
padding,
dilation,
groups,
)
return out
154 changes: 154 additions & 0 deletions src/infinicore/ops/conv1d/conv1d.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include "infinicore/ops/conv1d.hpp"

#include "../../utils.hpp"

#include <stdexcept>

namespace infinicore::op {

common::OpDispatcher<Conv1d::schema> &Conv1d::dispatcher() {
static common::OpDispatcher<Conv1d::schema> dispatcher_;
return dispatcher_;
}

void Conv1d::execute(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const size_t *pads,
const size_t *strides,
const size_t *dilations,
size_t n) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input, weight);
if (bias) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, bias);
}
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);

if (func == nullptr) {
throw std::runtime_error("No Conv1d implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}

func(output, input, weight, bias, pads, strides, dilations, n);
}

static size_t conv1d_out_size(size_t input, size_t padding, size_t dilation, size_t kernel, size_t stride) {
if (stride == 0 || dilation == 0 || kernel == 0) {
throw std::runtime_error("conv1d: stride, dilation, and kernel size must be greater than zero");
}
size_t effective_kernel = dilation * (kernel - 1) + 1;
size_t padded_input = input + 2 * padding;
if (padded_input < effective_kernel) {
throw std::runtime_error("Invalid conv1d output shape (negative or zero)");
}
return (padded_input - effective_kernel) / stride + 1;
}

static void validate_conv1d_shapes(Tensor output,
Tensor input,
Tensor weight,
std::optional<Tensor> bias,
size_t groups) {
const auto &out_shape = output->shape();
const auto &in_shape = input->shape();
const auto &w_shape = weight->shape();

if (in_shape.size() != 3 || w_shape.size() != 3 || out_shape.size() != 3) {
throw std::runtime_error("conv1d expects input [N, C_in, L], weight [C_out, C_in/groups, K], and output [N, C_out, L_out]");
}
if (groups == 0) {
throw std::runtime_error("conv1d: groups must be greater than zero");
}
if (in_shape[1] % groups != 0 || w_shape[0] % groups != 0) {
throw std::runtime_error("conv1d: input channels and output channels must be divisible by groups");
}
if (w_shape[1] != in_shape[1] / groups) {
throw std::runtime_error("conv1d: weight input channels must equal input channels divided by groups");
}
if (out_shape[0] != in_shape[0] || out_shape[1] != w_shape[0]) {
throw std::runtime_error("conv1d: output batch or channel dimension is invalid");
}
if (bias) {
const auto &b_shape = (*bias)->shape();
if (b_shape.size() != 1 || b_shape[0] != w_shape[0]) {
throw std::runtime_error("conv1d: bias must have shape [C_out]");
}
}
}

Tensor conv1d(Tensor input,
Tensor weight,
std::optional<Tensor> bias,
size_t stride,
size_t padding,
size_t dilation,
size_t groups) {
const auto &in_shape = input->shape();
const auto &w_shape = weight->shape();
if (in_shape.size() != 3 || w_shape.size() != 3) {
throw std::runtime_error("conv1d expects input [N, C_in, L] and weight [C_out, C_in/groups, K]");
}

size_t l_out = conv1d_out_size(in_shape[2], padding, dilation, w_shape[2], stride);
Shape out_shape = {in_shape[0], w_shape[0], l_out};

auto output = Tensor::empty(out_shape, input->dtype(), input->device());
conv1d_(output, input, weight, bias, stride, padding, dilation, groups);
return output;
}

void conv1d_(Tensor output,
Tensor input,
Tensor weight,
std::optional<Tensor> bias,
size_t stride,
size_t padding,
size_t dilation,
size_t groups) {
validate_conv1d_shapes(output, input, weight, bias, groups);

size_t expected_l_out = conv1d_out_size(input->shape()[2], padding, dilation, weight->shape()[2], stride);
if (output->shape()[2] != expected_l_out) {
throw std::runtime_error("conv1d: output length is invalid");
}

size_t in_channels_per_group = input->shape()[1] / groups;
size_t out_channels_per_group = weight->shape()[0] / groups;

for (size_t group = 0; group < groups; ++group) {
Tensor group_input = groups == 1
? input
: input->narrow({{1, group * in_channels_per_group, in_channels_per_group}})->contiguous();
Tensor group_weight = groups == 1
? weight
: weight->narrow({{0, group * out_channels_per_group, out_channels_per_group}});
Tensor group_output = groups == 1
? output
: Tensor::empty({output->shape()[0], out_channels_per_group, output->shape()[2]},
output->dtype(),
output->device());
Tensor group_bias;
if (bias) {
group_bias = groups == 1
? *bias
: (*bias)->narrow({{0, group * out_channels_per_group, out_channels_per_group}});
}

Conv1d::execute(group_output,
group_input,
group_weight,
group_bias,
&padding,
&stride,
&dilation,
1);

if (groups != 1) {
output->narrow({{1, group * out_channels_per_group, out_channels_per_group}})
->copy_from(group_output);
}
}
}
} // namespace infinicore::op
69 changes: 69 additions & 0 deletions src/infinicore/ops/conv1d/conv1d_infiniop.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/conv1d.hpp"
#include <infiniop.h>

namespace infinicore::op::conv1d_impl::infiniop {

thread_local common::OpCache<size_t, infiniopConvDescriptor_t> caches(
100,
[](infiniopConvDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyConvDescriptor(desc));
desc = nullptr;
}
});

void calculate(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const size_t *pads,
const size_t *strides,
const size_t *dilations,
size_t n) {
size_t seed = hash_combine(output, input, weight, bias, n);
for (size_t i = 0; i < n; ++i) {
hash_combine(seed, pads[i], strides[i], dilations[i]);
}

auto device = context::getDevice();
auto &cache = caches.getCache(device);

auto desc_opt = cache.get(seed);
infiniopConvDescriptor_t desc = nullptr;

if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateConvDescriptor(
context::getInfiniopHandle(device), &desc,
output->desc(), input->desc(), weight->desc(),
bias ? bias->desc() : nullptr,
const_cast<size_t *>(pads),
const_cast<size_t *>(strides),
const_cast<size_t *>(dilations),
n));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}

size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetConvWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);

INFINICORE_CHECK_ERROR(infiniopConv(
desc, workspace->data(), workspace_size,
output->data(),
input->data(),
weight->data(),
bias ? bias->data() : nullptr,
context::getStream()));
}

static bool registered = []() {
Conv1d::dispatcher().registerAll(&calculate, false);
return true;
}();

} // namespace infinicore::op::conv1d_impl::infiniop
2 changes: 2 additions & 0 deletions src/infinicore/pybind11/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "ops/cat.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/conv1d.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/diff.hpp"
#include "ops/digamma.hpp"
Expand Down Expand Up @@ -238,6 +239,7 @@ inline void bind(py::module &m) {
bind_atanh(m);
bind_addcmul(m);
bind_cdist(m);
bind_conv1d(m);
bind_binary_cross_entropy_with_logits(m);
bind_reciprocal(m);
bind_upsample_bilinear(m);
Expand Down
Loading
Loading