Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
#include "ops/add_rms_norm.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmod.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
Expand Down
18 changes: 18 additions & 0 deletions include/infinicore/ops/avg_pool1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class AvgPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t, size_t, size_t);
static void execute(Tensor output, Tensor input, size_t kernel_size, size_t stride, size_t padding);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor avg_pool1d(Tensor input, size_t kernel_size, size_t stride = 0, size_t padding = 0);
void avg_pool1d_(Tensor output, Tensor input, size_t kernel_size, size_t stride = 0, size_t padding = 0);

} // namespace infinicore::op
35 changes: 35 additions & 0 deletions include/infinicore/ops/cross_entropy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class CrossEntropy {
public:
// Schema 定义:函数指针类型
// CrossEntropy 需要接收三个 Tensor: Output (Loss), Input (Logits), Target (Labels)
using schema = void (*)(Tensor, Tensor, Tensor);

// 执行入口
static void execute(Tensor output, Tensor input, Tensor target);

// 分发器访问接口
static common::OpDispatcher<schema> &dispatcher();
};

// ==================================================================
// 对外 Functional API
// ==================================================================

// 1. Out-of-place 接口:
// 输入 Logits 和 Target,内部自动创建 Output Tensor 并返回
Tensor cross_entropy(Tensor input, Tensor target);

// 2. Explicit Output 接口 (类似于 In-place 风格):
// 用户显式提供 Output Tensor 用于存储结果
// 注意:虽然命名带有下划线 _,但通常 CrossEntropy 无法真正原地修改 input,
// 所以这里只是表示“写入指定的 output 内存”
void cross_entropy_(Tensor output, Tensor input, Tensor target);

} // namespace infinicore::op
19 changes: 19 additions & 0 deletions include/infinicore/ops/equal.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Equal {
public:
using schema = void (*)(Tensor, Tensor, Tensor);

static void execute(Tensor out, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor equal(Tensor a, Tensor b);
void equal_(Tensor out, Tensor a, Tensor b);

} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/hardswish.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Hardswish {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor hardswish(Tensor input);
void hardswish_(Tensor output, Tensor input);

} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/hardtanh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class HardTanh {
public:
using schema = void (*)(Tensor, Tensor, float, float);
static void execute(Tensor output, Tensor input, float min_val, float max_val);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor hardtanh(Tensor input, float min_val = -1.0f, float max_val = 1.0f);
void hardtanh_(Tensor output, Tensor input, float min_val = -1.0f, float max_val = 1.0f);

} // namespace infinicore::op
6 changes: 6 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,10 @@
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"

#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardtanh.h"

#endif // __INFINIOP_API_H__
32 changes: 32 additions & 0 deletions include/infiniop/ops/avg_pool1d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef __INFINIOP_AVG_POOL1D_API_H__
#define __INFINIOP_AVG_POOL1D_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAvgPool1dDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAvgPool1dDescriptor(
infiniopHandle_t handle,
infiniopAvgPool1dDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
size_t kernel_size,
size_t stride,
size_t padding);

__INFINI_C __export infiniStatus_t infiniopGetAvgPool1dWorkspaceSize(
infiniopAvgPool1dDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopAvgPool1d(
infiniopAvgPool1dDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAvgPool1dDescriptor(
infiniopAvgPool1dDescriptor_t desc);

#endif
31 changes: 31 additions & 0 deletions include/infiniop/ops/cross_entropy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef __INFINIOP_CROSS_ENTROPY_API_H__
#define __INFINIOP_CROSS_ENTROPY_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopCrossEntropyDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateCrossEntropyDescriptor(
infiniopHandle_t handle,
infiniopCrossEntropyDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t target_desc);

__INFINI_C __export infiniStatus_t infiniopGetCrossEntropyWorkspaceSize(
infiniopCrossEntropyDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopCrossEntropy(
infiniopCrossEntropyDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
const void *target,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyCrossEntropyDescriptor(
infiniopCrossEntropyDescriptor_t desc);

#endif
31 changes: 31 additions & 0 deletions include/infiniop/ops/equal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef __INFINIOP_EQUAL_API_H__
#define __INFINIOP_EQUAL_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopEqualDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateEqualDescriptor(
infiniopHandle_t handle,
infiniopEqualDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__INFINI_C __export infiniStatus_t infiniopGetEqualWorkspaceSize(
infiniopEqualDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopEqual(
infiniopEqualDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyEqualDescriptor(
infiniopEqualDescriptor_t desc);

#endif
29 changes: 29 additions & 0 deletions include/infiniop/ops/hardswish.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef __INFINIOP_HARDSWISH_API_H__
#define __INFINIOP_HARDSWISH_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopHardSwishDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateHardSwishDescriptor(
infiniopHandle_t handle,
infiniopHardSwishDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input);

__INFINI_C __export infiniStatus_t infiniopGetHardSwishWorkspaceSize(
infiniopHardSwishDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopHardSwish(
infiniopHardSwishDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyHardSwishDescriptor(
infiniopHardSwishDescriptor_t desc);

#endif
27 changes: 27 additions & 0 deletions include/infiniop/ops/hardtanh.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef __INFINIOP_HARDTANH_API_H__
#define __INFINIOP_HARDTANH_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopHardTanhDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateHardTanhDescriptor(infiniopHandle_t handle,
infiniopHardTanhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
float min_val,
float max_val);

__INFINI_C __export infiniStatus_t infiniopGetHardTanhWorkspaceSize(infiniopHardTanhDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopHardTanh(infiniopHardTanhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyHardTanhDescriptor(infiniopHardTanhDescriptor_t desc);

#endif
4 changes: 4 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm
from infinicore.ops.bilinear import bilinear
from infinicore.ops.cross_entropy import cross_entropy
from infinicore.ops.equal import equal
from infinicore.ops.fmod import fmod
from infinicore.ops.kv_caching import kv_caching
from infinicore.ops.matmul import matmul
Expand Down Expand Up @@ -132,11 +134,13 @@
"bilinear",
"fmod",
"matmul",
"equal",
"mul",
"narrow",
"squeeze",
"unsqueeze",
"rearrange",
"cross_entropy",
"empty",
"empty_like",
"from_blob",
Expand Down
6 changes: 6 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from .adaptive_max_pool1d import adaptive_max_pool1d
from .avg_pool1d import avg_pool1d
from .causal_softmax import causal_softmax
from .embedding import embedding
from .flash_attention import flash_attention
from .hardswish import hardswish
from .hardtanh import hardtanh
from .linear import linear
from .linear_w8a8i8 import linear_w8a8i8
from .random_sample import random_sample
Expand All @@ -22,6 +25,9 @@
"RopeAlgo",
"rope",
"silu",
"hardswish",
"hardtanh",
"avg_pool1d",
"swiglu",
"linear_w8a8i8",
"silu_and_mul",
Expand Down
24 changes: 24 additions & 0 deletions python/infinicore/nn/functional/avg_pool1d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def avg_pool1d(
input: Tensor,
kernel_size: int,
stride: int | None = None,
padding: int = 0,
*,
out=None,
) -> Tensor:
if stride is None:
stride = 0

if out is None:
return Tensor(
_infinicore.avg_pool1d(input._underlying, kernel_size, stride, padding)
)

_infinicore.avg_pool1d_(
out._underlying, input._underlying, kernel_size, stride, padding
)
return out
28 changes: 28 additions & 0 deletions python/infinicore/nn/functional/hardswish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def hardswish(input: Tensor, inplace: bool = False, *, out=None) -> Tensor:
r"""Apply the Hardswish activation function element-wise."""

if (
infinicore.use_ntops
and input.device.type in ("cuda", "musa")
and out is None
and hasattr(infinicore.ntops.torch, "hardswish")
):
try:
return infinicore.ntops.torch.hardswish(input, inplace=inplace)
except AttributeError:
pass

if inplace:
_infinicore.hardswish_(input._underlying, input._underlying)
return input

if out is None:
return Tensor(_infinicore.hardswish(input._underlying))

_infinicore.hardswish_(out._underlying, input._underlying)
return out
Loading