Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
#pragma once

#include "ops/adaptive_max_pool1d.hpp"
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmod.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
Expand Down
16 changes: 16 additions & 0 deletions include/infinicore/ops/adaptive_max_pool1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class AdaptiveMaxPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t);
static void execute(Tensor y, Tensor x, size_t output_size);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor adaptive_max_pool1d(Tensor x, size_t output_size);
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/asinh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Asinh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor asinh(Tensor x);
void asinh_(Tensor y, Tensor x);
} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/avg_pool1d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class AvgPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t, size_t, size_t);
static void execute(Tensor output, Tensor input, size_t kernel_size, size_t stride, size_t padding);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor avg_pool1d(Tensor input, size_t kernel_size, size_t stride = 0, size_t padding = 0);
void avg_pool1d_(Tensor output, Tensor input, size_t kernel_size, size_t stride = 0, size_t padding = 0);

} // namespace infinicore::op
15 changes: 15 additions & 0 deletions include/infinicore/ops/baddbmm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <optional>

namespace infinicore::op {

Tensor baddbmm(Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
void baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
} // namespace infinicore::op
12 changes: 12 additions & 0 deletions include/infinicore/ops/bilinear.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <optional>

namespace infinicore::op {

Tensor bilinear(Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
void bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);

} // namespace infinicore::op
35 changes: 35 additions & 0 deletions include/infinicore/ops/cross_entropy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class CrossEntropy {
public:
// Schema 定义:函数指针类型
// CrossEntropy 需要接收三个 Tensor: Output (Loss), Input (Logits), Target (Labels)
using schema = void (*)(Tensor, Tensor, Tensor);

// 执行入口
static void execute(Tensor output, Tensor input, Tensor target);

// 分发器访问接口
static common::OpDispatcher<schema> &dispatcher();
};

// ==================================================================
// 对外 Functional API
// ==================================================================

// 1. Out-of-place 接口:
// 输入 Logits 和 Target,内部自动创建 Output Tensor 并返回
Tensor cross_entropy(Tensor input, Tensor target);

// 2. Explicit Output 接口 (类似于 In-place 风格):
// 用户显式提供 Output Tensor 用于存储结果
// 注意:虽然命名带有下划线 _,但通常 CrossEntropy 无法真正原地修改 input,
// 所以这里只是表示“写入指定的 output 内存”
void cross_entropy_(Tensor output, Tensor input, Tensor target);

} // namespace infinicore::op
19 changes: 19 additions & 0 deletions include/infinicore/ops/equal.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Equal {
public:
using schema = void (*)(Tensor, Tensor, Tensor);

static void execute(Tensor out, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor equal(Tensor a, Tensor b);
void equal_(Tensor out, Tensor a, Tensor b);

} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/fmod.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Fmod {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor fmod(Tensor a, Tensor b);
void fmod_(Tensor c, Tensor a, Tensor b);
} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/hardswish.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Hardswish {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor hardswish(Tensor input);
void hardswish_(Tensor output, Tensor input);

} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/hardtanh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class HardTanh {
public:
using schema = void (*)(Tensor, Tensor, float, float);
static void execute(Tensor output, Tensor input, float min_val, float max_val);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor hardtanh(Tensor input, float min_val = -1.0f, float max_val = 1.0f);
void hardtanh_(Tensor output, Tensor input, float min_val = -1.0f, float max_val = 1.0f);

} // namespace infinicore::op
9 changes: 9 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
#define __INFINIOP_API_H__

#include "infiniop/handle.h"
#include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/embedding.h"
#include "infiniop/ops/flash_attention.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/int8_gemm.h"
Expand Down Expand Up @@ -42,4 +45,10 @@
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"

#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardtanh.h"

#endif // __INFINIOP_API_H__
22 changes: 22 additions & 0 deletions include/infiniop/ops/adaptive_max_pool1d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#define __INFINIOP_ADAPTIVE_MAX_POOL1D_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAdaptiveMaxPool1dDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAdaptiveMaxPool1dDescriptor(
infiniopHandle_t handle,
infiniopAdaptiveMaxPool1dDescriptor_t *desc,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
size_t output_size);

__INFINI_C __export infiniStatus_t infiniopGetAdaptiveMaxPool1dWorkspaceSize(infiniopAdaptiveMaxPool1dDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAdaptiveMaxPool1d(infiniopAdaptiveMaxPool1dDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, const void *x, void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAdaptiveMaxPool1dDescriptor(infiniopAdaptiveMaxPool1dDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/asinh.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ASINH_API_H__
#define __INFINIOP_ASINH_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAsinhDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAsinhDescriptor(infiniopHandle_t handle,
infiniopAsinhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__INFINI_C __export infiniStatus_t infiniopGetAsinhWorkspaceSize(infiniopAsinhDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAsinh(infiniopAsinhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAsinhDescriptor(infiniopAsinhDescriptor_t desc);

#endif
32 changes: 32 additions & 0 deletions include/infiniop/ops/avg_pool1d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef __INFINIOP_AVG_POOL1D_API_H__
#define __INFINIOP_AVG_POOL1D_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAvgPool1dDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAvgPool1dDescriptor(
infiniopHandle_t handle,
infiniopAvgPool1dDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
size_t kernel_size,
size_t stride,
size_t padding);

__INFINI_C __export infiniStatus_t infiniopGetAvgPool1dWorkspaceSize(
infiniopAvgPool1dDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopAvgPool1d(
infiniopAvgPool1dDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAvgPool1dDescriptor(
infiniopAvgPool1dDescriptor_t desc);

#endif
31 changes: 31 additions & 0 deletions include/infiniop/ops/cross_entropy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef __INFINIOP_CROSS_ENTROPY_API_H__
#define __INFINIOP_CROSS_ENTROPY_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopCrossEntropyDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateCrossEntropyDescriptor(
infiniopHandle_t handle,
infiniopCrossEntropyDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t target_desc);

__INFINI_C __export infiniStatus_t infiniopGetCrossEntropyWorkspaceSize(
infiniopCrossEntropyDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopCrossEntropy(
infiniopCrossEntropyDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
const void *target,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyCrossEntropyDescriptor(
infiniopCrossEntropyDescriptor_t desc);

#endif
31 changes: 31 additions & 0 deletions include/infiniop/ops/equal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef __INFINIOP_EQUAL_API_H__
#define __INFINIOP_EQUAL_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopEqualDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateEqualDescriptor(
infiniopHandle_t handle,
infiniopEqualDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__INFINI_C __export infiniStatus_t infiniopGetEqualWorkspaceSize(
infiniopEqualDescriptor_t desc,
size_t *size);

__INFINI_C __export infiniStatus_t infiniopEqual(
infiniopEqualDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyEqualDescriptor(
infiniopEqualDescriptor_t desc);

#endif
Loading