Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
b3f2a4a
adding anchor layout for load/store/prefetch_nd and dpas
Jianhui-Li Nov 22, 2025
bfae01f
propogation hornor pre-defined layout at anchor op
Jianhui-Li Nov 22, 2025
0482234
adding documentation
Jianhui-Li Nov 24, 2025
d1652af
address feedback and add more documentation
Jianhui-Li Nov 25, 2025
b186bc2
rename anchor_layout to layout
Jianhui-Li Nov 26, 2025
04f3edf
git merge main
Jianhui-Li Nov 26, 2025
60f5396
fix test
Jianhui-Li Nov 26, 2025
72fa240
fix clang-format
Jianhui-Li Nov 26, 2025
5f25c89
fix missing space in .clang-format
Jianhui-Li Nov 26, 2025
ae01e29
simplify: load/store/prefetch/loadmatrix/storematrix use anchor layou…
Jianhui-Li Nov 26, 2025
4795f24
simplify: load_nd/store_nd/prefetch_nd use anchor layout for blocking
Jianhui-Li Nov 27, 2025
227c15d
simplify: Avoid calling getTileShape(Result&Operand) from getTileShap…
Jianhui-Li Nov 27, 2025
0e44d07
simplify getDistributeLayoutAttr
Jianhui-Li Nov 29, 2025
c3bc085
add interface getAnchorLayout
Jianhui-Li Nov 30, 2025
84f89ed
git merge main
Jianhui-Li Dec 10, 2025
5d3e20e
remove permantLayout parameter in setDistribueLayout function
Jianhui-Li Dec 10, 2025
1025b57
add setAnchorLayout interface
Jianhui-Li Dec 10, 2025
3221cc8
refactor setDistributeLayoutAttr
Jianhui-Li Dec 11, 2025
5448d6b
adjusting tests to use anchor layout
Jianhui-Li Dec 12, 2025
efcc1b7
fixing XeGPU tests
Jianhui-Li Dec 12, 2025
ab0ccf0
add get/setTempDistributeLayoutAttr
Jianhui-Li Dec 13, 2025
2c80b0e
remove debug print
Jianhui-Li Dec 13, 2025
2a57b03
clean up setDistributeLayoutAttr in sg distribution
Jianhui-Li Dec 13, 2025
1dbd1e8
clean up setDistributeLayoutAttr in load optimization pass
Jianhui-Li Dec 13, 2025
52d560e
clean up setDistributeLayoutAttr in sg distribution pass
Jianhui-Li Dec 13, 2025
cc60c1b
replace get/setTempDistributeLayoutAttr to get/setLayoutAttr
Jianhui-Li Dec 13, 2025
9436b7b
shorten get/setTempLayoutAttr to get/setTempLayout
Jianhui-Li Dec 13, 2025
5beaa24
address feedbacks
Jianhui-Li Dec 16, 2025
86b3340
changing func names
Jianhui-Li Dec 16, 2025
8cb195d
fixing names and use setLayoutAttr APIs
Jianhui-Li Dec 16, 2025
a31948b
git merge main
Jianhui-Li Dec 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ set(LLVM_TARGET_DEFINITIONS XeGPUAttrs.td)
mlir_tablegen(XeGPUAttrInterface.h.inc -gen-attr-interface-decls)
mlir_tablegen(XeGPUAttrInterface.cpp.inc -gen-attr-interface-defs)
add_mlir_dialect_tablegen_target(MLIRXeGPUAttrInterfaceIncGen)

set(LLVM_TARGET_DEFINITIONS XeGPUAttrs.td)
mlir_tablegen(XeGPUOpInterface.h.inc -gen-op-interface-decls)
mlir_tablegen(XeGPUOpInterface.cpp.inc -gen-op-interface-defs)
add_mlir_dialect_tablegen_target(MLIRXeGPUOpInterfaceIncGen)
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class SliceAttr;
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUOpInterface.h.inc>
// clang-format on

#define GET_ATTRDEF_CLASSES
Expand Down
26 changes: 26 additions & 0 deletions mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -770,4 +770,30 @@ def XeGPU_MemLayoutAttr : XeGPUAttr<"MemLayout", "mem_layout"> {

}

def AnchorLayoutInterface : OpInterface<"AnchorLayoutInterface"> {
let cppNamespace = "::mlir::xegpu";

let description = [{
An attribute interface for accessing anchor layout information.
This interface provides a method to set and retrieve the anchor layout
from operations that implement it.
}];

let methods = [
InterfaceMethod<
/*desc=*/"Get the anchor layout attribute.",
/*retTy=*/"xegpu::DistributeLayoutAttr",
/*methodName=*/"getAnchorLayout",
/*args=*/(ins)
>,
InterfaceMethod<
/*desc=*/"Set the anchor layout attribute.",
/*retTy=*/"void",
/*methodName=*/"setAnchorLayout",
/*args=*/(ins "xegpu::DistributeLayoutAttr":$layout)
>,
];

}

#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
121 changes: 109 additions & 12 deletions mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include "mlir/Interfaces/ShapedOpInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ViewLikeInterface.td"


// Base class for dialect operations. This operation inherits from the base
// `Op` class in OpBase.td, and provides:
// * The parent dialect of the operation.
Expand Down Expand Up @@ -247,7 +248,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
}];
}

def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", [AnchorLayoutInterface]> {
let summary = "prefetches a n-D block to cache";
let description = [{
It issues an instruction to prefetch a block of data from continuous
Expand Down Expand Up @@ -296,6 +297,14 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
return getTensorDesc().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

SmallVector<OpFoldResult> getMixedOffsets() {
auto statics = getConstOffsets().value_or(SmallVector<int64_t>());
auto dynamics = getOffsets();
Expand Down Expand Up @@ -338,7 +347,7 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {


def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemRead]>
AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemRead]>, AnchorLayoutInterface
]> {
let summary = "loads a n-D block from memory (represented by TensorDesc)"
"to registers (represented by vector)";
Expand Down Expand Up @@ -419,6 +428,14 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
return getTensorDesc().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

SmallVector<OpFoldResult> getMixedOffsets() {
auto statics = getConstOffsets().value_or(SmallVector<int64_t>());
auto dynamics = getOffsets();
Expand Down Expand Up @@ -462,7 +479,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
}

def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemWrite]>
AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemWrite]>, AnchorLayoutInterface
]> {
let summary = "stores a n-D block register region back to memory, currently only supports 2D";

Expand Down Expand Up @@ -532,6 +549,14 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
return getTensorDesc().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

SmallVector<OpFoldResult> getMixedOffsets() {
auto statics = getConstOffsets().value_or(SmallVector<int64_t>());
auto dynamics = getOffsets();
Expand Down Expand Up @@ -724,7 +749,7 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
let hasVerifier = 1;
}

def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", [AnchorLayoutInterface]> {
let summary = "prefetches a set of scattered data points to cache";

let description = [{
Expand Down Expand Up @@ -813,6 +838,14 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
return getSource().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

TypedValue<xegpu::TensorDescType> getTensorDesc() {
if (auto tdescType = getTensorDescType()) {
return llvm::cast<TypedValue<xegpu::TensorDescType>>(getSource());
Expand Down Expand Up @@ -843,7 +876,7 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
let hasVerifier = 1;
}

def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>, AnchorLayoutInterface]> {
let summary = "load a set of scattered data points from memory.";

let description = [{ It (aka. load) load data per each lane. The output
Expand Down Expand Up @@ -949,6 +982,14 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
return getSource().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

TypedValue<xegpu::TensorDescType> getTensorDesc() {
if (auto tdescType = getTensorDescType()) {
return llvm::cast<TypedValue<xegpu::TensorDescType>>(getSource());
Expand Down Expand Up @@ -1005,7 +1046,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
let hasVerifier = 1;
}

def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>, AnchorLayoutInterface]> {
let summary = "store data to scattered memory locations.";
let description =
[{ It (aka. store) stores data to scattered memory locations. The value is
Expand Down Expand Up @@ -1078,7 +1119,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
l2_hint = #xegpu.cache_hint<cached>,
l3_hint = #xegpu.cache_hint<cached>,
layout = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
: memref<1024xf32>, vector<16xi1>, vector<16xindex> -> vector<16xf32>
: vector<16xf32>, memref<1024xf32>, vector<16xi1>, vector<16xindex>
```

Example 4 (Lane level):
Expand Down Expand Up @@ -1107,6 +1148,14 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
return getDest().getType();
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

TypedValue<xegpu::TensorDescType> getTensorDesc() {
if (auto tdescType = getTensorDescType()) {
return llvm::cast<TypedValue<xegpu::TensorDescType>>(getDest());
Expand Down Expand Up @@ -1217,7 +1266,7 @@ def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset",
let hasVerifier = 1;
}

def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]> {
def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>, AnchorLayoutInterface]> {
let summary = "It performs mma computation";

let description = [{DPAS performs matrix multiplication on matrix A of `mxk`
Expand Down Expand Up @@ -1278,6 +1327,15 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
let results = (outs XeGPU_DpasResType: $result);

let extraClassDeclaration = [{

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayoutCd().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutCdAttr(anchorLayout);
}

VectorType getLhsType() {
return getLhs().getType();
}
Expand Down Expand Up @@ -1311,7 +1369,8 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure,
MemoryEffects<[MemRead, MemWrite]>,
AllElementTypesMatch<["tensorDesc", "value", "result"]>,
AllShapesMatch<["tensorDesc", "value", "result"]>]> {
AllShapesMatch<["tensorDesc", "value", "result"]>,
AnchorLayoutInterface]> {
let summary = "Atomic read-modify-write operation on the TensorDesc. ";

let description = [{
Expand Down Expand Up @@ -1351,6 +1410,17 @@ def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure,
XeGPU_ValueType:$value,
OptionalAttr<DistributeLayoutAttr>:$layout);

let extraClassDeclaration = [{
xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

}];

let results = (outs XeGPU_ValueType:$result);

let assemblyFormat = [{
Expand Down Expand Up @@ -1424,7 +1494,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
let extraClassDeclaration = extraBaseClassDeclaration;
}

def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>]> {
def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>, AnchorLayoutInterface]> {
let summary = "Convert the layout of the input operand";
let description = [{
`convert_layout` redistribute data across subgroups and/or lanes from the `input_layout` to
Expand Down Expand Up @@ -1458,6 +1528,16 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
let assemblyFormat = [{
$source prop-dict attr-dict `:` type($source)
}];
let extraClassDeclaration = [{
xegpu::DistributeLayoutAttr getAnchorLayout() {
return getTargetLayout();
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setTargetLayoutAttr(anchorLayout);
}

}];

let hasFolder = 1;
let hasVerifier = 1;
Expand Down Expand Up @@ -1499,7 +1579,7 @@ def XeGPU_CreateMemDescOp: XeGPU_Op<"create_mem_desc", [Pure,
}

def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
AllElementTypesMatch<["mem_desc", "res"]>]> {
AllElementTypesMatch<["mem_desc", "res"]>, AnchorLayoutInterface]> {
let arguments = (ins XeGPU_MemDesc:$mem_desc,
Variadic<Index>: $offsets,
DenseI64ArrayAttr: $const_offsets,
Expand Down Expand Up @@ -1558,13 +1638,22 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
return vecTy.getShape();
return {};
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

}];

let hasVerifier = 1;
}

def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
AllElementTypesMatch<["mem_desc", "data"]>]> {
AllElementTypesMatch<["mem_desc", "data"]>, AnchorLayoutInterface]> {
let arguments = (ins
AnyTypeOf<[XeGPU_ValueType, XeGPU_ScalarType]>:$data,
XeGPU_MemDesc:$mem_desc,
Expand Down Expand Up @@ -1617,6 +1706,14 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
return {};
}

xegpu::DistributeLayoutAttr getAnchorLayout() {
return getLayout().value_or(nullptr);
}

void setAnchorLayout(xegpu::DistributeLayoutAttr anchorLayout) {
setLayoutAttr(anchorLayout);
}

}];

let hasVerifier = 1;
Expand Down
Loading