Skip to content

Commit 402f74e

Browse files
fangliu2020igcbot
authored andcommitted
Fix ACC restriction in HWConformity
Fix ACC restriction that ACC used as implicit src/dst or explict src must be aligned to dst GRF.
1 parent 7a8ef5c commit 402f74e

File tree

3 files changed

+25
-10
lines changed

3 files changed

+25
-10
lines changed

visa/HWConformity.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,14 @@ G4_DstRegRegion *HWConformity::insertMovAfter(INST_LIST_ITER &it,
9191
G4_DstRegRegion *dst,
9292
G4_Type type, G4_BB *bb,
9393
G4_SubReg_Align dstAlign) {
94+
return insertMovAfter(it, dst, type, bb, /*stride*/0, dstAlign);
95+
}
96+
97+
G4_DstRegRegion *HWConformity::insertMovAfter(INST_LIST_ITER &it,
98+
G4_DstRegRegion *dst,
99+
G4_Type type, G4_BB *bb,
100+
uint16_t stride,
101+
G4_SubReg_Align dstAlign) {
94102
G4_INST *inst = *it;
95103

96104
if (!dst) {
@@ -130,14 +138,14 @@ G4_DstRegRegion *HWConformity::insertMovAfter(INST_LIST_ITER &it,
130138
type = (type == Type_UB ? Type_UW : Type_W);
131139
}
132140
uint16_t dstWidthBytes = newExecSize * TypeSize(type);
133-
uint16_t scale = TypeSize(execType) / TypeSize(type);
141+
uint16_t scale = stride ? stride : TypeSize(execType) / TypeSize(type);
134142
/* so according to comments in function that call it MAD needs to have
135143
packed format. It ends up with hStride 2, due to DefHoisting. So it is
136144
trying to undo it. For every other type if srcType > dstCype we need to
137145
adjust regions. This is not necessary for HF. It's already packed.
138146
139147
The src region of move is wrong. Since for HF it is packed, unlike other
140-
data types. mad (8) r56.0.xyzw:hf -r37.0.xyzw:f r59.0.xyzw:hf r58.0.xyzw:hf
148+
data types.mad (8) r56.0.xyzw:hf -r37.0.xyzw:f r59.0.xyzw:hf r58.0.xyzw:hf
141149
{Align16, NoMask} mov (16) r44.0<2>:hf r56.0<16;8,2>:hf {Align1, H1} //
142150
#??:$39:%66
143151
*/
@@ -2087,9 +2095,11 @@ bool HWConformity::fixIndirectOpnd(INST_LIST_ITER i, G4_BB *bb) {
20872095
return spill_dst;
20882096
}
20892097

2090-
// If an accumulator is a source operand, its register region must match that of
2091-
// the destination register (which means GRF-aligned since we always GRF-align
2092-
// Acc) also check for restrictions on explicit acc dst
2098+
// If an accumulator is a implicilit src/dst or explicit src operand, its
2099+
// register region must match that of the destination register, which means
2100+
// dst must be GRF-aligned and hstride must be 1 since we always GRF-align
2101+
// Acc and use <1;1,0> for Acc source.
2102+
// Also check for restrictions on explicit acc dst.
20932103
bool HWConformity::fixAcc(INST_LIST_ITER iter, G4_BB *bb) {
20942104
G4_INST *inst = *iter;
20952105

@@ -2106,7 +2116,7 @@ bool HWConformity::fixAcc(INST_LIST_ITER iter, G4_BB *bb) {
21062116
}
21072117
}
21082118

2109-
// implicit acc src/dst get its offset from dst
2119+
// Implicit acc src/dst and explicit acc src must be aligned with dst GRF
21102120
bool useAcc = inst->hasImplicitAccSrc() || inst->hasImplicitAccDst();
21112121
if (!useAcc) {
21122122
for (int i = 0; i < inst->getNumSrc(); ++i) {
@@ -2119,9 +2129,10 @@ bool HWConformity::fixAcc(INST_LIST_ITER iter, G4_BB *bb) {
21192129
}
21202130

21212131
if (useAcc && dst && dst->getBase() && dst->getBase()->isRegVar()) {
2122-
if (!builder.tryToAlignOperand(dst, kernel.numEltPerGRF<Type_UB>())) {
2123-
inst->setDest(
2124-
insertMovAfter(iter, dst, dst->getType(), bb, builder.getGRFAlign()));
2132+
if (!builder.tryToAlignOperand(dst, kernel.numEltPerGRF<Type_UB>()) ||
2133+
dst->getHorzStride() != 1) {
2134+
inst->setDest(insertMovAfter(iter, dst, dst->getType(), bb, /*stride*/ 1,
2135+
builder.getGRFAlign()));
21252136
changed = true;
21262137
}
21272138
}

visa/HWConformity.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ class HWConformity {
225225
G4_DstRegRegion *insertMovAfter(INST_LIST_ITER &it, G4_DstRegRegion *dst,
226226
G4_Type type, G4_BB *bb,
227227
G4_SubReg_Align dstAlign = Any);
228+
G4_DstRegRegion *insertMovAfter(INST_LIST_ITER &it, G4_DstRegRegion *dst,
229+
G4_Type type, G4_BB *bb, uint16_t stride,
230+
G4_SubReg_Align dstAlign = Any);
228231
G4_Operand *insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type,
229232
G4_BB *bb, G4_SubReg_Align tmpAlign = Any);
230233
G4_Operand *insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type,

visa/VisaToG4/TranslateALU.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ int IR_Builder::translateVISAArithmeticInst(
105105
createSrc(phyregpool.getAcc0Reg(), 0, 0, getRegionStride1(),
106106
dstOpnd->getType());
107107

108-
createMov(exsize, carryBorrow, accSrcOpnd, instOpt, true);
108+
createMov(duplicateOperand(predOpnd), exsize, carryBorrow, accSrcOpnd,
109+
instOpt, true);
109110
} else if (opcode == ISA_PLANE) {
110111
const RegionDesc *rd = createRegionDesc(0, 4, 1);
111112
auto src0 = inst->getSrc(0);

0 commit comments

Comments
 (0)