@@ -91,6 +91,14 @@ G4_DstRegRegion *HWConformity::insertMovAfter(INST_LIST_ITER &it,
9191 G4_DstRegRegion *dst,
9292 G4_Type type, G4_BB *bb,
9393 G4_SubReg_Align dstAlign) {
94+ return insertMovAfter (it, dst, type, bb, /* stride*/ 0 , dstAlign);
95+ }
96+
97+ G4_DstRegRegion *HWConformity::insertMovAfter (INST_LIST_ITER &it,
98+ G4_DstRegRegion *dst,
99+ G4_Type type, G4_BB *bb,
100+ uint16_t stride,
101+ G4_SubReg_Align dstAlign) {
94102 G4_INST *inst = *it;
95103
96104 if (!dst) {
@@ -130,14 +138,14 @@ G4_DstRegRegion *HWConformity::insertMovAfter(INST_LIST_ITER &it,
130138 type = (type == Type_UB ? Type_UW : Type_W);
131139 }
132140 uint16_t dstWidthBytes = newExecSize * TypeSize (type);
133- uint16_t scale = TypeSize (execType) / TypeSize (type);
141+ uint16_t scale = stride ? stride : TypeSize (execType) / TypeSize (type);
134142 /* so according to comments in function that call it MAD needs to have
135143 packed format. It ends up with hStride 2, due to DefHoisting. So it is
136144 trying to undo it. For every other type if srcType > dstCype we need to
137145 adjust regions. This is not necessary for HF. It's already packed.
138146
139147 The src region of move is wrong. Since for HF it is packed, unlike other
140- data types. mad (8) r56.0.xyzw:hf -r37.0.xyzw:f r59.0.xyzw:hf r58.0.xyzw:hf
148+ data types.mad (8) r56.0.xyzw:hf -r37.0.xyzw:f r59.0.xyzw:hf r58.0.xyzw:hf
141149 {Align16, NoMask} mov (16) r44.0<2>:hf r56.0<16;8,2>:hf {Align1, H1} //
142150 #??:$39:%66
143151 */
@@ -2087,9 +2095,11 @@ bool HWConformity::fixIndirectOpnd(INST_LIST_ITER i, G4_BB *bb) {
20872095 return spill_dst;
20882096}
20892097
2090- // If an accumulator is a source operand, its register region must match that of
2091- // the destination register (which means GRF-aligned since we always GRF-align
2092- // Acc) also check for restrictions on explicit acc dst
2098+ // If an accumulator is a implicilit src/dst or explicit src operand, its
2099+ // register region must match that of the destination register, which means
2100+ // dst must be GRF-aligned and hstride must be 1 since we always GRF-align
2101+ // Acc and use <1;1,0> for Acc source.
2102+ // Also check for restrictions on explicit acc dst.
20932103bool HWConformity::fixAcc (INST_LIST_ITER iter, G4_BB *bb) {
20942104 G4_INST *inst = *iter;
20952105
@@ -2106,7 +2116,7 @@ bool HWConformity::fixAcc(INST_LIST_ITER iter, G4_BB *bb) {
21062116 }
21072117 }
21082118
2109- // implicit acc src/dst get its offset from dst
2119+ // Implicit acc src/dst and explicit acc src must be aligned with dst GRF
21102120 bool useAcc = inst->hasImplicitAccSrc () || inst->hasImplicitAccDst ();
21112121 if (!useAcc) {
21122122 for (int i = 0 ; i < inst->getNumSrc (); ++i) {
@@ -2119,9 +2129,10 @@ bool HWConformity::fixAcc(INST_LIST_ITER iter, G4_BB *bb) {
21192129 }
21202130
21212131 if (useAcc && dst && dst->getBase () && dst->getBase ()->isRegVar ()) {
2122- if (!builder.tryToAlignOperand (dst, kernel.numEltPerGRF <Type_UB>())) {
2123- inst->setDest (
2124- insertMovAfter (iter, dst, dst->getType (), bb, builder.getGRFAlign ()));
2132+ if (!builder.tryToAlignOperand (dst, kernel.numEltPerGRF <Type_UB>()) ||
2133+ dst->getHorzStride () != 1 ) {
2134+ inst->setDest (insertMovAfter (iter, dst, dst->getType (), bb, /* stride*/ 1 ,
2135+ builder.getGRFAlign ()));
21252136 changed = true ;
21262137 }
21272138 }
0 commit comments