Skip to content

Commit 15614f2

Browse files
jgu222igcbot
authored andcommitted
Fix layout struct copy
When copying some fields of a layout struct to another layout struct, the src was treated as a normal non-struct value. And thus, the copy was done incorrectly. This PR added a dedicated function to do layout struct to layout struct copy.
1 parent 5d31a3e commit 15614f2

File tree

2 files changed

+80
-3
lines changed

2 files changed

+80
-3
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2845,9 +2845,11 @@ void EmitPass::EmitInsertValueToLayoutStruct(InsertValueInst *IVI) {
28452845
}
28462846
} else {
28472847
CVariable *SrcV = GetSymbol(src0);
2848-
if (DstV != SrcV && DstV->IsUniform() && SrcV->IsUniform()) {
2848+
Value *DstRoot = m_deSSA->getRootValue(IVI);
2849+
Value *SrcRoot = m_deSSA->getRootValue(src0);
2850+
if (DstRoot != SrcRoot && DstV->IsUniform() && SrcV->IsUniform()) {
28492851
emitCopyToOrFromLayoutStruct(IVI, src0);
2850-
} else if (DstV != SrcV) {
2852+
} else if (DstRoot != SrcRoot) {
28512853
// Most often, SrcV has just one defined value and calling
28522854
// emitCopyToOrFromLayoutStruct() would copy all, thus special
28532855
// handling here to avoid copy undefined values.
@@ -2869,7 +2871,7 @@ void EmitPass::EmitInsertValueToLayoutStruct(InsertValueInst *IVI) {
28692871
n * (SrcV->IsUniform() ? 1 : nLanes));
28702872
if (II.size() == 2) {
28712873
uint32_t AOSStBytes = (uint32_t)m_DL->getTypeStoreSize(ty0);
2872-
emitVectorCopyToAOS(AOSStBytes, eltDst, eltSrc, n);
2874+
emitLayoutStructCopyAOSToAOS(AOSStBytes, eltDst, eltSrc, n);
28732875
} else {
28742876
emitVectorCopy(eltDst, eltSrc, n);
28752877
}
@@ -19289,6 +19291,79 @@ void EmitPass::emitVectorCopyToOrFromAOS(uint32_t AOSBytes, CVariable *Dst, CVar
1928919291
}
1929019292
}
1929119293

19294+
// This is to copy an AOS field of a struct to an AOS field of another struct.
19295+
// AOSBytes: the size of AOS struct (its members are laid out in AOS format).
19296+
// Dst: the start of Destination
19297+
// Src: the start of Source
19298+
// nElts: the number of elements to copy
19299+
// DstSubRegOffset : offset from Dst as the beginning location to be copied to
19300+
// SrcSubRegOffset : offset from Src as the beginning location to copy from
19301+
//
19302+
// For example, the following packed struct:
19303+
// __StructSOALayout_ {
19304+
// i32 s0
19305+
// __StructAOSLayout_ {
19306+
// <4xi8> s1;
19307+
// }
19308+
// i32 s2;
19309+
// } dst, src;
19310+
// Assume that the number of lanes is 16. dst(src)'s layout in GRFs:
19311+
// Lane 15 14 13 1 0
19312+
// ----------------------------------
19313+
// r10: s0 s0 s0 ...... s0 s0
19314+
// r11: s1 s1 s1 ...... s1 s1 // <4xi8> s1 is in AOS format
19315+
// r12: s2 s2 s2 ...... s2 s2
19316+
//
19317+
// For the following copy:
19318+
// dst.s1 = src.s1;
19319+
// the arguments are (numLanes = 16)
19320+
// AOSBytes = 4; nElts = 4
19321+
// (Dst, DstSubRegOffset) = (dst, 16*12) or (dst + 16*4, 0)
19322+
// (Src, SrcSubRegOffset) = (src, 16*12) or (src + 16*4, 0)
19323+
// the function generates 4 mov instructions:
19324+
// mov dst(1,0)<4>:b src(1,0)<4,1,0>:b
19325+
// mov dst(1,1)<4>:b src(1,1)<4,1,0>:b
19326+
// mov dst(1,2)<4>:b src(1,2)<4,1,0>:b
19327+
// mov dst(1,3)<4>:b src(1,3)<4,1,0>:b
19328+
//
19329+
// Note: for dst.s2 = src.s2, using emitVectorCopy()
19330+
//
19331+
void EmitPass::emitLayoutStructCopyAOSToAOS(uint32_t AOSBytes, CVariable *Dst, CVariable *Src, uint32_t nElts,
19332+
uint32_t DstSubRegOffset, uint32_t SrcSubRegOffset) {
19333+
assert(Dst->GetType() == Src->GetType());
19334+
19335+
bool srcUniform = Src->IsUniform();
19336+
bool dstUniform = Dst->IsUniform();
19337+
19338+
// Uniform vector copy.
19339+
if (srcUniform && dstUniform) {
19340+
emitUniformVectorCopy(Dst, Src, nElts, DstSubRegOffset, SrcSubRegOffset);
19341+
return;
19342+
}
19343+
19344+
const uint32_t nLanes = numLanes(m_currShader->m_SIMDSize);
19345+
unsigned doff = DstSubRegOffset, soff = SrcSubRegOffset;
19346+
uint32_t eltBytes = Dst->GetElemSize();
19347+
uint32_t stride = AOSBytes / eltBytes;
19348+
IGC_ASSERT(stride <= 4 && stride > 0);
19349+
IGC_ASSERT((AOSBytes % eltBytes) == 0);
19350+
19351+
uint DstVStride = dstUniform ? 1 : stride;
19352+
uint SrcVStride = srcUniform ? 0 : stride;
19353+
for (uint32_t i = 0; i < nElts; ++i) {
19354+
// Copy AOS field to AOS field
19355+
uint SrcSubReg = soff + i;
19356+
uint DstSubReg = doff + i;
19357+
19358+
m_encoder->SetDstRegion(DstVStride);
19359+
m_encoder->SetDstSubReg(DstSubReg);
19360+
m_encoder->SetSrcSubReg(0, SrcSubReg);
19361+
m_encoder->SetSrcRegion(0, SrcVStride, 1, 0);
19362+
m_encoder->Copy(Dst, Src);
19363+
m_encoder->Push();
19364+
}
19365+
}
19366+
1929219367
// Push a new frame onto the stack by:
1929319368
// Update FP to the current SP
1929419369
// Increment SP by pushSize

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ class EmitPass : public llvm::FunctionPass {
189189
void emitVectorCopyToOrFromAOS(uint32_t AOSBytes, CVariable *Dst, CVariable *Src, uint32_t nElts,
190190
uint32_t DstSubRegOffset, uint32_t SrcSubRegOffset, bool IsToAOS);
191191
void emitCopyToOrFromLayoutStruct(llvm::Value *D, llvm::Value *S);
192+
void emitLayoutStructCopyAOSToAOS(uint32_t AOSBytes, CVariable *Dst, CVariable *Src, uint32_t nElts,
193+
uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0);
192194

193195
/// stack-call code-gen functions
194196
void emitStackCall(llvm::CallInst *inst);

0 commit comments

Comments
 (0)