Skip to content

Commit e45241a

Browse files
authored
[AMDGPU] Hoist s_set_vgpr_msb past SALU program state instructions (#172108)
Hoisting past the program state instructions is legal and allows for better coissue.
1 parent 9878bac commit e45241a

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
137137
/// instruction to extend it or drop the clause if it cannot be adjusted.
138138
MachineBasicBlock::instr_iterator
139139
handleClause(MachineBasicBlock::instr_iterator I);
140+
141+
/// Check if an instruction \p I is immediately after another program state
142+
/// instruction which it cannot coissue with. If so, insert before that
143+
/// instruction to encourage more coissuing.
144+
MachineBasicBlock::instr_iterator
145+
handleCoissue(MachineBasicBlock::instr_iterator I);
140146
};
141147

142148
bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
167173
int64_t OldModeBits = CurrentMode << ModeWidth;
168174

169175
I = handleClause(I);
176+
I = handleCoissue(I);
170177
MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
171178
.addImm(NewMode | OldModeBits);
172179

@@ -283,6 +290,31 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
283290
return I;
284291
}
285292

293+
MachineBasicBlock::instr_iterator
294+
AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
295+
if (I.isEnd())
296+
return I;
297+
298+
if (I == I->getParent()->begin())
299+
return I;
300+
301+
MachineBasicBlock::instr_iterator Prev = std::prev(I);
302+
auto isProgramStateSALU = [this](MachineInstr *MI) {
303+
return TII->isBarrier(MI->getOpcode()) ||
304+
TII->isWaitcnt(MI || (SIInstrInfo::isProgramStateSALU(*MI) &&
305+
MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
306+
};
307+
308+
if (!isProgramStateSALU(&*Prev))
309+
return I;
310+
311+
while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
312+
isProgramStateSALU(&*Prev)) {
313+
--Prev;
314+
}
315+
return Prev;
316+
}
317+
286318
bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
287319
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
288320
if (!ST.has1024AddressableVGPRs())

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
457457
return get(Opcode).TSFlags & SIInstrFlags::SALU;
458458
}
459459

460+
static bool isProgramStateSALU(const MachineInstr &MI) {
461+
return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
462+
MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
463+
MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
464+
}
465+
460466
static bool isVALU(const MachineInstr &MI) {
461467
return MI.getDesc().TSFlags & SIInstrFlags::VALU;
462468
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
3+
4+
---
5+
name: multi
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
10+
; CHECK-LABEL: name: multi
11+
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
14+
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
15+
; CHECK-NEXT: S_WAIT_DSCNT 0
16+
; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
17+
; CHECK-NEXT: S_BARRIER_WAIT -1
18+
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
19+
; CHECK-NEXT: S_ENDPGM 0
20+
$vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
21+
S_WAIT_DSCNT 0
22+
S_BARRIER_SIGNAL_IMM -1
23+
S_BARRIER_WAIT -1
24+
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
25+
S_ENDPGM 0
26+
...
27+
28+
---
29+
name: high_vgprs
30+
tracksRegLiveness: true
31+
body: |
32+
bb.0:
33+
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
34+
; CHECK-LABEL: name: high_vgprs
35+
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
38+
; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
39+
; CHECK-NEXT: S_BARRIER_WAIT -1
40+
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
41+
; CHECK-NEXT: S_ENDPGM 0
42+
S_BARRIER_SIGNAL_IMM -1
43+
S_BARRIER_WAIT -1
44+
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
45+
S_ENDPGM 0
46+
...
47+
48+
---
49+
name: no_control
50+
tracksRegLiveness: true
51+
body: |
52+
bb.0:
53+
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
54+
; CHECK-LABEL: name: no_control
55+
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
58+
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
59+
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
60+
; CHECK-NEXT: S_ENDPGM 0
61+
$vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
62+
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
63+
S_ENDPGM 0
64+
...

0 commit comments

Comments
 (0)