diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index d01afee331025..6c5f2d79e5851 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -916,6 +916,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); + addRulesForGOpcs({G_BITREVERSE}, Standard) + .Uni(S32, {{Sgpr32}, {Sgpr32}}) + .Div(S32, {{Vgpr32}, {Vgpr32}}) + .Uni(S64, {{Sgpr64}, {Sgpr64}}) + .Div(S64, {{Vgpr64}, {Vgpr64}}); + addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}}); addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir index e82a492dbec20..6ae6d95ce3931 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: bitreverse_i32_s @@ -61,10 +60,7 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV1]] - ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[BITREVERSE]](s32), [[BITREVERSE1]](s32) + ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s64) = G_BITREVERSE [[COPY]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BITREVERSE %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll index e33b9ab0eda9e..5e5e6a6b19284 100644 --- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll @@ -2,11 +2,11 @@ ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti | FileCheck %s --check-prefix=SI ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel | FileCheck %s --check-prefix=GISEL +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel -new-reg-bank-select | FileCheck %s --check-prefix=GISEL ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-TRUE16 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-FAKE16 -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -151,9 +151,11 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa ; GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GISEL-NEXT: flat_load_ushort v0, v[0:1] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_bfrev_b32_e32 v0, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GISEL-NEXT: v_readfirstlane_b32 s2, v0 +; GISEL-NEXT: s_brev_b32 s2, s2 +; GISEL-NEXT: s_lshr_b32 s2, s2, 16 ; GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: flat_store_short v[0:1], v2 ; GISEL-NEXT: s_endpgm @@ -176,14 +178,16 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa ; GFX11-GISEL-TRUE16-LABEL: v_brev_i16: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_bfrev_b32_e32 v1, v1 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s2, v0 +; GFX11-GISEL-TRUE16-NEXT: s_brev_b32 s2, s2 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-GISEL-TRUE16-NEXT: s_endpgm ; ; GFX11-GISEL-FAKE16-LABEL: v_brev_i16: @@ -193,8 +197,12 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_bfrev_b32_e32 v1, v1 -; GFX11-GISEL-FAKE16-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1] +; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-GISEL-FAKE16-NEXT: s_brev_b32 s2, s2 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-GISEL-FAKE16-NEXT: s_endpgm %val = load i16, ptr addrspace(1) %valptr %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1 @@ -641,8 +649,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa ; GISEL-NEXT: v_mov_b32_e32 v4, s1 ; GISEL-NEXT: v_mov_b32_e32 v3, s0 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_bfrev_b32_e32 v1, v1 ; GISEL-NEXT: v_bfrev_b32_e32 v2, v0 +; GISEL-NEXT: v_bfrev_b32_e32 v1, v1 ; GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2] ; GISEL-NEXT: s_endpgm ; @@ -671,8 +679,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1 ; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v2, v0 +; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm @@ -819,11 +827,11 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_bfrev_b32_e32 v4, v1 ; GISEL-NEXT: v_bfrev_b32_e32 v5, v0 +; GISEL-NEXT: v_bfrev_b32_e32 v4, v1 ; GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GISEL-NEXT: v_bfrev_b32_e32 v6, v3 ; GISEL-NEXT: v_bfrev_b32_e32 v7, v2 +; GISEL-NEXT: v_bfrev_b32_e32 v6, v3 ; GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] ; GISEL-NEXT: s_endpgm @@ -855,10 +863,10 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: global_load_b128 v[0:3], v0, s[2:3] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v4, v1 ; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v5, v0 -; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v6, v3 +; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v4, v1 ; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v7, v2 +; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v6, v3 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-GISEL-NEXT: global_store_b128 v0, v[4:7], s[0:1] ; GFX11-GISEL-NEXT: s_endpgm