1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AMDGPU] Process V_MOV_B32_indirect in SET_GPR_IDX optimization

Differential Revision: https://reviews.llvm.org/D80256
This commit is contained in:
Stanislav Mekhanoshin 2020-05-19 15:50:49 -07:00
parent b70e05f3f5
commit b7a6af4686
3 changed files with 25 additions and 13 deletions

View File

@ -177,10 +177,12 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
return MO.isReg() &&
TRI->isVectorRegister(MRI, MO.getReg());
})) {
// The only exception allowed here is another indirect V_MOV_B32_e32
// The only exception allowed here is another indirect vector move
// with the same mode.
if (!IdxOn || I->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
!I->hasRegisterImplicitUseOperand(AMDGPU::M0))
if (!IdxOn ||
!((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
return false;
}
}

View File

@ -1066,8 +1066,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
@ -1138,8 +1136,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, s2
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, s3
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@ -1422,8 +1418,6 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v0, v16
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v1, v17
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
@ -3046,8 +3040,6 @@ define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %ve
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2
@ -3262,8 +3254,6 @@ define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inr
; GPRIDX-NEXT: v_mov_b32_e32 v2, s0
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v2, v0
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST)
; GPRIDX-NEXT: v_mov_b32_e32 v3, v1
; GPRIDX-NEXT: s_set_gpr_idx_off
; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2

View File

@ -336,3 +336,23 @@ body: |
$vgpr15 = V_MOV_B32_e32 undef $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
S_SET_GPR_IDX_OFF
...
---
name: indirect_mov
body: |
bb.0:
; GCN-LABEL: name: indirect_mov
; GCN: S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
; GCN: $vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
; GCN: V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
; GCN: S_SET_GPR_IDX_OFF
S_SET_GPR_IDX_ON $sgpr2, 1, implicit-def $m0, implicit undef $m0
$vgpr16 = V_MOV_B32_e32 undef $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
S_SET_GPR_IDX_OFF
S_SET_GPR_IDX_ON killed $sgpr2, 1, implicit-def $m0, implicit undef $m0
V_MOV_B32_indirect undef $vgpr0, undef $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3(tied-def 3)
S_SET_GPR_IDX_OFF
...